html-xml-utils-6.5/0000755000175000001440000000000012265516671011274 500000000000000html-xml-utils-6.5/textwrap.e0000644000175000001440000000046312265516604013237 00000000000000 void set_indent(int n); void set_linelen(int n); void flush(); void outc(char c, _Bool preformatted); void out(string s, _Bool preformatted); void outn(string s, size_t n, _Bool preformatted); void outln(char *s, _Bool preformatted); void outbreak(); void inc_indent(void); void dec_indent(void); html-xml-utils-6.5/asc2xml.c0000644000175000001440000000640712174313455012732 00000000000000/* * * Program to convert files from ASCII or ISO-8859-1 to UTF8. * * Usage: * * asc2xml * * Reads from stdin and write to stdout. Converts from ASCII (in fact: * Latin-1) (with or without &#-escapes) to UTF8, removing all * &#-escapes, except those representing ASCII characters. * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ * * Author: Bert Bos * * Copyright © 1994-2002 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * **/ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #ifndef VERSION # define VERSION 2.61 #endif /* putUTF8 -- write a character to stdout in UTF8 encoding */ static void putUTF8(long c) { if (c <= 0x7F) { /* Leave ASCII encoded */ printf("&#%ld;", c); } else if (c <= 0x07FF) { /* 110xxxxx 10xxxxxx */ putchar(0xC0 | (c >> 6)); putchar(0x80 | (c & 0x3F)); } else if (c <= 0xFFFF) { /* 1110xxxx + 2 */ putchar(0xE0 | (c >> 12)); putchar(0x80 | ((c >> 6) & 0x3F)); putchar(0x80 | (c & 0x3F)); } else if (c <= 0x1FFFFF) { /* 11110xxx + 3 */ putchar(0xF0 | (c >> 18)); putchar(0x80 | ((c >> 12) & 0x3F)); putchar(0x80 | ((c >> 6) & 0x3F)); putchar(0x80 | (c & 0x3F)); } else if (c <= 0x3FFFFFF) { /* 111110xx + 4 */ putchar(0xF8 | (c >> 24)); putchar(0x80 | ((c >> 18) & 0x3F)); putchar(0x80 | ((c >> 12) & 0x3F)); putchar(0x80 | ((c >> 6) & 0x3F)); putchar(0x80 | (c & 0x3F)); } else if (c <= 0x7FFFFFFF) { /* 1111110x + 5 */ putchar(0xFC | (c >> 30)); putchar(0x80 | ((c >> 24) & 0x3F)); putchar(0x80 | ((c >> 18) & 0x3F)); putchar(0x80 | ((c >> 12) & 0x3F)); putchar(0x80 | ((c >> 6) & 0x3F)); putchar(0x80 | (c & 0x3F)); } else { /* Not a valid character... */ printf("&#%ld;", c); } } /* asc2xml -- copy stdin to stdout, converting ASCII XML to UTF8 XML */ static void asc2xml(void) { long n; int c; while ((c = getchar()) != EOF) { if (c > 0x7F) { /* Latin-1, non-ASCII */ putUTF8(c); } else if (c != '&') { /* Normal ASCII char */ putchar(c); } else if ((c = getchar()) == EOF) { /* '&' before EOF */ putchar('&'); } else if (c != '#') { /* '&' not followed by '#' */ putchar('&'); putchar(c); } else if ((c = getchar()) == 'x') { /* '&#x' + hexadecimal */ n = 0; while (isxdigit((c = getchar()))) { if (c <= '9') n = 16 * n + c - '0'; else if (c <= 'F') n = 16 * n + c - 'A' + 10; else n = 16 * n + c - 'a' + 10; } /* Don't check for overflow, don't check if c == ';' */ putUTF8(n); } else { /* '&#' + decimal */ n = c - '0'; while (isdigit((c = getchar()))) { n = 10 * n + c - '0'; } /* Don't check for overflow, don't check if c == ';' */ putUTF8(n); } } } /* Print usage message, then exit */ static void usage(char *progname) { fprintf(stderr, "Version %s\nUsage: %s outfile\n", VERSION, progname); exit(1); } /* main -- main body */ int main(int argc, char *argv[]) { if (argc != 1) usage(argv[0]); asc2xml(); return 0; } html-xml-utils-6.5/hxmkbib.c0000644000175000001440000003662112225023233012773 00000000000000/* * mkbib - extract database entries from a db and format them * * mkbib reads a refer-style database of bibliographic entries, a list * of keys and a pattern file and outputs a list of citations * formatted according to the pattern and optionally sorted. * * The keys must correspond to %L fields in the refer database. * * The pattern file has the following structure: * * pattern: PREAMBLE entry POSTAMBLE; * entry: "{L:" [ TEXT | FIELD | conditional ]* "}"; * conditional: "{" !"? F ":" [ TEXT | FIELD | conditional ]* "}"; * * In the output, the entry will be repeated as often as there are * unique keys. A FIELD is of the form "%x" and wil be replaced by * field x of the entry. * * A part of the form "{x:ZZZ}" will be replaced by ZZZ if field x * exists and by nothing otherwise. A part of the form "{!x:ZZZ}" will * be replaced by ZZZ if field x does not exist. * * Occurrences of %x in the preamble (where x is a field name) will * not be output, but serve to build up the sort order. The default * sort order is to keep entries in the order they occur in the * auxfile, but if, e.g., "%A%D%T" occurs in the preamble, entries * will be sorted on author, date and title. * * To insert a literal "{", "}" or "%" in the preamble or in an entry, * prefix them with "%": "%{", "%}" and "%%". * * Usage: mkbib [-a auxfile] bibfile [inputfile] * * bibfile is a refer-style database. * * inputfile is the file that serves as template. If absent, stdin * is read. * * -a auxfile gives the name of the list of keys. If absent, the name * will be the same as inputfile with the extension (if any) * changed to ".aux". If no inputfile is given the default auxfile * is "aux.aux". Duplicate keys will only be used once. * * Note: When the "{x:" and "}" are inside an HTML file, they may be * in places where data is not allowed. To make the input file * itself valid HTML, it may be necessary to put them inside comments: * and . If one of them is put inside a comment, * the other must be as well. * * Here is an example of an input file: * * * Bibliography * *
*
%L *
{A:%A.} {T:%T.} {D:%D. } *
* * * To do: if the template adds something like "(eds)", allow it to be * changed to "(ed)" if there is only one editor. * * Copyright © 1994-2004 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 19 March 2000 * Version: $Id: hxmkbib.c,v 1.5 2013-07-25 21:05:13 bbos Exp $ **/ #include "config.h" #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_SEARCH_H # include #else # include "hash.e" /* Use our own implementation */ #endif #include #include #include "heap.e" #include "types.e" #include "errexit.e" #define LINESIZE 32768 #define INCR 25 /* Warning: arbitrary limit! */ #define HASHSIZE 4096 /* Size of hash table */ static string prog; /* argv[0] */ static string sortorder = NULL; /* Default is unsorted */ static string separator = "; "; /* Separates authors */ static int et_al_limit = 3; /* Max # of authors to print */ static string et_al = "et al."; /* String if more authors */ /* escape -- print a string, escaping characters dangerous for XML/HTML */ static void escape(const string s, unsigned char *last) { int i; for (i = 0; s[i]; i++) switch (s[i]) { case '<': printf("<"); break; case '>': printf(">"); break; case '&': printf("&"); break; case '"': printf("""); break; default: putchar(s[i]); } if (i > 0) *last = s[i-1]; } /* put_field -- copy field field of entry with label key */ static void put_field(const string key, unsigned char field, unsigned char *last) { ENTRY *e, e1 = {key, NULL}; string *lines; int i, j, nrfields; /* ToDo: escape dangerous characters */ /* ToDo: print "et. al." if more than N authors */ /* ToDo: for fields other than %A and %E use only the last occurrence */ /* ToDo: interpret and pretty-print dates in a consistent manner */ if (field == '%' || field == '{' || field == '}') { /* Literal */ putchar(field); *last = '\0'; return; } /* Find the entry for key */ if (! (e = hsearch(e1, FIND))) { fprintf(stderr, "%s: entry for key %s not found\n", prog, key); return; } /* Count how many occurences of %field there are in the entry */ lines = (string*)e->data; /* Type cast */ for (i = 0, nrfields = 0; lines[i]; i++) if (lines[i][1] == field) nrfields++; /* Check that there is indeed a field */ if (nrfields == 0) { fprintf(stderr, "%s: entry %s has no field %%%c\n", prog, key, field); return; } /* Check that there are no duplicate fields, other than for A and E */ if (nrfields != 1 && ! (field == 'A' || field == 'E')) { fprintf(stderr, "%s: entry %s has duplicate field %%%c\n", prog, key, field); return; } /* Now print the field(s) */ if (nrfields > et_al_limit) { /* Print only the first */ for (i = 0; lines[i][1] != field; i++); /* Find the first */ escape(lines[i] + 3, last); /* Print with entities */ printf("%s%s", separator, et_al); *last = et_al[strlen(et_al) - 1]; } else { /* Print all fields */ for (i = 0, j = 0; lines[i]; i++) { if (lines[i][1] == field) { /* Found it */ if (j != 0) printf("%s", separator); /* Multiple fields */ escape(lines[i] + 3, last); /* Print with entities */ j++; } } } } /* get_field -- check that entry for key has a field f, return ptr to field */ static string get_field(const string key, const unsigned char f) { ENTRY *e, e1 = {key, NULL}; string *lines; int i; /* Find the entry for key */ e = hsearch(e1, FIND); assert(e != NULL); assert(e->data != NULL); /* Find a line that starts with %field */ lines = (string*)e->data; /* Type cast */ for (i = 0; lines[i] && lines[i][1] != f; i++) ; assert(! lines[i] || (lines[i][0] == '%' && lines[i][2] == ' ')); return lines[i]; } /* compare_keys -- return the relative sort order for two keys: -1, 0, 1 */ static int compare_keys(const void *aptr, const void *bptr) { ENTRY e, *ae, *be; int c, i; string af, bf, a = *(string*)aptr, b = *(string*)bptr; /* Get the entry for key a */ e.key = a; ae = hsearch(e, FIND); assert(ae != NULL); /* Get the entry for key b */ e.key = b; be = hsearch(e, FIND); assert(be != NULL); /* Loop over sortorder, stop as soon as entries a and b are unequal */ for (i = 0, c = 0; c == 0 && sortorder[i]; i++) { af = get_field(a, sortorder[i]); bf = get_field(b, sortorder[i]); c = strcmp(af ? af : (string)"", bf ? bf : (string)""); } return c; } /* sort_keys -- sort the keys according to the sort order given */ static void sort_keys(string *keys, const int n) { assert(sortorder != NULL); qsort(keys, n, sizeof(*keys), compare_keys); } /* conditional -- conditionally copy a %{...%} segment */ static int conditional(const string pattern, const string key, unsigned char *last) { bool on; int level, i = 1; /* Pattern starts with '{' */ assert(pattern[0] == '{' && pattern[1] != '\0'); /* Check the condition */ if (pattern[i] == '!') on = !get_field(key, pattern[++i]); else on = get_field(key, pattern[i]) != NULL; if (pattern[i+1] != ':') errexit("%s: missing ':' in pattern\n", prog); /* Skip or copy until matching '%}' */ if (! on) { /* Skip until matching '}' */ for (i += 2, level = 1; level != 0; i++) if (pattern[i] == '%') { if (pattern[++i] == '{') level++; else if (pattern[i] == '}') level--; } i--; /* i points to '}' */ } else { /* Recursively copy segment */ for (i += 2; true; i++) if (pattern[i] == '%') { if (pattern[++i] == '{') i += conditional(pattern + i, key, last); else if (pattern[i] == '}') break; else if (pattern[i] == '%') {putchar('%'); *last = '\0';} else put_field(key, pattern[i], last); } else if (*last != '.' || pattern[i] != '.') { putchar(pattern[i]); *last = '\0'; } else { *last = '\0'; /* Don't print this '.' */ } } return i; /* Points at '}' */ } /* copy -- copy pattern, expanding fields. (May sort keys) */ static void copy(const string pattern, string *keys, const int n) { int j, start, end, level, slen = 0; unsigned char last = '\0'; /* Last char of field */ assert(sortorder == NULL); /* ToDo: Find a way to declare the separator in the source. Maybe {&:...} */ /* Find first '%{'. Also look for sort order */ for (start = 0; pattern[start]; start++) { if (pattern[start] == '%') { /* Special character */ if (pattern[++start] == '{') { /* Start of template */ break; } else if ('A' <= pattern[start] && pattern[start] <= 'Z') { renewarray(sortorder, slen + 2); /* Sort order */ sortorder[slen] = pattern[start]; sortorder[++slen] = '\0'; } else { putchar('%'); /* Not special */ putchar(pattern[start]); } } else { /* Normal character */ putchar(pattern[start]); } } if (!pattern[start]) { fprintf(stderr, "%s: warning: no '%%{' in input file\n", prog); return; /* Nothing more to copy */ } /* Sort the keys if there was a sort order */ if (sortorder) sort_keys(keys, n); /* Start now points to '{'. Find matching '%}' */ for (end = start + 1, level = 1; pattern[end] && level != 0; end++) { if (pattern[end] == '%') { if (pattern[++end] == '}') level--; else if (pattern[end] == '{') level++; } } if (level != 0) errexit("%s: unbalanced %{..%} in pattern\n", prog); /* End now points just after '}'. Loop over keys */ for (j = 0; j < n; j++) conditional(pattern + start, keys[j], &last); /* Copy postamble */ printf("%s", pattern + end); } /* in_list -- check if s is in the list of strings */ static bool in_list(const string s, const string *list, const int n) { int i; for (i = 0; i < n && strcmp(s, list[i]) != 0; i++) ; return i < n; } /* read_keys -- read the list of keys from file f */ static string *read_keys(FILE *f, int *number) { int i, e, n = 0; char line[LINESIZE]; string *keys = NULL; clearerr(f); while (fgets(line, sizeof(line), f)) { /* Remove trailing \n and other whitespace */ for (i = strlen(line); i > 0 && isspace(line[i-1]); i--) ; line[i] = '\0'; /* ToDo: linear search fast enough? Books don't have 1000's of refs... */ if (! in_list(line, keys, n)) { renewarray(keys, INCR * ((n + 1)/INCR + 1)); keys[n++] = newstring(line); } } if ((e = ferror(f))) errexit("%s: %s\n", prog, strerror(e)); *number = n; return keys; } /* check_and_store_entry -- check if we need this entry and if so store it */ static void check_and_store_entry(const string key, string *lines, int n) { ENTRY e, *e1; renewarray(lines, INCR * ((n + 1)/INCR + 1)); lines[n] = NULL; /* Mark end of entry */ if (key) { /* Does it have a key at all */ e.key = key; if ((e1 = hsearch(e, FIND))) /* Do we need this entry? */ e1->data = (char*)lines; /* Replace its data field */ } } /* read_entries -- read the relevant entries from the refer database */ static void read_entries(FILE *f, const string *keys, const int n) { char line[LINESIZE]; string *lines = NULL; string key = NULL; ENTRY e, *e1; int i, j, fe; /* First enter all keys into the hash table without any data */ for (i = 0; i < n; i++) { e.key = newstring(keys[i]); e.data = NULL; if (! hsearch(e, ENTER)) errexit("%s: %s\n", prog, strerror(errno)); } /* Now read entries from the database */ clearerr(f); i = 0; while (fgets(line, sizeof(line), f)) { if (line[0] != '%') { /* Separator line */ if (i != 0) { /* We were in an entry */ check_and_store_entry(key, lines, i); i = 0; /* Reset */ key = NULL; /* Reset */ lines = NULL; /* Reset */ } } else { /* This line is a field */ for (j = strlen(line); j > 0 && isspace(line[j-1]); j--) ; line[j] = '\0'; /* Remove trailing spaces */ renewarray(lines, INCR * ((i + 1)/INCR + 1)); lines[i] = newstring(line); if (strncmp(lines[i], "%L ", 3) == 0) key = lines[i] + 3; i++; } } if ((fe = ferror(f))) errexit("%s: %s\n", prog, strerror(fe)); /* Check if last entry was already stored */ if (i != 0) /* We were still in an entry */ check_and_store_entry(key, lines, i); /* Check that we found all keys */ for (i = 0; i < n; i++) { e.key = keys[i]; e1 = hsearch(e, FIND); assert(e1); if (! e1->data) errexit("%s: entry for \"%s\" not found\n", prog, keys[i]); } } /* read_pattern -- read the input file into memory */ static string read_pattern(FILE *f) { string p = NULL; int n, len = 0; /* ToDo: use ferror to check for errors */ do { renewarray(p, len + LINESIZE + 1); n = fread(p + len, sizeof(*p), LINESIZE, f); len += n; } while (! feof(f)); p[len] = '\0'; return p; } /* usage -- print usage message and exit */ static void usage(void) { errexit("Version %s\nUsage: %s [-a auxfile] [-s sep] [-n maxauthors] [-r moreauthors] bibfile [inputfile]\n", VERSION, prog); } /* main - main body */ int main(int argc, char *argv[]) { string auxfile = NULL, pattern, inputfile = NULL, dbfile, h; string *keys = NULL; FILE *f, *db, *aux; int c, n; /* Parse command line */ prog = argv[0]; while ((c = getopt(argc, argv, "a:s:n:r:")) != -1) { switch (c) { case 'a': auxfile = optarg; break; case 's': separator = optarg; break; case 'n': et_al_limit = atoi(optarg); break; case 'r': et_al = optarg; break; default: usage(); } } if (optind == argc || argc > optind + 2) usage(); /* First argument is refer database */ dbfile = argv[optind++]; /* Optional second argument is input file */ if (optind != argc) inputfile = argv[optind]; /* If we don't have an explicit auxfile yet, derive its name */ if (! auxfile) { if (! inputfile) { auxfile = "aux.aux"; } else { newarray(auxfile, strlen(argv[optind]) + 5); strcpy(auxfile, argv[optind]); if ((h = strrchr(auxfile, '.'))) *h = '\0'; strcat(auxfile, ".aux"); } } /* Create a hash table */ if (! hcreate(HASHSIZE)) errexit("%s: not enough memory for hash table\n", prog); /* Read keys from aux file */ if (! (aux = fopen(auxfile, "r"))) errexit("%s: %s: %s\n", prog, auxfile, strerror(errno)); keys = read_keys(aux, &n); if (fclose(aux) != 0) errexit("%s: %s: %s\n", prog, auxfile, strerror(errno)); /* Read the entries we need from the database */ if (! (db = fopen(dbfile, "r"))) errexit("%s: %s: %s\n", prog, dbfile, strerror(errno)); read_entries(db, keys, n); if (fclose(db) != 0) errexit("%s: %s: %s\n", prog, dbfile, strerror(errno)); /* Read pattern into memory */ if (! (f = inputfile ? fopen(inputfile, "r") : stdin)) errexit("%s: %s: %s\n", prog, inputfile, strerror(errno)); pattern = read_pattern(f); if (fclose(f) != 0) errexit("%s: %s: %s\n", prog, inputfile, strerror(errno)); /* Copy and expand the pattern */ copy(pattern, keys, n); return 0; } html-xml-utils-6.5/xml2asc.10000644000175000001440000000220111774370213012634 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "XML2ASC" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME xml2asc \- convert UTF-8 to &#nnn; entities .SH SYNOPSIS .B xml2asc .SH DESCRIPTION .LP Reads an UTF-8 encoded text from standard input and writes to standard output, converting all non-ASCII characters to &#nnn; entities, so that the result is ASCII-encoded. .LP One example use is to convert ISO-8859-1 to ASCII with &#nnn; entities, by first running .B asc2xml to convert ISO-8859-1 to UTF-8 and then pipe the result into .B xml2asc to convert to ASCII with &#nnn; entities for all accented characters. .LP To test if a file is correct UTF-8, ignore the output and test the exit code, e.g. in Bash: .d xml2asc /dev/null && echo "OK" || echo "Fail" .e .SH "DIAGNOSTICS" .B xml2asc returns with a non-zero exit code if the input was not UTF-8. .SH "SEE ALSO" .BR asc2xml (1), .BR UTF-8 " (RFC 2279)" .SH BUGS .LP Doesn't distinguish mark-up from content, so if the input uses non-ASCII characters in XML element names, they will be output with numerical entities in them, which is not legal in XML. html-xml-utils-6.5/hxcopy.c0000644000175000001440000001762512174313455012677 00000000000000/* hxcopy -- copy an HTML file and update relative URLs at the same time * * Copy an HTML file with all URLs that were relative to OLDURL * updated to be relative to NEWURL instead. (If the document has a * BASE element, only that is updated.) OLDURL and NEWURL may * themselves be relative (to the same base URL, which need not be * mentioned). * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ * * TO DO: Should it be an option whether URL references of the form * "", "#foo" and "?bar" are replaced by "oldurl", "oldurl#foo" and * "oldurl?bar"? (See adjust_url().) * * Created: 5 Dec 2008 * Author: Bert Bos * * Copyright © 2008-2012 W3C * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 */ #include "config.h" #include #include #include #include #include #include #if HAVE_STRING_H # include #endif #if HAVE_STRINGS_H # include #endif #include "export.h" #include "heap.e" #include "types.e" #include "html.e" #include "scan.e" #include "url.e" #include "dict.e" #include "openurl.e" #include "errexit.e" #define same(a, b) ((a) ? ((b) && eq((a), (b))) : !(b)) static bool has_errors = false; /* Enconutered errors during parsing */ static FILE *out = NULL; /* Where to write output */ static bool has_base = false; /* Document has a element */ static string newbase; /* Path from OLDURL to NEWURL */ /* path_from_url_to_url -- compute URL that is path from one URL to another */ static string path_from_url_to_url(const conststring a, const conststring b) { URL p, q; string s = NULL; char cwd[4096]; int i, j; if (!getcwd(cwd, sizeof(cwd) - 1)) return NULL; /* To do: handle long path */ strcat(cwd, "/"); s = URL_s_absolutize(cwd, a); p = URL_new(s); dispose(s); s = URL_s_absolutize(cwd, b); q = URL_new(s); dispose(s); if (p->proto && !q->proto) { errno = EACCES; /* Path from remote to local not possible */ } else if (!same(p->proto, q->proto) || !same(p->user, q->user) || !same(p->password, q->password) || !same(p->machine, q->machine) || !same(p->port, q->port)) { s = newstring(b); /* Just use the URL b */ } else { /* Find the last '/' before which both paths are the same */ for (j = i = 0; p->path[i] && q->path[i] && p->path[i] == q->path[i]; i++) if (p->path[i] == '/') j = i; /* Construct path from a to b by descending a and climbing b */ for (i = j + 1; p->path[i]; i++) if (p->path[i] == '/') strapp(&s, "../", NULL); strapp(&s, q->path + j + 1, NULL); } URL_dispose(p); URL_dispose(q); return s; } /* adjust_url -- return a new URL relative to newurl instead of oldurl */ static conststring adjust_url(const conststring url) { if (!url || !url[0] || url[0] == '#' || url[0] == '?') return url; /* Don't replace references to self */ else return URL_s_absolutize(newbase, url); } /* attribute_is_url -- check if the attribute is URL-valued */ static bool attribute_is_url(const conststring attrib) { return strcasecmp(attrib, "href") == 0 || strcasecmp(attrib, "src") == 0 || strcasecmp(attrib, "action") == 0 || strcasecmp(attrib, "background") == 0 || strcasecmp(attrib, "cite") == 0 || strcasecmp(attrib, "classid") == 0 || strcasecmp(attrib, "codebase") == 0 || strcasecmp(attrib, "data") == 0 || strcasecmp(attrib, "longdesc") == 0 || strcasecmp(attrib, "profile") == 0 || strcasecmp(attrib, "usemap") == 0; } /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_errors = true; } /* start -- called before the first event is reported */ void* start(void) { return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { fprintf(out, "", commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { fprintf(out, "%s", text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { fprintf(out, ""); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { fprintf(out, "", pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { conststring v; pairlist p; fprintf(out, "<%s", name); for (p = attribs; p; p = p->next) { fprintf(out, " %s", p->name); if (!p->value) v = NULL; else if (has_base) v = newstring(p->value); /* No need to adjust */ else if (attribute_is_url(p->name)) v = adjust_url(p->value); else v = newstring(p->value); /* No need to adjust */ if (v) fprintf(out, "=\"%s\"", v); dispose(v); } fprintf(out, ">"); /* If this is a tag, no further adjustments are needed */ if (strcasecmp(name, "base") == 0) has_base = true; } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { conststring v; pairlist p; fprintf(out, "<%s", name); for (p = attribs; p; p = p->next) { fprintf(out, " %s", p->name); if (!p->value) v = NULL; else if (has_base) v = newstring(p->value); /* No need to adjust */ else if (attribute_is_url(p->name)) v = adjust_url(p->value); else v = newstring(p->value); /* No need to adjust */ if (v) fprintf(out, "=\"%s\"", v); dispose(v); } fprintf(out, " />"); /* If this is a tag, no further adjustments are needed */ if (strcasecmp(name, "base") == 0) has_base = true; } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { fprintf(out, "", name); } /* usage -- print usage message and exit */ static void usage(const conststring progname) { fprintf(stderr, "Usage: %s [-v] [-i old-URL] [-o new-URL] [URL [URL]]\n", progname); exit(1); } int main(int argc, char *argv[]) { int c, status = 200; string oldurl = NULL, newurl = NULL; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); /* Parse command line */ while ((c = getopt(argc, argv, "i:o:v")) != -1) switch (c) { case 'o': newurl = optarg; break; case 'i': oldurl = optarg; break; case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0; default: usage(argv[0]); } if (argc > optind + 2) usage(argv[0]); if (argc > optind + 1) out = fopenurl(argv[optind+1], "w", NULL); else if (newurl) out = stdout; else errexit("%s: option -o is required if output is to stdout\n", argv[0]); if (!out) {perror(argv[optind+1]); exit(3);} if (argc > optind) yyin = fopenurl(argv[optind], "r", &status); else if (oldurl) yyin = stdin; else errexit("%s: option -i is required if input is from stdin\n", argv[0]); if (!yyin) {perror(argv[optind]); exit(2);} if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status)); if (!oldurl) oldurl = argv[optind]; if (!newurl) newurl = argv[optind+1]; newbase = path_from_url_to_url(newurl, oldurl); if (!newbase) errexit("%s: could not parse argument as a URL\n", argv[0]); if (yyparse() != 0) exit(4); return has_errors ? 1 : 0; } html-xml-utils-6.5/hxcite.c0000644000175000001440000002516312225033335012636 00000000000000/* * cite - adds hyperlinks to bibliographic references in HTML * * The programs looks for strings of the form [[name]] (i.e., a * bibliographic label inside a double pair of square brackets), e.g., * [[Knuth84]] or [[LieBos97]]. The label will be looked up in a * bibliography database and if it is found, the string will be * replaced by a pattern which is typically of the form [name], but the pattern can be changed * with a command line option. * * If the string is of the form {{name}}, the name will be looked up, * but the string will be copied unchanged. * * If the label is not found, a warning is printed and the string is * left unchanged. * * All labels that are found are also stored, one label per line, in a * separate file with extension .aux. This file can be used by mkbib * to create the bibliography by extracting the corresponding * bibliographic entries from the database. * * The bibliography database must be a refer-style database. Though * for the purposes of this program all lines that don't start with * "%L" or %K are ignored. Lines with "%L" are assumed to contain a * label. Lines with %K are assumed to contain whitespace separated * keywords, which are effectively aliases for the label. Entries must * have one %L line and one or zero %K lines. * * Options: * * -b base * Give the value for %b in the pattern. * * -p pattern * The replacement for the string [[label]]. The default is * * [%L] * * %L will be replaced by the label, %b by the value of the -b * option and %m by the marker (-m option). * * -a auxfile * The name of the file in which the list of labels will be stored. * Default is the name of the file given as argument, minus its * extension, plus ".aux". If no file is give (input comes from * stdin), the default name is "aux.aux". * * -m marker * By default, the program looks for "[[name]]", but it can be * made to look for "[[Xname]]" where X is some string, usually a * symbol such as '!' or ='. This allows references to be * classified, e.g., "[[!name]]" for normative references and * "[[name]]" for non-normative references. * * -c * Assume that every pair "" delimit a comment and * do not process any [[label]] that occurs between them. Any * "{{label}}" is processed as normal. This does not actually * parse the input as HTML or XML and thus the program will * mistake occurrences of these two strings inside CDATA sections * or attribute values for comment delimiters. * * Copyright 1994-2012 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 18 March 2000 * Version: $Id: hxcite.c,v 1.8 2013-10-08 16:46:21 bbos Exp $ **/ #include "config.h" #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_SEARCH_H # include #else # include "hash.e" #endif #include #include #include "export.h" #include "heap.e" #include "types.e" #include "errexit.e" /* Warning: arbitrary limits! */ #define LINESIZE 32768 #define HASHSIZE 4096 /* Size of hash table */ #define WS " \t\r\n\f" /* Separates %K keywords */ static string base = ""; /* URL of bibilography */ static string mark = ""; /* Flag after "'[[" */ static size_t marklen = 0; /* Length of mark */ static string prog; /* = argv[0] */ static string pattern = "[%L]"; static FILE *aux; static bool skip_comments = false; /* Whether to skip [[ inside */ /* get_label -- get the label for the keyword, or NULL */ static string get_label(const string keyword) { ENTRY *result, e = {keyword, NULL}; result = hsearch(e, FIND); return result ? (string) result->data : NULL; } /* valid_label -- check if the label is well-formed */ static bool valid_label(const string label) { int i; for (i = 0; label[i]; i++) if (! isalnum(label[i]) && label[i] != '-' && label[i] != '_' && label[i] != '.') return false; return true; } /* expand_ref -- print the reformatted reference */ static void expand_ref(const string label) { int i; /* ToDo: somehow allow sequence numbers for references [1], [2], etc. */ for (i = 0; pattern[i]; i++) { if (pattern[i] != '%') { putchar(pattern[i]); } else { switch (pattern[++i]) { case '%': putchar('%'); break; /* Literal '%' */ case 'b': printf("%s", base); break; /* Base URL */ case 'L': printf("%s", label); break; /* Label */ case 'm': printf("%s", mark); break; /* Mark (-m option) */ default: break; /* Error in pattern */ } } } } /* process_line -- look for citations in a line */ EXPORT void process_line(const string text, const string fname, int lineno, bool *in_comment) { string h = text, p, q, label = NULL, key; char c; /* Loop over occurrences of "[[" + mark + label + "]]" and "{{" + mark + label + "}}" */ while (*in_comment ? (p = strpbrk(h, "-{")) : (p = strpbrk(h, "[{<"))) { while (h != p) putchar(*(h++)); /* Print text up to here */ if (strncmp(p, "-->", 3) == 0) { /* End of comment */ putchar(*(h++)); *in_comment = false; continue; } if (strncmp(p, "... .e will be replaced by the content of the file \fIfoo.html\fP. .LP The comment is replaced by .d .e before the included text and .d .e after it. These comments make it possible to run .B hxincl on the resulting file again to update the inclusions. .PP Single quotes are allowed instead of double quotes. And if the file name contains no spaces, the quotes may also be omitted. .PP With .BR \-M , the .B hxincl command outputs a line of dependencies that is suitable for inclusion in a Makefile. The .I target is the target of the Makefile rule and .B hxincl will list after the ':' all the files that are included, recursively. E.g., the result of .d hxincl -M myfile.html inputfile .e might be .d myfile.html: foo.html bar.html .e .SH OPTIONS The following options are supported: .TP 10 .B \-x Use XML conventions: empty elements are written with a slash at the end: . .TP .BI \-b " base" Sets the base URL for resolving relative URLs. By default the file given as argument is the base URL. .TP .B \-f Removes the comments after including the files. This means .B hxincl connot be run on the resulting file later to update the inclusions. (Mnemonic: .BR f inal or .BR f rozen.) .TP .BI \-s " name=substitution" Include a different file than the one mentioned in the directive. If the comment is .d .e the file .I substitution is included instead. And if the file name in the comment includes a variable called .I name delimited by %, e.g., .d .e then .RI % name % is replaced by .I substitution and thus the file .RI xxx\- substitution is included. The option .B \-s may occur multiple times. %-delimited variables are expanded recursively, i.e., if the substitution text contains a variable, that variable is expanded, too. E.g., if the two options .B \-s name=%p1%.rrr and .B \-s p1=subst are given, then the "xxx-%name%" will expand to "xxx-subst.rrr". .TP .BI \-M " target" Instead of outputing the input file with all inclusions expanded, output just the list of all files that the input includes, recursively, in the form of a rule that is suitable for a Makefile. The .I target is printed as the target of that rule. .TP .B \-G Suppress error messages if a file to include cannot be found. (Only with .BR \-M .) .SH OPERANDS The following operand is supported: .TP 10 .I file\-or\-URL The name of an HTML or XML file or the URL of one. If absent, standard input is read instead. .SH "EXIT STATUS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred in the parsing of one of the HTML or XML files. .SH ENVIRONMENT To use a proxy to retrieve remote files, set the environment variables .B http_proxy or .BR ftp_proxy "." E.g., .B http_proxy="http://localhost:8080/" .SH BUGS .LP Assumes UTF-8 as input. Doesn't expand character entities. Instead pipe the input through .BR hxunent (1) and .BR asc2xml (1) to convert it to UTF-8. .LP Remote files (specified with a URL) are currently only supported for HTTP. Password-protected files or files that depend on HTTP "cookies" are not handled. (You can use tools such as .BR curl (1) or .BR wget (1) to retrieve such files.) .SH "SEE ALSO" .BR asc2xml (1), .BR hxnormalize (1), .BR hxnum (1), .BR hxprune (1), .BR hxtoc (1), .BR hxunent (1), .BR xml2asc (1), .BR UTF-8 " (RFC 2279)" html-xml-utils-6.5/Makefile.in0000644000175000001440000015721112265516533013265 00000000000000# Makefile.in generated by automake 1.11.6 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ # Copyright © 1994-2004 World Wide Web Consortium # See http://www.w3.org/Consortium/Legal/copyright-software # # Author: Bert Bos # Created: 31 Mar 2000 VPATH = @srcdir@ am__make_dryrun = \ { \ am__dry=no; \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ *) \ for am__flg in $$MAKEFLAGS; do \ case $$am__flg in \ *=*|--*) ;; \ *n*) am__dry=yes; break;; \ esac; \ done;; \ esac; \ test $$am__dry = yes; \ } pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : bin_PROGRAMS = hxaddid$(EXEEXT) hxcite$(EXEEXT) hxcount$(EXEEXT) \ hxextract$(EXEEXT) hxclean$(EXEEXT) hxcopy$(EXEEXT) \ hxprune$(EXEEXT) hxnsxml$(EXEEXT) hxincl$(EXEEXT) \ hxindex$(EXEEXT) hxmkbib$(EXEEXT) hxmultitoc$(EXEEXT) \ hxname2id$(EXEEXT) hxnormalize$(EXEEXT) hxnum$(EXEEXT) \ hxpipe$(EXEEXT) hxremove$(EXEEXT) hxselect$(EXEEXT) \ hxtabletrans$(EXEEXT) hxtoc$(EXEEXT) hxuncdata$(EXEEXT) \ hxunent$(EXEEXT) hxunpipe$(EXEEXT) hxunxmlns$(EXEEXT) \ hxwls$(EXEEXT) hxxmlns$(EXEEXT) hxref$(EXEEXT) \ xml2asc$(EXEEXT) asc2xml$(EXEEXT) noinst_PROGRAMS = cexport$(EXEEXT) subdir = . DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/config.h.in \ $(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \ TODO depcomp html.c html.h install-sh malloc.c missing \ realloc.c scan.c strdup.c strerror.c strstr.c tfind.c \ tsearch.c twalk.c ylwrap ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/flex-optimize.m4 \ $(top_srcdir)/m4/libcurl.m4 $(top_srcdir)/m4/optreset.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(man1dir)" PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) am_asc2xml_OBJECTS = asc2xml.$(OBJEXT) asc2xml_OBJECTS = $(am_asc2xml_OBJECTS) asc2xml_LDADD = $(LDADD) asc2xml_DEPENDENCIES = @LIBOBJS@ am_cexport_OBJECTS = cexport.$(OBJEXT) cexport_OBJECTS = $(am_cexport_OBJECTS) cexport_LDADD = $(LDADD) cexport_DEPENDENCIES = @LIBOBJS@ am_hxaddid_OBJECTS = hxaddid.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ dtd.$(OBJEXT) openurl.$(OBJEXT) errexit.$(OBJEXT) \ url.$(OBJEXT) connectsock.$(OBJEXT) heap.$(OBJEXT) \ tree.$(OBJEXT) types.$(OBJEXT) genid.$(OBJEXT) class.$(OBJEXT) \ hash.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ fopencookie.$(OBJEXT) hxaddid_OBJECTS = $(am_hxaddid_OBJECTS) hxaddid_LDADD = $(LDADD) hxaddid_DEPENDENCIES = @LIBOBJS@ am_hxcite_OBJECTS = heap.$(OBJEXT) errexit.$(OBJEXT) hxcite.$(OBJEXT) hxcite_OBJECTS = $(am_hxcite_OBJECTS) hxcite_LDADD = $(LDADD) hxcite_DEPENDENCIES = @LIBOBJS@ am_hxclean_OBJECTS = hxclean.$(OBJEXT) html.$(OBJEXT) tree.$(OBJEXT) \ types.$(OBJEXT) heap.$(OBJEXT) dtd.$(OBJEXT) scan.$(OBJEXT) \ errexit.$(OBJEXT) hxclean_OBJECTS = $(am_hxclean_OBJECTS) hxclean_LDADD = $(LDADD) hxclean_DEPENDENCIES = @LIBOBJS@ am_hxcopy_OBJECTS = html.$(OBJEXT) scan.$(OBJEXT) types.$(OBJEXT) \ url.$(OBJEXT) openurl.$(OBJEXT) errexit.$(OBJEXT) \ dict.$(OBJEXT) headers.$(OBJEXT) heap.$(OBJEXT) \ connectsock.$(OBJEXT) hxcopy.$(OBJEXT) fopencookie.$(OBJEXT) hxcopy_OBJECTS = $(am_hxcopy_OBJECTS) hxcopy_LDADD = $(LDADD) hxcopy_DEPENDENCIES = @LIBOBJS@ am_hxcount_OBJECTS = hxcount.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ types.$(OBJEXT) errexit.$(OBJEXT) heap.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) connectsock.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxcount_OBJECTS = $(am_hxcount_OBJECTS) hxcount_LDADD = $(LDADD) hxcount_DEPENDENCIES = @LIBOBJS@ am_hxextract_OBJECTS = hxextract.$(OBJEXT) html.$(OBJEXT) \ scan.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) errexit.$(OBJEXT) \ class.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ types.$(OBJEXT) fopencookie.$(OBJEXT) hxextract_OBJECTS = $(am_hxextract_OBJECTS) hxextract_LDADD = $(LDADD) hxextract_DEPENDENCIES = @LIBOBJS@ am_hxincl_OBJECTS = hxincl.$(OBJEXT) scan.$(OBJEXT) html.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) heap.$(OBJEXT) \ errexit.$(OBJEXT) connectsock.$(OBJEXT) types.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxincl_OBJECTS = $(am_hxincl_OBJECTS) hxincl_LDADD = $(LDADD) hxincl_DEPENDENCIES = @LIBOBJS@ am_hxindex_OBJECTS = hxindex.$(OBJEXT) scan.$(OBJEXT) html.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) heap.$(OBJEXT) class.$(OBJEXT) \ errexit.$(OBJEXT) connectsock.$(OBJEXT) types.$(OBJEXT) \ tree.$(OBJEXT) genid.$(OBJEXT) dtd.$(OBJEXT) headers.$(OBJEXT) \ dict.$(OBJEXT) fopencookie.$(OBJEXT) hxindex_OBJECTS = $(am_hxindex_OBJECTS) hxindex_LDADD = $(LDADD) hxindex_DEPENDENCIES = @LIBOBJS@ am_hxmkbib_OBJECTS = errexit.$(OBJEXT) heap.$(OBJEXT) \ hxmkbib.$(OBJEXT) hash.$(OBJEXT) hxmkbib_OBJECTS = $(am_hxmkbib_OBJECTS) hxmkbib_LDADD = $(LDADD) hxmkbib_DEPENDENCIES = @LIBOBJS@ am_hxmultitoc_OBJECTS = hxmultitoc.$(OBJEXT) html.$(OBJEXT) \ scan.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) errexit.$(OBJEXT) \ class.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ types.$(OBJEXT) fopencookie.$(OBJEXT) hxmultitoc_OBJECTS = $(am_hxmultitoc_OBJECTS) hxmultitoc_LDADD = $(LDADD) hxmultitoc_DEPENDENCIES = @LIBOBJS@ am_hxname2id_OBJECTS = html.$(OBJEXT) scan.$(OBJEXT) dtd.$(OBJEXT) \ openurl.$(OBJEXT) errexit.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) tree.$(OBJEXT) \ types.$(OBJEXT) hxname2id.$(OBJEXT) headers.$(OBJEXT) \ dict.$(OBJEXT) fopencookie.$(OBJEXT) hxname2id_OBJECTS = $(am_hxname2id_OBJECTS) hxname2id_LDADD = $(LDADD) hxname2id_DEPENDENCIES = @LIBOBJS@ am_hxnormalize_OBJECTS = hxnormalize.$(OBJEXT) html.$(OBJEXT) \ scan.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) tree.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) dtd.$(OBJEXT) \ types.$(OBJEXT) textwrap.$(OBJEXT) errexit.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxnormalize_OBJECTS = $(am_hxnormalize_OBJECTS) hxnormalize_LDADD = $(LDADD) hxnormalize_DEPENDENCIES = @LIBOBJS@ am_hxnsxml_OBJECTS = hxnsxml.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ types.$(OBJEXT) errexit.$(OBJEXT) heap.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) connectsock.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxnsxml_OBJECTS = $(am_hxnsxml_OBJECTS) hxnsxml_LDADD = $(LDADD) hxnsxml_DEPENDENCIES = @LIBOBJS@ am_hxnum_OBJECTS = hxnum.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) errexit.$(OBJEXT) \ heap.$(OBJEXT) connectsock.$(OBJEXT) headers.$(OBJEXT) \ dict.$(OBJEXT) types.$(OBJEXT) class.$(OBJEXT) \ fopencookie.$(OBJEXT) hxnum_OBJECTS = $(am_hxnum_OBJECTS) hxnum_LDADD = $(LDADD) hxnum_DEPENDENCIES = @LIBOBJS@ am_hxpipe_OBJECTS = hxpipe.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ types.$(OBJEXT) errexit.$(OBJEXT) heap.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) connectsock.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxpipe_OBJECTS = $(am_hxpipe_OBJECTS) hxpipe_LDADD = $(LDADD) hxpipe_DEPENDENCIES = @LIBOBJS@ am_hxprune_OBJECTS = hxprune.$(OBJEXT) tree.$(OBJEXT) scan.$(OBJEXT) \ html.$(OBJEXT) errexit.$(OBJEXT) dtd.$(OBJEXT) heap.$(OBJEXT) \ types.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) class.$(OBJEXT) headers.$(OBJEXT) \ dict.$(OBJEXT) fopencookie.$(OBJEXT) hxprune_OBJECTS = $(am_hxprune_OBJECTS) hxprune_LDADD = $(LDADD) hxprune_DEPENDENCIES = @LIBOBJS@ am_hxref_OBJECTS = html.$(OBJEXT) scan.$(OBJEXT) dtd.$(OBJEXT) \ openurl.$(OBJEXT) errexit.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) tree.$(OBJEXT) \ types.$(OBJEXT) genid.$(OBJEXT) hxref.$(OBJEXT) hash.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxref_OBJECTS = $(am_hxref_OBJECTS) hxref_LDADD = $(LDADD) hxref_DEPENDENCIES = @LIBOBJS@ am_hxremove_OBJECTS = hxremove.$(OBJEXT) types.$(OBJEXT) \ errexit.$(OBJEXT) heap.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ tree.$(OBJEXT) selector.$(OBJEXT) dtd.$(OBJEXT) hxremove_OBJECTS = $(am_hxremove_OBJECTS) hxremove_LDADD = $(LDADD) hxremove_DEPENDENCIES = @LIBOBJS@ am_hxselect_OBJECTS = hxselect.$(OBJEXT) types.$(OBJEXT) \ errexit.$(OBJEXT) heap.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ selector.$(OBJEXT) hxselect_OBJECTS = $(am_hxselect_OBJECTS) hxselect_LDADD = $(LDADD) hxselect_DEPENDENCIES = @LIBOBJS@ am_hxtabletrans_OBJECTS = hxtabletrans.$(OBJEXT) scan.$(OBJEXT) \ tree.$(OBJEXT) heap.$(OBJEXT) openurl.$(OBJEXT) html.$(OBJEXT) \ errexit.$(OBJEXT) dtd.$(OBJEXT) types.$(OBJEXT) dict.$(OBJEXT) \ connectsock.$(OBJEXT) headers.$(OBJEXT) url.$(OBJEXT) \ fopencookie.$(OBJEXT) hxtabletrans_OBJECTS = $(am_hxtabletrans_OBJECTS) hxtabletrans_LDADD = $(LDADD) hxtabletrans_DEPENDENCIES = @LIBOBJS@ am_hxtoc_OBJECTS = html.$(OBJEXT) scan.$(OBJEXT) dtd.$(OBJEXT) \ openurl.$(OBJEXT) errexit.$(OBJEXT) url.$(OBJEXT) \ class.$(OBJEXT) connectsock.$(OBJEXT) heap.$(OBJEXT) \ tree.$(OBJEXT) types.$(OBJEXT) genid.$(OBJEXT) hxtoc.$(OBJEXT) \ hash.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ fopencookie.$(OBJEXT) hxtoc_OBJECTS = $(am_hxtoc_OBJECTS) hxtoc_LDADD = $(LDADD) hxtoc_DEPENDENCIES = @LIBOBJS@ am_hxuncdata_OBJECTS = hxuncdata.$(OBJEXT) hxuncdata_OBJECTS = $(am_hxuncdata_OBJECTS) hxuncdata_LDADD = $(LDADD) hxuncdata_DEPENDENCIES = @LIBOBJS@ am_hxunent_OBJECTS = unent.$(OBJEXT) hxunentmain.$(OBJEXT) hxunent_OBJECTS = $(am_hxunent_OBJECTS) hxunent_LDADD = $(LDADD) hxunent_DEPENDENCIES = @LIBOBJS@ am_hxunpipe_OBJECTS = hxunpipe.$(OBJEXT) heap.$(OBJEXT) \ errexit.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ types.$(OBJEXT) fopencookie.$(OBJEXT) hxunpipe_OBJECTS = $(am_hxunpipe_OBJECTS) hxunpipe_LDADD = $(LDADD) hxunpipe_DEPENDENCIES = @LIBOBJS@ am_hxunxmlns_OBJECTS = hxunxmlns.$(OBJEXT) html.$(OBJEXT) \ scan.$(OBJEXT) openurl.$(OBJEXT) url.$(OBJEXT) \ connectsock.$(OBJEXT) heap.$(OBJEXT) errexit.$(OBJEXT) \ types.$(OBJEXT) headers.$(OBJEXT) dict.$(OBJEXT) \ fopencookie.$(OBJEXT) hxunxmlns_OBJECTS = $(am_hxunxmlns_OBJECTS) hxunxmlns_LDADD = $(LDADD) hxunxmlns_DEPENDENCIES = @LIBOBJS@ am_hxwls_OBJECTS = hxwls.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) connectsock.$(OBJEXT) \ heap.$(OBJEXT) errexit.$(OBJEXT) types.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxwls_OBJECTS = $(am_hxwls_OBJECTS) hxwls_LDADD = $(LDADD) hxwls_DEPENDENCIES = @LIBOBJS@ am_hxxmlns_OBJECTS = hxxmlns.$(OBJEXT) html.$(OBJEXT) scan.$(OBJEXT) \ openurl.$(OBJEXT) url.$(OBJEXT) connectsock.$(OBJEXT) \ heap.$(OBJEXT) errexit.$(OBJEXT) types.$(OBJEXT) \ headers.$(OBJEXT) dict.$(OBJEXT) fopencookie.$(OBJEXT) hxxmlns_OBJECTS = $(am_hxxmlns_OBJECTS) hxxmlns_LDADD = $(LDADD) hxxmlns_DEPENDENCIES = @LIBOBJS@ am_xml2asc_OBJECTS = xml2asc.$(OBJEXT) xml2asc_OBJECTS = $(am_xml2asc_OBJECTS) xml2asc_LDADD = $(LDADD) xml2asc_DEPENDENCIES = @LIBOBJS@ am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } SCRIPTS = $(bin_SCRIPTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ LEXCOMPILE = $(LEX) $(AM_LFLAGS) $(LFLAGS) YLWRAP = $(top_srcdir)/ylwrap YACCCOMPILE = $(YACC) $(AM_YFLAGS) $(YFLAGS) SOURCES = $(asc2xml_SOURCES) $(cexport_SOURCES) $(hxaddid_SOURCES) \ $(hxcite_SOURCES) $(hxclean_SOURCES) $(hxcopy_SOURCES) \ $(hxcount_SOURCES) $(hxextract_SOURCES) $(hxincl_SOURCES) \ $(hxindex_SOURCES) $(hxmkbib_SOURCES) $(hxmultitoc_SOURCES) \ $(hxname2id_SOURCES) $(hxnormalize_SOURCES) $(hxnsxml_SOURCES) \ $(hxnum_SOURCES) $(hxpipe_SOURCES) $(hxprune_SOURCES) \ $(hxref_SOURCES) $(hxremove_SOURCES) $(hxselect_SOURCES) \ $(hxtabletrans_SOURCES) $(hxtoc_SOURCES) $(hxuncdata_SOURCES) \ $(hxunent_SOURCES) $(hxunpipe_SOURCES) $(hxunxmlns_SOURCES) \ $(hxwls_SOURCES) $(hxxmlns_SOURCES) $(xml2asc_SOURCES) DIST_SOURCES = $(asc2xml_SOURCES) $(cexport_SOURCES) \ $(hxaddid_SOURCES) $(hxcite_SOURCES) $(hxclean_SOURCES) \ $(hxcopy_SOURCES) $(hxcount_SOURCES) $(hxextract_SOURCES) \ $(hxincl_SOURCES) $(hxindex_SOURCES) $(hxmkbib_SOURCES) \ $(hxmultitoc_SOURCES) $(hxname2id_SOURCES) \ $(hxnormalize_SOURCES) $(hxnsxml_SOURCES) $(hxnum_SOURCES) \ $(hxpipe_SOURCES) $(hxprune_SOURCES) $(hxref_SOURCES) \ $(hxremove_SOURCES) $(hxselect_SOURCES) \ $(hxtabletrans_SOURCES) $(hxtoc_SOURCES) $(hxuncdata_SOURCES) \ $(hxunent_SOURCES) $(hxunpipe_SOURCES) $(hxunxmlns_SOURCES) \ $(hxwls_SOURCES) $(hxxmlns_SOURCES) $(xml2asc_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac man1dir = $(mandir)/man1 NROFF = nroff MANS = $(man_MANS) ETAGS = etags CTAGS = ctags am__tty_colors = \ red=; grn=; lgn=; blu=; std= DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LEX = @LEX@ LEXLIB = @LEXLIB@ LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ LIBCURL = @LIBCURL@ LIBCURL_CPPFLAGS = @LIBCURL_CPPFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ OBJEXT = @OBJEXT@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VERSION = @VERSION@ YACC = @YACC@ YFLAGS = @YFLAGS@ _libcurl_config = @_libcurl_config@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build_alias = @build_alias@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host_alias = @host_alias@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ lex_opt_flags = @lex_opt_flags@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ bin_SCRIPTS = hxcite-mkbib hxprintlinks man_MANS = hxaddid.1 asc2xml.1 hxcite.1\ hxcite-mkbib.1 hxcopy.1\ hxcount.1 hxextract.1 hxclean.1\ hxprune.1 hxincl.1 hxindex.1 hxmkbib.1 hxmultitoc.1\ hxname2id.1 hxnormalize.1 hxnum.1 hxpipe.1\ hxprintlinks.1 hxremove.1 hxtabletrans.1\ hxtoc.1 hxuncdata.1 hxunent.1\ hxunpipe.1 hxunxmlns.1 hxwls.1 xml2asc.1 hxxmlns.1\ hxref.1 hxselect.1 hxnsxml.1 EXTRA_DIST = $(man_MANS) dtd.hash unent.hash export.h\ $(bin_SCRIPTS) $(BUILT_SOURCES) cexport.1\ $(TESTS) LDADD = @LIBOBJS@ @LIBCURL@ AM_CPPFLAGS = @LIBCURL_CPPFLAGS@ AM_YFLAGS = -d AM_LFLAGS = @lex_opt_flags@ EXPORTS = dict.e heap.e types.e headers.e connectsock.e\ dtd.e errexit.e tree.e genid.e html.e url.e\ openurl.e scan.e textwrap.e unent.e class.e\ selector.e hash.e fopencookie.e BUILT_SOURCES = $(EXPORTS) scan.c html.c html.h dtd.c unent.c asc2xml_SOURCES = asc2xml.c hxaddid_SOURCES = hxaddid.c html.y scan.l dtd.c openurl.c errexit.c\ url.c connectsock.c heap.c tree.c types.c genid.c\ class.c hash.c headers.c dict.c fopencookie.c cexport_SOURCES = cexport.c hxcite_SOURCES = heap.c errexit.c hxcite.c hxcount_SOURCES = hxcount.c html.y scan.l types.c errexit.c heap.c\ openurl.c url.c connectsock.c headers.c dict.c\ fopencookie.c hxextract_SOURCES = hxextract.c html.y scan.l openurl.c url.c\ connectsock.c heap.c errexit.c class.c headers.c\ dict.c types.c fopencookie.c hxclean_SOURCES = hxclean.c html.y tree.c types.c heap.c dtd.c\ scan.l errexit.c hxprune_SOURCES = hxprune.c tree.c scan.l html.y errexit.c dtd.c\ heap.c types.c openurl.c url.c connectsock.c class.c\ headers.c dict.c fopencookie.c hxincl_SOURCES = hxincl.c scan.l html.y openurl.c url.c heap.c\ errexit.c connectsock.c types.c headers.c dict.c\ fopencookie.c hxindex_SOURCES = hxindex.c scan.l html.y openurl.c url.c heap.c class.c\ errexit.c connectsock.c types.c tree.c genid.c dtd.c\ headers.c dict.c fopencookie.c hxmkbib_SOURCES = errexit.c heap.c hxmkbib.c hash.c hxmultitoc_SOURCES = hxmultitoc.c html.y scan.l openurl.c url.c\ connectsock.c heap.c errexit.c class.c headers.c\ dict.c types.c fopencookie.c hxnormalize_SOURCES = hxnormalize.c html.y scan.l openurl.c url.c\ tree.c connectsock.c heap.c dtd.c types.c\ textwrap.c errexit.c headers.c dict.c fopencookie.c hxnum_SOURCES = hxnum.c html.y scan.l openurl.c url.c errexit.c\ heap.c connectsock.c headers.c dict.c types.c class.c\ fopencookie.c hxpipe_SOURCES = hxpipe.c html.y scan.l types.c errexit.c heap.c\ openurl.c url.c connectsock.c headers.c dict.c\ fopencookie.c hxremove_SOURCES = hxremove.c types.c errexit.c heap.c html.y scan.l\ tree.c selector.c dtd.c hxselect_SOURCES = hxselect.c types.c errexit.c heap.c html.y scan.l\ selector.c hxtabletrans_SOURCES = hxtabletrans.c scan.l tree.c heap.c openurl.c html.y\ errexit.c dtd.c types.c dict.c connectsock.c\ headers.c url.c fopencookie.c hxtoc_SOURCES = html.y scan.l dtd.c openurl.c errexit.c url.c class.c\ connectsock.c heap.c tree.c types.c genid.c hxtoc.c\ hash.c headers.c dict.c fopencookie.c hxuncdata_SOURCES = hxuncdata.c hxunent_SOURCES = unent.c hxunentmain.c hxunpipe_SOURCES = hxunpipe.c heap.c errexit.c openurl.c url.c\ connectsock.c headers.c dict.c types.c fopencookie.c hxunxmlns_SOURCES = hxunxmlns.c html.y scan.l openurl.c url.c\ connectsock.c heap.c errexit.c types.c headers.c\ dict.c fopencookie.c hxwls_SOURCES = hxwls.c html.y scan.l openurl.c url.c\ connectsock.c heap.c errexit.c types.c headers.c\ dict.c fopencookie.c hxxmlns_SOURCES = hxxmlns.c html.y scan.l openurl.c url.c\ connectsock.c heap.c errexit.c types.c headers.c\ dict.c fopencookie.c xml2asc_SOURCES = xml2asc.c hxref_SOURCES = html.y scan.l dtd.c openurl.c errexit.c url.c\ connectsock.c heap.c tree.c types.c genid.c hxref.c\ hash.c headers.c dict.c fopencookie.c hxname2id_SOURCES = html.y scan.l dtd.c openurl.c errexit.c url.c\ connectsock.c heap.c tree.c types.c hxname2id.c\ headers.c dict.c fopencookie.c hxcopy_SOURCES = html.y scan.l types.c url.c openurl.c errexit.c\ dict.c headers.c heap.c connectsock.c hxcopy.c\ fopencookie.c hxnsxml_SOURCES = hxnsxml.c html.y scan.l types.c errexit.c heap.c\ openurl.c url.c connectsock.c headers.c dict.c\ fopencookie.c HTML_MANS = $(man_MANS:.1=.html) CLEANFILES = $(HTML_MANS) SUFFIX = .c:sC .l:sC .y:sC .e:h .h:h SUFFIXES = .1 .e .html # This is inconvenient. In automake version 1.11, $(wildcard) worked, # but not in version 1.14. :-( # TESTS = $(wildcard $(top_srcdir)/tests/*.sh) TESTS = tests/addid1.sh tests/addid1.sh tests/ascxml.sh\ tests/cdata1.sh tests/cite1.sh tests/cite2.sh tests/cite3.sh\ tests/cite4.sh\ tests/clean1.sh tests/copy1.sh tests/copy2.sh tests/copy3.sh\ tests/copy4.sh tests/copy5.sh tests/copy6.sh tests/extract1.sh\ tests/hxnsxml1.sh tests/hxnsxml2.sh tests/hxnsxml3.sh\ tests/hxnsxml4.sh tests/incl1.sh tests/incl10.sh\ tests/incl11.sh tests/incl2.sh tests/incl3.sh tests/incl4.sh\ tests/incl5.sh tests/incl6.sh tests/incl7.sh tests/incl8.sh\ tests/incl9.sh tests/index.sh tests/index2.sh tests/index3.sh\ tests/index4.sh tests/index5.sh\ tests/mkbib1.sh tests/normalize1.sh\ tests/pipe1.sh tests/pipe2.sh tests/pipe3.sh tests/ref1.sh\ tests/ref2.sh tests/ref3.sh tests/relurl1.sh tests/relurl2.sh\ tests/relurl3.sh tests/remove1.sh tests/remove2.sh\ tests/tabletrans1.sh tests/tabletrans2.sh tests/tabletrans3.sh\ tests/toc1.sh tests/toc2.sh tests/uncdata1.sh tests/unpipe1.sh\ tests/unpipe2.sh tests/unpipe3.sh tests/wls1.sh tests/wls2.sh\ tests/xmlasc1.sh tests/xmlasc2.sh tests/xmlasc3.sh\ tests/xmlasc4.sh tests/xmlasc5.sh tests/xmlasc6.sh\ tests/xmlasc7.sh tests/xmlns1.sh tests/xref1.sh tests/xref2.sh\ tests/xref3.sh tests/xref4.sh tests/xref5.sh tests/xref6.sh\ tests/xref7.sh all: $(BUILT_SOURCES) config.h $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .1 .e .html .c .l .o .obj .y am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): config.h: stamp-h1 @if test ! -f $@; then rm -f stamp-h1; else :; fi @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status config.h $(srcdir)/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f config.h stamp-h1 install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p; \ then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) clean-noinstPROGRAMS: -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) asc2xml$(EXEEXT): $(asc2xml_OBJECTS) $(asc2xml_DEPENDENCIES) $(EXTRA_asc2xml_DEPENDENCIES) @rm -f asc2xml$(EXEEXT) $(LINK) $(asc2xml_OBJECTS) $(asc2xml_LDADD) $(LIBS) cexport$(EXEEXT): $(cexport_OBJECTS) $(cexport_DEPENDENCIES) $(EXTRA_cexport_DEPENDENCIES) @rm -f cexport$(EXEEXT) $(LINK) $(cexport_OBJECTS) $(cexport_LDADD) $(LIBS) html.h: html.c @if test ! -f $@; then rm -f html.c; else :; fi @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) html.c; else :; fi hxaddid$(EXEEXT): $(hxaddid_OBJECTS) $(hxaddid_DEPENDENCIES) $(EXTRA_hxaddid_DEPENDENCIES) @rm -f hxaddid$(EXEEXT) $(LINK) $(hxaddid_OBJECTS) $(hxaddid_LDADD) $(LIBS) hxcite$(EXEEXT): $(hxcite_OBJECTS) $(hxcite_DEPENDENCIES) $(EXTRA_hxcite_DEPENDENCIES) @rm -f hxcite$(EXEEXT) $(LINK) $(hxcite_OBJECTS) $(hxcite_LDADD) $(LIBS) hxclean$(EXEEXT): $(hxclean_OBJECTS) $(hxclean_DEPENDENCIES) $(EXTRA_hxclean_DEPENDENCIES) @rm -f hxclean$(EXEEXT) $(LINK) $(hxclean_OBJECTS) $(hxclean_LDADD) $(LIBS) hxcopy$(EXEEXT): $(hxcopy_OBJECTS) $(hxcopy_DEPENDENCIES) $(EXTRA_hxcopy_DEPENDENCIES) @rm -f hxcopy$(EXEEXT) $(LINK) $(hxcopy_OBJECTS) $(hxcopy_LDADD) $(LIBS) hxcount$(EXEEXT): $(hxcount_OBJECTS) $(hxcount_DEPENDENCIES) $(EXTRA_hxcount_DEPENDENCIES) @rm -f hxcount$(EXEEXT) $(LINK) $(hxcount_OBJECTS) $(hxcount_LDADD) $(LIBS) hxextract$(EXEEXT): $(hxextract_OBJECTS) $(hxextract_DEPENDENCIES) $(EXTRA_hxextract_DEPENDENCIES) @rm -f hxextract$(EXEEXT) $(LINK) $(hxextract_OBJECTS) $(hxextract_LDADD) $(LIBS) hxincl$(EXEEXT): $(hxincl_OBJECTS) $(hxincl_DEPENDENCIES) $(EXTRA_hxincl_DEPENDENCIES) @rm -f hxincl$(EXEEXT) $(LINK) $(hxincl_OBJECTS) $(hxincl_LDADD) $(LIBS) hxindex$(EXEEXT): $(hxindex_OBJECTS) $(hxindex_DEPENDENCIES) $(EXTRA_hxindex_DEPENDENCIES) @rm -f hxindex$(EXEEXT) $(LINK) $(hxindex_OBJECTS) $(hxindex_LDADD) $(LIBS) hxmkbib$(EXEEXT): $(hxmkbib_OBJECTS) $(hxmkbib_DEPENDENCIES) $(EXTRA_hxmkbib_DEPENDENCIES) @rm -f hxmkbib$(EXEEXT) $(LINK) $(hxmkbib_OBJECTS) $(hxmkbib_LDADD) $(LIBS) hxmultitoc$(EXEEXT): $(hxmultitoc_OBJECTS) $(hxmultitoc_DEPENDENCIES) $(EXTRA_hxmultitoc_DEPENDENCIES) @rm -f hxmultitoc$(EXEEXT) $(LINK) $(hxmultitoc_OBJECTS) $(hxmultitoc_LDADD) $(LIBS) hxname2id$(EXEEXT): $(hxname2id_OBJECTS) $(hxname2id_DEPENDENCIES) $(EXTRA_hxname2id_DEPENDENCIES) @rm -f hxname2id$(EXEEXT) $(LINK) $(hxname2id_OBJECTS) $(hxname2id_LDADD) $(LIBS) hxnormalize$(EXEEXT): $(hxnormalize_OBJECTS) $(hxnormalize_DEPENDENCIES) $(EXTRA_hxnormalize_DEPENDENCIES) @rm -f hxnormalize$(EXEEXT) $(LINK) $(hxnormalize_OBJECTS) $(hxnormalize_LDADD) $(LIBS) hxnsxml$(EXEEXT): $(hxnsxml_OBJECTS) $(hxnsxml_DEPENDENCIES) $(EXTRA_hxnsxml_DEPENDENCIES) @rm -f hxnsxml$(EXEEXT) $(LINK) $(hxnsxml_OBJECTS) $(hxnsxml_LDADD) $(LIBS) hxnum$(EXEEXT): $(hxnum_OBJECTS) $(hxnum_DEPENDENCIES) $(EXTRA_hxnum_DEPENDENCIES) @rm -f hxnum$(EXEEXT) $(LINK) $(hxnum_OBJECTS) $(hxnum_LDADD) $(LIBS) hxpipe$(EXEEXT): $(hxpipe_OBJECTS) $(hxpipe_DEPENDENCIES) $(EXTRA_hxpipe_DEPENDENCIES) @rm -f hxpipe$(EXEEXT) $(LINK) $(hxpipe_OBJECTS) $(hxpipe_LDADD) $(LIBS) hxprune$(EXEEXT): $(hxprune_OBJECTS) $(hxprune_DEPENDENCIES) $(EXTRA_hxprune_DEPENDENCIES) @rm -f hxprune$(EXEEXT) $(LINK) $(hxprune_OBJECTS) $(hxprune_LDADD) $(LIBS) hxref$(EXEEXT): $(hxref_OBJECTS) $(hxref_DEPENDENCIES) $(EXTRA_hxref_DEPENDENCIES) @rm -f hxref$(EXEEXT) $(LINK) $(hxref_OBJECTS) $(hxref_LDADD) $(LIBS) hxremove$(EXEEXT): $(hxremove_OBJECTS) $(hxremove_DEPENDENCIES) $(EXTRA_hxremove_DEPENDENCIES) @rm -f hxremove$(EXEEXT) $(LINK) $(hxremove_OBJECTS) $(hxremove_LDADD) $(LIBS) hxselect$(EXEEXT): $(hxselect_OBJECTS) $(hxselect_DEPENDENCIES) $(EXTRA_hxselect_DEPENDENCIES) @rm -f hxselect$(EXEEXT) $(LINK) $(hxselect_OBJECTS) $(hxselect_LDADD) $(LIBS) hxtabletrans$(EXEEXT): $(hxtabletrans_OBJECTS) $(hxtabletrans_DEPENDENCIES) $(EXTRA_hxtabletrans_DEPENDENCIES) @rm -f hxtabletrans$(EXEEXT) $(LINK) $(hxtabletrans_OBJECTS) $(hxtabletrans_LDADD) $(LIBS) hxtoc$(EXEEXT): $(hxtoc_OBJECTS) $(hxtoc_DEPENDENCIES) $(EXTRA_hxtoc_DEPENDENCIES) @rm -f hxtoc$(EXEEXT) $(LINK) $(hxtoc_OBJECTS) $(hxtoc_LDADD) $(LIBS) hxuncdata$(EXEEXT): $(hxuncdata_OBJECTS) $(hxuncdata_DEPENDENCIES) $(EXTRA_hxuncdata_DEPENDENCIES) @rm -f hxuncdata$(EXEEXT) $(LINK) $(hxuncdata_OBJECTS) $(hxuncdata_LDADD) $(LIBS) hxunent$(EXEEXT): $(hxunent_OBJECTS) $(hxunent_DEPENDENCIES) $(EXTRA_hxunent_DEPENDENCIES) @rm -f hxunent$(EXEEXT) $(LINK) $(hxunent_OBJECTS) $(hxunent_LDADD) $(LIBS) hxunpipe$(EXEEXT): $(hxunpipe_OBJECTS) $(hxunpipe_DEPENDENCIES) $(EXTRA_hxunpipe_DEPENDENCIES) @rm -f hxunpipe$(EXEEXT) $(LINK) $(hxunpipe_OBJECTS) $(hxunpipe_LDADD) $(LIBS) hxunxmlns$(EXEEXT): $(hxunxmlns_OBJECTS) $(hxunxmlns_DEPENDENCIES) $(EXTRA_hxunxmlns_DEPENDENCIES) @rm -f hxunxmlns$(EXEEXT) $(LINK) $(hxunxmlns_OBJECTS) $(hxunxmlns_LDADD) $(LIBS) hxwls$(EXEEXT): $(hxwls_OBJECTS) $(hxwls_DEPENDENCIES) $(EXTRA_hxwls_DEPENDENCIES) @rm -f hxwls$(EXEEXT) $(LINK) $(hxwls_OBJECTS) $(hxwls_LDADD) $(LIBS) hxxmlns$(EXEEXT): $(hxxmlns_OBJECTS) $(hxxmlns_DEPENDENCIES) $(EXTRA_hxxmlns_DEPENDENCIES) @rm -f hxxmlns$(EXEEXT) $(LINK) $(hxxmlns_OBJECTS) $(hxxmlns_LDADD) $(LIBS) xml2asc$(EXEEXT): $(xml2asc_OBJECTS) $(xml2asc_DEPENDENCIES) $(EXTRA_xml2asc_DEPENDENCIES) @rm -f xml2asc$(EXEEXT) $(LINK) $(xml2asc_OBJECTS) $(xml2asc_LDADD) $(LIBS) install-binSCRIPTS: $(bin_SCRIPTS) @$(NORMAL_INSTALL) @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n' \ -e 'h;s|.*|.|' \ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) { files[d] = files[d] " " $$1; \ if (++n[d] == $(am__install_max)) { \ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ else { print "f", d "/" $$4, $$1 } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binSCRIPTS: @$(NORMAL_UNINSTALL) @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 's,.*/,,;$(transform)'`; \ dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/malloc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/realloc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strdup.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strerror.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/strstr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/tfind.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/tsearch.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/twalk.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/asc2xml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cexport.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/class.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/connectsock.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dict.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dtd.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/errexit.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fopencookie.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genid.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/headers.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/heap.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/html.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxaddid.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxcite.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxclean.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxcopy.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxcount.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxextract.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxincl.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxindex.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxmkbib.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxmultitoc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxname2id.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxnormalize.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxnsxml.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxnum.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxpipe.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxprune.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxref.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxremove.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxselect.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxtabletrans.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxtoc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxuncdata.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxunentmain.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxunpipe.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxunxmlns.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxwls.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hxxmlns.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/openurl.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scan.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textwrap.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tree.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/types.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unent.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/url.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml2asc.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c $< .c.obj: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` .l.c: $(am__skiplex) $(SHELL) $(YLWRAP) $< $(LEX_OUTPUT_ROOT).c $@ -- $(LEXCOMPILE) .y.c: $(am__skipyacc) $(SHELL) $(YLWRAP) $< y.tab.c $@ y.tab.h $*.h y.output $*.output -- $(YACCCOMPILE) install-man1: $(man_MANS) @$(NORMAL_INSTALL) @list1=''; \ list2='$(man_MANS)'; \ test -n "$(man1dir)" \ && test -n "`echo $$list1$$list2`" \ || exit 0; \ echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ { for i in $$list1; do echo "$$i"; done; \ if test -n "$$list2"; then \ for i in $$list2; do echo "$$i"; done \ | sed -n '/\.1[a-z]*$$/p'; \ fi; \ } | while read p; do \ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; echo "$$p"; \ done | \ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ sed 'N;N;s,\n, ,g' | { \ list=; while read file base inst; do \ if test "$$base" = "$$inst"; then list="$$list $$file"; else \ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ fi; \ done; \ for i in $$list; do echo "$$i"; done | $(am__base_list) | \ while read files; do \ test -z "$$files" || { \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ done; } uninstall-man1: @$(NORMAL_UNINSTALL) @list=''; test -n "$(man1dir)" || exit 0; \ files=`{ for i in $$list; do echo "$$i"; done; \ l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ sed -n '/\.1[a-z]*$$/p'; \ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags check-TESTS: $(TESTS) @failed=0; all=0; xfail=0; xpass=0; skip=0; \ srcdir=$(srcdir); export srcdir; \ list=' $(TESTS) '; \ $(am__tty_colors); \ if test -n "$$list"; then \ for tst in $$list; do \ if test -f ./$$tst; then dir=./; \ elif test -f $$tst; then dir=; \ else dir="$(srcdir)/"; fi; \ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \ all=`expr $$all + 1`; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$tst[\ \ ]*) \ xpass=`expr $$xpass + 1`; \ failed=`expr $$failed + 1`; \ col=$$red; res=XPASS; \ ;; \ *) \ col=$$grn; res=PASS; \ ;; \ esac; \ elif test $$? -ne 77; then \ all=`expr $$all + 1`; \ case " $(XFAIL_TESTS) " in \ *[\ \ ]$$tst[\ \ ]*) \ xfail=`expr $$xfail + 1`; \ col=$$lgn; res=XFAIL; \ ;; \ *) \ failed=`expr $$failed + 1`; \ col=$$red; res=FAIL; \ ;; \ esac; \ else \ skip=`expr $$skip + 1`; \ col=$$blu; res=SKIP; \ fi; \ echo "$${col}$$res$${std}: $$tst"; \ done; \ if test "$$all" -eq 1; then \ tests="test"; \ All=""; \ else \ tests="tests"; \ All="All "; \ fi; \ if test "$$failed" -eq 0; then \ if test "$$xfail" -eq 0; then \ banner="$$All$$all $$tests passed"; \ else \ if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \ banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \ fi; \ else \ if test "$$xpass" -eq 0; then \ banner="$$failed of $$all $$tests failed"; \ else \ if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \ banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \ fi; \ fi; \ dashes="$$banner"; \ skipped=""; \ if test "$$skip" -ne 0; then \ if test "$$skip" -eq 1; then \ skipped="($$skip test was not run)"; \ else \ skipped="($$skip tests were not run)"; \ fi; \ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \ dashes="$$skipped"; \ fi; \ report=""; \ if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \ report="Please report to $(PACKAGE_BUGREPORT)"; \ test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \ dashes="$$report"; \ fi; \ dashes=`echo "$$dashes" | sed s/./=/g`; \ if test "$$failed" -eq 0; then \ col="$$grn"; \ else \ col="$$red"; \ fi; \ echo "$${col}$$dashes$${std}"; \ echo "$${col}$$banner$${std}"; \ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \ test -z "$$report" || echo "$${col}$$report$${std}"; \ echo "$${col}$$dashes$${std}"; \ test "$$failed" -eq 0; \ else :; fi distdir: $(DISTFILES) @list='$(MANS)'; if test -n "$$list"; then \ list=`for p in $$list; do \ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \ if test -n "$$list" && \ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \ echo " typically \`make maintainer-clean' will remove them" >&2; \ exit 1; \ else :; fi; \ else :; fi $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__remove_distdir) dist-lzma: distdir tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma $(am__remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__remove_distdir) dist-tarZ: distdir tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__remove_distdir) dist-shar: distdir shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__remove_distdir) dist dist-all: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lzma*) \ lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac chmod -R a-w $(distdir); chmod u+w $(distdir) mkdir $(distdir)/_build mkdir $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build \ && ../configure --srcdir=.. --prefix="$$dc_install_base" \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am $(MAKE) $(AM_MAKEFLAGS) check-TESTS check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(MANS) config.h installdirs: for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." -rm -f html.c -rm -f html.h -rm -f scan.c -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) clean: clean-am clean-am: clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \ mostlyclean-am distclean: distclean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(DEPDIR) ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-man install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-binPROGRAMS install-binSCRIPTS install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-man1 install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -rf $(DEPDIR) ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS uninstall-man uninstall-man: uninstall-man1 .MAKE: all check check-am install install-am install-strip .PHONY: CTAGS GTAGS all all-am am--refresh check check-TESTS check-am \ clean clean-binPROGRAMS clean-generic clean-noinstPROGRAMS \ ctags dist dist-all dist-bzip2 dist-gzip dist-lzip dist-lzma \ dist-shar dist-tarZ dist-xz dist-zip distcheck distclean \ distclean-compile distclean-generic distclean-hdr \ distclean-tags distcleancheck distdir distuninstallcheck dvi \ dvi-am html html-am info info-am install install-am \ install-binPROGRAMS install-binSCRIPTS install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-man1 install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic pdf pdf-am ps ps-am tags uninstall \ uninstall-am uninstall-binPROGRAMS uninstall-binSCRIPTS \ uninstall-man uninstall-man1 $(EXPORTS): cexport .c.e: $(top_builddir)/cexport -I. -I$(srcdir) $(CPPFLAGS) -c "@CPP@" <$< >$@ .1.html: man2html -r $< | sed -e '1,/^$$/d' >$@ dtd.c: dtd.hash gperf -a -c -C -o -t -p -T -k '1,2,$$' -N lookup_element $< >$@ unent.c: unent.hash gperf -a -c -C -o -t -p -k '1,2,$$' -D -N lookup_entity $< >$@ # html.h: html.c scan.o: html.h html.e scan.c sel.tab.c sel.tab.h: sel.y bison -p sel_ -d -v sel.y Overview.html: $(HTML_MANS) echo "Manual pages" >>$@ # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: html-xml-utils-6.5/hxremove.10000644000175000001440000000311412041463321013113 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXREMOVE" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxremove \- remove elements from an XML file by means of a CSS selector .SH SYNOPSIS .B hxremove .RB "[\| " \-i " \|]" .RB "[\| " \-l .IR language " \|]" .I selector .SH DESCRIPTION .B hxremove reads a well-formed XML document from standard input and writes it to standard output without any elements that match the CSS selector that is given as argument. For example .d .B hxremove ol li:first-child .e removes the first li (list item in XHTML) from every ol (ordered list). .PP Assumes that class selectors (".foo") refer to an attribute called "class". And assumes that ID selectors ("#foo") refer to an attribute called "id". .PP To handle HTML files, make them well-formed XML first, e.g., with .BR "hxnormalize -x" . .PP Compare with .BR hxselect , which removes everything .I but the selected elements. .SH OPTIONS The following options are supported: .TP 10 .B \-i Match case-insensitively. Useful for HTML and some other SGML-based languages. .TP .BI \-l " language" Sets the default language, in case the root element doesn't have an xml:lang attribute (default: none). Example: .B \-l en .TP .B \-? Show command usage. .SH OPERANDS The following operand is supported: .TP .I selector A selector. Most selectors from CSS level 3 are supported, with the exception of pseudo-classes, pseudo-elements and selectors with \fBlast\-\fR in their name. .SH "SEE ALSO" .BR asc2xml (1), .BR xml2asc (1), .BR hxnormalize (1), .BR hxselect (1), .BR UTF-8 " (RFC 2279)" html-xml-utils-6.5/hxprintlinks.10000644000175000001440000000114411606170750014023 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXPRINTLINKS" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxprintlinks \- number links and add a table of URLs at the end of an HTML file .SH SYNOPSIS .B hxprintlinks .RB "[\| " \-b .IR base " \|]" .I file .SH DESCRIPTION .B hxprintlinks adds a numbered table of all URLs (links) found in the file to the end of the HTML file. .SH OPTIONS The following options are supported: .TP .BI \-b " base" Prefix all URLs with the given \fIbase\fR. .SH OPERANDS The following operand is supported: .TP .I file The file to work on. html-xml-utils-6.5/connectsock.c0000644000175000001440000000775311516104145013670 00000000000000/* connectsock.c * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ * * Copyright © 1994-2011 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 12 May 1998 **/ #include "config.h" #include #ifdef HAVE_SYS_SOCKET_H # include #endif #ifdef HAVE_NETINET_IN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include #include #include #include #include #include "export.h" EXPORT u_short portbase = 0; /* for non-root servers */ /* connectsock -- allocate & connect a socket using TCP or UDP */ EXPORT int connectsock(const char *host, const char *service, char *protocol) { /* host = name of host to which connection is desired */ /* service = service associated with the desired port */ /* protocol = name of protocol to use ("tcp" or "udp") */ struct addrinfo hints, *result, *rp; int t, s; /* Specify what type of connection we're looking for */ memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = (strcmp(protocol, "udp") == 0) ? SOCK_DGRAM : SOCK_STREAM; hints.ai_flags = 0; hints.ai_protocol = 0; /* Any protocol */ /* Parse network address and service */ if (getaddrinfo(host, service, &hints, &result) != 0) return -1; /* result is a linked list of address structures. */ for (s = -1, rp = result; s == -1 && rp; rp = rp->ai_next) { if ((t = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) != -1) { if (connect(t, rp->ai_addr, rp->ai_addrlen) != -1) s = t; else close(t); } } freeaddrinfo(result); /* Free the memory */ return s; /* If -1 no address succeeded */ } /* connectTCP -- connect to a specified UDP service on a specified host */ EXPORT int connectTCP(const char *host, const char *service) { return connectsock(host, service, "tcp"); } /* connectUDP -- connect to a specified UDP service on a specified host */ EXPORT int connectUDP(char *host, char *service) { return connectsock(host, service, "udp"); } /* passivesock -- allocate & bind a server socket using TCP or UDP */ EXPORT int passivesock(char *service, char *protocol, int qlen) { /* service = service associated with the desired port */ /* protocol = name of protocol to use ("tcp" or "udp") */ /* qlen = maximum length of the server request queue */ struct addrinfo hints, *result, *rp; int t, s; /* Specify what type of connection we're looking for */ memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ hints.ai_socktype = (strcmp(protocol, "udp") == 0) ? SOCK_DGRAM : SOCK_STREAM; hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */ hints.ai_protocol = 0; /* Any protocol */ hints.ai_canonname = NULL; hints.ai_addr = NULL; hints.ai_next = NULL; /* Parse network address and service */ if (getaddrinfo(NULL, service, &hints, &result) != 0) return -1; /* result is a linked list of address structures. */ for (s = -1, rp = result; s == -1 && rp; rp = rp->ai_next) { if ((t = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol)) != -1) { if (bind(t, rp->ai_addr, rp->ai_addrlen) != -1) s = t; else close(t); } } freeaddrinfo(result); /* Free the memory */ if (s == -1) return -1; /* No address succeeded */ /* If we want a TCP connection, also call listen(2) */ if (hints.ai_socktype == SOCK_STREAM && listen(s, qlen) < 0) return -1; return s; } /* passiveTCP -- creat a passive socket for use in a TCP server */ EXPORT int passiveTCP(char *service, int qlen) { /* service = service associated with thte desired port */ /* qlen = maximum server request queue length */ return passivesock(service, "tcp", qlen); } /* passiveUDP -- creat a passive socket for use in a UDP server */ EXPORT int passiveUDP(char *service) { return passivesock(service, "udp", 0); } html-xml-utils-6.5/genid.c0000644000175000001440000000632712174313455012450 00000000000000/* * Generate unique IDs. * * TO DO: Also generate "readable" IDs if the text uses non-ASCII * characters. * * Copyright 2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 4 August 2000 **/ #include "config.h" #include #include #include #ifdef HAVE_ERRNO_H # include #endif #include #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_SEARCH_H # include #else # include "search-freebsd.h" #endif #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #include #include "heap.e" #include "types.e" #include "tree.e" #include "errexit.e" #define MAXIDLEN 45 /* Max len of a generated ID */ typedef int(*compar_fn_t)(const void *, const void *); static void *idtree = NULL; /* Sorted tree of IDs */ /* storeID -- remember the existence of an ID (allocates a copy of the ID) */ EXPORT void storeID(conststring id) { /* Case-insensitive: necessary for HTML, only a little wasteful for XML */ (void) tsearch(newstring(id), &idtree, (compar_fn_t)strcasecmp); } /* gen_id_r -- find some text suitable for an ID recursively */ static void gen_id_r(Tree t, string s, int *len, int maxlen) { int i; Tree h; assert(s); /* s at least maxlen long */ /* Loop over children looking for useful text */ for (h = t->children; h && *len < maxlen - 1; h = h->sister) { switch (h->tp) { case Text: for (i = 0; *len < maxlen - 1 && h->text[i]; i++) if (isalpha(h->text[i])) s[(*len)++] = tolower(h->text[i]); else if (h->text[i] == '@') {s[(*len)++] = 'a'; s[(*len)++] = 't';} else if (*len == 0) ; /* Wait for a letter first */ else if (h->text[i]=='-') s[(*len)++] = h->text[i]; else if (h->text[i]=='.') s[(*len)++] = h->text[i]; else if (h->text[i]=='_') s[(*len)++] = h->text[i]; else if (isdigit(h->text[i])) s[(*len)++] = h->text[i]; else if (isspace(h->text[i]) && s[*len-1] != '-') s[(*len)++]='-'; break; case Element: /* Recursive */ gen_id_r(h, s, len, maxlen); break; default: break; } } #if 0 /* Look for a nice break, i.e., just before a '-' */ while (*len > 0 && s[(*len)-1] != '-') (*len)--; if (*len > 0) (*len)--; #endif s[*len] = '\0'; } /* gen_id -- try some heuristics to generate an ID for element t */ EXPORT string gen_id(Tree t) { string s; int len = 0; if (! (s = malloc(MAXIDLEN + 1))) errexit("Out of memory\n"); assert(MAXIDLEN > 4); gen_id_r(t, s, &len, MAXIDLEN - 4); if (len == 0) { s[len++] = 'x'; /* At least one character */ s[len] = '\0'; } if (tfind(s, &idtree, (compar_fn_t)strcasecmp)) { /* No suitable text found or text is already used elsewhere */ int seqno = 0; do { /* Try adding digits */ sprintf(s + len, "%d", seqno); seqno++; } while (seqno != 10000 && tfind(s, &idtree, (compar_fn_t)strcasecmp)); if (seqno == 10000) { /* 10000 tried, giving up... */ free(s); return NULL; } } (void) tsearch(s, &idtree, (compar_fn_t)strcasecmp); /* Store it */ return s; } html-xml-utils-6.5/hxclean.c0000644000175000001440000000534412174313455013002 00000000000000/* * Clean up an HTML file: * Insert missing tags. * * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * 16 September 1997 * Bert Bos * $Id: hxclean.c,v 1.2 2013-07-25 20:59:34 bbos Exp $ */ #include "config.h" #include #include #include "export.h" #include "types.e" #include "tree.e" #include "html.e" #include "scan.e" static Tree tree; /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); } /* start -- called before the first event is reported */ void* start(void) { tree = create(); return NULL; } /* end -- called after the last even is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { tree = append_comment(tree, commenttext); } /* handle_text -- called after a tex chunk is parsed */ void handle_text(void *clientdata, string text) { tree = append_text(tree, text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { tree = append_declaration(tree, gi, fpi, url); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { tree = append_procins(tree, pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { tree = html_push(tree, name, attribs); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { tree = html_push(tree, name, attribs); } /* handle_pop -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { tree = html_pop(tree, name); } int main(int argc, char *argv[]) { /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); if (argc == 1) { yyin = stdin; } else if (argc == 2) { yyin = fopen(argv[1], "r"); if (yyin == NULL) { perror(argv[1]); exit(2); } } else { fprintf(stderr, "Version %s\n", VERSION); fprintf(stderr, "Usage: %s [html-file]\n", argv[0]); exit(1); } if (yyparse() != 0) { exit(3); } tree = get_root(tree); dumptree(tree); return 0; } html-xml-utils-6.5/m4/0000755000175000001440000000000012265516671011614 500000000000000html-xml-utils-6.5/m4/optreset.m40000644000175000001440000000112412174741251013632 00000000000000# CHECK_GETOPT_OPTRESET # -------------------------------------------------------------- # Set HAVE_GETOPT_OPTRESET if getopt() needs optreset to restart AC_DEFUN([CHECK_GETOPT_OPTRESET], [AC_CACHE_CHECK([whether getopt has optreset support], ac_cv_have_getopt_optreset, [ AC_TRY_LINK( [ #include ], [ extern int optreset; optreset = 0; ], [ ac_cv_have_getopt_optreset="yes" ], [ ac_cv_have_getopt_optreset="no" ] ) ]) if test "x$ac_cv_have_getopt_optreset" = "xyes" ; then AC_DEFINE(HAVE_GETOPT_OPTRESET, 1, [Define if your getopt(3) defines and uses optreset]) fi]) html-xml-utils-6.5/m4/flex-optimize.m40000644000175000001440000000035612174741251014567 00000000000000# AC_FLEX_OPTIMIZE # -------------------------------------------------------------- # Check whether we can use option -Cfe to optimize the lexer AC_DEFUN([AC_FLEX_OPTIMIZE], [case "$ac_cv_prog_LEX" in *flex) lex_opt_flags=-Cfe;; esac]) html-xml-utils-6.5/m4/libcurl.m40000644000175000001440000002367112225032653013430 00000000000000# LIBCURL_CHECK_CONFIG ([DEFAULT-ACTION], [MINIMUM-VERSION], # [ACTION-IF-YES], [ACTION-IF-NO]) # ---------------------------------------------------------- # David Shaw May-09-2006 # # Checks for libcurl. DEFAULT-ACTION is the string yes or no to # specify whether to default to --with-libcurl or --without-libcurl. # If not supplied, DEFAULT-ACTION is yes. MINIMUM-VERSION is the # minimum version of libcurl to accept. Pass the version as a regular # version number like 7.10.1. If not supplied, any version is # accepted. ACTION-IF-YES is a list of shell commands to run if # libcurl was successfully found and passed the various tests. # ACTION-IF-NO is a list of shell commands that are run otherwise. # Note that using --without-libcurl does run ACTION-IF-NO. # # This macro #defines HAVE_LIBCURL if a working libcurl setup is # found, and sets @LIBCURL@ and @LIBCURL_CPPFLAGS@ to the necessary # values. Other useful defines are LIBCURL_FEATURE_xxx where xxx are # the various features supported by libcurl, and LIBCURL_PROTOCOL_yyy # where yyy are the various protocols supported by libcurl. Both xxx # and yyy are capitalized. See the list of AH_TEMPLATEs at the top of # the macro for the complete list of possible defines. Shell # variables $libcurl_feature_xxx and $libcurl_protocol_yyy are also # defined to 'yes' for those features and protocols that were found. # Note that xxx and yyy keep the same capitalization as in the # curl-config list (e.g. it's "HTTP" and not "http"). # # Users may override the detected values by doing something like: # LIBCURL="-lcurl" LIBCURL_CPPFLAGS="-I/usr/myinclude" ./configure # # For the sake of sanity, this macro assumes that any libcurl that is # found is after version 7.7.2, the first version that included the # curl-config script. Note that it is very important for people # packaging binary versions of libcurl to include this script! # Without curl-config, we can only guess what protocols are available, # or use curl_version_info to figure it out at runtime. AC_DEFUN([LIBCURL_CHECK_CONFIG], [ AH_TEMPLATE([LIBCURL_FEATURE_SSL],[Defined if libcurl supports SSL]) AH_TEMPLATE([LIBCURL_FEATURE_KRB4],[Defined if libcurl supports KRB4]) AH_TEMPLATE([LIBCURL_FEATURE_IPV6],[Defined if libcurl supports IPv6]) AH_TEMPLATE([LIBCURL_FEATURE_LIBZ],[Defined if libcurl supports libz]) AH_TEMPLATE([LIBCURL_FEATURE_ASYNCHDNS],[Defined if libcurl supports AsynchDNS]) AH_TEMPLATE([LIBCURL_FEATURE_IDN],[Defined if libcurl supports IDN]) AH_TEMPLATE([LIBCURL_FEATURE_SSPI],[Defined if libcurl supports SSPI]) AH_TEMPLATE([LIBCURL_FEATURE_NTLM],[Defined if libcurl supports NTLM]) AH_TEMPLATE([LIBCURL_PROTOCOL_HTTP],[Defined if libcurl supports HTTP]) AH_TEMPLATE([LIBCURL_PROTOCOL_HTTPS],[Defined if libcurl supports HTTPS]) AH_TEMPLATE([LIBCURL_PROTOCOL_FTP],[Defined if libcurl supports FTP]) AH_TEMPLATE([LIBCURL_PROTOCOL_FTPS],[Defined if libcurl supports FTPS]) AH_TEMPLATE([LIBCURL_PROTOCOL_FILE],[Defined if libcurl supports FILE]) AH_TEMPLATE([LIBCURL_PROTOCOL_TELNET],[Defined if libcurl supports TELNET]) AH_TEMPLATE([LIBCURL_PROTOCOL_LDAP],[Defined if libcurl supports LDAP]) AH_TEMPLATE([LIBCURL_PROTOCOL_DICT],[Defined if libcurl supports DICT]) AH_TEMPLATE([LIBCURL_PROTOCOL_TFTP],[Defined if libcurl supports TFTP]) AH_TEMPLATE([LIBCURL_PROTOCOL_RTSP],[Defined if libcurl supports RTSP]) AH_TEMPLATE([LIBCURL_PROTOCOL_POP3],[Defined if libcurl supports POP3]) AH_TEMPLATE([LIBCURL_PROTOCOL_IMAP],[Defined if libcurl supports IMAP]) AH_TEMPLATE([LIBCURL_PROTOCOL_SMTP],[Defined if libcurl supports SMTP]) AC_ARG_WITH(libcurl, AC_HELP_STRING([--with-libcurl=PREFIX],[look for the curl library in PREFIX/lib and headers in PREFIX/include]), [_libcurl_with=$withval],[_libcurl_with=ifelse([$1],,[yes],[$1])]) if test "$_libcurl_with" != "no" ; then AC_PROG_AWK _libcurl_version_parse="eval $AWK '{split(\$NF,A,\".\"); X=256*256*A[[1]]+256*A[[2]]+A[[3]]; print X;}'" _libcurl_try_link=yes if test -d "$_libcurl_with" ; then LIBCURL_CPPFLAGS="-I$withval/include" _libcurl_ldflags="-L$withval/lib" AC_PATH_PROG([_libcurl_config],[curl-config],[], ["$withval/bin"]) else AC_PATH_PROG([_libcurl_config],[curl-config],[],[$PATH]) fi if test x$_libcurl_config != "x" ; then AC_CACHE_CHECK([for the version of libcurl], [libcurl_cv_lib_curl_version], [libcurl_cv_lib_curl_version=`$_libcurl_config --version | $AWK '{print $[]2}'`]) _libcurl_version=`echo $libcurl_cv_lib_curl_version | $_libcurl_version_parse` _libcurl_wanted=`echo ifelse([$2],,[0],[$2]) | $_libcurl_version_parse` if test $_libcurl_wanted -gt 0 ; then AC_CACHE_CHECK([for libcurl >= version $2], [libcurl_cv_lib_version_ok], [ if test $_libcurl_version -ge $_libcurl_wanted ; then libcurl_cv_lib_version_ok=yes else libcurl_cv_lib_version_ok=no fi ]) fi if test $_libcurl_wanted -eq 0 || test x$libcurl_cv_lib_version_ok = xyes ; then if test x"$LIBCURL_CPPFLAGS" = "x" ; then LIBCURL_CPPFLAGS=`$_libcurl_config --cflags` fi if test x"$LIBCURL" = "x" ; then LIBCURL=`$_libcurl_config --libs` # This is so silly, but Apple actually has a bug in their # curl-config script. Fixed in Tiger, but there are still # lots of Panther installs around. case "${host}" in powerpc-apple-darwin7*) LIBCURL=`echo $LIBCURL | sed -e 's|-arch i386||g'` ;; esac fi # All curl-config scripts support --feature _libcurl_features=`$_libcurl_config --feature` # Is it modern enough to have --protocols? (7.12.4) if test $_libcurl_version -ge 461828 ; then _libcurl_protocols=`$_libcurl_config --protocols` fi else _libcurl_try_link=no fi unset _libcurl_wanted fi if test $_libcurl_try_link = yes ; then # we didn't find curl-config, so let's see if the user-supplied # link line (or failing that, "-lcurl") is enough. LIBCURL=${LIBCURL-"$_libcurl_ldflags -lcurl"} AC_CACHE_CHECK([whether libcurl is usable], [libcurl_cv_lib_curl_usable], [ _libcurl_save_cppflags=$CPPFLAGS CPPFLAGS="$LIBCURL_CPPFLAGS $CPPFLAGS" _libcurl_save_libs=$LIBS LIBS="$LIBCURL $LIBS" AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]],[[ /* Try and use a few common options to force a failure if we are missing symbols or can't link. */ int x; curl_easy_setopt(NULL,CURLOPT_URL,NULL); x=CURL_ERROR_SIZE; x=CURLOPT_WRITEFUNCTION; x=CURLOPT_FILE; x=CURLOPT_ERRORBUFFER; x=CURLOPT_STDERR; x=CURLOPT_VERBOSE; if (x) ; ]])],libcurl_cv_lib_curl_usable=yes,libcurl_cv_lib_curl_usable=no) CPPFLAGS=$_libcurl_save_cppflags LIBS=$_libcurl_save_libs unset _libcurl_save_cppflags unset _libcurl_save_libs ]) if test $libcurl_cv_lib_curl_usable = yes ; then # Does curl_free() exist in this version of libcurl? # If not, fake it with free() _libcurl_save_cppflags=$CPPFLAGS CPPFLAGS="$CPPFLAGS $LIBCURL_CPPFLAGS" _libcurl_save_libs=$LIBS LIBS="$LIBS $LIBCURL" AC_CHECK_FUNC(curl_free,, AC_DEFINE(curl_free,free, [Define curl_free() as free() if our version of curl lacks curl_free.])) CPPFLAGS=$_libcurl_save_cppflags LIBS=$_libcurl_save_libs unset _libcurl_save_cppflags unset _libcurl_save_libs AC_DEFINE(HAVE_LIBCURL,1, [Define to 1 if you have a functional curl library.]) AC_SUBST(LIBCURL_CPPFLAGS) AC_SUBST(LIBCURL) for _libcurl_feature in $_libcurl_features ; do AC_DEFINE_UNQUOTED(AS_TR_CPP(libcurl_feature_$_libcurl_feature),[1]) eval AS_TR_SH(libcurl_feature_$_libcurl_feature)=yes done if test "x$_libcurl_protocols" = "x" ; then # We don't have --protocols, so just assume that all # protocols are available _libcurl_protocols="HTTP FTP FILE TELNET LDAP DICT TFTP" if test x$libcurl_feature_SSL = xyes ; then _libcurl_protocols="$_libcurl_protocols HTTPS" # FTPS wasn't standards-compliant until version # 7.11.0 (0x070b00 == 461568) if test $_libcurl_version -ge 461568; then _libcurl_protocols="$_libcurl_protocols FTPS" fi fi # RTSP, IMAP, POP3 and SMTP were added in # 7.20.0 (0x071400 == 463872) if test $_libcurl_version -ge 463872; then _libcurl_protocols="$_libcurl_protocols RTSP IMAP POP3 SMTP" fi fi for _libcurl_protocol in $_libcurl_protocols ; do AC_DEFINE_UNQUOTED(AS_TR_CPP(libcurl_protocol_$_libcurl_protocol),[1]) eval AS_TR_SH(libcurl_protocol_$_libcurl_protocol)=yes done else unset LIBCURL unset LIBCURL_CPPFLAGS fi fi unset _libcurl_try_link unset _libcurl_version_parse unset _libcurl_config unset _libcurl_feature unset _libcurl_features unset _libcurl_protocol unset _libcurl_protocols unset _libcurl_version unset _libcurl_ldflags fi if test x$_libcurl_with = xno || test x$libcurl_cv_lib_curl_usable != xyes ; then # This is the IF-NO path ifelse([$4],,:,[$4]) else # This is the IF-YES path ifelse([$3],,:,[$3]) fi unset _libcurl_with ])dnl html-xml-utils-6.5/hxcite-mkbib0000755000175000001440000000172011104406557013500 00000000000000#!/bin/bash # # Script that combines the functions of cite and mkbib # # Copyright 1994-2000 World Wide Web Consortium # See http://www.w3.org/Consortium/Legal/copyright-software # # Author: Bert Bos # Created: 29 Mar 2000 # Version: $Id: hxcite-mkbib,v 1.1 2008/08/17 14:00:21 bbos Exp $ USAGE="Usage: cite-mkbib [-b base] [-p pattern] [-s sep] bibfile [file]" AUX=${TMPDIR:-/tmp}/cm1-$$ TMP=${TMPDIR:-/tmp}/cm2-$$ trap "rm $AUX $TMP 2>/dev/null" 0 # usage -- print usage message and exit usage () { echo "$USAGE" >&2; exit 2; } # Parse command line while [ $# -ne 0 ]; do case "$1" in -b) base="-b '$2'"; shift 2;; -p) pattern="-p '$2'"; shift 2;; -s) sep="-s '$2'"; shift 2;; -*) usage;; --) shift; break;; *) break; esac done if [ $# -lt 1 -o $# -gt 2 ]; then usage; fi bibfile="$1" shift # Call cite and mkbib eval cite "$base" "$pattern" -a $AUX "'$bibfile'" "$@" >$TMP && eval mkbib "$sep" -a $AUX "'$bibfile'" $TMP html-xml-utils-6.5/README0000644000175000001440000000556512051455020012066 00000000000000html-xml-utils-*.tar.gz A number of simple utilities for manipulating HTML and XML files. See INSTALL for generic installation instructions. Get the source at: http://www.w3.org/Tools/HTML-XML-utils/ htmlutils-*.tar.gz Old versions (before version 0.1) Note: the names changed in version 5.0: most programs got an "hx" prefix. Please, uninstall any version < 5.0 before installing a version >= 5.0 cexport (1) - create headerfile of exported declarations from a C file hxaddid (1) - add ID's to selected elements hxcite (1) - replace bibliographic references by hyperlinks hxcite-mkbib (1) - expand references and create bibliography hxcopy (1) - copy an HTML file while preserving relative links hxcount (1) - count elements and attributes in HTML or XML files hxextract (1) - extract selected elements hxclean (1) - apply heuristics to correct an HTML file hxprune (1) - remove marked elements from an HTML file hxincl (1) - expand included HTML or XML files hxindex (1) - create an alphabetically sorted index hxmkbib (1) - create bibliography from a template hxmultitoc (1) - create a table of contents for a set of HTML files hxname2id - move some ID= or NAME= from A elements to their parents hxnormalize (1) - pretty-print an HTML file hxnum (1) - number section headings in an HTML file hxpipe (1) - convert XML to a format easier to parse with Perl or AWK hxprintlinks (1) - number links & add table of URLs at end of an HTML file hxremove (1) - remove selected elements from an XML file hxtabletrans (1) - transpose an HTML or XHTML table hxtoc (1) - insert a table of contents in an HTML file hxuncdata (1) - replace CDATA sections by character entities hxunent (1) - replace HTML predefined character entities to UTF-8 hxunpipe (1) - convert output of pipe back to XML format hxunxmlns (1) - replace "global names" by XML Namespace prefixes hxwls (1) - list links in an HTML file hxxmlns (1) - replace XML Namespace prefixes by "global names" asc2xml, xml2asc (1) - convert between UTF8 and &#nnn; entities hxref (1) - generate cross-references hxselect (1) - extract elements that match a (CSS) selector This package is configured with automake/autoconf. Generic instructions are in the file INSTALL. Here are some specific problems that may arise: 1) Error when running lex: lex scan.l && mv lex.yy.c scan.c "scan.l":line 2: Error: missing translation value The scan.l file uses features of flex that do not exist in lex. However, it is not necessary to run lex, since the file scan.c is provided in the package. Just do a "touch scan.c" to make sure "make" will not try to generate it anew. $Date: 2012/11/16 15:21:52 $ html-xml-utils-6.5/hxnsxml.c0000644000175000001440000001170312174313455013055 00000000000000/* * hxnsxml - convert output of hxxmlns back to normal XML * * To do: handle quotes in Namespace URLs. * To do: handle XML's own Namespace. * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ * * Copyright 1994-2010 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 12 July 2010 * **/ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #include #include "export.h" #include "types.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" #define XML "{http://www.w3.org/XML/1998/namespace}" static bool has_error = false; static bool has_ns = false; /* true if Namespaces occur anywhere in document */ /* --------------- implements interface api.h -------------------------- */ /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_error = true; } /* start -- called before the first event is reported */ void* start(void) { return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { printf("", commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { printf("%s", text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { printf("\n"); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { printf("", pi_text); } /* print_attrs -- print attributes */ void print_attrs(const pairlist attribs) { pairlist p; int i, j; char c = 'a'; for (p = attribs; p; p = p->next) { if (p->name[0] != '{') { i = 0; } else { for (i = 1; p->name[i] && p->name[i] != '}'; i++); if (p->name[i]) i++; } if (i > 2) { if (c > 'z') { fprintf(stderr, "Bug: hxnsxml cannot handle > 26 namespaces per element.\n"); exit(2); } printf(" xmlns:%c=\"", c); for (j = 1; j < i - 1; j++) putchar(p->name[j]); putchar('\"'); printf(" %c:", c); c++; } else { printf(" "); } printf("%s=\"%s\"", p->name + i, p->value); } } /* print_tag -- print "<" and the element name, optionally with a namespace */ static void print_tag(const conststring name) { int i, j; if (name[0] != '{') { i = 0; } else { for (i = 1; name[i] && name[i] != '}'; i++); if (name[i]) i++; } printf("<%s", name + i); if (i > 2) { /* Element has a Namespace */ printf(" xmlns=\""); for (j = 1; j < i - 1; j++) putchar(name[j]); putchar('"'); has_ns = true; } else if (has_ns) { /* Document has Namespaces, this element not */ printf(" xmlns=\"\""); } } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { print_tag(name); print_attrs(attribs); putchar('>'); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { print_tag(name); print_attrs(attribs); printf(" />"); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { int i; if (name[0] != '{') { i = 0; } else { for (i = 1; name[i] && name[i] != '}'; i++); if (name[i]) i++; } printf("", name + i); } /* --------------------------------------------------------------------- */ /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "Version %s\nUsage: %s [file-or-url]\n", VERSION, prog); exit(2); } int main(int argc, char *argv[]) { int status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); if (argc > 2) usage(argv[0]); else if (argc == 2) yyin = fopenurl(argv[1], "r", &status); else yyin = stdin; if (!yyin) {perror(argv[1]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status)); if (yyparse() != 0) exit(3); return has_error ? 1 : 0; } html-xml-utils-6.5/hxwls.c0000644000175000001440000001672712174266016012534 00000000000000/* * List all links from the given document. * * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Bert Bos * Created 31 July 1999 * $Id: hxwls.c,v 1.6 2013-06-30 20:38:41 bbos Exp $ */ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRDUP # include "strdup.e" # endif #endif #include "export.h" #include "heap.e" #include "types.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "url.e" #include "errexit.e" static bool has_error = false; static string base = NULL; static string self; static enum {Short, Long, HTML, Tuple} format = Short; /* Option -l -h -t */ static bool relative = false; /* Option -r */ /* output -- print the link (lowercases rel argument) */ static void output(const conststring type, const conststring rel, conststring url) { string h = NULL, rel1; if (url) { /* If we found a URL */ if (! relative && base) { h = URL_s_absolutize(base, url); url = h; } rel1 = newstring(rel ? rel : ""); down(rel1); switch (format) { case HTML: printf("
  • %s
  • \n", type, rel1, url, url); break; case Long: printf("%s\t%s\t%s\n", type, rel1, url); break; case Short: printf("%s\n", url); break; case Tuple: printf("%s\t%s\t%s\t%s\n", self, type, rel1, url); break; default: assert(!"Cannot happen!"); } free(rel1); free(h); } } /* --------------- implements parser interface api------------------------- */ /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_error = true; } /* start -- called before the first event is reported */ void* start(void) { if (format == HTML) { printf("\n"); printf("\n"); printf("Output of listlinks\n"); printf("\n"); printf("
      \n"); } return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { if (format == HTML) { printf("
    \n"); printf("\n"); printf("\n"); } } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { free(commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { /* There may be several consecutive calls to this routine. */ /* escape(text); */ free(text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { /* skip */ if (gi) free(gi); if (fpi) free(fpi); if (url) free(url); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { if (pi_text) free(pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { /* ToDo: print text of anchor, if available */ conststring h; if (strcasecmp(name, "base") == 0) { h = pairlist_get(attribs, "href"); if (h) base = strdup(h); /* Use as base from now on */ output("base", NULL, h); } else if (strcasecmp(name, "link") == 0) { output("link", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href")); } else if (strcasecmp(name, "a") == 0) { output("a", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href")); } else if (strcasecmp(name, "img") == 0) { output("img", NULL, pairlist_get(attribs, "src")); output("longdesc", NULL, pairlist_get(attribs, "longdesc")); } else if (strcasecmp(name, "input") == 0) { output("input", NULL, pairlist_get(attribs, "href")); } else if (strcasecmp(name, "object") == 0) { output("object", NULL, pairlist_get(attribs, "data")); } else if (strcasecmp(name, "area") == 0) { output("area", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href")); } else if (strcasecmp(name, "ins") == 0) { output("ins", NULL, pairlist_get(attribs, "cite")); } else if (strcasecmp(name, "del") == 0) { output("del", NULL, pairlist_get(attribs, "cite")); } else if (strcasecmp(name, "q") == 0) { output("q", NULL, pairlist_get(attribs, "cite")); } else if (strcasecmp(name, "blockquote") == 0) { output("bq", NULL, pairlist_get(attribs, "cite")); } else if (strcasecmp(name, "form") == 0) { output("form", pairlist_get(attribs, "method"), pairlist_get(attribs, "action")); } else if (strcasecmp(name, "frame") == 0) { output("frame", NULL, pairlist_get(attribs, "src")); } else if (strcasecmp(name, "iframe") == 0) { output("iframe", NULL, pairlist_get(attribs, "src")); } else if (strcasecmp(name, "head") == 0) { output("head", NULL, pairlist_get(attribs, "profile")); } else if (strcasecmp(name, "script") == 0) { output("script", NULL, pairlist_get(attribs, "src")); } else if (strcasecmp(name, "body") == 0) { output("body", NULL, pairlist_get(attribs, "background")); } /* Free memory */ pairlist_delete(attribs); free(name); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { handle_starttag(clientdata, name, attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { free(name); } /* --------------------------------------------------------------------- */ /* usage -- print usage message and exit */ static void usage(string progname) { fprintf(stderr, "Version %s\nUsage: %s [-l] [-r] [-h] [-b base] [-t] [HTML-file]\n", VERSION, progname); exit(1); } int main(int argc, char *argv[]) { int c, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); /* Parse command line arguments */ while ((c = getopt(argc, argv, "lb:rht")) != -1) { switch (c) { case 'l': format = Long; break; /* Long listing */ case 'b': base = strdup(optarg); break; /* Set base of URL */ case 'r': relative = true; break; /* Do not make URLs absolute */ case 'h': format = HTML; break; /* Output in HTML format */ case 't': format = Tuple; break; /* Output as 4-tuples */ default: usage(argv[0]); } } if (optind == argc) { yyin = stdin; self = "-"; } else if (optind == argc - 1) { if (!base) base = strdup(argv[optind]); if (eq(argv[optind], "-")) yyin = stdin; else yyin = fopenurl(argv[optind], "r", &status); self = argv[optind]; } else { usage(argv[0]); } if (yyin == NULL) {perror(argv[optind]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status)); if (yyparse() != 0) exit(3); if (base) free(base); return has_error ? 1 : 0; } html-xml-utils-6.5/hxincl.c0000644000175000001440000002536712174313455012654 00000000000000/* * incl - expand included files * * Searches for and expands the referenced file * in place. File may be a URL. Works recursively. Other accepted * syntaxes: * * * * * ... * ... * ... * * If there are no quotes, the file name may not include whitespace. * * Copyright 1994-2012 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 2 Dec 1998 * Version: $Id: hxincl.c,v 1.13 2013-07-25 21:02:59 bbos Exp $ * **/ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #include #include #include #include #include "export.h" #include "types.e" #include "errexit.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "heap.e" #include "url.e" #define INCLUDE "include" #define BEGIN "begin-include" #define END "end-include" typedef struct _stack { bool skipping; struct _stack *next; } *stack; typedef enum {KNone, KIncl, KBegin, KEnd} Key; static bool do_xml = false; static bool final = false; /* Final means to remove the comments */ static bool has_error = false; static stack skipping = NULL; static Dictionary substitutions = NULL; static string target = NULL; static bool warn_missing = true; /* push -- push a skipping state on the stack */ static void push(stack *skipping, bool s) { stack h; new(h); h->next = *skipping; h->skipping = s; *skipping = h; } /* pop -- pop a skipping state off the stack */ static void pop(stack *skipping) { stack h; assert(*skipping); h = *skipping; *skipping = (*skipping)->next; dispose(h); } /* top -- return value of top of skipping stack */ static bool top(stack skipping) { assert(skipping); return skipping->skipping; } /* word_to_key -- check whether word s is one of the recognized keywords */ static Key word_to_key(const string s, int len) { if (len == sizeof(END) - 1 && strncmp(s, END, len)== 0) return KEnd; if (len == sizeof(INCLUDE) - 1 && strncmp(s, INCLUDE, len)== 0) return KIncl; if (len == sizeof(BEGIN) - 1 && strncmp(s, BEGIN, len)== 0) return KBegin; return KNone; } /* add_substitution -- add a file name substitution to the dictionary */ static void add_substitution(const string assignment) { string s; /* To do: handle file names containing '='. Add escapes? */ if (!substitutions) substitutions = dict_create(10); if (!substitutions) errexit("Out of memory.\n"); if (!(s = strchr(assignment, '='))) errexit("No '=' found in option -s\n"); *s = '\0'; if (!dict_add(substitutions, assignment, s + 1)) errexit("Out of memory?\n"); *s = '='; } /* expand_vars -- look for and expand %variables% in s */ static string expand_vars(const conststring s) { string h, k, sub2, var, result = NULL; conststring sub; k = strdup(s); if (!k) err(1, NULL); /* To do: avoid infinite loops */ for (h = k; h;) { /* Append the text leading up to next '%' */ result = strapp(&result, strsep(&h, "%"), NULL); /* If there is any text left, get the text until the next '%' into var */ if ((var = strsep(&h, "%"))) { if (!h) { /* No matching '%' found */ result = strapp(&result, "%", var, NULL); } else if (*var == '\0') { /* Treat "%%" as a single "%" */ result = strapp(&result, "%", NULL); } else if (substitutions && (sub = dict_find(substitutions, var))) { sub2 = expand_vars(sub); result = strapp(&result, sub2, NULL); free(sub2); } else { /* Undefined %var% */ result = strapp(&result, "%", var, "%", NULL); } } } free(k); return result; } /* --------------- implements interface api.h -------------------------- */ /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_error = true; } /* start -- called before the first event is reported */ void* start(void) { push(&skipping, false); /* Start by not skipping */ return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { assert(clientdata == NULL); assert(top(skipping) == false); /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { /* A push() occurs at and at include_file() */ /* A pop() occurs at and at ENDINCL */ int i, j, status; conststring s, url; FILE *f; Key key; i = strspn(commenttext, " \t\n\r\f"); /* Skip whitespace */ j = strcspn(commenttext + i, " \t\n\r\f"); /* First word */ key = word_to_key(commenttext + i, j); if (key == KEnd) { /* */ /* Don't print anything, just pop a level */ pop(&skipping); } else if (top(skipping)) { /* Are we already skipping? */ /* Don't print anything; push a level if this is a begin-include */ if (key == KBegin) push(&skipping, true); } else if (key == KNone) { /* Unrecognized comment? */ /* Print the comment verbatim */ if (!target) printf("", commenttext); } else { /* include or begin-include */ /* Push a level if this is a begin-include */ if (key == KBegin) push(&skipping, true); /* Find start of file name */ i += j; i += strspn(commenttext + i, " \t\n\r\f"); /* Skip whitespace */ /* Accept either "...", '...', or any string without spaces */ if (commenttext[i] == '"') { j = strcspn(commenttext + i + 1, "\""); url = newnstring(commenttext + i + 1, j); } else if (commenttext[i] == '\'') { j = strcspn(commenttext + (++i), "'"); url = newnstring(commenttext + i + 1, j); } else { j = strcspn(commenttext + i, " \t\n\r\f"); url = newnstring(commenttext + i, j); } /* If we have a substitution for it, use that instead */ if (substitutions && (s = dict_find(substitutions, url))) url = s; /* Expand any %variables% in the url */ url = expand_vars(url); /* Get the file and recursively parse it */ assert(get_yyin_name()); s = URL_s_absolutize(get_yyin_name(), url); if (target) printf(" \\\n %s", s); /* To do: escape spaces in s */ if (!(f = fopenurl(s, "r", &status))) { if (!target || warn_missing) perror(url); } else if (status != 200) { if (!target || warn_missing) fprintf(stderr, "%s : %s\n", url, http_strerror(status)); } else { if (!final && !target) printf("", BEGIN, commenttext + i); push(&skipping, false); include_file(f, s); } dispose(url); dispose(s); } free(commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { if (top(skipping) == false && !target) printf("%s", text); free(text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { if (!target) { printf(""); } free(gi); free(fpi); free(url); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { if (top(skipping) == false && !target) printf("", pi_text); free(pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { pairlist p; if (top(skipping) == false && !target) { printf("<%s", name); for (p = attribs; p; p = p->next) { if (p->value != NULL) printf(" %s=\"%s\"", p->name, p->value); else if (do_xml) printf(" %s=\"%s\"", p->name, p->name); else printf(" %s", p->name); } printf(">"); } free(name); pairlist_delete(attribs); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { pairlist p; if (top(skipping) == false && !target) { printf("<%s", name); for (p = attribs; p; p = p->next) { if (p->value != NULL) printf(" %s=\"%s\"", p->name, p->value); else if (do_xml) printf(" %s=\"%s\"", p->name, p->name); else printf(" %s", p->name); } printf(do_xml ? " />" : ">"); } free(name); pairlist_delete(attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { if (top(skipping) == false && !target) printf("", name); free(name); } /* handle_endincl -- called after the end of an included file is reached */ void handle_endincl(void *clientdata) { pop(&skipping); /* Mark the end of the inclusion */ if (!final && !target) printf("", END); } /* --------------------------------------------------------------------- */ /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "Usage: %s [-v] [-x] [-b base] [-s name=subst ...] [-M target [-G]] [file-or-url]\n", prog); exit(2); } int main(int argc, char *argv[]) { int c, status = 200; string base = NULL; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); set_endincl_handler(handle_endincl); /* Parse command line arguments */ while ((c = getopt(argc, argv, ":xb:fs:M:Gv")) != -1) switch (c) { case 'x': do_xml = true; break; case 'b': base = optarg; break; case 'f': final = true; break; case 's': add_substitution(optarg); break; case 'M': target = optarg; break; case 'G': warn_missing = false; break; case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0; default: usage(argv[0]); } if (optind == argc) { set_yyin(stdin, base ? base : "stdin"); } else if (optind == argc - 1) { set_yyin(fopenurl(argv[optind], "r", &status), base ? base : argv[optind]); } else { usage(argv[0]); } if (yyin == NULL) {perror(argv[optind]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status)); if (target) printf("%s:", target); if (yyparse() != 0) exit(3); if (target) printf("\n"); return has_error ? 1 : 0; } html-xml-utils-6.5/hxtoc.c0000644000175000001440000003233712174266016012507 00000000000000/* * Insert an active ToC between "" and "", * or replacing the comment "" * * Headers with class "no-toc" will not be listed in the ToC. * * The ToC links to elements with ID attributes as well as with * empty elements. * * Tags for a with class "index" are assumed to be used by * a cross-reference generator and will not be copied to the ToC. * * Similarly, DFN tags are not copied to the ToC (but the element's * content is). * * Any tags with a class of "bctarget" are not copied, but * regenerated. They are assumed to be backwards-compatible versions * of ID attributes on their parent elements. With the option -t or -x * they are removed. * * Copyright 1994-2013 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created Sep 1997 * Version: $Id: hxtoc.c,v 1.9 2013-06-30 20:40:57 bbos Exp $ * **/ #include "config.h" #include #include #include #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_SEARCH_H # include #else # include "search-freebsd.h" #endif #include "export.h" #include "types.e" #include "heap.e" #include "tree.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" #include "genid.e" #include "class.e" #define BEGIN_TOC "begin-toc" /* */ #define END_TOC "end-toc" /* */ #define TOC "toc" /* */ #define NO_TOC "no-toc" /* CLASS="... no-toc..." */ #define INDEX "index" /* CLASS="... index..." */ #define TARGET "bctarget" /* CLASS="...bctarget..." */ #define EXPAND true #define NO_EXPAND false #define KEEP_ANCHORS true #define REMOVE_ANCHORS false #define INDENT " " /* Amount to indent ToC per level */ static Tree tree; static int toc_low = 1, toc_high = 6; /* Which headers to include */ static bool xml = false; /* Use convention */ static bool bctarget = true; /* Generate after IDs */ static string toc_class = "toc"; /*
      */ static bool use_div = false; /* Option -d */ /* handle_error -- called when a parse error occurred */ static void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); } /* start -- called before the first event is reported */ static void* start(void) { tree = create(); return NULL; } /* end -- called after the last event is reported */ static void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ static void handle_comment(void *clientdata, string commenttext) { tree = append_comment(tree, commenttext); } /* handle_text -- called after a tex chunk is parsed */ static void handle_text(void *clientdata, string text) { tree = append_text(tree, text); } /* handle_declaration -- called after a declaration is parsed */ static void handle_decl(void *clientdata, string gi, string fpi, string url) { tree = append_declaration(tree, gi, fpi, url); } /* handle_proc_instr -- called after a PI is parsed */ static void handle_pi(void *clientdata, string pi_text) { tree = append_procins(tree, pi_text); } /* handle_starttag -- called after a start tag is parsed */ static void handle_starttag(void *clientdata, string name, pairlist attribs) { conststring id; tree = html_push(tree, name, attribs); /* If it has an ID, store it (so we don't accidentally generate it) */ if ((id = pairlist_get(attribs, "id"))) storeID(id); } /* handle_emptytag -- called after an empty tag is parsed */ static void handle_emptytag(void *clientdata, string name, pairlist attribs) { handle_starttag(clientdata, name, attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ static void handle_endtag(void *clientdata, string name) { tree = html_pop(tree, name); } /* indent -- print level times a number of spaces */ static void indent(int level) { for (; level > 0; level--) printf(INDENT); } /* is_div -- t is a division (DIV, SECTION, ARTICLE, ASIDE or NAV) */ static bool is_div(Tree t) { assert(t->tp == Element); return eq(t->name, "div") || eq(t->name, "section") || /* HTML5 */ eq(t->name, "article") || /* HTML5 */ eq(t->name, "aside") || /* HTML5 */ eq(t->name, "nav"); /* HTML5 */ } /* heading_level -- return 1..6 if t is H1..H6, else 0 */ static int heading_level(Tree t) { assert(t->tp == Element); if (has_class(t->attribs, NO_TOC)) return 0; if (eq(t->name, "h1")) return 1; if (eq(t->name, "h2")) return 2; if (eq(t->name, "h3")) return 3; if (eq(t->name, "h4")) return 4; if (eq(t->name, "h5")) return 5; if (eq(t->name, "h6")) return 6; return 0; } /* div_parent -- if t is the first child of a section elt, return that elt */ static Tree div_parent(Tree t) { Tree h, result = NULL; assert(t->tp == Element); assert(t->parent); if (t->parent->tp != Element) return NULL; if (has_class(t->parent->attribs, NO_TOC)) return NULL; if (is_div(t->parent)) result = t->parent; else if (!eq(t->parent->name, "hgroup")) return NULL; else if (!(result = div_parent(t->parent))) return NULL; for (h = t->parent->children; h != t; h = h->sister) { if (h->tp == Element) return NULL; if (h->tp == Text && !only_space(h->text)) return NULL; } return result; } /* first_child_is_heading -- true if first child is a Hn or HGROUP */ static bool first_child_is_heading(Tree t) { Tree h; assert(t->tp == Element); for (h = t->children; h; h = h->sister) { switch (h->tp) { case Element: return eq(h->name, "hgroup") || heading_level(h) > 0; case Text: if (!only_space(h->text)) return false; break; default: break; } } return false; } static void expand(Tree t, bool *write, bool exp, bool keep_anchors, int div_depth); /* toc -- create a table of contents */ static void toc(Tree t, int *curlevel, bool *item_is_open, int div_depth) { conststring val, id; int level; Tree h, div = NULL; bool write = true; switch (t->tp) { case Text: break; case Comment: break; case Declaration: break; case Procins: break; case Element: if (use_div && is_div(t) && first_child_is_heading(t)) { /* It's a section element with a heading as first child */ div_depth++; level = 0; } else { /* Check if the element is a heading and what its level is */ level = heading_level(t); if (level && use_div && (div = div_parent(t))) level = div_depth; } /* If it's a header for the ToC, create a list item for it */ if (level >= toc_low && level <= toc_high) { /* Ensure there is an ID to point to */ h = use_div && div ? div : t; if (! (id = get_attrib(h, "id"))) { id = gen_id(h); set_attrib(h, "id", id); } assert(*curlevel <= level || *item_is_open); while (*curlevel > level) { printf(xml ? "\n" : "\n"); indent(*curlevel - toc_low); printf("
    "); (*curlevel)--; } if (*curlevel == level && *item_is_open) { printf(xml ? "\n" : "\n"); } else if (*item_is_open) { printf("\n"); (*curlevel)++; indent(*curlevel - toc_low); printf("
    "); } /* Open a list if needed */ if (term->nrkeys > globalprevious->nrkeys && globalprevious->nrkeys == i) { indent(i); printf("
      "); } /* Print new subterms, if any */ for (j = i; j < term->nrkeys; j++) { indent(j); printf("
    • %s", term->terms[j]); if (j != term->nrkeys - 1) { indent(j + 1); printf("
        "); } } #if 0 int listmode = 0; if (folding_cmp(globalprevious->sortkeys, globalprevious->nrkeys, term->sortkeys, term->nrkeys) == 0) if (globalurlprevious) { string d = strchr(globalurlprevious,'#'); if (d) listmode = strncmp(globalurlprevious, term->url, d - globalurlprevious); else listmode = strcmp(globalurlprevious, term->url); } #endif /* Print a link */ switch (term->importance) { case 1: #if 0 printf("%s url); print_full_term(term); printf("\">%s", use_secno ? term->secno : "#"); #else printf(", "); printf("url); printf("\""); if (use_secno) print_title(term); printf(">"); if (!use_secno) putchar('#'); else if (term->secno) print_escaped(term->secno); else print_escaped(unknown_name); printf(""); #endif break; case 2: #if 0 printf("%s url); print_full_term(term); printf("\">%s", use_secno ? term->secno : "#"); #else printf(", "); printf("url); printf("\""); if (use_secno) print_title(term); printf(">"); if (!use_secno) putchar('#'); else if (term->secno) print_escaped(term->secno); else print_escaped(unknown_name); printf(""); #endif break; default: assert(! "Cannot happen\n"); } /* Remember this term */ globalprevious = term; globalurlprevious = term->url; } /* mkindex -- write out an index */ static void mkindex(Indexterm terms) { int i; printf("
          "); /* Initialize globalprevious to a term with an unlikely sortkey */ new(globalprevious); globalprevious->nrkeys = 1; newarray(globalprevious->sortkeys, globalprevious->nrkeys); newarray(globalprevious->sortkeys[0], 15); wcscpy(globalprevious->sortkeys[0], L"zzzzzzzzzzzzzz"); twalk(terms, write_index_item); /* Close all open lists */ for (i = 0; i < globalprevious->nrkeys; i++) printf("\n
        "); } /* expand -- write the tree, add if needed and replace */ static void expand(Tree t, bool *write, Indexterm terms) { conststring val; Tree h; pairlist a; string s; bool do_tag; for (h = t->children; h != NULL; h = h->sister) { switch (h->tp) { case Text: if (*write) printf("%s", h->text); break; case Comment: s = newstring(h->text); trim(s); if (eq(s, INDEX) || eq(s, BEGIN_INDEX)) { if (!final) printf("\n", BEGIN_INDEX); mkindex(terms); if (!final) printf("", END_INDEX); if (eq(s, BEGIN_INDEX)) *write = false; /* Skip old index */ } else if (eq(s, END_INDEX)) { *write = true; } else { printf("", h->text); } dispose(s); break; case Declaration: printf("name); if (h->text) printf(" PUBLIC \"%s\"", h->text); if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url); printf(">"); break; case Procins: if (*write) printf("", h->text); break; case Element: if (*write) { /* If an was inserted by index itself, remove it */ do_tag = !eq(h->name, "a") || !has_class(h->attribs, TARGET); if (do_tag) { printf("<%s", h->name); for (a = h->attribs; a != NULL; a = a->next) { printf(" %s", a->name); if (a->value != NULL) printf("=\"%s\"", a->value); } assert(! is_empty(h->name) || h->children == NULL); printf(xml && is_empty(h->name) ? " />" : ">"); /* Insert an if element has an ID and is not */ if (bctarget && is_mixed(h->name) && (val = get_attrib(h, "id")) && !eq(h->name, "a") && ! xml) printf("", TARGET, val); } expand(h, write, terms); if (do_tag && ! is_empty(h->name)) printf("", h->name); } break; case Root: assert(! "Cannot happen"); break; default: assert(! "Cannot happen"); } } } /* termcmp -- comparison routine for Indexterms */ static int termcmp(const void *a1, const void *b1) { Indexterm a = (Indexterm)a1, b = (Indexterm)b1; int r; assert(a); assert(b); assert(a->sortkeys); assert(b->sortkeys); assert(a->nrkeys > 0); assert(b->nrkeys > 0); r = folding_cmp(a->sortkeys, a->nrkeys, b->sortkeys, b->nrkeys); if (r != 0) return r; return strcmp(a->url, b->url); /* Terms are equal, compare URL instead */ } /* copy_contents -- recursively expand contents of element t into a string */ static void copy_contents(Tree t, string *s) { Tree h; int i; pairlist a; string p; for (h = t->children; h != NULL; h = h->sister) { switch (h->tp) { case Text: i = *s ? strlen(*s) : 0; renewarray(*s, i + strlen(h->text) + 1); /* Copy, but transform all whitespace to spaces */ for (p = h->text; *p; p++, i++) (*s)[i] = isspace(*p) ? ' ' : *p; (*s)[i] = '\0'; break; case Comment: break; case Declaration: break; case Procins: break; case Element: /* Only certain tags are retained */ if (eq(h->name, "span") || eq(h->name, "code") || eq(h->name, "tt") || eq(h->name, "acronym") || eq(h->name, "abbr") || eq(h->name, "bdo") || eq(h->name, "kbd") || eq(h->name, "samp") || eq(h->name, "sub") || eq(h->name, "sup") || eq(h->name, "var")) { strapp(s, "<", h->name, NULL); for (a = h->attribs; a != NULL; a = a->next) { if (! a->value) strapp(s, " ", a->name, NULL); else strapp(s, " ", a->name, "=\"", a->value, "\"", NULL); } assert(! is_empty(h->name) || h->children == NULL); if (is_empty(h->name)) { strapp(s, xml ? " />" : ">", NULL); } else { strapp(s, ">", NULL); copy_contents(h, s); strapp(s, "name, ">", NULL); } } else { /* Ignore tag, copy contents */ copy_contents(h, s); } break; case Root: assert(! "Cannot happen"); break; default: assert(! "Cannot happen"); } } } /* copy_to_index -- copy the contents of element h to the index db */ static void copy_to_index(Tree t, Indexterm *terms, int importance, conststring secno, conststring doctitle) { conststring id, title; string h; Indexterm term; int i, n; id = get_attrib(t, "id"); #ifdef USE_DATA_ATTRIBUTE if (! (title = get_attrib(t, "data-index"))) #endif title = get_attrib(t, "title"); /* Get term either from title attribute or contents */ if (title) { i = 0; while (title[i]) { n = strcspn(title + i, "|"); /* Find | or \0 */ new(term); term->importance = importance; term->secno = secno ? newstring(secno) : NULL; term->doctitle = newstring(doctitle); term->url = NULL; strapp(&term->url, base, "#", id, NULL); h = newnstring(title + i, n); parse_subterms(term, h); if (! tsearch(term, (void**)terms, termcmp)) errx(1, "Out of memory while parsing term %s\n", h); i += n; if (title[i]) i++; /* Skip '|' */ } if (final) /* Remove used attribute */ #ifdef USE_DATA_ATTRIBUTE if (!delete_attrib(t, "data-index")) #endif delete_attrib(t, "title"); } else { /* Recursively copy contents */ h = NULL; copy_contents(t, &h); if (h) { /* Non-empty contents */ new(term); term->importance = importance; term->secno = secno ? newstring(secno) : NULL; term->doctitle = newstring(doctitle); term->url = NULL; strapp(&term->url, base, "#", id, NULL); parse_subterms(term, h); if (! tsearch(term, (void**)terms, termcmp)) errx(1, "Out of memory while parsing term %s", h); } } } /* collect -- collect index terms, add IDs where needed */ static void collect(Tree t, Indexterm *terms, string *secno, string *doctitle) { int importance; Tree h; for (h = t->children; h != NULL; h = h->sister) { switch (h->tp) { case Text: case Comment: case Declaration: case Procins: break; case Element: if (eq(h->name, "title")) { dispose(*doctitle); copy_contents(h, doctitle); } if (has_class(h->attribs, SECNO)) { dispose(*secno); copy_contents(h, secno); trim(*secno); } else if (has_class(h->attribs, NO_NUM)) { dispose(*secno); *secno = newstring(unknown_name); } if (eq(h->name, "dfn")) importance = 2; else if (has_class(h->attribs,INDEX)||has_class(h->attribs,INDEX_INST)) importance = 1; else if (userclassnames && has_class_in_list(h->attribs, userclassnames)) importance = 1; else if (has_class(h->attribs, INDEX_DEF)) importance = 2; else importance = 0; if (importance != 0) { /* Give it an ID, if it doesn't have one */ if (! get_attrib(h, "id")) set_attrib(h, "id", gen_id(h)); copy_to_index(h, terms, importance, *secno, *doctitle); } else { collect(h, terms, secno, doctitle); } break; case Root: assert(! "Cannot happen"); break; default: assert(! "Cannot happen"); } } } /* load_index -- read persistent term db from file */ static void load_index(const string indexdb, Indexterm *terms) { FILE *f; int n1, n2, n3, n4, n5; char line[MAXSTR]; Indexterm term; string h; if (! (f = fopen(indexdb, "r"))) return; /* Assume file not found... */ while (fgets(line, sizeof(line), f)) { #if 1 n1 = strcspn(line, "\t"); if (line[n1] != '\t') errx(1, "Illegal syntax in %s", indexdb); n2 = n1 + 1 + strcspn(line + n1 + 1, "\t"); if (line[n2] != '\t') errx(1, "Illegal syntax in %s", indexdb); n3 = n2 + 1 + strcspn(line + n2 + 1, "\t"); if (line[n3] != '\t') errx(1, "Illegal syntax in %s", indexdb); n4 = n3 + 1 + strcspn(line + n3 + 1, "\t"); if (line[n4] != '\t') errx(1, "Illegal syntax in %s", indexdb); n5 = n4 + 1 + strcspn(line + n4 + 1, "\t\n"); if (line[n5] != '\n') errx(1, "Illegal syntax in %s", indexdb); new(term); h = newnstring(line, n1); switch (line[n1 + 1]) { case '1': term->importance = 1; break; case '2': term->importance = 2; break; default: errx(1, "Error in %s (column 2 must be '1' or '2')", indexdb); } term->url = newnstring(line + n2 + 1, n3 - n2 - 1); term->secno = newnstring(line + n3 + 1, n4 - n3 - 1); term->doctitle = newnstring(line + n4 + 1, n5 - n4 - 1); parse_subterms(term, h); if (! tsearch(term, (void**)terms, termcmp)) errx(1, "Out of memory while loading %s", indexdb); #else chomp(line); n = strcspn(line, "\t"); if (line[n] != '\t') errx(1, "Illegal syntax in %s", indexdb); new(term); h = newnstring(line, n); switch (line[n + 1]) { case '1': term->importance = 1; break; case '2': term->importance = 2; break; default: errx(1, "Error in %s (column 2 must be '1' or '2')", indexdb); } if (line[n+2] != '\t') errx(1, "Illegal syntax in %s", indexdb); n3 = n + 3 + strcspn(line + n + 3, "\t"); if (line[n3] != '\t') errx(1, "Illegal syntax in %s", indexdb); n4 = n3 + 1 + strcspn(line + n3 + 1, "\t"); if (line[n4] != '\t') errx(1, "Illegal syntax in %s", indexdb); term->url = newnstring(line + n + 3, n3 - n - 3); term->secno = newnstring(line + n3 + 1, n4 - n3 - 1); term->doctitle = newstring(line + n4 + 1); parse_subterms(term, h); if (! tsearch(term, (void**)terms, termcmp)) errx(1, "Out of memory while loading %s", indexdb); #endif } fclose(f); } /* save_a_term -- write one term to globalfile */ static void save_a_term(const void *term1, const VISIT which, const int dp) { Indexterm term = *(Indexterm*)term1; int i; if (which == endorder || which == leaf) { for (i = 0; i < term->nrkeys; i++) { if (i > 0) fprintf(globalfile, "!!"); fprintf(globalfile, "%s", term->terms[i]); } fprintf(globalfile, "\t%d\t%s\t%s\t%s\n", term->importance, term->url, term->secno ? term->secno : (use_secno ? unknown_name : "#"), term->doctitle); } } /* save_index -- write terms to file */ static void save_index(const string indexdb, Indexterm terms) { if (! (globalfile = fopen(indexdb, "w"))) errx(1, "%s: %s", indexdb, strerror(errno)); twalk(terms, save_a_term); fclose(globalfile); } /* usage -- print usage message and exit */ static void usage(string name) { errx(1, "Version %s\nUsage: %s [-i indexdb] [-b base] [-x] [-t] [-n] [-c userclass] [-s template] [-u phrase] [html-file]", VERSION, name); } /* tokenize -- split string s into tokens at each comma, return an array */ static string * tokenize(string s) { string * t; int i, n; assert(s && s[0]); for (t = NULL, n = 0; *s; s += i + 1, n++) { i = strcspn(s, ","); renewarray(t, n + 1); t[n] = newnstring(s, i); } renewarray(t, n + 1); /* Make final item NULL */ t[n] = NULL; return t; } /* main */ int main(int argc, char *argv[]) { bool write = true; Indexterm termtree = NULL; /* Sorted tree of terms */ string secno, doctitle; int c, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); yyin = NULL; while ((c = getopt(argc, argv, "txb:i:cnfrs:u:")) != -1) switch (c) { case 't': bctarget = false; break; /* Don't write after each ID */ case 'x': xml = true; break; /* Output as XML */ case 'b': base = newstring(optarg); break; /* Set base of URL */ case 'i': indexdb = newstring(optarg); break; /* Set name of index db */ case 'c': userclassnames = tokenize(optarg); break; /* Set class names */ case 'n': use_secno = true; break; /* Print section numbers instead of "#" */ case 'f': final = true; break; /* "Final": remove used attributes */ case 'r': trim_punct = false; break; /* Do not remove trailing punctuation */ case 's': section_name = newstring(optarg); break; case 'u': unknown_name = newstring(optarg); break; default: usage(argv[0]); } if (optind == argc) yyin = stdin; else if (argc > optind + 1) usage(argv[0]); else if (eq(argv[optind], "-")) yyin = stdin; else yyin = fopenurl(argv[optind], "r", &status); if (yyin == NULL) {perror(argv[optind]); exit(1);} if (status != 200) errx(1, "%s : %s", argv[optind], http_strerror(status)); if (!base) base = newstring(""); if (!section_name) section_name = newstring("section %s"); if (!unknown_name) unknown_name = newstring("??"); /* Apply user's locale */ setlocale(LC_ALL, ""); /* Read the index DB into memory */ if (indexdb) load_index(indexdb, &termtree); /* Parse, build tree, collect existing IDs */ if (yyparse() != 0) exit(3); /* Scan for index terms, add them to the tree, add IDs where needed */ secno = NULL; doctitle = newstring(""); collect(get_root(tree), &termtree, &secno, &doctitle); /* Write out the document, adding and replacing */ expand(get_root(tree), &write, termtree); /* Store terms to file */ if (indexdb) save_index(indexdb, termtree); fclose(yyin); #if 0 tree_delete(tree); /* Just to test memory mgmt */ #endif return 0; } html-xml-utils-6.5/hxextract.10000644000175000001440000000346211606170750013305 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXEXTRACT" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxextract \- extract selected elements from a HTML or XML file .SH SYNOPSIS .B hxextract .RB "[\| " \-h .RB "| " \-? " \|]" .RB "[\| " \-x " \|]" .RB "[\| " \-s .IR text " \|]" .RB "[\| " \-e .IR text " \|]" .RB "[\| " \-b .IR base " \|]" .I element-or-class .RB "[\| " \-c .IR "configfile" " | " .IR file\-or\-URL " \|]" .SH DESCRIPTION .B hxextract outputs all elements with a certain name and/or class. .PP Input must be well-formed, since no HTML heuristics are applied. .SH OPTIONS The following options are supported: .TP 10 .B \-x Use XML format conventions. .TP 10 .BI \-s " text" Insert .I text at the start of the output. .TP 10 .BI \-e " text" Insert .I text at the end of the output. .TP 10 .BI \-b " base" URL base .TP 10 .BI \-c " configfile" Read @chapter lines from .I configfile (lines must be of the form "@chapter filename") and extract elements from each of those files. .TP 10 .BR \-h ", " \-? Print command usage. .SH OPERANDS The following operands are supported: .TP 10 .I element-or-class The name of an element to extract (e.g., "H2"), or the name of a class preceded by "." (e.g., ".example") or a combination of both (e.g., "H2.example"). .TP .I file-or-URL A file name or a URL. To read from standard input, use "-". .SH ENVIRONMENT To use a proxy to retrieve remote files, set the environment variables .B http_proxy and .BR ftp_proxy "." E.g., .B http_proxy="http://localhost:8080/" .SH BUGS .LP Remote files (specified with a URL) are currently only supported for HTTP. Password-protected files or files that depend on HTTP "cookies" are not handled. (You can use tools such as .BR curl (1) or .BR wget (1) to retrieve such files.) .SH "SEE ALSO" .BR hxselect (1) html-xml-utils-6.5/cexport.c0000644000175000001440000003036612174313455013046 00000000000000/* cexport.c -- create header file of EXPORT'ed declarations from c files */ /* * Author: Bert Bos * Created: before 1995 * * C files are scanned for the keyword EXPORT. Any declaration that * follows it is copied to a file with the extension .e. It works for * typedefs, #defines, variables and functions, but only if ANSI * prototypes are used. Macros are exported with EXPORTDEF(.) * * Examples: * * EXPORT typedef int * IntPtr -- export IntPtr * * EXPORT void walkTree(Tree t) -- export walkTree() * * #define max(a,b) ((a)>(b)?(a):(b)) * EXPORTDEF(max(a,b)) -- export max(a,b) * * Files are first piped through the C preprocessor cpp. * * Command line options: * -c : use instead of cpp * -e : use instead of '.e' * other options are passed to cpp * * The program is not very smart about C syntax, but it doesn't have * to be, as long as the input is correct ANSI C. If it is not, no * warnings will be given (except possibly for unmatched braces, * quotes and paretheses), but the output will not be correct C, * either. * * TO DO: an option to check if the new .e file is different any * existing one and to keep the old one in that case. (Useful to save * unnecessary recompilations.) */ #include "config.h" #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #ifndef CPP #define CPP "cc -E" #endif #define LINELEN BUFSIZ static int err = 0; /* Global error counter */ static char *cppcmd = CPP; static char *extension = ".e"; static FILE *in, *out; static int eof; static int lineno; static char line[LINELEN]; static char *curname; /*************************************************************************** * get_line -- read next line, return 0 if eof ***************************************************************************/ static int get_line() { static char buf[BUFSIZ]; char *s; int i; do { if (eof) return 0; else if (! fgets(line, LINELEN, in)) { eof = 1; return 0; } else if (line[0] != '#') { lineno++; return 1; } else if (line[1] == ' ') { i = 2; while (isspace(line[i])) i++; if (! isdigit(line[i])) { lineno++; return 1; } else { lineno = strtol(line + i, &s, 0) - 1; if (*(s+1) != '"') { strcpy(buf, s + 1); buf[strlen(buf)-1] = '\0'; } else { strcpy(buf, s + 2); for (i = 2; buf[i] != '"'; i++) ; buf[i] = '\0'; } if (buf[0]) curname = buf; } } else if (line[1] == 'l' && strncmp(line, "#line", 5) == 0) { lineno = strtol(line + 5, &s, 0) - 1; if (*(s+1) != '"') { strcpy(buf, s + 1); buf[strlen(buf)-1] = '\0'; } else { strcpy(buf, s + 2); for (i = 2; buf[i] != '"'; i++) ; buf[i] = '\0'; } if (buf[0]) curname = buf; } else { lineno++; return 1; } } while (1); } /*************************************************************************** * exportdef -- copy a #define to output ***************************************************************************/ static void exportdef(i) int i; { unsigned int len; /* * TO DO: encountering an end of file should produce a suitable error * message: end of file in middle of macro definition. */ fputs("#define ", out); /* EXPORTDEF -> #define */ /* Unquote the following string */ for (i += 10; line[i] && line[i] != '"'; i++) ; for (i++; line[i] && line[i] != '"'; i++) putc(line[i], out); putc(' ', out); fputs(line + i + 1, out); /* Write rest of line */ len = strlen(line); /* Continuation lines? */ while (len >= 2 && line[len-2] == '\\') { if (! get_line()) break; fputs(line, out); len = strlen(line); } } /*************************************************************************** * export -- copy next declaration to output ***************************************************************************/ static void export(i) int *i; { int brace, paren, squote, dquote, comment, stop, is_typedef; /* * TO DO: End of file while any of the variables is still * non-null is also an error. */ *i += 6; /* Skip "EXPORT" */ comment = 0; squote = 0; dquote = 0; paren = 0; brace = 0; stop = 0; is_typedef = 0; do { switch (line[*i]) { case '\\': if (line[*i+1]) (*i)++; /* Skip next char */ break; case '{': if (!comment && !squote && !dquote && !paren) brace++; break; case '}': if (!comment && !squote && !dquote && !paren) brace--; if (brace < 0) { fprintf(stderr, "%s:%d: syntax error (too many '}'s)\n", curname, lineno); err++; brace = 0; } break; case '"': if (!comment && !squote) dquote = !dquote; break; case '\'': if (!comment && !dquote) squote = !squote; break; case '*': if (!comment && !dquote && !squote && *i > 0 && line[*i-1] == '/') comment = 1; /* Start of comment */ break; case '/': /* Possible end of comment */ if (comment && *i > 0 && line[*i-1] == '*') comment = 0; break; case '(': if (!comment && !dquote && !squote && !brace) paren++; break; case ')': if (!comment && !dquote && !squote && !brace) { paren--; if (paren == 0 && !is_typedef) { putc(')', out); putc(';', out); putc('\n', out); stop = 1; } } break; case ';': if (!comment && !dquote && !squote && !paren && !brace) { putc(';', out); putc('\n', out); stop = 1; } break; case '=': if (!comment && !dquote && !squote && !brace && !paren) { putc(';', out); /* End of variable decl. */ putc('\n', out); stop = 1; } break; case '\n': if (dquote) { fprintf(stderr, "%s:%d: syntax error (string didn't end)\n", curname, lineno); err++; dquote = 0; } if (squote) { fprintf(stderr, "%s:%d: syntax error (char const didn't end)\n", curname, lineno); err++; squote = 0; } break; case '\0': if (! get_line()) stop = 1; else *i = -1; break; case 't': if (!comment && !squote && !dquote && paren == 0 && brace == 0 && strncmp("typedef", &line[*i], 7) == 0) is_typedef = 1; } if (! stop) { if (*i >= 0) putc(line[*i], out); (*i)++; } } while (! stop); } /*************************************************************************** * process -- scan file and write exported declarations ***************************************************************************/ static void process(file, cpp) char *file, *cpp; { char cmd[1024], *s, outname[1024]; int brace, paren, dquote, squote, comment, i; strcpy(cmd, cppcmd); /* Build cpp command line */ strcat(cmd, cpp); strcat(cmd, file ? file : "-"); eof = 0; lineno = 0; in = popen(cmd, "r"); /* Pipe file through cpp */ if (! in) { perror(cmd); err++; return; } if (file) { strcpy(outname, file); /* Construct output file */ s = strrchr(outname, '.'); /* Extension becomes .e */ if (! s) s = outname + strlen(outname); strcpy(s, extension); out = fopen(outname, "w"); if (! out) { perror(outname); err++; return; } } else { out = stdout; /* No file name, use stdout */ } if (file) curname = file; else curname = ""; /* * If the word EXPORT is found and it is not inside a comment, between * quotes, parentheses or braces, the export() function is called to copy * the declaration to the out file. When the export() function ends, `line' * may have changed, but `i' points to the last copied character. * * If the word EXPORTDEF is found at the start of a line and it * is not inside a comment or between quotes, exportdef is called. */ comment = 0; dquote = 0; squote = 0; paren = 0; brace = 0; while (get_line()) { for (i = 0; line[i]; i++) { switch (line[i]) { case '\\': if (line[i+1]) i++; /* Skip next char */ break; case '{': if (!comment && !dquote && !squote) brace++; break; case '}': if (!comment && !dquote && !squote) brace--; if (brace < 0) { fprintf(stderr, "%s:%d: syntax error (too many '}'s)\n", curname, lineno); err++; brace = 0; } break; case '(': if (!comment && !dquote && !squote) paren++; break; case ')': if (!comment && !dquote && !squote) paren--; if (paren < 0) { fprintf(stderr, "%s:%d: syntax error (too many ')'s)\n", curname, lineno); err++; paren = 0; } break; case '\'': if (!comment && !dquote) squote = !squote; break; case '"': if (!comment && !squote) dquote = !dquote; break; case '\n': if (dquote) { fprintf(stderr, "%s:%d: syntax error (string didn't end)\n", curname, lineno); err++; dquote = 0; } if (squote) { fprintf(stderr, "%s:%d: syntax error (char const didn't end)\n", curname, lineno); err++; squote = 0; } break; case '*': if (!comment && !dquote && !squote && i > 0 && line[i-1] == '/') comment = 1; /* Start of comment */ break; case '/': /* Possible end of comment */ if (comment && i > 0 && line[i-1] == '*') comment = 0; break; case 'E': if (comment || dquote || squote || paren != 0 || brace != 0) ; else if (strncmp(&line[i], "EXPORT", 6) == 0 && (i == 0 || !isalnum(line[i-1])) && !isalnum(line[i+6])) export(&i); else if (strncmp(&line[i], "EXPORTDEF ", 10) == 0 && (i == 0 || !isalnum(line[i-1]))) { exportdef(i); i = strlen(line) - 1; } break; } } } if (comment) { fprintf(stderr, "%s:%d: syntax error (comment didn't end)\n", curname, lineno); err++; } if (dquote) { fprintf(stderr, "%s:%d: syntax error (string didn't end)\n", curname, lineno); err++; } if (squote) { fprintf(stderr, "%s:%d: syntax error (char const didn't end)\n", curname, lineno); err++; } if (file) fclose(out); fclose(in); } static void usage(s) char *s; { fprintf(stderr, "Usage: %s {-Idir|-Dsym} [-h] [-c cppcmd] [-e ext] {file}\n", s); err++; } int main(argc, argv) int argc; char *argv[]; { char cpp[BUFSIZ]; /* Max. cmd. line length */ int nfiles, i; strcpy(cpp, " -D__export "); nfiles = 0; for (i = 1; i < argc; i++) { if (!strncmp(argv[i], "-c", 2)) { /* Replace cpp command */ if (argv[i][2]) cppcmd = argv[i] + 2; else cppcmd = argv[++i]; } else if (!strncmp(argv[i], "-e", 2)) { /* Extension instead of .e */ if (argv[i][2]) extension = argv[i] + 2; else extension = argv[++i]; } else if (!strncmp(argv[i], "-h", 2)) { /* -h: help */ usage(argv[0]); } else if (argv[i][0] == '-' || argv[i][0] == '+') { strcat(cpp, argv[i]); /* Pass options to cpp */ strcat(cpp, " "); } else { /* Not option, must be file */ nfiles++; process(argv[i], cpp); } } if (nfiles == 0) /* no arguments, use stdin */ process(NULL, cpp); return err; } html-xml-utils-6.5/textwrap.c0000644000175000001440000001137412174303405013230 00000000000000/* * Routines to wrap lines and indent them. * * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * To do: count characters, not bytes * * Bert Bos * Created 10 May 1998 * $Id: textwrap.c,v 1.28 2013-07-25 20:13:57 bbos Exp $ */ #include "config.h" #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #include "export.h" #include "types.e" #include "errexit.e" #include "heap.e" /* To do: XML 1.1 allows , so the following isn't safe anymore */ #define NBSP 1 /* Marks non-break-space */ static unsigned char *buf = NULL; static int buflen = 0; /* Size of buf */ static int len = 0; /* Length of buf */ static int linelen = 0; /* Length of printed line */ static int level = 0; /* Indentation level */ static int indent = 2; /* # of spaces per indent */ static int maxlinelen = 72; /* Desired line length */ static unsigned char prev = NBSP; /* Previously added char */ /* set_indent -- set the amount of indent per level */ EXPORT void set_indent(int n) {indent = n;} /* set_linelen -- set the maximum length of a line */ EXPORT void set_linelen(int n) {maxlinelen = n;} /* flush -- print word in buf */ EXPORT void flush() { int i, j; assert(len <= buflen); while (len != 0 && linelen + len >= maxlinelen) { /* Line needs break */ /* Find last space before maxlinelen */ for (i = maxlinelen - linelen - 1; i >= 0 && buf[i] != ' '; i--) ; /* If none, find first space after maxlinelen, or end of buffer */ if (i < 0) for (i = linelen <= maxlinelen ? maxlinelen - linelen : 0; i < len && buf[i] != ' '; i++) ; if (i == len) break; /* No breakpoint */ assert(i >= 0); /* Found a breakpoint at i */ assert(buf[i] == ' '); /* Print up to breakpoint (removing non-break-space markers) */ for (j = 0; j < i; j++) putchar(buf[j] != NBSP ? buf[j] : ' '); putchar('\n'); /* Break line */ linelen = 0; assert(level >= 0); assert(len >= 0); assert(i <= len); i++; /* Skip the breakpoint */ len -= i; if (len != 0) { /* If anything left, insert the indent */ memmove(buf + level * indent, buf + i, len); for (j = 0; j < level * indent; j++) buf[j] = NBSP; /* Indent */ len += level * indent; } } /* Print rest, if any (removing non-break-space markers) */ /* First remove spaces at end of line */ while (len > 0 && buf[len-1] == ' ') len--; for (j = 0; j < len; j++) putchar(buf[j] != NBSP ? buf[j] : ' '); linelen += len; len = 0; } /* outc -- add one character to output buffer */ EXPORT void outc(char c, bool preformatted) { if (c == '\n' && !preformatted) c = ' '; /* Newline is just a space */ if (c == '\r' && !preformatted) c = ' '; /* CR is just a space */ if (c == '\t' && !preformatted) c = ' '; /* Tab is just a space */ if (c == '\f' && !preformatted) c = ' '; /* Formfeed is just a space */ if (c == ' ' && preformatted) c = NBSP; /* Non-break-space marker */ if (c == ' ' && prev == ' ') return; /* Don't add another space */ if (c == ' ' && linelen + len >= maxlinelen) flush(); /* Empty the buf */ if (c == '\n' || c == '\r' || c == '\f') flush(); /* Empty the buf */ if (c == ' ' && linelen + len == 0) return; /* No ins at BOL */ while (level * indent >= buflen) {buflen += 1024; renewarray(buf, buflen);} if (linelen + len == 0) while (len < level * indent) buf[len++] = NBSP; if (c == ' ' && len && buf[len-1] == ' ') return; /* Skip multiple spaces */ while (len >= buflen) {buflen += 1024; renewarray(buf, buflen);} buf[len++] = c; /* Finally, insert c */ prev = c; /* Remember for next round */ } /* out -- add text to current output line, print line if getting too long */ EXPORT void out(string s, bool preformatted) { if (s) for (; *s; s++) outc(*s, preformatted); } /* outn -- add n chars to current output, print line if getting too long */ EXPORT void outn(string s, size_t n, bool preformatted) { size_t i; for (i = 0; i < n; i++) outc(s[i], preformatted); } /* outln -- add string to output buffer, followed by '\n' */ EXPORT void outln(char *s, bool preformatted) { out(s, preformatted); flush(); assert(len == 0); putchar('\n'); linelen = 0; } /* outbreak -- conditional new line; make sure next text starts on new line */ EXPORT void outbreak() { flush(); assert(len == 0); if (linelen != 0) { putchar('\n'); linelen = 0; } } /* inc_indent -- increase indentation level by 1 */ EXPORT void inc_indent(void) { flush(); level++; } /* decc_indent -- decrease indentation level by 1 */ EXPORT void dec_indent(void) { flush(); level--; } html-xml-utils-6.5/xml2asc.c0000644000175000001440000000545312174266016012732 00000000000000/* * * Program to convert files from UTF-8 to ASCII, using the * &#-escapes from XML to escape non-ASCII characters. * * Usage: * * xml2asc * * Reads from stdin and write to stdout. Converts from UTF8 (with or * without &#-escapes) to ASCII, inserting &#-escapes for all * non-ASCII characters. * * Version: $Revision: 1.7 $ ($Date: 2013-06-30 20:28:19 $) * Author: Bert Bos * * Copyright 1994-2002 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * **/ #include "config.h" #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #define NOT_A_CHAR 2097152 /* One more than the largest code point */ static int nerrors = 0; /* getUTF8 -- read UTF8 encoded char from stdin, return NOT_A_CHAR on error */ static long getUTF8() { int b; long c; /* 0 = 0000 1 = 0001 2 = 0010 3 = 0011 4 = 0100 5 = 0101 6 = 0110 7 = 0111 8 = 1000 9 = 1001 A = 1010 B = 1011 C = 1100 D = 1101 E = 1110 F = 1111 */ if ((b = getchar()) == EOF) return EOF; /* EOF */ if ((b & 0x80) == 0) return b; /* 0xxxxxxx = ASCII */ if ((b & 0xE0) == 0xC0) { /* 110xxxxx + 10xxxxxx */ c = b & 0x1F; if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); return c <= 0x7F ? NOT_A_CHAR : c; } if ((b & 0xF0) == 0xE0) { /* 1110xxxx + (2) */ c = b & 0x0F; if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); if (0xD800 <= c && c <= 0xDFFF) return NOT_A_CHAR; /* Surrogate pair */ return c <= 0x7FF ? NOT_A_CHAR : c; } if ((b & 0xF8) == 0xF0) { /* 11110xxx + (3) */ c = b & 0x07; if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); if ((b = getchar()) == EOF) {ungetc(EOF, stdin); return NOT_A_CHAR;} c = (c << 6) | (b & 0x3F); return c <= 0xFFFF ? NOT_A_CHAR : c; } return NOT_A_CHAR; } /* xml2asc -- copy stdin to stdout, converting UTF8 XML to ASCII XML */ static void xml2asc(void) { long c; while ((c = getUTF8()) != EOF) { if (c == NOT_A_CHAR) nerrors++; else if (c <= 127) putchar(c); else printf("&#%ld;", c); } } /* Print usage message, then exit */ static void usage(char *progname) { fprintf(stderr, "Version %s\nUsage: %s outfile\n", VERSION, progname); exit(1); } /* main -- main body */ int main(int argc, char *argv[]) { if (argc != 1) usage(argv[0]); xml2asc(); return nerrors; } html-xml-utils-6.5/hxnormalize.c0000644000175000001440000002165012174313455013716 00000000000000/* * Format an HTML source in a consistent manner. * * Copyright 1994-2012 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 * * Created 9 May 1998 * Bert Bos * $Id: hxnormalize.c,v 1.16 2013-07-25 21:08:00 bbos Exp $ */ #include "config.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_STRING_H # include #elif HAVE_STRINGS_H # include #endif #include #include #include "export.h" #include "types.e" #include "tree.e" #include "html.e" #include "scan.e" #include "textwrap.e" #include "dict.e" #include "openurl.e" #include "errexit.e" static Tree tree; static bool do_xml = false; static bool do_endtag = false; static bool has_errors = false; static bool do_doctype = true; static bool clean_span = false; static string long_comment = NULL; static bool do_lang = false; /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_errors = true; } /* start -- called before the first event is reported */ void* start(void) { tree = create(); return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { tree = append_comment(tree, commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { tree = append_text(tree, text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { tree = append_declaration(tree, gi, fpi, url); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { tree = append_procins(tree, pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { tree = html_push(tree, name, attribs); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { tree = html_push(tree, name, attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { tree = html_pop(tree, name); free(name); } /* insert -- insert an attribute into a sorted list of attributes */ static pairlist insert(pairlist x, pairlist list) { if (! list) { /* Empty list */ x->next = NULL; return x; } else if (strcmp(x->name, list->name) <= 0) { /* Insert at head */ x->next = list; return x; } else { /* Insert not at head */ list->next = insert(x, list->next); return list; } } /* sort_list -- sort a linked list of attributes, return reordered list */ static pairlist sort_list(pairlist list) { /* Insertion sort should be fast enough... */ if (! list) return NULL; else return insert(list, sort_list(list->next)); } /* next_ambiguous -- check if omitting end changes the meaning */ static bool next_ambiguous(Node *n) { Node *h = n; /* Skip text nodes with only white space */ while (h->sister && h->sister->tp == Text && only_space(h->sister->text)) h = h->sister; if (h->sister == NULL) return false; if (h->sister->tp == Text) return true; if (h->sister->tp == Comment) return true; if (h->sister->tp == Procins) return true; if (h->sister->tp == Declaration) return false; /* Should not occur */ assert(h->sister->tp == Element); /* Cannot be Root */ return has_parent(h->sister->name, n->name); } /* needs_quotes -- check if the attribute value can be printed unquoted */ static bool needs_quotes(const string s) { int i; assert(s); if (!s[0]) return true; /* Empty string */ for (i = 0; s[i]; i++) if (!isalnum(s[i]) && (s[i] != '-') && (s[i] != '.')) return true; return false; } /* pp -- print the document normalized */ static void pp(Tree n, bool preformatted, bool allow_text, conststring lang) { bool pre, mixed;\ conststring lang2; string s; pairlist h; size_t i; Tree l; switch (n->tp) { case Text: if (!allow_text) { assert(only_space(n->text)); } else { s = n->text; i = strlen(s); outn(s, i, preformatted); } break; case Comment: if (long_comment && strstr(n->text, long_comment) && !preformatted) { /* Found a comment that should have an empty line before it */ outbreak(); outln(NULL, true); } out("", true); else outln("-->", preformatted); break; case Declaration: if (do_doctype) { out("name, false); if (n->text) { out(" PUBLIC \"", false); out(n->text, false); out("\"", false); } if (n->url) { if (!n->text) out(" SYSTEM", false); out(" \"", false); out(n->url, false); out("\"", false); } outln(">", false); } break; case Procins: out("text, true); if (allow_text || preformatted) out(">", false); else outln(">", false); break; case Element: if (clean_span && eq(n->name, "span") && ! n->attribs) { /* Omit start and end tags, print just the children. */ for (l = n->children; l != NULL; l = l->sister) pp(l, preformatted, true, lang); break; } /* Determine language, remove redundant language attribute */ if (do_lang) { if ((lang2 = pairlist_get(n->attribs, "lang")) || (lang2 = pairlist_get(n->attribs, "xml:lang"))) { if (lang && eq(lang, lang2)) { pairlist_unset(&n->attribs, "lang"); pairlist_unset(&n->attribs, "xml:lang"); } lang = lang2; } } if (!preformatted && break_before(n->name)) outln(NULL, false); out("<", preformatted); out(n->name, preformatted); if (break_before(n->name)) inc_indent(); n->attribs = sort_list(n->attribs); for (h = n->attribs; h != NULL; h = h->next) { out(" ", false); out(h->name, false); if (do_xml) { out("=\"", false); out(h->value ? h->value : h->name, true); outc('"', false); } else if (h->value == NULL) { /* The h->name *is* the value (and the attribute name is implicit) */ } else if (!needs_quotes(h->value)) { out("=", false); /* Omit the quotes */ out(h->value, true); } else { out("=\"", false); out(h->value, true); outc('"', false); } } if (is_empty(n->name)) { assert(n->children == NULL); out(do_xml ? " />" : ">", true); if (break_before(n->name)) dec_indent(); if (!preformatted && break_after(n->name)) outln(NULL, false); } else { out(">", preformatted); pre = preformatted || is_pre(n->name); mixed = is_mixed(n->name); for (l = n->children; l != NULL; l = l->sister) pp(l, pre, mixed, lang); if (break_before(n->name)) dec_indent(); if (do_xml || do_endtag || need_etag(n->name) || next_ambiguous(n)) { out("name, preformatted); out(">", preformatted); } if (!preformatted && break_after(n->name)) outbreak(); } break; default: assert(!"Cannot happen"); } } /* prettyprint -- print the tree normalized */ static void prettyprint(Tree t) { Tree h; assert(t->tp == Root); for (h = t->children; h != NULL; h = h->sister) pp(h, false, false, NULL); flush(); } /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "%s version %s\n\ Usage: %s [-e] [-d] [-x] [-L] [-i indent] [-l linelen] [-c commentmagic] [file_or_url]\n", prog, VERSION, prog); exit(1); } /* main -- main body */ int main(int argc, char *argv[]) { int c, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); while ((c = getopt(argc, argv, "edxi:l:sc:L")) != -1) switch (c) { case 'e': do_endtag = true; break; case 'x': do_xml = true; break; case 'd': do_doctype = false; break; case 'i': set_indent(atoi(optarg)); break; case 'l': set_linelen(atoi(optarg)); break; case 's': clean_span = true; break; case 'c': long_comment = optarg; break; case 'L': do_lang = true; break; default: usage(argv[0]); } if (optind == argc) yyin = stdin; else if (optind == argc - 1) yyin = fopenurl(argv[optind], "r", &status); else usage(argv[0]); if (yyin == NULL) {perror(argv[optind]); exit(2);} if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status)); if (yyparse() != 0) {exit(3);} tree = get_root(tree); prettyprint(tree); return has_errors ? 1 : 0; } html-xml-utils-6.5/hxxmlns.c0000644000175000001440000001631012174266016013054 00000000000000/* * hxxmlns - expand XML Namespace prefixes * * Expand all element and attribute names to "global names" by * expanding the prefix. All names will be printed as "{URL}name". * Attribute names without a prefix will have an empty namespace part: * "{}name". * * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 22 Mar 2000 * Version: $Id: hxxmlns.c,v 1.7 2013-06-30 20:37:30 bbos Exp $ * **/ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #include #include "export.h" #include "types.e" #include "heap.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" extern int yylineno; /* From scan.l */ /* The symbol table is a chain of prefix/uri pairs. Every time an * element starts, the prefixes defined by it are added at the end. To * expand a prefix, the most recently added prefix/uri pair is used. * When en element ends, the chain is reduced to what it was when the * element started. The stack keeps track of where the chain ended at * the start of the element. * * ToDo: should we hash the prefixes? or is linear search good enough? **/ typedef struct _Symbol { string prefix; string uri; struct _Symbol *next; } Symbol, *SymbolTable; typedef struct _StackElt { Symbol *frame; struct _StackElt *next; } *Stack; static Symbol xml = {"xml", "http://www.w3.org/XML/1998/namespace", NULL}; static bool has_error = false; static SymbolTable symtable = &xml; static Stack stack = NULL; static bool do_decls = true; /* Print decl, comment, PI? */ /* print_globalname -- print a name with expanded prefix */ static void print_globalname(string name, bool use_default) { string h, prefix, local; Symbol *s; /* Split the name */ h = strchr(name, ':'); if (!h && !use_default) { /* No prefix & no default ns */ printf("%s", name); return; } if (h) { *h = '\0'; prefix = name; local = h + 1; } else { prefix = ""; local = name; } /* Find the prefix in the symbol table */ for (s = symtable; s && !eq(prefix, s->prefix); s = s->next) ; if (!s && !eq(prefix, "")) { fprintf(stderr, "%d: prefix \"%s\" not defined\n", yylineno, prefix); has_error = true; /* To do: do we report anything if the default prefix is undefined? */ } /* ToDo: check that any '}' in uri is escaped */ printf("{%s}%s", s ? s->uri : (string)"", local); } /* do_tag -- print a start or empty tag expanded */ static void do_tag(string name, pairlist attribs, bool empty) { Stack h; pairlist p; Symbol *sym; /* Mark the current end of the symbol table */ new(h); h->next = stack; h->frame = symtable; stack = h; /* Scan the attributes for namespace definitions and store them */ for (p = attribs; p; p = p->next) { if (strncmp(p->name, "xmlns", 5) == 0) { new(sym); sym->prefix = newstring(p->name + (p->name[5] ? 6 : 5)); sym->uri = newstring(p->value); sym->next = symtable; symtable = sym; } } /* Print the tag with prefixes expanded */ putchar('<'); print_globalname(name, true); for (p = attribs; p; p = p->next) { if (strncmp(p->name, "xmlns", 5) != 0) { putchar(' '); print_globalname(p->name, false); printf("=\"%s\"", p->value); } } printf(empty ? "/>" : ">"); } /* pop_symboltable -- unwind the symbol table to previous mark */ static void pop_symboltable(string name) { Symbol *h; Stack p; if (!stack) { if (! has_error) fprintf(stderr, "%d: too many end tags\n", yylineno); has_error = true; return; } /* Remove entries from symbol table chain until last mark */ while (symtable != stack->frame) { h = symtable; symtable = symtable->next; dispose(h->prefix); dispose(h->uri); dispose(h); } /* Pop stack itself */ p = stack; stack = stack->next; dispose(p); } /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_error = true; } /* start -- called before the first event is reported */ void* start(void) { return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { if (do_decls) printf("", commenttext); free(commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { printf("%s", text); free(text); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { if (do_decls) { printf("", fpi); if (url) printf(" %s\"%s\">", fpi ? "" : "SYSTEM ", url); printf(">"); } free(gi); if (fpi) free(fpi); if (url) free(url); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { if (do_decls) printf("", pi_text); free(pi_text); } /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { do_tag(name, attribs, false); free(name); pairlist_delete(attribs); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { do_tag(name, attribs, true); pop_symboltable(name); free(name); pairlist_delete(attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { /* Printf the end tag */ printf("'); /* Unwind the symbol table */ pop_symboltable(name); free(name); } /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "Version %s\nUsage: %s [-d] [xml-file-or-url]\n", VERSION, prog); exit(2); } int main(int argc, char *argv[]) { int i, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); /* Parse command line arguments */ for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) { switch (argv[i][1]) { case 'd': do_decls = false; break; default: usage(argv[0]); } } if (i < argc && eq(argv[i], "--")) i++; if (i == argc) yyin = stdin; else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status); else usage(argv[0]); if (yyin == NULL) {perror(argv[i]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status)); if (yyparse() != 0) exit(3); return has_error ? 1 : 0; } html-xml-utils-6.5/hash.c0000644000175000001440000000630711131725771012303 00000000000000#ifndef HAVE_SEARCH_H /* * hsearch() on Mac OS X 10.1.2 appears to be broken: there is no * search.h; there is a search() in the C library, but it doesn't work * properly. We include some hash functions here, protected by * HAVE_SEARCH_H. Hopefully when search.h appears in Mac OS X, * hsearch() will be fixed at the same time... * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ */ #include "config.h" #include #include #include #include "export.h" #include "heap.e" EXPORT typedef struct entry {char *key; void *data;} ENTRY; EXPORT typedef enum {FIND, ENTER} ACTION; static ENTRY *htab; static int *htab_index1, *htab_index2; static unsigned int htab_size = 0; static unsigned int htab_inited; /* isprime -- test if n is a prime number */ static int isprime(unsigned int n) { /* Simplistic algorithm, probably good enough for now */ unsigned int i; assert(n % 2); /* n not even */ for (i = 3; i * i < n; i += 2) if (n % i == 0) return 0; return 1; } /* hcreate -- create a hash table for at least nel entries */ EXPORT int hcreate(size_t nel) { /* Change nel to next higher prime */ for (nel |= 1; !isprime(nel); nel += 2) ; /* Allocate hash table and array to keep track of initialized entries */ newarray(htab, nel); newarray(htab_index1, nel); newarray(htab_index2, nel); if (!htab || !htab_index1 || !htab_index2) { dispose(htab); dispose(htab_index1); dispose(htab_index2); return 0; /* Out of memory */ } htab_inited = 0; htab_size = nel; return 1; } /* hdestroy -- deallocate hash table */ EXPORT void hdestroy(void) { assert(htab_size); dispose(htab_index1); dispose(htab_index2); dispose(htab); htab_size = 0; } /* hsearch -- search for and/or insert an entry in the hash table */ EXPORT ENTRY *hsearch(ENTRY item, ACTION action) { unsigned int hval, i; char *p; assert(htab_size); /* There must be a hash table */ /* Compute a hash value */ #if 1 /* This function suggested by Dan Bernstein */ for (hval = 5381, p = item.key; *p; p++) hval = (hval * 33) ^ *p; #else i = hval = strlen(item.key); do {i--; hval = (hval << 1) + item.key[i];} while (i > 0); #endif hval %= htab_size; /* if (action == ENTER) debug("%d\n", hval); */ /* Look for either an empty slot or an entry with the wanted key */ i = hval; while (htab_index1[i] < htab_inited && htab_index2[htab_index1[i]] == i && strcmp(htab[i].key, item.key) != 0) { i = (i + 1) % htab_size; /* "Open" hash method */ if (i == hval) return NULL; /* Made full round */ } /* Now we either have an empty slot or an entry with the same key */ if (action == ENTER) { htab[i].key = item.key; /* Put the item in this slot */ htab[i].data = item.data; if (htab_index1[i] >= htab_inited || htab_index2[htab_index1[i]] != i) { /* Item was not yet used, mark it as used */ htab_index1[i] = htab_inited; htab_index2[htab_inited] = i; htab_inited++; } return &htab[i]; } else if (htab_index1[i] < htab_inited && htab_index2[htab_index1[i]] == i) return &htab[i]; /* action == FIND, found key */ return NULL; /* Found empty slot */ } #endif /* HAVE_SEARCH_H */ html-xml-utils-6.5/hxcount.c0000644000175000001440000001051012174313455013037 00000000000000/* * Count elements and attributes. * * This counts occurrences of elements and element/attribute pairs. * This is just an example of how to use the parser. * No attempt is made to count efficiently. * * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Bert Bos * Created Nov 1998 * $Id: hxcount.c,v 1.4 2013-07-25 21:00:38 bbos Exp $ */ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRdup # include "strdup.e" # endif #endif #include #include #include #include "export.h" #include "types.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" typedef struct _pair { char *name; int count; } pair; static pair *freq = NULL; static int nrelems = 0; static bool has_errors = false; /* countstring -- add 1 to number of occurences for s (case-insensitively) */ static void countstring(char *s) { int i; i = 0; while (i < nrelems && strcasecmp(freq[i].name, s) != 0) i++; if (i == nrelems) { nrelems++; freq = realloc(freq, nrelems * sizeof(freq[0])); if (freq == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);} freq[i].name = strdup(s); freq[i].count = 0; } freq[i].count++; } /* count -- count element types and their attributes */ static void count(char *name, pairlist attribs) { /* Count element name */ countstring(name); /* Count attribute names (or rather, the strings "elem/attrib") */ for (; attribs != NULL; attribs = attribs->next) { char *s = malloc(strlen(name) + strlen(attribs->name) + 2); if (s == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);} strcat(strcat(strcpy(s, name), "/"), attribs->name); countstring(s); free(s); } } /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_errors = true; } /* start -- called before the first event is reported */ void* start(void) {return NULL;} /* end -- called after the last event is reported */ void end(void *clientdata) {} /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) {} /* handle_text -- called after a tex chunk is parsed */ void handle_text(void *clientdata, string text) {} /* handle_declaration -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) {} /* handle_proc_instr -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) {} /* handle_starttag -- called after a start tag is parsed */ void handle_starttag(void *clientdata, string name, pairlist attribs) { count(name, attribs); } /* handle_emptytag -- called after am empty tag is parsed */ extern void handle_emptytag(void *clientdata, string name, pairlist attribs) { count(name, attribs); } /* handle_pop -- called after an endtag is parsed (name may be "") */ extern void handle_endtag(void *clientdata, string name) {} /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "Version %s\n", VERSION); fprintf(stderr, "Usage: %s [html-file]\n", prog); exit(2); } /* main -- parse input, count elements and attributes of each type */ int main(int argc, char *argv[]) { int i, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); if (argc == 1) yyin = stdin; else if (argc == 2) yyin = fopenurl(argv[1], "r", &status); else usage(argv[0]); if (yyin == NULL) {perror(argv[1]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status)); /* Parse input */ if (yyparse() != 0) exit(3); /* Print results */ for (i = 0; i < nrelems; i++) printf("%6d\t%s\n", freq[i].count, freq[i].name); return has_errors ? 1 : 0; } html-xml-utils-6.5/hxmkbib.10000644000175000001440000001746111606170750012723 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXMKBIB" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxmkbib \- create bibliography from a template .SH SYNOPSIS .B hxmkbib .RB "[\| " \-s .IR separator " \|]" .RB "[\| " \-a .IR auxfile " \|]" .RB "[\| " \-n .IR maxauthors " \|]" .RB "[\| " \-r .IR moreauthors " \|]" .IR bibfile " [\| " templatefile " \|]" .SH DESCRIPTION .LP The .B hxmkbib commands reads a list of bibliographic keys (labels) from .IR auxfile , finds the corresponding entries in .I bibfile and creates a bibliography, using .I templatefile as a model. The .I auxfile may, e.g., have been created by .BR hxcite (1). It consists of labels, one per line. The .I bibfile is a .BR refer (1) style database. .B hxmkbib looks for entries with a .B %L field equal to a key in the .IR auxfile . .PP The .I templatefile consists of three parts: .TP 10 .B preamble The preamble is the part up to the first occurrence of .BR %{ . The preamble is copied to the output unchanged, except for occurrences of .BR % . To create a single % in the output, there must be two in the preamble (%%). All other occurrences of % followed by another letter are not copied, but are collected into a string called the "sort order." and use to sort the entries, as explained below. .TP .B template The template starts with .B %{L: and ends with a matching .BR %} . The text in between is copied as often as there are bibliographic entries in .I bibfile that correspond to keys in .IR auxfile . Variables in the template are replaced by the corresponding field in the bibliographic entry: all occurrences of .BI % x will be replaced by the field .BI % x of the entry. Parts of the text may be enclosed in .BI %{ x : and .BR %} . This means that the text in between should only be output if the current entry has a field .IR x . Text that is enclosed in .BI %{! x : and .B %} will only be output if the entry does .B not have a field .IR x . Both kinds of conditional sections may also be nested. .TP .B postamble The text after the .B %} is copied unchanged to the output, after all bibliographic entries have been processed. .PP By default bibliographic entries are copied to the output in the order of the keys in .IR auxfile , except that keys that occur more than once are only used once. If the preamble contains occurrences of .BI % x (where .I x is neither "%" nor "{") then these together determine the sort order. E.g., if the preamble contains %A%D then the entries will be sorted first on field A (author) and then on field D (date). .PP Here is an example of template file that creates a bibliography in HTML format: .d Bibliography
        %{L:
        %{A:A%}%{!A:%{E:E%}%{!E:%{Q:Q%}%{!Q:-%}%}%}
        %{B:"%T" in: %{E:%E (eds) %}%B.%{V: %V.%} %}%{J:"%T" in: %{E:%E (eds) %}%J.%{V: %V.%}%{N: %N.%}%{P: pp. %P.%} %}%{!B:%{!J:%T. %}%}%{I:%I. %}%{D:%D. %}%{C:%C. %}%{R:%R. %}%{S:%S. %}%{O:%O %}%{U:%U %}
        %}
        .e This template starts with four lines of preamble, including the sort string %A%D on line 3. The sort string itself will not be output, but the rest of the comment will. .PP From the line .B %{L: to the line .B %} is the template. E.g., the line that starts with .B
        Gosling, James; Joy, Bill; Steele, Guy
        The Java language specification. Addison-Wesley. 1998. http://java.sun.com/docs/books/jls/index.html
        .e .SH OPTIONS The following options are supported: .TP 10 .BI \-a " auxfile" The file that contains the list of keys (labels) for which bibliographic entries should be printed. If the option is absent, the name of this file is formed from the .I templatefile argument by removing the last extension and adding .BR .aux . If no .I templatefile is given, the default .I auxfile is .BR aux.aux . .TP .BI \-s " separator" If there are multiple authors or editors in an entry, their names will be listed with a separator in between. By default the separator is "; " (i.e., a semicolon and a space). With this option the separator can be changed. .TP .BI \-n " maxauthors" If there are more than .I maxauthors authors in an entry, only the first author will be printed and the others will be replaced by the string .IR moreauthors . The default is 3. .TP .BI \-r " moreauthors" The string to print if there are more than .I maxauthors authors. The default is "et al.". .SH OPERANDS The following operands are supported: .TP 10 .I bibfile The name of a bibliographic database must be given. It must be a file in .BR refer (1) format and every entry must have at least a .B %L field, which is used as key. (Entries without such a field will be ignored.) .TP .I templatefile The name of the input file is optional. If absent, .B hxmkbib will read the template from stdin. .SH "DIAGNOSTICS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred. Usually this is because a file could not be opened or because the %{ and %} pairs are not properly nested. Very rarely it may also be an out of memory error. Some of the possible error messages: .TP .I missing ':' in pattern .B hxmkbib found a %{ but the second or third letter after it was not a colon. .TP .I no '%{' in template file The template file is unusable, because it contains no template. .TP .I unbalanced %{..%} in pattern There are more %{ than %}. .SH "SEE ALSO" .BR asc2xml (1), .BR hxcite (1), .BR hxnormalize (1), .BR hxnum (1), .BR hxprune (1), .BR hxtoc (1), .BR hxunent (1), .BR xml2asc (1), .BR UTF-8 " (RFC 2279)" .SH BUGS Sorting is primitive: the program doesn't parse dates or names and simply sorts "Jan 2000" under the letter "J" and "Albert Camus" under the letter "A". For the moment the only work-around is to put names in the .I bibfile as "Camus, Albert". .PP The program simply lists all authors or editors. There is no way to generate an "et. al." after the third one. The work-around is to put the "et. al." in the .IR bibfile . Putting commas between the first authors and the word "and" before the final one is also not possible. .PP The program doesn't try to interpret names of authors or editors and they cannot be reformatted. It is impossible to write a name that is specified as "Sartre, Jean-Paul" in the .I bibfile as "J. Sartre" or as "Jean-Paul Sartre" in the output. .PP There is no way to suppress a period after a field if the field already ends with a period. E.g., the template "%{A:A.%}" may generate "A. Person Jr.." if the author is "A. Person Jr." The only option is to either not put periods in the .IR bibfile or not put periods in the template. .PP Entries in the .I bibfile can only be used if they have a .B %L (label) field. The program cannot find entries by searching for keywords, like .BR refer (1). .PP .B hxmkbib will replace any ampersands (&) and less-than (<) and greater-than (>) signs that occur in the .I bibfile by their XML entities & < > on the assumption that the template is HTML/XML. This may not be appropriate for other formats. html-xml-utils-6.5/openurl.c0000644000175000001440000005237712225047246013053 00000000000000/* * Routines to open a URL instead of a local file * * int openurl(const char *path) * FILE *fopenurl(const char *path) * * ToDo: set CURLOPT_FAILONERROR, check return codes and then return * NULL from fopenurl2() with a proper error code, instead of just * passing the body of the HTTP error message. * ToDo: Add arguments for PUT, POST; parse and return headers. * * Todo: authentication, use relevant fields in url. * * Uses http_proxy and ftp_proxy environment variables. * * Author: Bert Bos * Created: 7 March 1999 * * Copyright © 1994-2011 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * * The write_cb() and wait_for_data() functions are inspired by * http://curl.haxx.se/libcurl/c/fopen.html which has the following * copyright: * * Copyright (c) 2003 Simtec Electronics * * Re-implemented by Vincent Sanders with * extensive reference to original curl example code * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "config.h" #define _GNU_SOURCE /* Try to get fopencookie() from stdio.h */ #include #include #include #include #include #include #include #include #include "export.h" #if HAVE_LIBCURL && !HAVE_FOPENCOOKIE # include "fopencookie.e" /* Use our own fopencookie() */ #endif #include "dict.e" #include "heap.e" #include "types.e" #include "errexit.e" #define MAXREDIRECTS 10 /* Maximum # of 30x redirects to follow */ #ifdef DEBUG /* debug -- print debugging info */ static void debug(char *format,...) { va_list ap; va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); } #else #define debug(...) #endif /* http_strerror -- return a string describing the status code */ EXPORT conststring http_strerror(int code) { switch(code) { case 100: return "Continue"; case 101: return "Switching Protocols"; case 200: return "OK"; case 201: return "Created"; case 202: return "Accepted"; case 203: return "Non-Authoritative Information"; case 204: return "No Content"; case 205: return "Reset Content"; case 206: return "Partial Content"; case 300: return "Multiple Choices"; case 301: return "Moved Permanently"; case 302: return "Found"; case 303: return "See Other"; case 304: return "Not Modified"; case 305: return "Use Proxy"; case 306: return "(Unused)"; case 307: return "Temporary Redirect"; case 400: return "Bad Request"; case 401: return "Unauthorized"; case 402: return "Payment Required"; case 403: return "Forbidden"; case 404: return "Not Found"; case 405: return "Method Not Allowed"; case 406: return "Not Acceptable"; case 407: return "Proxy Authentication Required"; case 408: return "Request Timeout"; case 409: return "Conflict"; case 410: return "Gone"; case 411: return "Length Required"; case 412: return "Precondition Failed"; case 413: return "Request Entity Too Large"; case 414: return "Request-URI Too Long"; case 415: return "Unsupported Media Type"; case 416: return "Requested Range Not Satisfiable"; case 417: return "Expectation Failed"; case 500: return "Internal Server Error"; case 501: return "Not Implemented"; case 502: return "Bad Gateway"; case 503: return "Service Unavailable"; case 504: return "Gateway Timeout"; case 505: return "HTTP Version Not Supported"; default: return "(Unknown status code)"; } } #if HAVE_LIBCURL #include typedef struct fcurl_data { CURL *curl; char *buffer; /* buffer to store cached data */ size_t buffer_len; /* currently allocated buffers length */ size_t buffer_pos; /* end of data in buffer */ int still_running; /* is background url fetch still in progress */ Dictionary headers; /* response headers */ struct curl_slist *req; /* extra or overridden request headers */ int *statusptr; /* ptr to var with received HTTP status code */ } URL_FILE; CURLM *multi_handle = NULL; /* We use a global one for convenience */ int libcurl_is_initialized = 0; /* header_cb -- libcurl calls this once for every protocol header line */ static size_t header_cb(char *buf, size_t size, size_t nmemb, URL_FILE *file) { char *p, *header, *value, *url; size_t i, j; /* This routine currently only handles HTTP headers */ if (curl_easy_getinfo(file->curl, CURLINFO_EFFECTIVE_URL, &url) != CURLE_OK || (strncasecmp("http:", url, 5) != 0 && strncasecmp("https:", url, 6) != 0)) return size * nmemb; if (size * nmemb == 2) { /* CRLF signals end of headers */ assert(buf[0] == '\r' && buf[1] == '\n'); } else if (!(p = memchr(buf, ':', size * nmemb))) { /* Must be status code */ debug("+ < %s", buf); *file->statusptr = atoi(buf + strcspn(buf, " ")); if (file->headers) dict_destroy_all(file->headers); /* Clear old headers */ } else if (file->headers) { /* Normal header */ header = down(newnstring(buf, p - buf)); i = strspn(p + 1, " \t") + 1; /* Skip white space */ j = strcspn(p + i, "\r\n"); /* End before CR-LF */ assert(j < size * nmemb); value = newnstring(p + i, j); debug("+ < %s: %s\n", header, value); if (!dict_add(file->headers, header, value)) return 0; /* Memory error */ dispose(header); dispose(value); } return size * nmemb; } /* write_cb -- curl calls this routine when it has read some data */ static size_t write_cb(char *buf, size_t size, size_t nitems, void *userdata) { URL_FILE *file = (URL_FILE*)userdata; size *= nitems; if (file->buffer_len < file->buffer_pos + size) { /* Need bigger buffer */ file->buffer_len = file->buffer_pos + size; file->buffer = realloc(file->buffer, file->buffer_len); if (!file->buffer) return 0; /* Out of memory */ } memcpy(file->buffer + file->buffer_pos, buf, size); file->buffer_pos += size; return size; } /* wait_for_data -- fill the read buffer up to requested # of bytes */ static CURLMcode wait_for_data(URL_FILE *file, size_t want) { fd_set fdread, fdwrite, fdexcep; struct timeval timeout; CURLMcode rc; int maxfd, n; CURLMsg *msg; long curl_timeout; while (1) { do rc = curl_multi_perform(multi_handle, &file->still_running); while (rc == CURLM_CALL_MULTI_PERFORM); if (rc != CURLM_OK) return rc; /* Error */ /* Stop if the connection is closed, after checking why */ if (!file->still_running) { if (!(msg = curl_multi_info_read(multi_handle, &n))) return CURLM_OK; else return msg->msg == CURLMSG_DONE ? msg->data.result : CURLM_OK; } /* Stop when we have enough data */ if (file->buffer_pos >= want) return CURLM_OK; /* Determine how long to wait in select(), max is 1 second */ curl_multi_timeout(multi_handle, &curl_timeout); if (curl_timeout < 0 || curl_timeout >= 1000) { timeout.tv_sec = 1; /* Set 1 second timeout */ timeout.tv_usec = 0; } else { /* Use precise timeout */ timeout.tv_sec = 0; timeout.tv_usec = curl_timeout * 1000; } /* Get file descriptors from the transfers */ FD_ZERO(&fdread); FD_ZERO(&fdwrite); FD_ZERO(&fdexcep); maxfd = -1; rc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd); if (rc != CURLM_OK) return rc; /* Call select() to wait for either some data or a timeout */ if (select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout) < 0) errexit("select error: %s\n", strerror(errno)); } return CURLM_OK; } /* free_file -- decommission an easy_handle and free memory */ static void free_file(URL_FILE *file) { debug("+ free_file()\n"); (void) curl_multi_remove_handle(multi_handle, file->curl); curl_easy_cleanup(file->curl); if (file->buffer) free(file->buffer); if (file->req) curl_slist_free_all(file->req); free(file); } /* close_cb -- callback called when close(2) is called on our connection */ static int close_cb(void *cookie) { debug("+ close_cb()\n"); (void)free_file((URL_FILE*)cookie); return 0; } /* read_cb -- callback called when read(2) is called on our connection */ static ssize_t read_cb(void *cookie, char *buf, size_t n) { URL_FILE *file = (URL_FILE*)cookie; /* Todo: set errno to something corresponding to the error */ if (wait_for_data(file, n) != CURLM_OK) {errno = EIO; return -1;} if (file->buffer_pos < n) n = file->buffer_pos; if (!buf) {errno = EFAULT; return -1;} memcpy(buf, file->buffer, n); file->buffer_pos -= n; memmove(file->buffer, file->buffer + n, file->buffer_pos); return n; } /* cleanup -- callback for exit(3) to clean up libcurl connections */ static void cleanup(void) { if (multi_handle) { curl_multi_cleanup(multi_handle); multi_handle = NULL; } if (libcurl_is_initialized) { curl_global_cleanup(); libcurl_is_initialized = 0; } } /* fopenurl3 -- like fopenurl2, but uses method instead of GET */ EXPORT FILE *fopenurl3(const conststring method, const conststring url, const conststring mode, const Dictionary request, Dictionary response, int maxredirs, int *status) { const char *h, *v, *headerline; cookie_io_functions_t iofuncs; URL_FILE *file; CURLMcode rc; int dummy; size_t n; debug("+ fopenurl3(%s, %s, %s,..., %d,...)\n", method, url, mode, maxredirs); if (!status) status = &dummy; *status = 200; /* In case the url isn't a full URL, assume it is a local file */ assert(url != NULL); n = strspn(url, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+-."); if (url[n] != ':') return fopen(url, mode); debug("+ Not a local file, set up libcurl...\n"); if (!libcurl_is_initialized) { if (curl_global_init(CURL_GLOBAL_ALL) != 0) {errno = EIO; return NULL;} (void) atexit(cleanup); libcurl_is_initialized = 1; } new(file); file->still_running = 1; file->buffer = NULL; file->buffer_len = 0; file->buffer_pos = 0; file->headers = response; file->req = NULL; file->statusptr = status; /* Construct the extra request headers, if any */ if (request) for (h = dict_next(request, NULL); h; h = dict_next(request, h)) { v = dict_find(request, h); headerline = strapp(NULL, h, ": ", v, NULL); debug("+ > %s\n", headerline); file->req = curl_slist_append(file->req, headerline); dispose(headerline); } file->curl = curl_easy_init(); #ifdef DEBUG curl_easy_setopt(file->curl, CURLOPT_VERBOSE, 1L); #endif curl_easy_setopt(file->curl, CURLOPT_CUSTOMREQUEST, method); curl_easy_setopt(file->curl, CURLOPT_URL, url); curl_easy_setopt(file->curl, CURLOPT_WRITEDATA, file); curl_easy_setopt(file->curl, CURLOPT_WRITEFUNCTION, write_cb); curl_easy_setopt(file->curl, CURLOPT_HEADERFUNCTION, header_cb); curl_easy_setopt(file->curl, CURLOPT_HEADERDATA, file); curl_easy_setopt(file->curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(file->curl, CURLOPT_MAXREDIRS, (long)maxredirs); curl_easy_setopt(file->curl, CURLOPT_HTTPHEADER, file->req); #if LIBCURL_VERSION_NUM >= 0x071506 /* needs libcurl >= 7.21.6 */ curl_easy_setopt(file->curl, CURLOPT_TRANSFER_ENCODING, 1L); #endif curl_easy_setopt(file->curl, CURLOPT_ENCODING, ""); /* Let curl decode it */ if (!multi_handle && !(multi_handle = curl_multi_init())) { free_file(file); errno = EIO; return NULL; } rc = curl_multi_add_handle(multi_handle, file->curl); debug("+ added to multi_handle -> %d\n", rc); if (rc != CURLM_OK) {free_file(file); errno = EIO; return NULL;} /* Todo: make errno more specific, if possible */ debug("+ Set up the connection...\n"); /* Set up the connection */ (void)curl_multi_perform(multi_handle, &file->still_running); #if 0 if (file->buffer_pos == 0 && !file->still_running) { int n; CURLMsg *m = curl_multi_info_read(multi_handle, &n); if (m->msg == CURLMSG_DONE && m->data.result == CURLE_UNSUPPORTED_PROTOCOL) errno = EPROTONOSUPPORT; else errno = EIO; /* Todo: be more specific, if possible */ debug("+ closed before the first data\n"); free_file(file); return NULL; } #endif /* Get the first data, i.e., after any headers */ rc = wait_for_data(file, 1); if (rc != CURLM_OK) { free_file(file); if (rc == CURLE_TOO_MANY_REDIRECTS) errno = EMLINK; else if (rc == CURLE_UNSUPPORTED_PROTOCOL) errno = EPROTONOSUPPORT; else errno = EIO; return NULL;} /* Todo: make errno more specific, if possible */ iofuncs.read = read_cb; iofuncs.write = NULL; iofuncs.seek = NULL; iofuncs.close = close_cb; return fopencookie(file, mode, iofuncs); } /* fopenurl2 -- like fopenurl, but sends and returns HTTP headers */ EXPORT FILE *fopenurl2(const conststring url, const conststring mode, const Dictionary request, Dictionary response, int maxredirs, int *status) { assert(url != NULL); return fopenurl3("GET", url, mode, request, response, maxredirs, status); } /* fopenurl -- like fopen, but takes a URL; HTTP headers are parsed */ EXPORT FILE *fopenurl(const conststring path, const conststring mode, int *status) { assert(path != NULL); debug("+ fopenurl(\"%s\", \"%s\")\n", path, mode); return fopenurl2(path, mode, NULL, NULL, MAXREDIRECTS, status); } #else /* HAVE_LIBCURL */ #if HAVE_SYS_TYPES_H # include #endif #if HAVE_SYS_STAT_H # include #endif #if HAVE_FCNTL_H # include #endif #include #if HAVE_SYS_SOCKET_H # include #endif #if HAVE_STRINGS_H # include #endif #include #include "url.e" #include "connectsock.e" #include "headers.e" #define BUFLEN 4096 /* Max len of header lines */ static URL http_proxy = NULL, ftp_proxy = NULL; static int http_proxy_init = 0, ftp_proxy_init = 0; /* Forward declaration */ FILE *fopenurl2(const conststring path, const conststring mode, const Dictionary request, Dictionary response, int maxredirs, int *status); /* open_http2 -- open resource via HTTP; return file pointer or NULL */ static FILE *open_http2(const conststring machine, const conststring port, const conststring path, Dictionary request, Dictionary response, int maxredirs, int *status) { int delete_response = !response; conststring h, v; char buf[BUFLEN]; int fd, n, i; string s, t; FILE *f; assert(machine); assert(port); assert(path); debug("+ open_http2(\"%s\", \"%s\", \"%s\",...)\n", machine, port, path); /* Too many redirects? */ if (maxredirs < 0) {errno = EMLINK; return NULL;} /* Connect */ if ((fd = connectTCP(machine, port)) < 0) return NULL; /* Construct the request */ t = strapp(NULL, "GET ", path, " HTTP/1.1\r\nHost: ", machine, NULL); if (!eq(port, "80")) strapp(&t, ":", port, NULL); strapp(&t, "\r\n", NULL); /* Add other headers */ if (request) for (h = dict_next(request, NULL); h; h = dict_next(request, h)) strapp(&t, h, ": ", dict_find(request, h), "\r\n", NULL); strapp(&t, "\r\n", NULL); /* Send the request, end with n = 0 (success) or n = -1 (failure) */ n = strlen(t); while (n > 0) { i = write(fd, t, n); if (i < 0) n = -1; else n -= i; } dispose(t); if (n < 0) return NULL; /* No more output to server */ (void)shutdown(fd, 1); /* Create FILE* */ if (!(f = fdopen(fd, "r"))) return NULL; /* Check protocol version, read status code */ if (!fgets(buf, sizeof(buf), f) || !(hasprefix(buf, "HTTP/1.1 ") || hasprefix(buf, "HTTP/1.0 "))) { (void)fclose(f); return NULL; } *status = atoi(buf + strcspn(buf, " ")); debug("+ Status = %d\n", *status); /* Read response headers */ if (!response) response = dict_create(50); if (!read_mail_headers(f, response)) { (void)fclose(f); f = NULL; } else if (hasprefix(buf+9, "301") || hasprefix(buf+9, "302") || hasprefix(buf+9, "303") || hasprefix(buf+9, "307")) { (void)fclose(f); if (!(v = dict_find(response, "location"))) { errno = 121; /* EREMOTEIO */ f = NULL; /* Redirect without a location!? */ } else { s = newstring(v); /* Because we'll delete the response dict. */ dict_destroy_all(response); f = fopenurl2(s, "r", request, response, maxredirs - 1, status); dispose(s); } } /* To do: handle 305 Use Proxy */ /* Return the body of the stream */ if (response && delete_response) dict_delete(response); return f; } /* open_http -- open resource via HTTP; return file pointer or NULL */ static FILE *open_http(const URL url, Dictionary request, Dictionary response, int maxredirs, int *status) { string s, machine, port; FILE *f; /* Initialize proxy from environment variable, if not already done */ if (! http_proxy_init) { if ((s = getenv("http_proxy"))) http_proxy = URL_new(s); http_proxy_init = 1; } /* What server do we connect to: a proxy or the end server? */ machine = (http_proxy ? http_proxy : url)->machine; port = (http_proxy ? http_proxy : url)->port; if (!port) port = "80"; if (http_proxy) f = open_http2(machine, port, url->full, request,response,maxredirs,status); else { s = NULL; if (url->path) strapp(&s, url->path, NULL); if (url->query) strapp(&s, "?", url->query, NULL); if (url->fragment) strapp(&s, "#", url->fragment, NULL); if (!s || !*s) strapp(&s, "/", NULL); debug("+ path = \"%s\"\n", s); f = open_http2(machine, port, s, request, response, maxredirs, status); dispose(s); } return f; } /* open_ftp -- open resource via FTP; return file pointer or NULL */ static FILE *open_ftp(const URL url, Dictionary request, Dictionary response, int maxredirs) { string proxy; int dummy; if (! ftp_proxy_init) { if ((proxy = getenv("ftp_proxy"))) ftp_proxy = URL_new(proxy); ftp_proxy_init = 1; } /* Can only work via proxy for now... */ if (!ftp_proxy) {errno = ENOSYS; return NULL;} return open_http2(ftp_proxy->machine, ftp_proxy->port ? ftp_proxy->port : "80", url->full, request, response, maxredirs, &dummy); } /* open_file -- open resource as local file or FTP; return file ptr or NULL */ static FILE *open_file(const URL url, const conststring mode, Dictionary request, Dictionary response, int maxredirs) { FILE *f = NULL; if (! url->machine || eq(url->machine, "localhost")) { f = fopen(url->path, mode); } if (! f) { if (! eq(mode, "r")) errno = EACCES; /* Not yet supported */ else f = open_ftp(url, request, response, maxredirs); } return f; } /* fopenurl3 -- like fopenurl2, but with a method other than GET */ EXPORT FILE *fopenurl3(const conststring method, const conststring path, const conststring mode, const Dictionary request, Dictionary response, int maxredirs, int *status) { FILE *f = NULL; int dummy; URL url; if (!status) status = &dummy; *status = 200; if (strcmp(method, "GET") != 0) {errno = EPROTONOSUPPORT; return NULL;} url = URL_new(path); if (! url) { errno = EACCES; /* Invalid URL */ } else if (! url->proto) { f = fopen(path, mode); /* Assume it's a local file */ } else if (eq(url->proto, "http")) { if (! eq(mode, "r")) errno = ENOSYS; /* Not yet supported */ else f = open_http(url, request, response, maxredirs, status); } else if (eq(url->proto, "ftp")) { if (! eq(mode, "r")) errno = ENOSYS; /* Not yet supported */ else f = open_ftp(url, request, response, maxredirs); } else if (eq(url->proto, "file")) { f = open_file(url, mode, request, response, maxredirs); } else { errno = EPROTONOSUPPORT; /* Unimplemented protocol */ } URL_dispose(url); return f; } /* fopenurl2 -- like fopenurl, but sends and returns HTTP headers */ EXPORT FILE *fopenurl2(const conststring url, const conststring mode, const Dictionary request, Dictionary response, int maxredirs, int *status) { return fopenurl3("GET", url, mode, request, response, maxredirs, status); } /* fopenurl -- like fopen, but takes a URL; HTTP headers are parsed */ EXPORT FILE *fopenurl(const conststring url, const conststring mode, int *status) { return fopenurl3("GET", url, mode, NULL, NULL, MAXREDIRECTS, status); } #endif /* HAVE_LIBCURL */ html-xml-utils-6.5/url.c0000644000175000001440000002224412174266016012160 00000000000000/* * Routines and data structures to parse URLs * * Assumes the strings are encoded in UTF-8 * * Bug: URL_s_absolutize("foo/bar", "../") yields "" but should return "./" * * Copyright 1994-2011 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 7 March 1999 */ #include "config.h" #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #include #include #include "export.h" #include "heap.e" #include "types.e" EXPORT typedef struct { string full; /* Full URL as a string */ string proto; /* Protocol */ string user; /* User name */ string password; /* User's password */ string machine; /* Domain name or IP number */ string port; /* Port number or service */ string path; /* Path part of URL */ string query; /* Query part of URL */ string fragment; /* Fragment ID part of URL */ } *URL; /* utf8tohex -- convert UTF-8 to %HH hex encoding, allocate on heap */ static string utf8tohex(const conststring s) { static string hex = "0123456789ABCDEF"; string h; int i, j; newarray(h, 3 * strlen(s) + 1); /* Usually too much */ for (i = 0, j = 0; s[i]; i++) { if (s[i] & 0x80) { /* Not ASCII */ h[j++] = '%'; h[j++] = hex[s[i]/16]; h[j++] = hex[s[i]%16]; } else if (s[i] == ' ') { /* Also escape spaces */ h[j++] = '%'; h[j++] = '2'; h[j++] = '0'; } else h[j++] = s[i]; } h[j] = '\0'; return h; } /* URL_dispose -- free the memory used by a URL struct */ EXPORT void URL_dispose(URL url) { if (url) { dispose(url->full); dispose(url->proto); dispose(url->user); dispose(url->password); dispose(url->machine); dispose(url->port); dispose(url->path); dispose(url->query); dispose(url->fragment); dispose(url); } } /* URL_new -- create a new URL struct; return NULL if not a valid URL */ EXPORT URL URL_new(const conststring url) { #define PROTO "([^:/?#]+)" /* 2--------2 */ #define USER "([^/?#@:[]*)" /* 5----------5 */ #define PASSWORD "([^/?#@[]*)" /* 7---------7 */ #define HOST "(([^/?#:[]+)|\\[([0-9a-fA-F:]*)])?" /* 89---------9----A-------------A-8 */ #define PORT "([^/?#]*)" /* C-------C */ #define AUTH "(" USER "(:" PASSWORD ")?@)?" HOST "(:" PORT ")?" /* 4--5--5--6---7------7--6--4 8--8 B---C--C--B */ #define PATH "([^?#[]*)" /* D------D */ #define QUERY "([^#]*)" /* F---F */ #define FRAGM "(.*)" /* H--H */ #define PAT "(" PROTO ":)?(//" AUTH ")?" PATH "(\\?" QUERY ")?(#" FRAGM ")?" /* 1 2---2 1 3 3 D--D E F---F E G H---H G */ /* * 2 = proto, 5 = user, 7 = password, 9/A = machine, C = port, D = path, * F = query, H = fragment */ # define MAXSUB 18 static regex_t re; static int initialized = 0; regmatch_t pm[MAXSUB]; URL result; assert(url != NULL); /* Compile the regexp, only once */ if (! initialized) { assert(regcomp(&re, PAT, REG_EXTENDED) == 0); /* Could be memory... */ initialized = 1; } /* Match the URL against the pattern; return NULL if no match */ if (regexec(&re, url, MAXSUB, pm, 0) != 0) return NULL; /* Store the various parts */ new(result); result->full = utf8tohex(url); result->proto = pm[2].rm_so == -1 ? NULL : down(newnstring(url, pm[2].rm_eo)); result->user = pm[5].rm_so == -1 ? NULL : newnstring(url + pm[5].rm_so, pm[5].rm_eo - pm[5].rm_so); result->password = pm[7].rm_so == -1 ? NULL : newnstring(url + pm[7].rm_so, pm[7].rm_eo - pm[7].rm_so); result->machine = pm[9].rm_so != -1 ? newnstring(url + pm[9].rm_so, pm[9].rm_eo - pm[9].rm_so) : pm[10].rm_so != -1 ? newnstring(url + pm[10].rm_so, pm[10].rm_eo - pm[10].rm_so) : NULL; result->port = pm[12].rm_so == -1 ? NULL : newnstring(url + pm[12].rm_so, pm[12].rm_eo - pm[12].rm_so); result->path = pm[13].rm_so == -1 ? NULL : newnstring(url + pm[13].rm_so, pm[13].rm_eo - pm[13].rm_so); result->query = pm[15].rm_so == -1 ? NULL : newnstring(url + pm[15].rm_so, pm[15].rm_eo - pm[15].rm_so); result->fragment = pm[17].rm_so == -1 ? NULL : newnstring(url + pm[17].rm_so, pm[17].rm_eo - pm[17].rm_so); return result; } /* merge -- merge a base path and a relative path */ static string merge(const URL base, const string path) { string s; int j; if (base->machine && (!base->path || !base->path[0])) { newarray(s, strlen(path) + 2); s[0] = '/'; strcpy(s + 1, path); } else if (!base->path) { s = newstring(path); } else { for (j = strlen(base->path); j > 0 && base->path[j-1] != '/'; j--); newarray(s, j + strlen(path) + 1); memmove(s, base->path, j); strcpy(s + j, path); } return s; } /* remove_dot_segments -- remove /./ and /foo/../ */ static void remove_dot_segments(string path) { int i = 0, len = strlen(path), j; while (len) { if (hasprefix(path + i, "/../")) { len -= 3; if (i == 0) { memmove(path + 1, path + 4, len); } else { for (j = i - 1; j > 0 && path[j-1] != '/'; j--) ; if (!hasprefix(path + j, "../")) { memmove(path + j, path + i + 4, len); i = j != 0 ? j - 1 : 0; } else { i += 3; } } } else if (eq(path + i, "/..")) { len = 0; if (i == 0) { path[1] = '\0'; i = 1; } else { for (j = i - 1; j > 0 && path[j-1] != '/'; j--) ; if (!hasprefix(path + j, "../")) { path[j] = '\0'; i = j; } else { i += 3; } } } else if (hasprefix(path + i, "/./")) { memmove(path + i, path + i + 2, len - 1); len -= 2; } else if (eq(path + i, "/.")) { path[i+1] = '\0'; len--; } else { i++; len--; } } } /* URL_absolutize -- make a relative URL absolute */ EXPORT URL URL_absolutize(const URL base, const URL url) { URL abs; string s; new(abs); /* RFC 3986, section 5.2.2 */ if (url->proto) { abs->proto = newstring(url->proto); abs->user = newstring(url->user); abs->password = newstring(url->password); abs->machine = newstring(url->machine); abs->port = newstring(url->port); abs->path = newstring(url->path); remove_dot_segments(abs->path); abs->query = newstring(url->query); } else { if (url->machine) { abs->user = newstring(url->user); abs->password = newstring(url->password); abs->machine = newstring(url->machine); abs->port = newstring(url->port); abs->path = newstring(url->path); remove_dot_segments(abs->path); abs->query = newstring(url->query); } else { if (!url->path || !url->path[0]) { abs->path = newstring(base->path); if (url->query) { abs->query = newstring(url->query); } else { abs->query = newstring(base->query); } } else { if (url->path[0] == '/') { abs->path = newstring(url->path); remove_dot_segments(abs->path); } else { abs->path = merge(base, url->path); remove_dot_segments(abs->path); } abs->query = newstring(url->query); } abs->user = newstring(base->user); abs->password = newstring(base->password); abs->machine = newstring(base->machine); abs->port = newstring(base->port); } abs->proto = newstring(base->proto); } abs->fragment = newstring(url->fragment); newarray(s, (abs->proto ? strlen(abs->proto) + 1 : 0) + (abs->user ? strlen(abs->user) + 1 : 0) + (abs->password ? strlen(abs->password) + 1 : 0) + (abs->machine ? strlen(abs->machine) + 4 : 0) + (abs->port ? strlen(abs->port) + 1 : 0) + (abs->path ? strlen(abs->path) : 0) + (abs->query ? strlen(abs->query) + 1 : 0) + (abs->fragment ? strlen(abs->fragment) + 1 : 0) + 1); sprintf(s, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", abs->proto ? abs->proto : (string) "", abs->proto ? (string) ":" : (string) "", abs->machine ? (string) "//" : (string) "", abs->user ? abs->user : (string) "", abs->password ? (string) ":" : (string) "", abs->password ? abs->password : (string) "", abs->user ? (string) "@" : (string) "", abs->machine && strchr(abs->machine, ':') ? "[" : "", abs->machine ? abs->machine : (string) "", abs->machine && strchr(abs->machine, ':') ? "]" : "", abs->port ? (string) ":" : (string) "", abs->port ? abs->port : (string) "", abs->path ? abs->path : (string) "", abs->query ? "?" : (string) "", abs->query ? abs->query : (string) "", abs->fragment ? (string) "#" : (string) "", abs->fragment ? abs->fragment : (string) ""); /* Instead of strchr() above, we could have an IPv6 flag. Necessary? */ abs->full = utf8tohex(s); dispose(s); return abs; } /* URL_s_absolutize -- make a relative URL absolute */ EXPORT string URL_s_absolutize(const conststring base, const conststring url) { URL url1 = URL_new(url), base1 = URL_new(base); URL abs = URL_absolutize(base1, url1); string result = newstring(abs->full); URL_dispose(abs); URL_dispose(url1); URL_dispose(base1); return result; } html-xml-utils-6.5/html.y0000644000175000001440000001162012174313455012344 00000000000000%{ /* * Simple XML grammar, with call-back functions. * * Part of HTML-XML-utils, see: * http://www.w3.org/Tools/HTML-XML-utils/ * * Copyright © 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 1997 **/ #include "config.h" #include #include #include #include #include "export.h" #include "types.e" /* The types of the various callback routines */ EXPORT typedef void (*html_handle_error_fn) (void *clientdata, const string s, int lineno); EXPORT typedef void* (*html_handle_start_fn) (void); EXPORT typedef void (*html_handle_end_fn) (void *clientdata); EXPORT typedef void (*html_handle_comment_fn) (void *clientdata, const string commenttext); EXPORT typedef void (*html_handle_text_fn) (void *clientdata, const string text); EXPORT typedef void (*html_handle_decl_fn) (void *clientdata, const string gi, const string fpi, const string url); EXPORT typedef void (*html_handle_pi_fn) (void *clientdata, const string pi_text); EXPORT typedef void (*html_handle_starttag_fn) (void *clientdata, const string name, pairlist attribs); EXPORT typedef void (*html_handle_emptytag_fn) (void *clientdata, const string name, pairlist attribs); EXPORT typedef void (*html_handle_endtag_fn) (void *clientdata, const string name); EXPORT typedef void (*html_handle_endincl_fn) (void *clientdata); /* yyparse -- entry point for the parser */ EXPORT extern int yyparse(void); /* Store client data */ static void *data; /* All callback routines */ static struct { html_handle_error_fn error; html_handle_start_fn start; html_handle_end_fn end; html_handle_comment_fn comment; html_handle_text_fn text; html_handle_decl_fn decl; html_handle_pi_fn pi; html_handle_starttag_fn starttag; html_handle_emptytag_fn emptytag; html_handle_endtag_fn endtag; html_handle_endincl_fn endincl; } h = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; /* Routines to bind concrete routines to the callbacks */ EXPORT void set_error_handler(html_handle_error_fn f) {h.error = f;} EXPORT void set_start_handler(html_handle_start_fn f) {h.start = f;} EXPORT void set_end_handler(html_handle_end_fn f) {h.end = f;} EXPORT void set_comment_handler(html_handle_comment_fn f) {h.comment = f;} EXPORT void set_text_handler(html_handle_text_fn f) {h.text = f;} EXPORT void set_decl_handler(html_handle_decl_fn f) {h.decl = f;} EXPORT void set_pi_handler(html_handle_pi_fn f) {h.pi = f;} EXPORT void set_starttag_handler(html_handle_starttag_fn f){h.starttag = f;} EXPORT void set_emptytag_handler(html_handle_emptytag_fn f){h.emptytag = f;} EXPORT void set_endtag_handler(html_handle_endtag_fn f) {h.endtag = f;} EXPORT void set_endincl_handler(html_handle_endincl_fn f) {h.endincl = f;} extern int yylex(void); EXPORT int lineno = 1; /* Line number in input file */ static int nrerrors = 0; #define MAX_ERRORS_REPORTED 20 /* yyerror -- report parse error */ static void yyerror(const string s) { nrerrors++; if (nrerrors < MAX_ERRORS_REPORTED) h.error(data, s, lineno); else if (nrerrors == MAX_ERRORS_REPORTED) h.error(data, "too many errors", lineno); else ; /* don't report any more errors */ } /* call -- if the function exists, call it with the given aguments */ #define call(fn, args) do {if (fn) (fn)args;} while (0) %} %union { string s; pairlist p; } %token TEXT COMMENT START END NAME STRING PROCINS %token EMPTYEND DOCTYPE ENDINCL %type

        attribute attributes %% start : {data = h.start ? h.start() : NULL;} document {call(h.end, (data));} ; document : document COMMENT {call(h.comment, (data, $2));} | document TEXT {call(h.text, (data, $2));} | document starttag | document endtag | document decl | document PROCINS {call(h.pi, (data, $2));} | document ENDINCL {call(h.endincl, (data));} | document error | /* empty */ ; starttag : START attributes '>' {call(h.starttag, (data, $1, $2));} | START attributes EMPTYEND {call(h.emptytag, (data, $1, $2));} ; attributes : attribute attributes {$$ = $1; $$->next = $2;} | /* empty */ {$$ = NULL;} ; attribute : NAME {pairlist h = malloc(sizeof(*h)); assert(h != NULL); h->name = $1; h->value=NULL; $$ = h;} | NAME '=' NAME {pairlist h = malloc(sizeof(*h)); assert(h != NULL); h->name = $1; h->value = $3; $$ = h;} | NAME '=' STRING {pairlist h = malloc(sizeof(*h)); assert(h != NULL); h->name = $1; h->value = $3; $$ = h;} ; endtag : END '>' {call(h.endtag, (data, $1));} ; decl : DOCTYPE NAME NAME STRING STRING '>' {call(h.decl, (data, $2, $4, $5));} | DOCTYPE NAME NAME STRING '>' {if (strcasecmp($3, "public") == 0) call(h.decl, (data, $2, $4, NULL)); else /* "system" */ call(h.decl, (data, $2, NULL, $4));} | DOCTYPE NAME '>' {call(h.decl, (data, $2, NULL, NULL));} ; html-xml-utils-6.5/cexport.10000644000175000001440000000062511111474036012750 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH CEXPORT 1 "31 Mar 2000" .SH NAME cexport \- create header file with exported declarations from a C file .SH SYNOPSIS .B cexport .RB "[\| " \-c .IR cpp\-command " \|]" .RB "[\| " \-e .IR extension " \|]" .RB "[\| " \-h " \|]" .RI "[\| " cc\-options " \|]" .RI "[\| " file " [\| " file... " \|] \|]" .SH DESCRIPTION [ToDo] html-xml-utils-6.5/hxtoc.10000644000175000001440000000650612123434146012417 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXTOC" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxtoc \- insert a table of contents in an HTML file .SH SYNOPSIS .B hxtoc .RB "[\| " \-x " \|]" .RB "[\| " \-l .IR low " \|]" .RB "[\| " \-h .IR high " \|]" .RI "[\| " file " \|]" .RB "[\| " \-t " \|]" .RB "[\| " \-d " \|]" .RB "[\| " \-c .IR class " \|]" .SH DESCRIPTION .LP The .B hxtoc command reads an HTML file, inserts missing ID attributes in all H1 to H6 elements between the levels .B \-l and .B \-h (unless the option .B \-d is in effect, see below) and also inserts A elements with NAME attributes, so old browsers will recognize the H1 to H6 headers as target anchors as well (unless the option .B \-t is in effect). The output is written to stdout. .LP If there is a comment of the form .d .e or a pair of comments .d \&... .e then the comment, or the pair with everything in between, will be replaced by a table of contents, consisting of a list (UL) of links to all headers in the document. .LP The text of headers is copied to this table of contents, including any inline markup, except that DFN tags and SPAN tags with a CLASS of "index" are omitted (but the elements content is copied). .LP If a header has a CLASS attribute with as value (or one of its values) the keyword "no-toc", then that header will not appear in the table of contents. .SH OPTIONS The following options are supported: .TP 10 .B \-x Use XML conventions: empty elements are written with a slash at the end: .TP .BI \-l " low" Sets the lowest numbered header to appear in the table of content. Default is 1 (i.e., H1). .TP .BI \-h " high" Sets the highest numbered header to appear in the table of content. Default is 6 (i.e., H6). .TP .B \-t Normally, .B hxtoc adds both ID attributes and empty A elements with a NAME attribute and CLASS="bctarget", so that older browsers that do no understand ID will still find the target. With this option, the A elements will not be generated. .TP .BI \-c " class" The generated UL elements in the table of contents will have a CLASS attribute with the value .I class. The default is "toc". .TP .B \-d Tries to use sectioning elements as targets in the table of contents instead of H1 to H6. A sectioning elements is a DIV, SECTION, ARTICLE, ASIDE or NAV element whose first child is a heading element (H1 to H6) or an HGROUP. The sectioning element will be given an ID if it doesn't have one yet. With this option, the level of any H1 to H6 that is the first child of a sectioning element (or of an HGROUP that is itself the first child of a sectioning element) is not determined by its name, but by the nesting depth of the sectioning elements. (Any H1 to H6 that are not the first child of a sectioning element still have their level implied by their name.) .SH OPERANDS The following operand is supported: .TP 10 .I file The name of an HTML file. If absent, standard input is read instead. .SH "DIAGNOSTICS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred in the parsing of the HTML file. .B hxtoc will try to correct the error and produce output anyway. .SH "SEE ALSO" .BR asc2xml (1), .BR hxnormalize (1), .BR hxnum (1), .BR xml2asc (1) .SH BUGS .LP The error recovery for incorrect HTML is primitive. html-xml-utils-6.5/heap.e0000644000175000001440000000151212265516603012271 00000000000000#define fatal(msg) fatal3(msg, __FILE__, __LINE__) #define new(p) if (((p)=malloc(sizeof(*(p))))); else fatal3("out of memory", __FILE__, __LINE__) #define dispose(p) if (!(p)) ; else (free((void*)p), (p) = (void*)0) #define heapmax(p) 9999999 #define newstring(s) heap_newstring(s, __FILE__, __LINE__) #define newnstring(s,n) heap_newnstring(s, n, __FILE__, __LINE__) #define newarray(p,n) if (((p)=malloc((n)*sizeof(*(p))))); else fatal3("out of memory", __FILE__, __LINE__) #define renewarray(p,n) if (((p)=realloc(p,(n)*sizeof(*(p))))); else fatal3("out of memory", __FILE__, __LINE__) void fatal3(const char *s, const char *file, const unsigned int line); char * heap_newstring(const char *s, const char *file, const int line); char * heap_newnstring(const char *s, const size_t n, const char *file, const int line); html-xml-utils-6.5/hxwls.10000644000175000001440000000314611606170750012437 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXWLS" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxwls \- list links in an HTML file .SH SYNOPSIS .B hxwls .RB "[\| " \-l " \|]" .RB "[\| " \-t " \|]" .RB "[\| " \-r " \|]" .RB "[\| " \-h " \|]" .RB "[\| " \-b .IR " base" " \|]" .RI "[\| " file " \|]" .SH DESCRIPTION .LP The .B hxwls command reads an HTML file (standard input by default) and prints out all links it finds. The output is written to stdout. .SH OPTIONS The following options are supported: .TP 10 .B \-l Produce a long listing. Instead of just the URI, .B hxwls prints three columns: the element name, the value of the REL attribute, and the target URI. .TP .B \-t Produce a tuple listing. .B hxwls prints four columns: the URI of the document itself, the element name, the value of the REL attribute, and the target URI. .TP .BI \-r Print relative URLs as they are, without converting them to absolute URLs. .TP .BI \-b " base" Use .I base as the initial base URL. If there is a element in the document, it will override the \-b option. .TP .BI \-h Output as HTML. The output will be listed in the form of elements. .SH OPERANDS The following operand is supported: .TP 10 .I file The name or the URL of an HTML file. If absent, standard input is read instead. .SH "DIAGNOSTICS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred in the parsing of the HTML file. .B hxwls will try to correct the error and produce output anyway. .SH "SEE ALSO" .BR asc2xml (1), .BR hxnormalize (1), .BR hxnum (1), .BR xml2asc (1) html-xml-utils-6.5/hxunent.10000644000175000001440000000310111606170750012752 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXUNENT" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxunent \- replace HTML predefined character entities by UTF-8 .SH SYNOPSIS .B hxunent .RB "[\| " \-b " \|]" .RB "[\| " \-f " \|]" .RI "[\| " file " \|]" .SH DESCRIPTION .LP The .B hxunent command reads the .I file (or standard input) and copies it to standard output with &-entities by their equivalent character (encoded as UTF-8). E.g., " is replaced by " and < is replaced by <. .SH OPTIONS The following options are supported: .TP 10 .B -b The five builtin entities of XML (< > " ' &) are not replaced but copied unchanged. This is necessary if the output has to be valid XML or SGML. .TP .B -f This option changes how unknown entities or lone ampersands are handled. Normally they are copied unchanged, but this option tries to "fix" them by replacing ampersands by &. Often such stray ampersands are the result of copy and paste of URLs into a document and then this option indeed fixes them and makes the document valid. .SH "DIAGNOSTICS" The program's exit value is 0 if all went well, otherwise: .TP 10 .B 1 The input couldn't be read (file not found, file not readable...) .TP .B 2 Wrong command line arguments. .SH "SEE ALSO" .BR asc2xml (1), .BR xml2asc (1), .BR UTF-8 " (RFC 2279)" .SH BUGS .LP The program assumes entities are as defined by HTML. It doesn't read a document's DTD to find the actual definitions in use in a document. With .BR \-f , it will even remove all entities that are not HTML entities. html-xml-utils-6.5/INSTALL0000644000175000001440000003660011777117217012252 00000000000000Installation Instructions ************************* Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. HP-UX `make' updates targets which have the same time stamps as their prerequisites, which makes it generally unusable when shipped generated files such as `configure' are involved. Use GNU `make' instead. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. html-xml-utils-6.5/NEWS0000644000175000001440000000000007071133506011670 00000000000000html-xml-utils-6.5/TODO0000644000175000001440000000144211323651757011704 00000000000000Suggestions from Werner Heuser - `printlinks' is a great help, but I would like some links not to be touched, e.g. some graphics. - please include a download URL into the source. Imagine somebody likes the program and wants to come back and get the current version Proprietary extensions: Microsoft Internet Explorer accepts a proprietary extension to HTML that looks like malformed mark-up: "" or "" (where xxx is text that doesn't contain "]"). The parser currently reports (correctly) a syntax error. Should there be a mode in which this is accepted? If so, as what? as TEXT? Let hxindex recognize DATA-INDEX attributes in addition to TITLE? (These new DATA- attributes are suggested by HTML5.) Let hxindex look at locale for sorting order? html-xml-utils-6.5/hxnormalize.10000644000175000001440000000650011606170750013627 00000000000000.TH "HXNORMALIZE" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxnormalize \- pretty-print an HTML file .SH SYNOPSIS .B hxnormalize .RB "[\| " \-x " \|]" .RB "[\| " \-e " \|]" .RB "[\| " \-d " \|]" .RB "[\| " \-s " \|]" .RB "[\| " \-L " \|]" .RB "[\| " \-i .IR indent " \|]" .RB "[\| " \-l .IR line\-length " \|]" .RB "[\| " \-c .IR commentmagic " \|]" .RI "[\| " file-or-URL " \|]" .SH DESCRIPTION .LP The .B hxnormalize command pretty-prints an HTML file, and also tries to fix small errors. The output is the same HTML, but with a maximum line length and with optional indentation to indicate the nesting level of each line. .SH OPTIONS The following options are supported: .TP 10 .B \-x Use XML conventions: empty elements are written with a slash at the end: . Implies .BR \-e . .TP .B \-e Always insert endtags, even if HTML does not require them (for example:

        and ). .TP .B \-d Omit the DOCTYPE from the output. .TP .BI \-i " indent" Set the number of spaces to indent each nesting level. Default is 2. Not all elements cause an indent. In general, elements that can occur in a block environment are started on a new line and cause an indent, but inline elements, such as EM and SPAN do not cause an indent. .TP .BI \-l " line\-length" Sets the maximum length of lines. .B hxnormalize will wrap lines so that all lines are as long as possible, but no longer than this length. Default is 72. Words that are longer than the line length will not be broken, and will extend past this length. A \"word\" is a sequence of characters delimited by white space.) The content of the STYLE, SCRIPT and PRE elements will not be line-wrapped. .TP .B \-s Omit tags that don't have any attributes. .TP .B \-L Remove redundant "lang" and "xml:lang" attributes. (I.e., those whose value is the same as the language inherited from the parent element.) .TP .BI \-c " commentmagic" Comments are normally placed right after the preceding text. That is usually correct for short comments, but some comments are meant to be on a separate line. .I commentmagic is a string and when that string occurs inside a comment, .B hxnormalize will output an empty line before that comment. E.g. \fB\-c "===="\fR can be used to put all comments that contain "====" on a separate line, preceded by an empty line. By default, no comments are treated that way. .SH OPERANDS The following operand is supported: .TP 10 .I file-or-URL The name or URL of an HTML file. If absent, standard input is read instead. .SH "EXIT STATUS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred in the parsing of the HTML file. .B hxnormalize will try to correct the error and produce output anyway. .SH ENVIRONMENT To use a proxy to retrieve remote files, set the environment variables .B http_proxy and .BR ftp_proxy "." E.g., .B http_proxy="http://localhost:8080/" .SH BUGS .LP The error recovery for incorrect HTML is primitive. .LP .B hxnormalize will not omit an endtag if the white space after it could possibly be significant. E.g., it will not remove the first

        from "

        text

        text

        ". .LP .B hxnormalize can currently only retrieve remote files over HTTP. It doesn't handle password-protected files, nor files whose content depends on HTTP "cookies." .SH "SEE ALSO" .BR asc2xml (1), .BR xml2asc (1), .BR UTF-8 " (RFC 2279)" html-xml-utils-6.5/hxunentmain.c0000644000175000001440000001006011421033240013666 00000000000000/* unent -- expand HTML entities * * Author: Bert Bos * Created: 10 Aug 2008 */ #include "config.h" #include #include #include #include #include "export.h" #include "unent.e" static int leave_builtin = 0; /* Leave standard entities untouched */ static int fix_ampersands = 0; /* Replace lone and unrecognized & by & */ /* append_utf8 -- append the UTF-8 sequence for code n */ static void append_utf8(const int n) { if (n <= 0x7F) { putchar(n); } else if (n <= 0x7FF) { putchar(0xC0 | (n >> 6)); putchar(0x80 | (n & 0x3F)); } else if (n <= 0xFFFF) { putchar(0xE0 | (n >> 12)); putchar(0x80 | ((n >> 6) & 0x3F)); putchar(0x80 | (n & 0x3F)); } else if (n <= 0x1FFFFF) { putchar(0xF0 | (n >> 18)); putchar(0x80 | ((n >> 12) & 0x3F)); putchar(0x80 | ((n >> 6) & 0x3F)); putchar(0x80 | (n & 0x3F)); } else if (n <= 0x3FFFFFF) { putchar(0xF0 | (n >> 24)); putchar(0x80 | ((n >> 18) & 0x3F)); putchar(0x80 | ((n >> 12) & 0x3F)); putchar(0x80 | ((n >> 6) & 0x3F)); putchar(0x80 | (n & 0x3F)); } else { putchar(0xF0 | (n >> 30)); putchar(0x80 | ((n >> 24) & 0x3F)); putchar(0x80 | ((n >> 18) & 0x3F)); putchar(0x80 | ((n >> 12) & 0x3F)); putchar(0x80 | ((n >> 6) & 0x3F)); putchar(0x80 | (n & 0x3F)); } } /* expand -- print string, expanding entities to UTF-8 sequences */ static void expand(const char *s) { const struct _Entity *e; int i, n; for (i = 0; s[i];) { if (s[i] != '&') { /* Literal character */ putchar(s[i++]); } else if (s[i+1] != '#') { /* Named entity, eg. é */ for (i++, n = 0; isalnum(s[i+n]); n++) ; if (! (e = lookup_entity(s + i, n))) { /* Unknown entity */ if (fix_ampersands) fputs("&", stdout); else putchar('&'); } else if (leave_builtin && (e->code == 38 || e->code == 39 || e->code == 60 || e->code == 62 || e->code == 34)) { /* Keep it */ putchar('&'); for (; isalnum(s[i]); i++) putchar(s[i]); if (s[i] != ';') putchar(';'); /* Make sure the ; is there */ } else { /* Expand to UTF-8 */ append_utf8(e->code); i += n; if (s[i] == ';') i++; } } else if (s[i+2] != 'x') { /* Decimal entity, eg. F */ for (n = 0, i += 2; isdigit(s[i]); i++) n = 10 * n + s[i] - '0'; if (leave_builtin && (n == 38 || n == 60 || n == 62 || n == 34)) printf("&#%d;", n); else append_utf8(n); if (s[i] == ';') i++; } else { /* Hex entity, eg. _ */ for (n = 0, i += 3; isxdigit(s[i]); i++) if (isdigit(s[i])) n = 16 * n + s[i] - '0'; else n = 16 * n + toupper(s[i]) - 'A' + 10; if (leave_builtin && (n == 38 || n == 60 || n == 62 || n == 34)) printf("&#x%x;", n); else append_utf8(n); if (s[i] == ';') i++; } } /* SGML says also that a record-end (i.e., an end-of-line) may be * used instead of a semicolon to end an entity reference. But the * record-end is not suppressed in HTML and such an entity reference * is invalid in XML, so we don't implement that rule here. Instead, * the end-of-line is treated as any other character (other than * semicolon) and left in the document. */ } static void usage(const char *prog) #if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 5 __attribute__((__noreturn__)) #endif ; /* usage -- print usage message and exit */ static void usage(const char *prog) { fprintf(stderr, "Version %s\nUsage: %s [-b] [-f] [file]\n", VERSION, prog); exit(2); } /* main -- read input, expand entities, write out again */ int main(int argc, char *argv[]) { char buf[4096]; FILE *infile; int c; while ((c = getopt(argc, argv, "bf")) != -1) switch (c) { case 'b': leave_builtin = 1; break; case 'f': fix_ampersands = 1; break; default: usage(argv[0]); } if (optind == argc) infile = stdin; else if (optind == argc - 1) infile = fopen(argv[optind], "r"); else usage(argv[0]); if (infile == NULL) {perror(argv[optind]); exit(1);} while (fgets(buf, sizeof(buf), infile)) expand(buf); fclose(infile); return 0; } html-xml-utils-6.5/hxmultitoc.10000644000175000001440000000073611606170750013474 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXMULTITOC" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxmultitoc \- create a table of content for a set of HTML files .SH SYNOPSIS .B hxmultitoc .RB "[\| " \-x " \|]" .RB "[\| " \-s .IR preample " \|]" .RB "[\| " \-e .IR postample " \|]" .RB "[\| " \-l .IR low .RB " | " \-h .IR high .RB " | " \-b .IR base .RB " | " \-c .IR configfile .RI " | " file " \|] ..." .SH DESCRIPTION [ToDo] html-xml-utils-6.5/install-sh0000755000175000001440000003325611777117217013231 00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2011-01-19.21; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # `make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_glob='?' initialize_posix_glob=' test "$posix_glob" != "?" || { if (set -f) 2>/dev/null; then posix_glob= else posix_glob=: fi } ' posix_mkdir= # Desired mode of installed file. mode=0755 chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false no_target_directory= usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve the last data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -s) stripcmd=$stripprog;; -t) dst_arg=$2 # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) no_target_directory=true;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call `install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for `test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writeable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac eval "$initialize_posix_glob" oIFS=$IFS IFS=/ $posix_glob set -f set fnord $dstdir shift $posix_glob set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && eval "$initialize_posix_glob" && $posix_glob set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && $posix_glob set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd -f "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: html-xml-utils-6.5/tests/0000755000175000001440000000000012265516671012436 500000000000000html-xml-utils-6.5/tests/ref2.sh0000755000175000001440000000075311526231611013543 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF term 1 text... other EOF # The echo adds a newline at the end of the file # (./hxref $TMP1; echo) >$TMP2 cat >$TMP3 <<-EOF

        term 1 text... other

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/incl7.sh0000755000175000001440000000100412040262563013711 00000000000000: trap 'rm -rf $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 mkdir -p $TMP3/dir1/dir2 echo 'Test' >$TMP3/dir1/test1 echo '' >$TMP3/dir1/dir2/test2 echo '' | ./hxincl -b $TMP3/. >$TMP1 (echo 'Test' echo '' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/index2.sh0000755000175000001440000000242512051517030014070 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF

        Heading 0

        Heading 1

        A-term

        Heading 2

        <M-term>

        Heading 3

        Z-term

        Index

        Remove this. EOF # The echo adds a newline at the end of the file # (./hxnum $TMP1 | LC_ALL=C ./hxindex -t -n -f; echo) >$TMP2 cat >$TMP3 <

        1. Heading 0

        Heading 1

        A-term

        1.1. Heading 2

        <M-term>

        1.1.1. Heading 3

        Z-term

        2. Index

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/copy5.sh0000755000175000001440000000102011471054406013734 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . . . EOF cat >$TMP3 <<-EOF . . . EOF ./hxcopy -i http://example.com/foo1/bar -o foo2/bar $TMP1 $TMP2 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/pipe1.sh0000755000175000001440000000045611471054406013727 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxpipe -l >$TMP1 <<-EOF EOF cat >$TMP2 <<-EOF L1 (style L1 -/*\n/*]]>*/ L3 )style L4 -\n EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/tabletrans3.sh0000755000175000001440000000106212123434146015123 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo '
        head 1head 2
        head AABC1A2
        head BB2
        head CC2
        ' | ./hxtabletrans >$TMP1 echo '
        head A head B head C
        head 1 ABC1
        head 2 A2 B2 C2
        ' >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/cite4.sh0000755000175000001440000000127012225024070013704 00000000000000: trap 'rm $TMP1 $TMP2 $TMP4' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 # Make a Refer database # cat >$TMP1 <<-EOF %L label1 %K key-a key-b %K key-c key-d %L label2 EOF # The expected auxiliary file # cat >$TMP2 <<-EOF label1 label1 label2 label2 EOF # Run hxcite # ./hxcite -c -a $TMP4 $TMP1 >/dev/null <<-EOF Here is a reference that uses the label directly: [[label1]] Here is a reference that uses a key: [[key-a]] Here is another one: [[key-d]] And on that uses a key not expanded: {{key-c}} EOF cp $TMP4 tmp.tmp # Compare the generated auxiliary file # cmp -s $TMP2 $TMP4 html-xml-utils-6.5/tests/incl2.sh0000755000175000001440000000061411471054406013714 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "Test" >$TMP3 (echo '' echo 'Ignore this' echo '' ) | ./hxincl >$TMP1 (echo 'Test' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/normalize1.sh0000755000175000001440000000055511625244345014776 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF

        EOF cat >$TMP2 <<-EOF

        EOF ./hxnormalize -i 0 -L $TMP1 >$TMP3 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/incl8.sh0000755000175000001440000000132512040261274013716 00000000000000: trap 'rm -r $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 echo 'Test1' >$TMP3/%v% echo 'Test2' >$TMP3/%v (echo '' echo '' echo '' echo '' ) | ./hxincl -s sub=%v% -s v=$TMP3 >$TMP1 (echo 'Test1' echo '' echo 'Test2' echo '' echo 'Test2' echo '' echo 'Test2' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/hxnsxml3.sh0000755000175000001440000000033711471054406014473 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF EOF ./hxnsxml $TMP1 >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xref6.sh0000755000175000001440000000070612035004522013730 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref -l >$TMP1 <<-EOF

        @foo and foo @foo, foo. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        @foo and foo @foo, foo.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlns1.sh0000755000175000001440000000100611471054406014123 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxxmlns >$TMP1 <<-EOF EOF cat >$TMP2 <<-EOF <{}outer> <{x:y}a f="f"> <{p:q}b g="g"> <{x:y}a h="h"/> <{p:q}b/> <{http://www.w3.org/XML/1998/namespace}c/> EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc6.sh0000755000175000001440000000015411307205721014255 00000000000000: # Illegal UTF-8 sequence (surrogate pair) echo -e "\0355\0240\0200" | ./xml2asc >/dev/null [[ $? != 0 ]] html-xml-utils-6.5/tests/pipe3.sh0000755000175000001440000000032411624743151013725 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF test 1 test 2 EOF ./hxpipe file:$TMP1 | ./hxunpipe >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xref1.sh0000755000175000001440000000100311471054406013723 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref >$TMP1 <<-EOF

        term is referenced twice: term and here. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        term is referenced twice: term and here.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/incl10.sh0000755000175000001440000000075712040303222013765 00000000000000: trap 'rm -r $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 p=$PWD cd $TMP3 mkdir dir echo 'Test1' >file1 echo '' >dir/file2 echo 'Test3' >file3 (echo '' echo '' ) >file0 $p/hxincl -M target file0 >$TMP1 (echo "target: \\" echo " file1 \\" echo " dir/file2 \\" echo " file3" ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc5.sh0000755000175000001440000000013311307204663014255 00000000000000: # Illegal UTF-8 sequence echo -e "\0340\0200\0274" | ./xml2asc >/dev/null [[ $? != 0 ]] html-xml-utils-6.5/tests/xref7.sh0000755000175000001440000000150012047235373013737 00000000000000: trap 'rm -r $DIR' 0 DIR=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 p=$PWD cd $DIR echo '

        foo1 and foo2

        bar1 and bar2' >in1 echo '

        bar1 and bar2

        foo1 and foo2' >in2 $p/hxref -i index -b out1 in1 >/dev/null $p/hxref -i index -b out2 in2 >out2 $p/hxref -i index -b out1 in1 >out1 # Add a newline echo '' >>out1 echo '' >>out2 echo '

        foo1 and foo2

        bar1 and bar2

        ' >ref1 echo '

        bar1 and bar2

        foo1 and foo2

        ' >ref2 cmp -s out1 ref1 && cmp -s out2 ref2 html-xml-utils-6.5/tests/extract1.sh0000755000175000001440000000114112035010145014421 00000000000000#!/bin/bash # This test can only run if netcat is present # if ! type nc >/dev/null; then exit; fi PORT=54325 # Some port that is unlikely to be in use # Start 11 "servers" that redirect to each other # for ((i = 0; i < 11; i++)); do ((p = PORT + i)) ((q = p + 1)) echo -e "HTTP/1.1 302\r\nLocation: http://localhost:$q/\r\n\r" |\ nc -l $p >/dev/null & done sleep 1 # Give the servers time to start ./hxextract body http://localhost:$PORT/ 2>&1 | grep -q 'Too many links' code=$? kill %1 kill %2 kill %3 kill %4 kill %5 kill %6 kill %7 kill %8 kill %9 kill %10 kill %11 wait exit $code html-xml-utils-6.5/tests/xref2.sh0000755000175000001440000000100311726234776013741 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref >$TMP1 <<-EOF

        term is referenced twice: here and term. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        term is referenced twice: here and term.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/ref3.sh0000755000175000001440000000074511526231774013557 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF other text... term EOF # The echo adds a newline at the end of the file # (./hxref $TMP1; echo) >$TMP2 cat >$TMP3 <<-EOF

        other text... term

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/tabletrans1.sh0000755000175000001440000000107412051454306015124 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo '
        head 1head 2
        head AA1A2
        head BB1B2
        head CC1C2
        ' | ./hxtabletrans >$TMP1 echo '
        head A head B head C
        head 1 A1 B1 C1
        head 2 A2 B2 C2
        ' >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc2.sh0000755000175000001440000000017511471054406014260 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo -e "\0200" >$TMP1 ! ./xml2asc <$TMP1 >/dev/null html-xml-utils-6.5/tests/relurl2.sh0000755000175000001440000000261211515360104014266 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxwls -l -b "http://example.org/remove/this" >$TMP1 <<-EOF link1 link2 link3 query query self self link4 link5 img link6 link7 link8 EOF cat >$TMP2 <<-EOF link rel http://example.org/link0 base http://user:pass@[1234:abcd]:90/base/segm?query1 a http://user:pass@[1234:abcd]:90/base/link1 a http://user:pass@[1234:abcd]:90/link2 a http://other.com/base3/link3 a http://user:pass@[1234:abcd]:90/base/query?query2 a http://user:pass@[1234:abcd]:90/base/segm?query3 a http://user:pass@[1234:abcd]:90/base/segm?query1 a http://user:pass@[1234:abcd]:90/base/segm?query1#fragment a http://user:pass@[1234:abcd]:90/link4 a http://user:pass@[1234:abcd]:90/link5 img http://user:pass@[1234:abcd]:90/base/img.png a http://user:pass@[1234:abcd]:90/base/ a http://user:pass@[1234:abcd]:90/base/link7 a http://user:pass@[1234:abcd]:90/link7 EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/cite1.sh0000755000175000001440000000165612225025066013717 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF %L label1 %L label2 EOF cat >$TMP2 <<-EOF aaa [label1] bbb [label1] ccc aaa [label1] bbb [label1] ccc aaa {{label1]] bbb {label1} ccc aaa aaa[label1]bbb EOF ./hxcite -c -a $TMP4 $TMP1 >$TMP3 <<-EOF aaa [[label1]] bbb [[label1]] ccc aaa [[label1]] bbb [label1] ccc aaa {{label1]] bbb {label1} ccc aaa aaa[[label1]]bbb EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/incl4.sh0000755000175000001440000000054212040007523013705 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "Test" >$TMP3 echo '' | ./hxincl -s sub='%var%' -s var=$TMP3 >$TMP1 (echo 'Test' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/incl5.sh0000755000175000001440000000055112040241513013705 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "Test" >$TMP3 echo '' | ./hxincl -s sub='%v%' -s v='%u%' -s u=$TMP3 >$TMP1 (echo 'Test' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/incl6.sh0000755000175000001440000000077212040245234013717 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo 'Test' >$TMP4 echo '' >$TMP3 echo '' | ./hxincl -s sub=$TMP3 -s sub2=$TMP4 -b /tmp/here >$TMP1 (echo 'Test' echo '' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/toc2.sh0000755000175000001440000000150312224775212013554 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxtoc -d -t >$TMP1 <<-EOF Test

        Document heading

        Second-level heading

        Third-level heading

        EOF echo >>$TMP1 # Add newline cat >$TMP2 <<-EOF Test

        Document heading

        Second-level heading

        Third-level heading

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/pipe2.sh0000755000175000001440000000056311471054406013727 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxpipe >$TMP1 <<-EOF text1 <_foo>text2 text3 EOF cat >$TMP2 <<-EOF Afoo1 CDATA bar1 Afoo2 CDATA bar2 bar2 (abc -\ntext1\n |def -\n (_foo -text2 )_foo -\ntext3\n )abc -\n EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc3.sh0000755000175000001440000000026211471054406014256 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "<" | ./xml2asc >$TMP1 echo "<" >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc4.sh0000755000175000001440000000012611307204437014255 00000000000000: # Illegal UTF-8 sequence echo -e "\0300\0274" | ./xml2asc >/dev/null [[ $? != 0 ]] html-xml-utils-6.5/tests/hxnsxml1.sh0000755000175000001440000000105711471054406014471 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4 $TMP5' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP5=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF EOF # Running hxxmlns and hxnsxml twice should not change anything: # ./hxxmlns $TMP1 >$TMP2 ./hxnsxml $TMP2 >$TMP3 ./hxxmlns $TMP3 >$TMP4 ./hxnsxml $TMP4 >$TMP5 cmp -s $TMP2 $TMP4 && cmp -s $TMP3 $TMP5 html-xml-utils-6.5/tests/incl9.sh0000755000175000001440000000106212040303436013712 00000000000000: trap 'rm -r $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 echo 'Test1' >$TMP3/file1 echo 'Test2' >$TMP3/file2 echo 'Test3' >$TMP3/file3 echo '' >$TMP3/file0 echo '' >>$TMP3/file0 echo '' >>$TMP3/file0 ./hxincl -M foo -s sub=$TMP3 $TMP3/file0 >$TMP1 echo "foo: \\" >$TMP2 echo " $TMP3/file1 \\" >>$TMP2 echo " $TMP3/file2 \\" >>$TMP2 echo " $TMP3/file3" >>$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/incl1.sh0000755000175000001440000000052211471054406013711 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "Test" >$TMP3 echo '' | ./hxincl >$TMP1 (echo 'Test' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/uncdata1.sh0000755000175000001440000000052511471054406014406 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxuncdata >$TMP1 <<-EOF EOF cat >$TMP2 <<-EOF EOF # if cmp -s $TMP1 $TMP2; then echo Pass; else echo Fail; exit 2; fi cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc7.sh0000755000175000001440000000012611307213570014256 00000000000000: # Illegal UTF-8 sequence echo -e "\0301\0277" | ./xml2asc >/dev/null [[ $? != 0 ]] html-xml-utils-6.5/tests/toc1.sh0000755000175000001440000000170612123434135013552 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxtoc -d -t >$TMP1 <<-EOF Test

        Document heading

        Second-level heading

        Third-level heading
        EOF echo >>$TMP1 # Add newline cat >$TMP2 <<-EOF Test

        Document heading

        Second-level heading

        Third-level heading
        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/cite3.sh0000755000175000001440000000162312225023325013707 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF %L label1 %L label2 EOF cat >$TMP2 <<-EOF aaa [label1] bbb [label1] ccc aaa [label1] bbb [label1] ccc aaa {{label1]] bbb {label1} ccc aaa aaa[label1]bbb EOF ./hxcite -a $TMP4 $TMP1 >$TMP3 <<-EOF aaa [[label1]] bbb [[label1]] ccc aaa [[label1]] bbb [label1] ccc aaa {{label1]] bbb {label1} ccc aaa aaa[[label1]]bbb EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/unpipe3.sh0000755000175000001440000000077611606170707014304 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxunpipe >$TMP1 <<-EOF ?processing instruction -\n !root "fpi" si -\n *comment L3 -\n Afoo CDATA bar\012 Abar IMPLIED Aid TOKEN x12 (x -line 1\nline 2\n \012111 )x |empty -\n C EOF cat >$TMP2 <<-EOF line 1 line 2 111 EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/index.sh0000755000175000001440000000240012051516743014011 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF

        Heading 0

        Heading 1

        A-term

        Heading 2

        <M-term>

        Heading 3

        Z-term

        Index

        Remove this. EOF # The echo adds a newline at the end of the file # (./hxnum $TMP1 | LC_ALL=C ./hxindex -t -n; echo) >$TMP2 cat >$TMP3 <

        1. Heading 0

        Heading 1

        A-term

        1.1. Heading 2

        <M-term>

        1.1.1. Heading 3

        Z-term

        2. Index

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/wls1.sh0000755000175000001440000000076411471054406013601 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . EOF cat >$TMP2 <<-EOF http://example.org/style.css ../othersub/foo.html ../othersub/bar/foo.png ../bar/foo.png EOF ./hxwls -b ../othersub/base $TMP1 >$TMP3 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/hxnsxml2.sh0000755000175000001440000000040711471054406014470 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF EOF # No namespaces in input then none in output # ./hxnsxml $TMP1 >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/cdata1.sh0000755000175000001440000000045711471054406014047 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxnormalize -i 0 >$TMP1 <<-EOF EOF cat >$TMP2 <<-EOF EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xref4.sh0000755000175000001440000000103411716467774013754 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref -l >$TMP1 <<-EOF

        boss is referenced twice: bosses and here. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        boss is referenced twice: bosses and here.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/mkbib1.sh0000755000175000001440000000203412225024102014034 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4 $TMP5' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP5=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 # Make a Refer database with bibliographic data # cat >$TMP1 <<-EOF %L LABEL1 %T Title One %A Author One %L LABEL2 %T Title Two %A Author Two %A Another Author %D 2013 EOF # Make an auxiliary file with a list of labels # cat >$TMP2 <<-EOF LABEL2 LABEL2 LABEL1 EOF # Make a template for the generated bibliography # cat >$TMP3 <<-EOF Bibliography # %L sorted by label %{L:%L %A %T %{D:%D%}%{!D:no date%} %}End EOF # The expected output of hxmkbib # cat >$TMP4 <<-EOF Bibliography # sorted by label LABEL1 Author One Title One no date LABEL2 Author Two; Another Author Title Two 2013 End EOF # Run hxmkbib with the above three files # ./hxmkbib -a $TMP2 $TMP1 $TMP3 >$TMP5 # Compare to the expected output # cmp -s $TMP4 $TMP5 html-xml-utils-6.5/tests/unpipe2.sh0000755000175000001440000000046611606170707014277 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF Text EOF ./hxpipe $TMP1 | ./hxunpipe >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/hxnsxml4.sh0000755000175000001440000000113211471054406014466 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4 $TMP5' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP5=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF EOF # Running hxxmlns and hxnsxml twice should not change anything, # including escaped characters. # ./hxxmlns $TMP1 >$TMP2 ./hxnsxml $TMP2 >$TMP3 ./hxxmlns $TMP3 >$TMP4 ./hxnsxml $TMP4 >$TMP5 cmp -s $TMP2 $TMP4 && cmp -s $TMP3 $TMP5 html-xml-utils-6.5/tests/clean1.sh0000755000175000001440000000061612073323655014056 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo '' >$TMP3 echo '

        <!- this is not a comment -->' >$TMP2 echo '

        ' >>$TMP2 ./hxclean $TMP3 >$TMP1 echo >>$TMP1 # Add a newline at the end cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/copy2.sh0000755000175000001440000000070711471054406013744 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . . . EOF cat >$TMP3 <<-EOF . . . EOF ./hxcopy -i foo1/bar -o foo2/bar $TMP1 $TMP2 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/relurl3.sh0000755000175000001440000000045211515361523014275 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxwls -l -b "http://example.org/remove/this" >$TMP1 <<-EOF EOF cat >$TMP2 <<-EOF link rel http:/path EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xref5.sh0000755000175000001440000000105611716472052013742 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref -l >$TMP1 <<-EOF

        bounty is referenced twice: bounties and here. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        bounty is referenced twice: bounties and here.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/index5.sh0000755000175000001440000000146112051510712014072 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4 $TMP5' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP5=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 # Make a file to index echo 'This documentterm1 term2' >$TMP1 # Initialize a database with some terms cat >$TMP2 <$TMP3 <>$TMP3 # Call hxindex ./hxindex -i $TMP2 -b foo $TMP1 >/dev/null # Check. The order of the terms in the database may differ. sort $TMP2 >$TMP4 sort $TMP3 >$TMP5 cmp -s $TMP4 $TMP5 html-xml-utils-6.5/tests/addid1.sh0000755000175000001440000000143212124540544014031 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/addidXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/addidXXXXXXXX` || exit 1 ./hxaddid p >$TMP1 <<-EOF

        This is a paragraph in English.

        Voilà un écrit en français.

        АБВГҐ ДЂЃЕЁ ЄЖЗЗ́Ѕ ИІЇЙЈ КЛЉМФ ХЦЧЏШ ЩЪЫЬЭ ЮЯ

        αβγδε ζηθικ λμνοπ ρςστ υφχψω EOF echo >>$TMP1 # Add newline cat >$TMP2 <<-EOF

        This is a paragraph in English.

        Voilà un écrit en français.

        АБВГҐ ДЂЃЕЁ ЄЖЗЗ́Ѕ ИІЇЙЈ КЛЉМФ ХЦЧЏШ ЩЪЫЬЭ ЮЯ

        αβγδε ζηθικ λμνοπ ρςστ υφχψω

        EOF cp $TMP1 tmp.html cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/cite2.sh0000755000175000001440000000107612225023326013711 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3 $TMP4' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP4=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF %L label1 %L label2 EOF cat >$TMP2 <<-EOF label1 label1 label1 label2 label1 EOF ./hxcite -c -a $TMP4 $TMP1 >$TMP3 <<-EOF aaa [[label1]] bbb [[label1]] ccc aaa [[label1]] bbb [label1] ccc aaa {{label1]] bbb {label1} ccc aaa aaa[[label1]]bbb EOF cmp -s $TMP2 $TMP4 html-xml-utils-6.5/tests/ref1.sh0000755000175000001440000000072311526231574013547 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF term 1 text... term 1 EOF # The echo adds a newline at the end of the file # (./hxref $TMP1; echo) >$TMP2 cat >$TMP3 <<-EOF

        term 1 text... term 1

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/ascxml.sh0000755000175000001440000000057411471054406014201 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./asc2xml <<-EOF | ./xml2asc >$TMP1 abc123  ¡¢ “ „ „ ☀☁☂ ☃ EOF cat >$TMP2 <<-EOF abc123  ¡¢ “ „ „ ☀☁☂ ☃ EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/index4.sh0000755000175000001440000000340212051517030014066 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF

        Heading 0

        Heading 1

        A-term

        Z-term

        Heading 2

        <M-term>

        A-term

        Heading 3

        Z-term

        M-term

        Index

        Remove this. EOF # The echo adds a newline at the end of the file # (./hxnum $TMP1 | LC_ALL=C ./hxindex -t -n -f -r; echo) >$TMP2 cat >$TMP3 <

        1. Heading 0

        Heading 1

        A-term

        Z-term

        1.1. Heading 2

        <M-term>

        A-term

        1.1.1. Heading 3

        Z-term

        M-term

        2. Index

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/relurl1.sh0000755000175000001440000000232411471054406014273 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxwls -l -b "http://example.org/remove/this" >$TMP1 <<-EOF link1 link2 link3 query query self self link4 link5 img link6 link7 link8 EOF cat >$TMP2 <<-EOF link rel http://example.org/link0 base http://example.org/base/segm?query1 a http://example.org/base/link1 a http://example.org/link2 a http://other.com/base3/link3 a http://example.org/base/query?query2 a http://example.org/base/segm?query3 a http://example.org/base/segm?query1 a http://example.org/base/segm?query1#fragment a http://example.org/link4 a http://example.org/link5 img http://example.org/base/img.png a http://example.org/base/ a http://example.org/base/link7 a http://example.org/link7 EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/unpipe1.sh0000755000175000001440000000060511606170707014271 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxunpipe >$TMP1 <<-EOF !root "test1" test2 -\n !root "" test2 -\n !root "test1" -\n !root "" -\n EOF cat >$TMP2 <<-EOF EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xmlasc1.sh0000755000175000001440000000035211471054406014254 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo -e "abcdefghijklmnopqrstuvwxyz\0000\0002\0003\0175\0176\0177" >$TMP1 ./xml2asc <$TMP1 >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/copy1.sh0000755000175000001440000000040411471054406013735 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . . . EOF ./hxcopy $TMP1 $TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/wls2.sh0000755000175000001440000000106411471054406013574 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . EOF cat >$TMP2 <<-EOF http://example.org/style.css http://example.org/othersub/foo.html http://example.org/othersub/bar/foo.png http://example.org/bar/foo.png EOF ./hxwls -b http://example.org/othersub/base $TMP1 >$TMP3 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/remove2.sh0000755000175000001440000000073112041544241014257 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 cat >$TMP1 < EOF cat >$TMP2 < EOF ./hxremove ':lang(fr)' <$TMP1 >$TMP3 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/incl11.sh0000755000175000001440000000033012051553150013762 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 printf "Test\n" >$TMP2 printf '' | ./hxincl -f >$TMP1 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/copy3.sh0000755000175000001440000000071111471054406013740 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . . . EOF cat >$TMP3 <<-EOF . . . EOF ./hxcopy -i foo1/bar -o foo2/bar <$TMP1 >$TMP2 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/index3.sh0000755000175000001440000000357612124535367014117 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF

        Heading 0

        Heading 1

        A-term

        Z-term

        Heading 2

        <M-term>

        A-term

        Heading 3

        Z-term

        M-term

        term!!M.

        Index

        Remove this. EOF # The echo adds a newline at the end of the file # (./hxnum $TMP1 | LC_ALL=C ./hxindex -t -n -f; echo) >$TMP2 cat >$TMP3 <

        1. Heading 0

        Heading 1

        A-term

        Z-term

        1.1. Heading 2

        <M-term>

        A-term

        1.1.1. Heading 3

        Z-term

        M-term

        term!!M.

        2. Index

        EOF cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/copy4.sh0000755000175000001440000000077411471054406013752 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 cat >$TMP1 <<-EOF . . . EOF cat >$TMP3 <<-EOF . . . EOF ./hxcopy -i http://example.com/foo1/bar -o foo2/bar <$TMP1 >$TMP2 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/tabletrans2.sh0000755000175000001440000000113612041545077015131 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo '
        head 1head 2
        head AA1A2
        head BB1B2
        head CC1C2
        ' | ./hxtabletrans | ./hxtabletrans | ./hxtabletrans >$TMP1 echo '
        head A head B head C
        head 1 A1 B1 C1
        head 2 A2 B2 C2
        ' >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/xref3.sh0000755000175000001440000000103611716466153013743 00000000000000: trap 'rm $TMP1 $TMP2' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 ./hxref -l >$TMP1 <<-EOF

        term is referenced twice: term and plural. EOF # Add a newline at the end: echo >>$TMP1 cat >$TMP2 <<-EOF

        term is referenced twice: term and plural.

        EOF cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/incl3.sh0000755000175000001440000000052311471054406013714 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1 echo "Test" >$TMP3 echo '' | ./hxincl -s sub=$TMP3 >$TMP1 (echo 'Test' echo '' ) >$TMP2 cmp -s $TMP1 $TMP2 html-xml-utils-6.5/tests/remove1.sh0000755000175000001440000000065212041563331014261 00000000000000: trap 'rm $TMP1 $TMP2 $TMP3' 0 TMP1=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 TMP2=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 TMP3=`mktemp /tmp/tmp.XXXXXXXX` || exit 1 cat >$TMP1 < EOF cat >$TMP2 < EOF ./hxremove foo <$TMP1 >$TMP3 cmp -s $TMP2 $TMP3 html-xml-utils-6.5/tests/copy6.sh0000755000175000001440000000047511471054406013752 00000000000000: trap 'rm -rf $TMP' 0 TMP=`mktemp -d /tmp/tmp.XXXXXXXXXX` || exit 1 EXEDIR=$PWD cd -P $TMP || exit 1 # -P to avoid symlinks cat >aaa <<-EOF . . . EOF # Copy from relative path to absolute path $EXEDIR/hxcopy aaa $PWD/bbb cmp -s aaa bbb html-xml-utils-6.5/hxunxmlns.c0000644000175000001440000001320712174266016013421 00000000000000/* * unxmlns - convert <{namespace}foo> to * * This program is the reverse of xmlns. * * To do: optimize, i.e., reuse inherited namespace declaration, * instead of declaring them again on every element. * * Copyright 2005 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 8 November 2005 * Version: $Id: hxunxmlns.c,v 1.6 2013-06-30 20:39:10 bbos Exp $ * **/ #include "config.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif #endif #include #include #include #include "export.h" #include "types.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" #define XMLNS "{http://www.w3.org/XML/1998/namespace}" const static size_t XMLNSLEN = 39; /* strlen(XMLNS) */ extern int yylineno; /* From scan.l */ static bool has_error = false; /* --------------- implements interface api.h -------------------------- */ /* handle_error -- called when a parse error occurred */ void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); has_error = true; } /* start -- called before the first event is reported */ void* start(void) { return NULL; } /* end -- called after the last event is reported */ void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ void handle_comment(void *clientdata, string commenttext) { printf("", commenttext); } /* handle_text -- called after a text chunk is parsed */ void handle_text(void *clientdata, string text) { fputs(text, stdout); } /* handle_decl -- called after a declaration is parsed */ void handle_decl(void *clientdata, string gi, string fpi, string url) { printf(""); } /* handle_pi -- called after a PI is parsed */ void handle_pi(void *clientdata, string pi_text) { printf("", pi_text); } /* print_attrs -- print attributes and declare their namespaces, if any */ static void print_attrs(const pairlist attribs) { string h, s; pairlist p; int n = 0; for (p = attribs; p; p = p->next) { if (p->name[0] != '{') { printf(" %s=\"%s\"", p->name, p->value); } else if (p->name[1] == '}') { printf(" %s=\"%s\"", p->name + 2, p->value); } else if (strncmp(p->name, XMLNS, XMLNSLEN) == 0) { printf(" xml:%s=\"%s\"", p->name + XMLNSLEN, p->value); } else if (! (h = strchr(p->name, '}'))) { fprintf(stderr, "%d: Unmatched \"{\" in attribute name (\"%s\")\n", yylineno, p->name); has_error = true; } else { printf(" xmlns:x%d=\"", n); for (s = p->name + 1; s != h; s++) putchar(*s); printf("\" x%d:%s=\"%s\"", n, h + 1, p->value); } } } /* print_tag_start -- print "'); } /* handle_emptytag -- called after an empty tag is parsed */ void handle_emptytag(void *clientdata, string name, pairlist attribs) { print_tag_start(name); print_attrs(attribs); printf(" />"); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ void handle_endtag(void *clientdata, string name) { string h; if (name[0] != '{') { printf("", name); } else if (! (h = strchr(name, '}'))) { fprintf(stderr, "%d: Unmatched \"{\" in tag name (\"%s\")\n", yylineno, name); has_error = true; } else { printf("", h + 1); } } /* --------------------------------------------------------------------- */ /* usage -- print usage message and exit */ static void usage(string prog) { fprintf(stderr, "Version %s\nUsage: %s [html-file-or-url]\n", VERSION, prog); exit(2); } int main(int argc, char *argv[]) { int i, status = 200; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); /* Parse command line arguments */ for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) { switch (argv[i][1]) { default: usage(argv[0]); } } if (i < argc && eq(argv[i], "--")) i++; if (i == argc) yyin = stdin; else if (i == argc - 1 && eq(argv[i], "-")) yyin = stdin; else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status); else usage(argv[0]); if (yyin == NULL) {perror(argv[i]); exit(1);} if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status)); if (yyparse() != 0) exit(3); return has_error ? 1 : 0; } html-xml-utils-6.5/hxname2id.c0000644000175000001440000001410212174313455013227 00000000000000/* * Move target anchors to the element they belong to, i.e., look for * and and replace it with * * There is no attempt to check if the name is a valid SGML/XML token * or whether it is unique. The replacement is syntactical only. * * Copyright 2004 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: Dec 2004 * Version: $Id: hxname2id.c,v 1.6 2013-07-25 21:06:22 bbos Exp $ * **/ #include "config.h" #include #include #include #include #include #include #if STDC_HEADERS # include #else # ifndef HAVE_STRCHR # define strchr index # define strrchr rindex # endif # ifndef HAVE_STRSTR # include "strstr.e" # endif #endif #ifdef HAVE_ERRNO_H # include #endif #ifdef HAVE_SEARCH_H # include #else # include "search-freebsd.h" #endif #include "export.h" #include "types.e" #include "heap.e" #include "tree.e" #include "html.e" #include "scan.e" #include "dict.e" #include "openurl.e" #include "errexit.e" static Tree tree; static bool xml = false; /* Use convention */ /* handle_error -- called when a parse error occurred */ static void handle_error(void *clientdata, const string s, int lineno) { fprintf(stderr, "%d: %s\n", lineno, s); } /* start -- called before the first event is reported */ static void* start(void) { tree = create(); return NULL; } /* end -- called after the last event is reported */ static void end(void *clientdata) { /* skip */ } /* handle_comment -- called after a comment is parsed */ static void handle_comment(void *clientdata, string commenttext) { tree = append_comment(tree, commenttext); } /* handle_text -- called after a tex chunk is parsed */ static void handle_text(void *clientdata, string text) { tree = append_text(tree, text); } /* handle_declaration -- called after a declaration is parsed */ static void handle_decl(void *clientdata, string gi, string fpi, string url) { tree = append_declaration(tree, gi, fpi, url); } /* handle_proc_instr -- called after a PI is parsed */ static void handle_pi(void *clientdata, string pi_text) { tree = append_procins(tree, pi_text); } /* handle_starttag -- called after a start tag is parsed */ static void handle_starttag(void *clientdata, string name, pairlist attribs) { tree = html_push(tree, name, attribs); } /* handle_emptytag -- called after an empty tag is parsed */ static void handle_emptytag(void *clientdata, string name, pairlist attribs) { handle_starttag(clientdata, name, attribs); } /* handle_endtag -- called after an endtag is parsed (name may be "") */ static void handle_endtag(void *clientdata, string name) { tree = html_pop(tree, name); } /* has_anchor_child -- check if the first thing in the element is an */ static bool has_anchor_child(Tree t, conststring *nameval) { Tree h; /* Loop until either text or an element is found */ for (h = t->children; h != NULL; h = h->sister) { switch (h->tp) { case Comment: /* Skip these */ case Procins: break; case Text: /* Skip if whitespace, otherwise return false */ if (! only_space(h->text)) return false; break; case Element: /* true if or , else false */ return eq(h->name, "a") && ((*nameval = get_attrib(h, "id")) || (*nameval = get_attrib(h, "name"))); default: assert(! "Cannot happen"); } } return false; } /* process -- write the tree, add IDs at elements with an child */ static void process(Tree t, bool remove_anchor) { Tree h; conststring nameval; bool remove_next_anchor = false; pairlist a; for (h = t->children; h != NULL; h = h->sister) { switch (h->tp) { case Text: printf("%s", h->text); break; case Comment: printf("", h->text); break; case Declaration: printf("name); if (h->text) printf(" PUBLIC \"%s\"", h->text); if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url); printf(">"); break; case Procins: printf("", h->text); break; case Element: if (!get_attrib(h, "id") && has_anchor_child(h, &nameval)) { /* Put the anchor on this element and remove it from the child */ set_attrib(h, "id", nameval); remove_next_anchor = true; } printf("<%s", h->name); for (a = h->attribs; a != NULL; a = a->next) { /* Print attribs, except id/name that the parent wants us to remove */ if (!remove_anchor || (!eq(a->name, "id") && !eq(a->name, "name"))) { printf(" %s", a->name); if (a->value != NULL) printf("=\"%s\"", a->value); } } if (is_empty(h->name)) { assert(h->children == NULL); printf(xml ? " />" : ">"); } else { printf(">"); process(h, remove_next_anchor); printf("", h->name); } break; case Root: assert(! "Cannot happen"); break; default: assert(! "Cannot happen"); } } } /* usage -- print usage message and exit */ static void usage(string name) { errexit("Version %s\nUsage: %s [-x] [html-file]\n", VERSION, name); } int main(int argc, char *argv[]) { int i, status; /* Bind the parser callback routines to our handlers */ set_error_handler(handle_error); set_start_handler(start); set_end_handler(end); set_comment_handler(handle_comment); set_text_handler(handle_text); set_decl_handler(handle_decl); set_pi_handler(handle_pi); set_starttag_handler(handle_starttag); set_emptytag_handler(handle_emptytag); set_endtag_handler(handle_endtag); yyin = stdin; for (i = 1; i < argc; i++) { if (eq(argv[i], "-x")) { xml = true; } else if (eq(argv[i], "-?")) { usage(argv[0]); } else if (eq(argv[i], "-")) { /* yyin = stdin; */ } else { yyin = fopenurl(argv[i], "r", &status); if (yyin == NULL) {perror(argv[1]); exit(2);} if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status)); } } if (yyparse() != 0) exit(3); tree = get_root(tree); process(tree, false); tree_delete(tree); /* Just to test memory mgmt */ return 0; } html-xml-utils-6.5/hxunxmlns.10000644000175000001440000000146611606170750013341 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXUNXMLNS" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxunxmlns \- replace XML "global names" by Namespace prefixes .SH SYNOPSIS .B hxunxmlns .RB "[\| " \-\- " \|]" .RI "[\| " file " \|]" .SH DESCRIPTION .B hxunxmlns is the reverse of .BR hxxmlns (1). It converts element and attribute names of the form .d <{URL1}foo {URL2}bar="..."> .e to .d .e .PP The predefined XML Namespace is recognized and replaced by the prefix "xml:". .SH "DIAGNOSTICS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 One or more errors occurred in the parsing of the file. .SH "SEE ALSO" .BR hxxmlns (1) .SH BUGS .LP The error recovery for incorrect XML is primitive. html-xml-utils-6.5/hxunpipe.10000644000175000001440000000212711606170750013130 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXUNPIPE" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxunpipe \- convert output of hxpipe back to XML format .SH SYNOPSIS .B hxunpipe .RI "[\| " file-or-URL " \|]" .SH DESCRIPTION .B hxunpipe takes the output of .BR hxpipe (1) (or of .BR onsgmls (1)) and turns it back into XML/SGML mark-up. .SH OPERANDS The following operand is supported: .TP 10 .I file-or-URL The name or URL of an HTML file. If absent, standard input is read instead. .SH "EXIT STATUS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B > 0 An error occurred in the input. .SH ENVIRONMENT To use a proxy to retrieve remote files, set the environment variables .B http_proxy and .BR ftp_proxy "." E.g., .B http_proxy="http://localhost:8080/" .SH BUGS .LP Not all syntax errors in the input are recognized. .LP .B hxunpipe can currently only retrieve remote files over HTTP. It doesn't handle password-protected files, nor files whose content depends on HTTP "cookies." .SH "SEE ALSO" .BR hxpipe (1), .BR onsgmls (1). html-xml-utils-6.5/types.c0000644000175000001440000001070312174266016012517 00000000000000/* * Copyright 1994-2000 World Wide Web Consortium * See http://www.w3.org/Consortium/Legal/copyright-software * * Author: Bert Bos * Created: 1997 **/ #include "config.h" #include #include #ifdef HAVE_STRING_H # include #elif HAVE_STRINGS_H # include #endif #include #include #include "export.h" #include "heap.e" EXPORT typedef char *string; EXPORT typedef const char *conststring; EXPORT typedef struct _pairlist { string name; string value; struct _pairlist *next; } *pairlist; EXPORT typedef unsigned int MediaSet; EXPORT enum _Media { MediaNone = 0, MediaPrint = (1 << 0), MediaScreen = (1 << 1), MediaTTY = (1 << 2), MediaBraille = (1 << 3), MediaTV = (1 << 4), MediaProjection = (1 << 5), MediaEmbossed = (1 << 6), MediaAll = 0xFF }; #define eq(s, t) (*(s) == *(t) && strcmp(s, t) == 0) EXPORTDEF(eq(s, t)) #define hexval(c) ((c) <= '9' ? (c)-'0' : (c) <= 'F' ? 10+(c)-'A' : 10+(c)-'a') EXPORTDEF(hexval(c)) /* pairlist_delete -- free all memory occupied by a pairlist */ EXPORT void pairlist_delete(pairlist p) { if (p) { pairlist_delete(p->next); dispose(p->name); dispose(p->value); dispose(p); } } /* pairlist_copy -- make a deep copy of a pairlist */ EXPORT pairlist pairlist_copy(const pairlist p) { pairlist h = NULL; if (p) { new(h); h->name = newstring(p->name); h->value = newstring(p->value); h->next = pairlist_copy(p->next); } return h; } /* pairlist_get -- get value corresponding to name, or NULL */ EXPORT conststring pairlist_get(pairlist p, const conststring name) { for (; p && strcasecmp(p->name, name) != 0; p = p->next); return p ? p->value : NULL; } /* pairlist_set -- add or change a name/value pair */ EXPORT void pairlist_set(pairlist *p, const conststring name, const conststring val) { pairlist h; for (h = *p; h && strcasecmp(h->name, name) != 0; h = h->next); if (h) { free(h->value); h->value = newstring(val); } else { new(h); h->name = newstring(name); h->value = newstring(val); h->next = *p; *p = h; } } /* pairlist_unset -- remove a name/value pair from list, false if not found */ EXPORT bool pairlist_unset(pairlist *p, const conststring name) { pairlist h, h1; if (! *p) return false; if (strcasecmp((*p)->name, name) == 0) { /* Remove first pair in list */ h = *p; free(h->name); free(h->value); *p = h->next; free(h); return true; } for (h = *p; h->next && strcasecmp(h->next->name, name) != 0; h = h->next); if (! h->next) return false; /* Not found */ free(h->next->name); free(h->next->value); h1 = h->next; h->next = h->next->next; free(h1); return true; } /* strapp -- append to a string, re-allocating memory; last arg must be 0 */ EXPORT string strapp(string *s,...) { va_list ap; int i, j; conststring h; va_start(ap, s); if (!s) {new(s); *s = NULL;} i = *s ? strlen(*s) : 0; while ((h = va_arg(ap, conststring))) { j = strlen(h); renewarray(*s, i + j + 1); strcpy(*s + i, h); i += j; } va_end(ap); return *s; } /* chomp -- remove trailing \n or \r\n (if any) from string */ EXPORT void chomp(string s) { int i; if (s && (i = strlen(s)) != 0 && s[i-1] == '\n') { s[i-1] = '\0'; if (i > 1 && s[i-2] == '\r') s[i-2] = '\0'; } } EXPORT inline int min(int a, int b) { return a < b ? a : b; } EXPORT inline int max(int a, int b) { return a > b ? a : b; } /* down -- convert a string to lowercase, return pointer to arg */ EXPORT string down(const string s) { string t; for (t = s; *t; t++) *t = tolower(*t); return s; } /* hasprefix -- true if s starts with prefix */ EXPORT bool hasprefix(conststring s, conststring prefix) { if (!prefix) return true; /* NULL is prefix of everything */ if (!s) return !prefix; /* Only NULL is prefix of NULL */ while (*prefix && *prefix == *s) prefix++, s++; return *prefix == '\0'; } /* hasaffix -- true if s ends with affix */ EXPORT bool hasaffix(conststring s, conststring affix) { size_t i, j; if (!affix) return true; if (!s) return !affix; i = strlen(s); j = strlen(affix); if (i < j) return false; s = s + i - j; while (*affix && *affix == *s) affix++, s++; return *affix == '\0'; } /* only_space -- check if s contains only whitespace */ EXPORT bool only_space(conststring s) { while (*s == ' ' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\f') s++; return *s == '\0'; } html-xml-utils-6.5/hxindex.10000644000175000001440000002212112051521643012727 00000000000000.de d \" begin display .sp .in +4 .nf .. .de e \" end display .in -4 .fi .sp .. .TH "HXINDEX" "1" "10 Jul 2011" "6.x" "HTML-XML-utils" .SH NAME hxindex \- insert an index into an HTML document .SH SYNOPSIS .B hxindex .RB "[\| " \-t " \|]" .RB "[\| " \-x " \|]" .RB "[\| " \-n " \|]" .RB "[\| " \-f " \|]" .RB "[\| " \-r " \|]" .RB "[\| " \-c .IR classes " \|]" .RB "[\| " \-b .IR base " \|]" .RB "[\| " \-i .IR indexdb " \|]" .RB "[\| " \-s .IR template " \|]" .RB "[\| " \-u .IR phrase " \|]" .RB "[--]" .RI "[\| " file\-or\-URL " \|]" .SH DESCRIPTION .LP The .B hxindex looks for terms to be indexed in a document, collects them, turns them into target anchors and creates a sorted index as an HTML list, which is inserted at the place of a placeholder in the document. The resulting document is written to standard output. .LP The index is inserted at the place of a comment of the form .d .e or between two comments of the form .d \&... .e In the latter case, all existing content between the two comments is removed first. .LP Index terms are either elements of type \fI\fP or elements with a class attribute of \fI"index".\fP (For backward compatibility, also class attributes \fI"index-inst"\fP and \fI"index-def"\fP are recognized.) \fI\fP elements (and class \fI"index-def"\fP) are considered more important than elements with class \fI"index"\fP and will appear in bold in the generated index. .LP The option .B \-c adds additional classes, that are aliases for \fI"index"\fP. .LP By default, the contents of the element are taken as the index term. Here are two examples of occurrences of the index term "shoe": .d A shoe is a piece of clothing that... completed by a leather shoe... .e .LP If the term to be indexed is not equal to the contents of the element, the \fItitle\fP attribute can be used to give the correct term: .d \&... Shoes are pieces of clothing that... \&... with two leather shoes... .e .LP The \fItitle\fP attribute must also be used when the index term is a subterm of another. Subterms appear indented in the index, under their head term. To define a subterm, use a \fItitle\fP attribute with two exclamation marks ("!!") between the term and the subterm, like this: .d ... ... ... .e As the last example above shows, there can be multiple levels of sub-subterms. .LP The \fItitle\fP attribute also allows multiple index terms to be associated with a single occurrence. The multiple terms are separated with a vertical bar ("|"). Compare the following examples with the ones above: .d ... ... .e These two elements both insert two terms into the index. Note that the second example above combines subterms and multiple terms. .LP It is possible to run index on a file that already has an index. The old target anchors and the old index will be removed before being re-generated. .SH OPTIONS The following options are supported: .TP 10 .B \-t By default, .B hxindex adds an ID attribute to the element that contains the occurrence of a term and .I also inserts an \fI\fP element inside it with a \fIname\fP attribute equal to the ID. This is to allow old browsers that ignore ID attributes, such as Netscape 4, to find the target as well. The .B \-t option suppresses the \fI\fP element. .TP .B \-x This option turns on XML syntax conventions: empty elements will end in \fI/>\fP instead of \fI>\fP as in HTML. .B \-x implies .B \-t. .TP .BI \-i " indexdb" .B hxindex can read an initial index from a file and write the merged collection of index terms back to that file. This allows an index to span several documents. The .B \-i option is used to give the name of the file that contains the index. .TP .BI \-b " base" This option is useful in combination with .B \-i to give the base URL reference of the document. By default, .B hxindex will store links to occurrences in the \fIindexdb\fP file in the form \fI#anchor\fP, but when .B \-b is given, the links will look like \fIbase#anchor\fP instead. .IP When used in combination with .BR \-n , the title attributes of the links will contain the title of the document that contains the term. The title is inserted before the .I template (see option .BR \-s ) and separated from it with a comma and a space. E.g., if .B hxindex is called with .d hxindex -i termdb -n -base myfile.html myfile.html .e and the termdb already contains an entry for "foo" in in section "3.1" of a document called "file2.html" with title "The foos", then the generated index will contain an entry such as this: .d foo, 3.1 .e .TP .BI \-c " class[,class[,...]]" Normal index terms are recognized because they have a class of \fI"index".\fP The .B \-c option adds additional, comma-separated class names that will be considered aliases for \fI"index"\fP. E.g., .B \-c instance will make sure that .B term is recognized as a term for the index. .TP .B \-n By default, the index consists of links with "#" as the anchor text. Option .B \-n causes the link text to consist of the section numbers of the sections in which the terms occur, falling back to "without number" (see option .B \-u below) if no section number could be found. Section numbers are found by looking for the nearest preceding start tag with a class of "secno" or "no-num". In the case of "secno", the contents of that element are taken as the section number. In the case of "no-num" the section is assumed to have no number and .B hxindex will print "without number" instead. These classes are also used by .BR hxnum (1), so it is useful to run .B hxindex .I after .BR hxnum ", e.g.," .d hxnum myfile.html | hxindex \-n >mynewfile.html .e .TP .BI \-s " template" When option .B \-n is used, the link will have a title attribute and the .I template determines what it contains. The default is "section %s", where the %s is a placeholder for the section number. In other words, the index will contain entries like this: .d term, 7.8 .e .IP Some examples: .d hxindex -n -s 'chapter %s' hxindex -n -s 'part %s' hxindex -n -s 'hoofdstuk %s' -u 'zonder nummer' .e .IP This option is only useful in combination with .B \-n .TP .BI \-u " phrase" When option .B \-n is used to display section numbers, references for which no section number can be found are shown as .I phrase instead. The default is "??". .IP This option is only useful in combination with .B \-n .TP .B \-f Remove \fItitle\fP attributes that were used for the index as well as the comments that delimit the inserted index. This avoids that browsers display these attributes. Note that \fBhxindex\fP cannot be run again on its own output if this option is used. (Mnemonic: "\fBf\fPreeze" or "\fBf\fPinal".) .TP .B \-r Do not ignore trailing punctuation when sorting index terms. E.g., if two terms are written as .d foo,... foo .e .B hxindex will normally ignore the comma and treat them as the same term, but with .B \-r, they are treated as different. This affects trailing commas (,), semicolons (;), colons (:), exclamations mark (!), question marks (?) and full stops (.). A final full stop is never ignored if there are two or more in the term, to protect abbreviations ("B.C.") and ellipsis ("more..."). This does .I not affect how the index term is printed (it is always printed as it appears in the text), only how it is compared to similar terms. (Mnemonic: "\fBr\fPaw".) .SH OPERANDS The following operand is supported: .TP 10 .I file\-or\-URL The name of an HTML or XML file or the URL of one. If absent, or if the file is "-", standard input is read instead. .SH "EXIT STATUS" The following exit values are returned: .TP 10 .B 0 Successful completion. .TP .B >0 An error occurred in parsing the HTML file. .SH ENVIRONMENT The input is assumed to be in UTF-8, but the current locale is used to determine the sorting order of the index terms. I.e., .B hxindex looks at the LANG, LC_ALL and/or LC_COLLATE environment variables. See .BR locale (1). .LP To use a proxy to retrieve remote files, set the environment variables .B http_proxy or .BR ftp_proxy "." E.g., .B http_proxy="http://localhost:8080/" .SH BUGS .LP Assumes UTF-8 as input. Doesn't expand character entities (apart from the standard ones: "&", "<", ">" and """). Instead, pipe the input through .BR hxunent (1) and, if needed, .BR asc2xml (1) to convert it to UTF-8. .LP Remote files (specified with a URL) are currently only supported for HTTP. Password-protected files or files that depend on HTTP "cookies" are not handled. (You can use tools such as .BR curl (1) or .BR wget (1) to retrieve such files.) .LP The accessibility of an index, even when generated with option .BR \-n , is poor. .SH "SEE ALSO" .BR asc2xml (1), .BR hxnormalize (1), .BR hxnum (1), .BR hxprune (1), .BR hxtoc (1), .BR hxunent (1), .BR xml2asc (1), .BR locale (1), .BR UTF-8 " (RFC 2279)" html-xml-utils-6.5/unent.e0000644000175000001440000000017412265516604012511 00000000000000 struct _Entity {char *name; unsigned int code;}; const struct _Entity *lookup_entity (const char *str, unsigned int len); html-xml-utils-6.5/ChangeLog0000644000175000001440000007227712225220044012763 000000000000002013-10-08 Bert Bos * openurl.c: Removed a forgotten occurrence of "Boolean" in the code path for compilations without libcurl. * hxcite.1: Documented the use of %K lines. * hxcite.c: Now looks for labels also on %K lines. 2013-07-27 Bert Bos * Makefile.am: No longer uses $(wildcard) to find all tests, which doesn't work anymore in automake since version 1.14. * hkmkbib.c: Now uses getopt() so that invalid options no longer cause a crash. * hkwls.c: Now uses getopt() so that invalid options no longer cause a crash. * All programs now include "config.h" instead of . Somebody found it easier to compile them that way. * configure.ac: Renamed configure.in to configure.ac. Removed acinclude.m4 and put all macro definitions in separate files in directory m4. * hxnum.c: now using getopt() to parse and verify the command line. 2013-07-25 Bert Bos * hxindex.c: fixed crash when argument to trim() is NULL. * acinclude.m4: removed definition of LIBCURL_CHECK_CONFIG, there is one already in /usr/share/aclocal/libcurl.m4 2013-04-19 Bert Bos * Released 6.4 2013-03-23 Bert Bos * toc1.sh: Added a first test for hxtoc. * Removed type Boolean in favor of bool from stdbool.h. * hxtoc.c: Implemented the HTML5 algorithm which says that a Hn that is the first child of a sectioning element (or of a HGROUP that is itself the first child of a sectioning element) doesn't have level n, but a level equal to the number of ancestors that are sectioning elements. Where a sectioning element is a DIV, SECTION, ARTICLE, ASIDE or NAV whose first child is either an HGROUP or an Hn. 2013-03-18 Bert Bos * hxtoc.c: option -d not only looks for DIV elements, but now also for ARTICLE, ASIDE, NAV and SECTION. 2013-01-09 Bert Bos * Released 6.3 * scan.l: A lonely "<" is allowed in SGML (although not in XML). Accept it and turn it into a "<". 2012-11-16 Bert Bos * tests: Added incl10.sh, incl11.sh and index5.sh. * hxindex.c: Fixed bug in command line parsing that caused hxindex not to accept the -b option. Added options -s and -u to control the title attribute that is added on links when -n is used. The title attribute now contains something like "section 3.2" or, if -b is also used, something like "Working with cork, section 3.2". The format of the index database for option -i changed and now includes the document title. The program now uses errx() instead of errexit(), so that error messages include the program name. 2012-10-23 Bert Bos * openurl.c: Added some assert() statements. * hxincl.c: Added options -v and -M. * hxaddid.c hxcite.c hxcopy.c hxextract.c hxpipe.c hxref.c hxremove.c hxselect.c: Added option -v to print the version number. * hxtabletrans.c: Added. 2012-10-21 Bert Bos * selector.c: Renamed Root to RootSel to avoid clash with Root in tree.c * hxremove.c: Added * tree.c: Added some routines to add nodes to a tree without checking against the HTML DTD (useful for generic XML documents and used by hxremove) 2012-10-19 Bert Bos * scan.l: added set_yyin() and get_yyin_name() to store/retrieve the name of the file that is going to parsed. Also added the file name as a parameter to include_file(). This is used by hxincl.c * hxincl.c: Now remembers the name of the included file so that the path of a recursively included file is relative to the file that includes it, rather than to the top level file. 2012-10-18 Bert Bos * hxincl.c: Now allows %-delimited variables in the file name that is to be included. 2012-10-09 Bert Bos * extract1.sh: Added a "sleep 1" after starting the servers to allow them more time to start and open some sockets. * hxref.c: "@", "(", ")" and "_" are now treated as significant letters when comparing terms. * genid.c: "@" is now used in the generated ID replaced by the letters "at" and "_" is kept as is. 2012-10-02 Bert Bos * Released version 6.2 * html.y: fixed type error in call() macro: void != NULL. (Bug found by Michael Tautschnig of Debian) 2012-08-31 Bert Bos * hxnormalize.c: When checking whether an optional end tag can be omitted, skip any text nodes with only spaces. In HTML mode, don't try to guess the attribute name for a given attribute value (i.e., dont replace by
        ), but leave the attribute value alone. * textwrap.c: fixed bug that created lines indented with spaces and no further content. * dtd.hash: insert line break before , and