html2text-1.3.2a/0040755000000000000000000000000010001250432012276 5ustar rootroothtml2text-1.3.2a/Area.C0100644000000000000000000002563507760112171013301 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Wed Jul 2 21:56:45 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #include #include #include #include "Area.h" #include "string.h" #define LATIN1_nbsp 160 /* ------------------------------------------------------------------------- */ #define malloc_array(type, size)\ ((type *) malloc(sizeof(type) * (size))) #define realloc_array(array, type, size) \ ((array) = (type *) realloc((array), sizeof(type) * (size))) #define copy_array(from, to, type, count) \ ((void) memcpy((to), (from), (count) * sizeof(type))) /* ------------------------------------------------------------------------- */ Line::Line(size_type l) : length_(l), cells_(malloc_array(Cell, l)) { Cell *p, *end = cells_ + l; for (p = cells_; p != end; p++) p->clear(); } Line::Line(const char *p) : length_(strlen(p)), cells_(malloc_array(Cell, length_)) { Cell *q = cells_, *end = q + length_; while (q != end) { q->character = *p++; q->attribute = Cell::NONE; q++; } } Line::Line(const string &s) : length_(s.length()), cells_(malloc_array(Cell, length_)) { const char *p = s.c_str(); Cell *q = cells_, *end = q + length_; while (q != end) { q->character = *p++; q->attribute = Cell::NONE; q++; } } Line::~Line() { free(cells_); } /* ------------------------------------------------------------------------- */ void Line::resize(size_type l) { if (l == length()) return; realloc_array(cells_, Cell, l); for (size_type x = length(); x < l; x++) cells_[x].clear(); length_ = l; } void Line::insert(const Line &l, size_type x) { enlarge(x + l.length()); const Cell *p = l.cells_, *end = p + l.length(); Cell *q = cells_ + x; while (p != end) *q++ = *p++; } void Line::insert(const char *p, size_type x) { enlarge(x + strlen(p)); Cell *q = cells_ + x; while (*p) q++->character = *p++; } void Line::insert(const string &s, size_type x) { insert(s.c_str(), x); } void Line::append(char c) { size_type x = length_; resize(x + 1); cells_[x].character = c; cells_[x].attribute = Cell::NONE; } void Line::append(const Line &l) { size_type x = length_; enlarge(x + l.length_); const Cell *p = l.cells_, *end = p + l.length(); Cell *q = cells_ + x; while (p != end) *q++ = *p++; } void Line::append(const char *p) { size_type x = length_; enlarge(x + strlen(p)); Cell *q = cells_ + x; for (; *p; ++p, ++q) { q->character = *p; q->attribute = Cell::NONE; } } void Line::add_attribute(char addition) { Cell *p = cells_, *end = cells_ + length_; while(p != end) p++->attribute |= addition; } /* ------------------------------------------------------------------------- */ bool Area::use_backspaces = true; /* ------------------------------------------------------------------------- */ Area::Area() : width_(0), height_(0), cells_(malloc_array(Cell *, 0)) { } Area::Area( size_type w /*= 0*/ , size_type h /*= 0*/ , char c /*= ' '*/ , char a /*= Cell::NONE*/ ) : width_(w), height_(h), cells_(malloc_array(Cell *, h)) { for (size_type y = 0; y < h; y++) { Cell *p = cells_[y] = malloc_array(Cell, w), *end = p + w; while (p != end) { p->character = c; p->attribute = a; p++; } } } Area::Area(const char *p) : width_(strlen(p)), height_(1), cells_(malloc_array(Cell *, 1)) { cells_[0] = malloc_array(Cell, width_); Cell *q = cells_[0], *end = q + width_; while (q != end) { q->character = *p++; q->attribute = Cell::NONE; q++; } } Area::Area(const string &s) : width_(s.length()), height_(1), cells_(malloc_array(Cell *, 1)) { cells_[0] = malloc_array(Cell, width_); Cell *q = cells_[0]; for (string::size_type i = 0; i < s.length(); ++i) { q->character = s[i]; q->attribute = Cell::NONE; q++; } } Area::Area(const Line &l) : width_(l.length_), height_(1), cells_(malloc_array(Cell *, 1)) { cells_[0] = malloc_array(Cell, width_); copy_array(l.cells_, cells_[0], Cell, width_); } Area::~Area() { for (size_type y = 0; y < height(); y++) free(cells_[y]); free(cells_); } /* ------------------------------------------------------------------------- */ const Area & Area::operator>>=(size_type rs) { if (rs > 0) { resize(width_ + rs, height_); for (size_type y = 0; y < height_; y++) { Cell *c = cells_[y]; memmove(c + rs, c, (width_ - rs) * sizeof(Cell)); for (size_type x = 0; x < rs; x++) { c[x].character = ' '; c[x].attribute = Cell::NONE; } } } return *this; } void Area::resize(size_type w, size_type h) { size_type y_max = h < height() ? h : height(); if (w > width()) { for (size_type y = 0; y < y_max; y++) { realloc_array(cells_[y], Cell, w); Cell *p = cells_[y] + width(), *end = cells_[y] + w; while (p != end) p++->clear(); } } else if (w < width()) { for (size_type y = 0; y < y_max; y++) { realloc_array(cells_[y], Cell, w); } } if (h > height()) { realloc_array(cells_, Cell *, h); for (size_type y = height(); y < h; y++) { Cell *p = cells_[y] = malloc_array(Cell, w), *end = p + w; while (p != end) p++->clear(); } } else if (h < height()) { for (size_type y = h; y < height(); y++) free(cells_[y]); realloc_array(cells_, Cell *, h); } width_ = w; height_ = h; } void Area::enlarge(size_type w, size_type h) { if (w > width() || h > height()) { resize(w > width() ? w : width(), h > height() ? h : height()); } } void Area::insert(const Area &a, size_type x, size_type y) { enlarge(x + a.width(), y + a.height()); for (size_type i = 0; i < a.height(); i++) { const Cell *p = a.cells_[i], *end = p + a.width(); Cell *q = cells_[y + i] + x; while (p != end) *q++ = *p++; } } void Area::insert( const Area &a, size_type x, size_type y, size_type w, size_type h, int halign, int valign ) { if (halign != LEFT && a.width() < w) x += ( halign == CENTER ? (w - a.width()) / 2 : halign == RIGHT ? w - a.width() : 0 ); if (valign != TOP && a.height() < h) y += ( valign == MIDDLE ? (h - a.height()) / 2 : valign == BOTTOM ? h - a.height() : 0 ); insert(a, x, y); } void Area::insert(const Cell &c, size_type x, size_type y) { enlarge(x + 1, y + 1); cells_[y][x] = c; } void Area::fill(const Cell &c, size_type x, size_type y, size_type w, size_type h) { enlarge(x + w, y + h); for (size_type yy = y; yy < y + h; yy++) { Cell *p = &cells_[yy][x]; for (size_type i = 0; i < w; i++) *p++ = c; } } void Area::insert(const Cell *p, size_type count, size_type x, size_type y) { enlarge(x + count, y + 1); Cell *q = &cells_[y][x]; while (count--) *q++ = *p++; } void Area::insert(char c, size_type x, size_type y) { enlarge(x + 1, y + 1); cells_[y][x].character = c; } void Area::insert(const string &s, size_type x, size_type y) { enlarge(x + s.length(), y + 1); Cell *cell = &cells_[y][x]; for (string::size_type i = 0; i < s.length(); i++) { cell->character = s[i]; cell->attribute = Cell::NONE; cell++; } } void Area::prepend(int n) { if (n <= 0) return; realloc_array(cells_, Cell *, height() + n); memmove(cells_ + n, cells_, height() * sizeof(*cells_)); for (int y = 0; y < n; ++y) { Cell *p = cells_[y] = malloc_array(Cell, width()), *end = p + width(); while (p != end) p++->clear(); } height_ += n; } const Area & Area::operator+=(const Area &x) { insert(x, 0, height()); return *this; } void Area::fill(char c, size_type x, size_type y, size_type w, size_type h) { enlarge(x + w, y + h); for (size_type yy = y; yy < y + h; yy++) { Cell *p = &cells_[yy][x]; for (size_type i = 0; i < w; i++) p++->character = c; } } void Area::add_attribute(char addition) { for (size_type y = 0; y < height(); y++) { Cell *p = cells_[y], *end = p + width(); while (p != end && p->character == ' ') ++p; Cell *q = p; while (p != end) { if (p++->character != ' ') { while (q < p) q++->attribute |= addition; } } } } void Area::add_attribute( char addition, size_type x, size_type y, size_type w, size_type h ) { enlarge(x + w, y + h); for (size_type yy = y; yy < y + h; yy++) { Cell *p = &cells_[yy][x], *end = p + w; while (p != end) p++->attribute |= addition; } } /* ------------------------------------------------------------------------- */ ostream &backspace(ostream &os) { return os << '\b'; } ostream & operator<<(ostream &os, const Area &a) { for (Area::size_type y = 0; y < a.height(); y++) { const Cell *cell = a.cells_[y], *end = cell + a.width(); while ( end != cell && end[-1].character == ' ' && (end[-1].attribute & (Cell::UNDERLINE | Cell::STRIKETHROUGH)) == 0 ) end--; for (const Cell *p = cell; p != end; p++) { char c = p->character; char a = p->attribute; if (c == (char) LATIN1_nbsp) c = ' '; if (a == Cell::NONE) { os << c; } else { if (Area::use_backspaces) { /* * No LESS / terminal combination that I know of supports * dash-backspace-character as "strikethrough". Pity. */ if (a & Cell::STRIKETHROUGH) os << '-' << backspace; /* * No LESS that I know of can combine underlining and boldface. In * practice, boldface always takes precedence. * * It's not a good idea to optimize an underlined space as a single * underscore (as opposed to underscore-backspace-space) -- this * would not look nice next to an underlined character. */ if ((a & Cell::UNDERLINE) ) os << '_' << backspace; if ((a & Cell::BOLD ) && c != ' ') os << c << backspace; os << c; } else { os << (c == ' ' && (a & Cell::UNDERLINE) ? '_' : c); } } } os << std::endl; } return os; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/Area.h0100644000000000000000000001230707760112171013336 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 21:21:10 CEST 2001: ported to g++ 3.0 * Wed Jul 2 21:59:41 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #ifndef __Area_h_INCLUDED__ /* { */ #define __Area_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include #include #include #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif using std::string; using std::ostream; /* ------------------------------------------------------------------------- */ struct Cell { char character; char attribute; enum { NONE = 0, UNDERLINE = 1, BOLD = 2, STRIKETHROUGH = 4 }; void clear() { character = ' '; attribute = NONE; } }; /* ------------------------------------------------------------------------- */ class Line { public: typedef size_t size_type; Line(size_type l = 0); Line(const char *); Line(const string &); ~Line(); size_type length() const { return length_; } bool empty() const { return length_ == 0; } const Cell &operator[](size_type x) const { return cells_[x]; } Cell &operator[](size_type x) { return cells_[x]; } const Cell *cells() const { return cells_; } void resize(size_type l); void enlarge(size_type l) { if (l > length_) resize(l); } void insert(const Line &, size_type x); void insert(const char *, size_type x); void insert(const string &, size_type x); void append(char c ); void append(const Line &l); void append(const char *p); const Line &operator+=(char c ) { append(c); return *this; } const Line &operator+=(const Line &l) { append(l); return *this; } const Line &operator+=(const char *p) { append(p); return *this; } void add_attribute(char addition); private: Line(const Line &); const Line &operator=(const Line &); size_type length_; Cell *cells_; friend class Area; }; /* ------------------------------------------------------------------------- */ class Area { public: typedef size_t size_type; enum { LEFT, CENTER, RIGHT, TOP, MIDDLE, BOTTOM }; Area(); Area(size_type w, size_type h = 0, char = ' ', char = Cell::NONE); Area(const char *); Area(const string &); Area(const Line &); ~Area(); size_type width() const { return width_; } size_type height() const { return height_; } const Cell *operator[](size_type y) const { return cells_[y]; } Cell *operator[](size_type y) { return cells_[y]; } const Area &operator>>=(size_type rs); void resize(size_type w, size_type h); void enlarge(size_type w, size_type h); void insert(const Line &l, size_type x, size_type y) { insert(l.cells_, l.length_, x, y); } void insert(const Area &, size_type x, size_type y); void insert( const Area &, size_type x, size_type y, size_type w, size_type h, int halign, int valign ); void insert(const Cell &, size_type x, size_type y); void insert(const Cell *, size_type count, size_type x, size_type y); void insert(char, size_type x, size_type y); void insert(const string &, size_type x, size_type y); void prepend(int n); // Prepend blank lines at top void append(int n) // Append blank lines at bottom { enlarge(width(), height() + n); } const Area &operator+=(const Area &); // Append at bottom! const Area &operator+=(int n) { append(n); return *this; } void fill(const Cell &, size_type x, size_type y, size_type w, size_type h); void fill(char, size_type x, size_type y, size_type w, size_type h); void add_attribute(char addition); // ...but not to left and right free areas void add_attribute( char addition, size_type x, size_type y, size_type w, size_type h ); static bool use_backspaces; // "true" by default. private: Area(const Area &); const Area &operator=(const Area &); size_type width_; size_type height_; Cell **cells_; friend ostream &operator<<(ostream &, const Area &); }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/HTMLControl.C0100644000000000000000000003641307760112171014532 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Fre Jun 8 17:20:38 CEST 2001: fixed coredump * Sun Apr 7 11:31:10 CEST 2002: fixed parser errors * Wed Jul 2 22:00:21 CEST 2003: ported to g++ 3.3 * Sun Nov 16 22:13:50 CET 2003: better rendering of XHTML */ /***************************************************************************/ #include #include #include #include #include "html.h" #include "HTMLControl.h" #include "sgml.h" #include "cmp_nocase.h" #ifndef nelems #define nelems(array) (sizeof(array) / sizeof((array)[0])) #endif enum { NOT_A_TAG, START_TAG, BLOCK_START_TAG, END_TAG, BLOCK_END_TAG, NON_CONTAINER_TAG }; /* ------------------------------------------------------------------------- */ /* * Effectively, this method simply invokes "yylex2()", but it does some * postprocessing on PCDATA tokens that would be difficult to do in "yylex2()". */ int HTMLControl::yylex(yy_HTMLParser_stype *value_return) { for (;;) { // Notice the "return" at the end of the body! int token, tag_type; if (next_token == EOF) { token = yylex2(value_return, &tag_type); } else { token = next_token; *value_return = next_token_value; tag_type = next_token_tag_type; next_token = EOF; } /* * Switch on/off "literal mode" on "
" and "
". */ if (token == PRE) { literal_mode = true; /* * Swallow '\n' immediately following "
".
       */
      if (next_token == EOF) {
        next_token = yylex2(&next_token_value, &next_token_tag_type);
      }
      if (next_token == PCDATA) {
        string &s(*next_token_value.strinG);
        if (!s.empty() && s[0] == '\n') s.erase(0, 1);
      }
    }

    if (token == END_PRE) literal_mode = false;

    if (token == PCDATA) {

      /*
       * In order to post-process the PCDATA token, we need to look ahead one
       * token...
       */
      if (next_token == EOF) {
        next_token = yylex2(&next_token_value, &next_token_tag_type);
      }

      /*
       * Erase " '\n' { ' ' } " immediately before "
". */ if (next_token == END_PRE) { string &s(*value_return->strinG); string::size_type x = s.length(); while (x > 0 && s[x - 1] == ' ') --x; if (x > 0 && s[x - 1] == '\n') s.erase(x - 1, string::npos); } else /* * Erase whitespace before end tag or block start tag. */ if (!literal_mode && ( next_token_tag_type == END_TAG || next_token_tag_type == BLOCK_END_TAG || next_token_tag_type == BLOCK_START_TAG )) { string &s(*value_return->strinG); string::size_type x = s.length(); while (x > 0 && isspace(s[x - 1])) --x; s.erase(x, string::npos); } /* * Collate sequences of whitespace, if not in "literal mode". */ if (!literal_mode) { string &s(*value_return->strinG); // bool whitespace_only = true; for (string::size_type x = 0; x < s.length(); ++x) { if (isspace(s[x])) { string::size_type y; for (y = x + 1; y < s.length() && isspace(s[y]); ++y); s.replace(x, y - x, " "); } else { // whitespace_only = false; } } if (s.empty()) { delete value_return->strinG; continue; } } } /* * Erase whitespace after start tag or block end tag, if not in "literal * mode". */ if (!literal_mode && ( ( tag_type == START_TAG || tag_type == BLOCK_START_TAG || tag_type == BLOCK_END_TAG || token == BR || token == HR ) && token != SCRIPT && token != STYLE )) { if (next_token == EOF) { next_token = yylex2(&next_token_value, &next_token_tag_type); } if (next_token == PCDATA) { string &s(*next_token_value.strinG); string::size_type x; for (x = 0; x < s.length() && isspace(s[x]); ++x); if (x > 0) s.erase(0, x); if (s.empty()) { delete next_token_value.strinG; next_token = EOF; } } } return token; } } /* ------------------------------------------------------------------------- */ /* * Keep this array sorted alphabetically! */ static const struct TextToIntP { char name[11]; char block_tag; const int *start_tag_code; const int *end_tag_code; } tag_names[] = { #define pack1(tag) { #tag, 0, &HTMLParser::tag, 0 } #define pack2(tag) { #tag, 0, &HTMLParser::tag, &HTMLParser::END_##tag } #define pack3(tag) { #tag, 1, &HTMLParser::tag, &HTMLParser::END_##tag } pack2(A), pack3(ADDRESS), pack2(APPLET), pack1(AREA), pack2(B), pack1(BASE), pack1(BASEFONT), pack2(BIG), pack3(BLOCKQUOTE), pack3(BODY), pack1(BR), pack3(CAPTION), pack3(CENTER), pack3(CITE), pack2(CODE), pack3(DD), pack2(DFN), pack3(DIR), pack3(DIV), pack3(DL), pack3(DT), pack2(EM), pack2(FONT), pack3(FORM), pack3(H1), pack3(H2), pack3(H3), pack3(H4), pack3(H5), pack3(H6), pack3(HEAD), pack1(HR), pack3(HTML), pack2(I), pack1(IMG), pack1(INPUT), pack1(ISINDEX), pack2(KBD), pack3(LI), pack1(LINK), pack2(MAP), pack3(MENU), pack1(META), pack2(NOBR), pack3(OL), pack3(OPTION), pack3(P), pack1(PARAM), pack3(PRE), pack2(SAMP), pack3(SCRIPT), pack2(SELECT), pack2(SMALL), pack2(STRIKE), pack2(STRONG), pack3(STYLE), pack2(SUB), pack2(SUP), pack3(TABLE), pack3(TD), pack2(TEXTAREA), pack3(TH), pack3(TITLE), pack3(TR), pack2(TT), pack2(U), pack3(UL), pack2(VAR), #undef pack }; /* ------------------------------------------------------------------------- */ int HTMLControl::yylex2(yy_HTMLParser_stype *value_return, int *tag_type_return) { int c; *tag_type_return = NOT_A_TAG; for (;;) { // Notice the "return" at the end of this loop. /* * Get the first character of the token. */ c = get_char(); if (c == EOF) return EOF; if (c == '<') { /* * Examine the first character of the tag. */ c = get_char(); if (c == '!') { c = get_char(); if (c == '-') { c = get_char(); if (c != '-') return SCAN_ERROR; /* * This is a comment... skip it! * * * * * * EXTENSION: Allow "-->" as the terminator of a multi-line comment. */ int state = 0; do { c = get_char(); if (c == EOF) return SCAN_ERROR; switch (state) { case 0: if (c == '-') state = 1; break; case 1: state = c == '-' ? 2 : 0; break; case 2: state = c == '>' ? 3 : c == '-' ? 2 : 0; break; } } while (state != 3); continue; // Start over } /* * Scan "" tag. */ if (!isalpha(c)) return SCAN_ERROR; string tag_name(1, '!'); tag_name += c; for (;;) { c = get_char(); if (!isalnum(c) && c != '-') break; tag_name += c; } if (cmp_nocase(tag_name, "!DOCTYPE") != 0) return SCAN_ERROR; while (c != '>') { c = get_char(); if (c == EOF) return SCAN_ERROR; // Let newline not close the DOCTYPE tag - Arno } return DOCTYPE; } if (c == '/' || isalpha(c) || c == '_') { string tag_name; bool is_end_tag = false; if (c == '/') { is_end_tag = true; c = get_char(); } if (!isalpha(c) && c != '_') return SCAN_ERROR; tag_name += c; for (;;) { c = get_char(); if (!isalnum(c) && c != '-' && c != '_' && c != ':') break; // Tolerate colons in tags for MS-Word's sake - Arno tag_name += c; } while (isspace(c)) c = get_char(); /* * Scan tag attributes (only for opening tags). Create the * "tag_attributes" only on demand; this saves a lot of overhead. */ auto_ptr > tag_attributes; if (!is_end_tag) { while (isalpha(c) || c == '_') { TagAttribute attribute; /* * Scan attribute name. */ attribute.first = c; for (;;) { c = get_char(); if (!isalpha(c) && c != '-' && c != '_' && c != ':') break; // Same as in line 352 - Arno attribute.first += c; } while (isspace(c)) c = get_char(); // Skip WS after attribute name /* * Scan (optional) attribute value. */ if (c == '=') { c = get_char(); while (isspace(c)) c = get_char(); if (c == '"' || c == '\'') { int closing_quote = c; // Same as opening quote! for (;;) { c = get_char(); if (c == EOF) return SCAN_ERROR; // Accept multiple-line elements - Arno if (c == closing_quote) break; /* * Do *not* interpret "ä" and consorts here! This * would ruin tag attributes like "HREF=hhh?a=1&b=2". */ attribute.second += c; } c = get_char(); // Get next char after closing quote. } else while (c != '>' && (unsigned char) c > (unsigned char) ' ') { // This is for non-ACSII chars - Arno if (c == EOF) return SCAN_ERROR; // Same as in line 390 attribute.second += c; c = get_char(); } while (isspace(c)) c = get_char(); // Skip WS after attr value } /* * Store the attribute. */ if (!tag_attributes.get()) { tag_attributes.reset(new list); } tag_attributes->push_back(attribute); } } // accept XHTML tags like
- Alexander Solovey if (c != '>') { if ( c == '/' ) { c = get_char(); if (c != '>') { return SCAN_ERROR; } } else { return SCAN_ERROR; } } if (debug_scanner) { std::cerr << "Scanned tag \"<" << (is_end_tag ? "/" : "") << tag_name; if (!is_end_tag && tag_attributes.get()) { const list &ta(*tag_attributes); list::const_iterator j; for (j = ta.begin(); j != ta.end(); ++j) { std::cerr << " " << (*j).first << "=\"" << (*j).second << "\""; } } std::cerr << ">\"" << std::endl; } /* * Look up the tag in the table of recognized tags. */ static int (*const f)(const char *, const char *) = cmp_nocase; const TextToIntP *tag = (const TextToIntP *) bsearch( tag_name.c_str(), tag_names, nelems(tag_names), sizeof(TextToIntP), (int (*)(const void *, const void *)) f ); if (tag == NULL) { /* EXTENSION: Swallow unknown tags. */ if (debug_scanner) { std::cerr << "Tag unknown -- swallowed." << std::endl; } continue; } /* * Return the BISON token code for the tag. */ if (is_end_tag) { if (!tag->end_tag_code) { if (debug_scanner) { std::cerr << "Non-container end tag scanned." << std::endl; } continue; } *tag_type_return = tag->block_tag ? BLOCK_END_TAG : END_TAG; return *tag->end_tag_code; } else { *tag_type_return = ( !tag->end_tag_code ? NON_CONTAINER_TAG : tag->block_tag ? BLOCK_START_TAG : START_TAG ); value_return->tag_attributes = tag_attributes.release(); return *tag->start_tag_code; } } /* * EXTENSION: This tag did not match "= (unsigned char) ' ') { // Same as in line 402 string *s = value_return->strinG = new string; while (c != EOF) { /* * Accept literal '<' in some cases. */ if (c == '<') { int c2; unget_char(c2 = get_char()); if (c2 == '!' || c2 == '/' || isalpha(c2)) { unget_char(c); break; } } *s += c; c = get_char(); } replace_sgml_entities(s); // Replace "ä" and consorts. /* * Swallow empty PCDATAs. */ if (s->empty()) { delete s; continue; } if (debug_scanner) std::cerr << "Scanned PCDATA \"" << *s << "\"" << std::endl; return PCDATA; } return SCAN_ERROR; } } /* ------------------------------------------------------------------------- */ bool HTMLControl::read_cdata(const char *terminal, string *value_return) { string &s(*value_return); int c; int state = 0; for (;;) { c = get_char(); if (c == EOF) return false; if (toupper(c) == terminal[state]) { state++; if (terminal[state] == '\0') { // s.erase(s.length() - state); // caused core dump on empty STYLE and SCRIPT elements - Johannes Geiger s.erase(s.length() - state + 1); return true; } } else { state = 0; } s += c; } } /* ------------------------------------------------------------------------- */ int HTMLControl::get_char() { if (number_of_ungotten_chars > 0) { return ungotten_chars[--number_of_ungotten_chars]; } int c = is.get(); while (c == '\r') c = is.get(); if (c == EOF) { ; } else if (c == '\n') { current_line++; current_column = 0; } else { current_column++; } return c; } /* ------------------------------------------------------------------------- */ void HTMLControl::unget_char(int c) { if (number_of_ungotten_chars == nelems(ungotten_chars)) { yyerror("Too many chars ungotten"); return; } ungotten_chars[number_of_ungotten_chars++] = c; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/HTMLControl.h0100644000000000000000000000522207760112171014571 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 21:25:07 CEST 2001: ported to g++ 3.0 * Wed Jul 2 22:01:12 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #ifndef __HTMLControl_h_INCLUDED__ /* { */ #define __HTMLControl_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include "HTMLParser.h" #include "urlistream.h" #include using std::istream; /* ------------------------------------------------------------------------- */ class HTMLControl : public HTMLParser { public: HTMLControl(urlistream &is_, bool debug_scanner_, bool debug_parser_) : HTMLParser(), current_line(1), current_column(0), literal_mode(false), next_token(EOF), debug_scanner(debug_scanner_), is(is_), number_of_ungotten_chars(0) { yydebug = debug_parser_; } int current_line; int current_column; private: /* * Implementing virtual methods of "HTMLParser". */ /*virtual*/ int yylex(yy_HTMLParser_stype *value_return); /*virtual*/ bool read_cdata(const char *terminal, string *value_return); /* * Helpers. */ int yylex2(yy_HTMLParser_stype *value_return, int *tag_type_return); bool literal_mode; int next_token; yy_HTMLParser_stype next_token_value; int next_token_tag_type; int get_char(); void unget_char(int); bool debug_scanner; urlistream &is; int ungotten_chars[5]; int number_of_ungotten_chars; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/HTMLParser.C0100644000000000000000000033746307760112171014357 0ustar rootroot#define YY_HTMLParser_h_included /* A Bison parser, made from HTMLParser.y */ #line 1 "/usr/local/lib/bison.cc" /* -*-C-*- Note some compilers choke on comments on `#line' lines. */ /* Skeleton output parser for bison, Copyright (C) 1984, 1989, 1990 Bob Corbett and Richard Stallman This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* HEADER SECTION */ #ifndef _MSDOS #ifdef MSDOS #define _MSDOS #endif #endif /* turboc */ #ifdef __MSDOS__ #ifndef _MSDOS #define _MSDOS #endif #endif #ifndef alloca #ifdef __GNUC__ #define alloca __builtin_alloca #else /* not GNU C. */ #if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) #include #else /* not sparc */ #if defined (_MSDOS) #include #ifndef __TURBOC__ /* MS C runtime lib */ #define alloca _alloca #endif #else /* not MSDOS, or __TURBOC__ */ #if defined(_AIX) #include #pragma alloca #endif /* not _AIX */ #endif /* not MSDOS, or __TURBOC__ */ #endif /* not sparc. */ #endif /* not GNU C. */ #endif /* alloca not defined. */ #ifdef c_plusplus #ifndef __cplusplus #define __cplusplus #endif #endif #ifdef __cplusplus #ifndef YY_USE_CLASS #define YY_USE_CLASS #endif #else #ifndef __STDC__ #define const #endif #endif #include #define YYBISON 1 /* #line 70 "/usr/local/lib/bison.cc" */ #define YY_HTMLParser_PURE #define YY_HTMLParser_DEBUG 1 #line 40 "HTMLParser.y" /* ------------------------------------------------------------------------- */ #ident "$Id: HTMLParser.y,v 1.14 1999/10/26 10:56:55 arno Exp $" #include "html.h" #include "HTMLParser.h" // MIPS machines don't have "alloca()", so disable stack realloc'ing. #ifdef mips #define yyoverflow yyerror("parser stack overflow"), (void) #endif /* ------------------------------------------------------------------------- */ #define YY_HTMLParser_LEX_BODY = 0 #define YY_HTMLParser_ERROR_BODY = 0 #define YY_HTMLParser_MEMBERS \ virtual ~HTMLParser(); \ virtual void process(const Document &) = 0;\ virtual bool read_cdata(const char *terminal, string *) = 0;\ int list_nesting; #define YY_HTMLParser_CONSTRUCTOR_INIT : list_nesting(0) #line 69 "HTMLParser.y" typedef union { Document *document; Element *element; list > *element_list; PCData *pcdata; string *strinG; list *tag_attributes; int inT; list > *table_rows; list > *table_cells; ListItem *list_item; list > *list_items; Caption *caption; Heading *heading; list > *option_list; Option *option; DefinitionList *definition_list; list > *definition_list_item_list; TermName *term_name; TermDefinition *term_definition; Preformatted *preformatted; Address *address; list > > *tag_attributes_list; } yy_HTMLParser_stype; #define YY_HTMLParser_STYPE yy_HTMLParser_stype #line 70 "/usr/local/lib/bison.cc" /* %{ and %header{ and %union, during decl */ #define YY_HTMLParser_BISON 1 #ifndef YY_HTMLParser_COMPATIBILITY #ifndef YY_USE_CLASS #define YY_HTMLParser_COMPATIBILITY 1 #else #define YY_HTMLParser_COMPATIBILITY 0 #endif #endif #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifdef YYLTYPE #ifndef YY_HTMLParser_LTYPE #define YY_HTMLParser_LTYPE YYLTYPE #endif #endif #ifdef YYSTYPE #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE YYSTYPE #endif #endif #ifdef YYDEBUG #ifndef YY_HTMLParser_DEBUG #define YY_HTMLParser_DEBUG YYDEBUG #endif #endif #ifdef YY_HTMLParser_STYPE #ifndef yystype #define yystype YY_HTMLParser_STYPE #endif #endif #endif #ifndef YY_HTMLParser_PURE /* #line 105 "/usr/local/lib/bison.cc" */ #line 105 "/usr/local/lib/bison.cc" /* YY_HTMLParser_PURE */ #endif /* section apres lecture def, avant lecture grammaire S2 */ /* #line 109 "/usr/local/lib/bison.cc" */ #line 109 "/usr/local/lib/bison.cc" /* prefix */ #ifndef YY_HTMLParser_DEBUG /* #line 111 "/usr/local/lib/bison.cc" */ #line 111 "/usr/local/lib/bison.cc" /* YY_HTMLParser_DEBUG */ #endif #ifndef YY_HTMLParser_LSP_NEEDED /* #line 116 "/usr/local/lib/bison.cc" */ #line 116 "/usr/local/lib/bison.cc" /* YY_HTMLParser_LSP_NEEDED*/ #endif /* DEFAULT LTYPE*/ #ifdef YY_HTMLParser_LSP_NEEDED #ifndef YY_HTMLParser_LTYPE typedef struct yyltype { int timestamp; int first_line; int first_column; int last_line; int last_column; char *text; } yyltype; #define YY_HTMLParser_LTYPE yyltype #endif #endif /* DEFAULT STYPE*/ /* We used to use `unsigned long' as YY_HTMLParser_STYPE on MSDOS, but it seems better to be consistent. Most programs should declare their own type anyway. */ #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE int #endif /* DEFAULT MISCELANEOUS */ #ifndef YY_HTMLParser_PARSE #define YY_HTMLParser_PARSE yyparse #endif #ifndef YY_HTMLParser_LEX #define YY_HTMLParser_LEX yylex #endif #ifndef YY_HTMLParser_LVAL #define YY_HTMLParser_LVAL yylval #endif #ifndef YY_HTMLParser_LLOC #define YY_HTMLParser_LLOC yylloc #endif #ifndef YY_HTMLParser_CHAR #define YY_HTMLParser_CHAR yychar #endif #ifndef YY_HTMLParser_NERRS #define YY_HTMLParser_NERRS yynerrs #endif #ifndef YY_HTMLParser_DEBUG_FLAG #define YY_HTMLParser_DEBUG_FLAG yydebug #endif #ifndef YY_HTMLParser_ERROR #define YY_HTMLParser_ERROR yyerror #endif #ifndef YY_HTMLParser_PARSE_PARAM #ifndef __STDC__ #ifndef __cplusplus #ifndef YY_USE_CLASS #define YY_HTMLParser_PARSE_PARAM #ifndef YY_HTMLParser_PARSE_PARAM_DEF #define YY_HTMLParser_PARSE_PARAM_DEF #endif #endif #endif #endif #ifndef YY_HTMLParser_PARSE_PARAM #define YY_HTMLParser_PARSE_PARAM void #endif #endif /* TOKEN C */ #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifdef YY_HTMLParser_LTYPE #ifndef YYLTYPE #define YYLTYPE YY_HTMLParser_LTYPE #else /* WARNING obsolete !!! user defined YYLTYPE not reported into generated header */ #endif #endif #ifndef YYSTYPE #define YYSTYPE YY_HTMLParser_STYPE #else /* WARNING obsolete !!! user defined YYSTYPE not reported into generated header */ #endif #ifdef YY_HTMLParser_PURE #ifndef YYPURE #define YYPURE YY_HTMLParser_PURE #endif #endif #ifdef YY_HTMLParser_DEBUG #ifndef YYDEBUG #define YYDEBUG YY_HTMLParser_DEBUG #endif #endif #ifndef YY_HTMLParser_ERROR_VERBOSE #ifdef YYERROR_VERBOSE #define YY_HTMLParser_ERROR_VERBOSE YYERROR_VERBOSE #endif #endif #ifndef YY_HTMLParser_LSP_NEEDED #ifdef YYLSP_NEEDED #define YY_HTMLParser_LSP_NEEDED YYLSP_NEEDED #endif #endif /* #line 223 "/usr/local/lib/bison.cc" */ #define DOCTYPE 258 #define PCDATA 259 #define SCAN_ERROR 260 #define A 261 #define ADDRESS 262 #define APPLET 263 #define AREA 264 #define B 265 #define BASE 266 #define BASEFONT 267 #define BIG 268 #define BLOCKQUOTE 269 #define BODY 270 #define BR 271 #define CAPTION 272 #define CENTER 273 #define CITE 274 #define CODE 275 #define DD 276 #define DFN 277 #define DIR 278 #define DIV 279 #define DL 280 #define DT 281 #define EM 282 #define FONT 283 #define FORM 284 #define H1 285 #define H2 286 #define H3 287 #define H4 288 #define H5 289 #define H6 290 #define HEAD 291 #define HR 292 #define HTML 293 #define I 294 #define IMG 295 #define INPUT 296 #define ISINDEX 297 #define KBD 298 #define LI 299 #define LINK 300 #define MAP 301 #define MENU 302 #define META 303 #define NOBR 304 #define OL 305 #define OPTION 306 #define P 307 #define PARAM 308 #define PRE 309 #define SAMP 310 #define SCRIPT 311 #define SELECT 312 #define SMALL 313 #define STRIKE 314 #define STRONG 315 #define STYLE 316 #define SUB 317 #define SUP 318 #define TABLE 319 #define TD 320 #define TEXTAREA 321 #define TH 322 #define TITLE 323 #define TR 324 #define TT 325 #define U 326 #define UL 327 #define VAR 328 #define END_A 329 #define END_ADDRESS 330 #define END_APPLET 331 #define END_B 332 #define END_BIG 333 #define END_BLOCKQUOTE 334 #define END_BODY 335 #define END_CAPTION 336 #define END_CENTER 337 #define END_CITE 338 #define END_CODE 339 #define END_DD 340 #define END_DFN 341 #define END_DIR 342 #define END_DIV 343 #define END_DL 344 #define END_DT 345 #define END_EM 346 #define END_FONT 347 #define END_FORM 348 #define END_H1 349 #define END_H2 350 #define END_H3 351 #define END_H4 352 #define END_H5 353 #define END_H6 354 #define END_HEAD 355 #define END_HTML 356 #define END_I 357 #define END_KBD 358 #define END_LI 359 #define END_MAP 360 #define END_MENU 361 #define END_NOBR 362 #define END_OL 363 #define END_OPTION 364 #define END_P 365 #define END_PRE 366 #define END_SAMP 367 #define END_SCRIPT 368 #define END_SELECT 369 #define END_SMALL 370 #define END_STRIKE 371 #define END_STRONG 372 #define END_STYLE 373 #define END_SUB 374 #define END_SUP 375 #define END_TABLE 376 #define END_TD 377 #define END_TEXTAREA 378 #define END_TH 379 #define END_TITLE 380 #define END_TR 381 #define END_TT 382 #define END_U 383 #define END_UL 384 #define END_VAR 385 #line 223 "/usr/local/lib/bison.cc" /* #defines tokens */ #else /* CLASS */ #ifndef YY_HTMLParser_CLASS #define YY_HTMLParser_CLASS HTMLParser #endif #ifndef YY_HTMLParser_INHERIT #define YY_HTMLParser_INHERIT #endif #ifndef YY_HTMLParser_MEMBERS #define YY_HTMLParser_MEMBERS #endif #ifndef YY_HTMLParser_LEX_BODY #define YY_HTMLParser_LEX_BODY #endif #ifndef YY_HTMLParser_ERROR_BODY #define YY_HTMLParser_ERROR_BODY #endif #ifndef YY_HTMLParser_CONSTRUCTOR_PARAM #define YY_HTMLParser_CONSTRUCTOR_PARAM #endif #ifndef YY_HTMLParser_CONSTRUCTOR_CODE #define YY_HTMLParser_CONSTRUCTOR_CODE #endif #ifndef YY_HTMLParser_CONSTRUCTOR_INIT #define YY_HTMLParser_CONSTRUCTOR_INIT #endif class YY_HTMLParser_CLASS YY_HTMLParser_INHERIT { public: /* static const int token ... */ /* #line 254 "/usr/local/lib/bison.cc" */ static const int DOCTYPE; static const int PCDATA; static const int SCAN_ERROR; static const int A; static const int ADDRESS; static const int APPLET; static const int AREA; static const int B; static const int BASE; static const int BASEFONT; static const int BIG; static const int BLOCKQUOTE; static const int BODY; static const int BR; static const int CAPTION; static const int CENTER; static const int CITE; static const int CODE; static const int DD; static const int DFN; static const int DIR; static const int DIV; static const int DL; static const int DT; static const int EM; static const int FONT; static const int FORM; static const int H1; static const int H2; static const int H3; static const int H4; static const int H5; static const int H6; static const int HEAD; static const int HR; static const int HTML; static const int I; static const int IMG; static const int INPUT; static const int ISINDEX; static const int KBD; static const int LI; static const int LINK; static const int MAP; static const int MENU; static const int META; static const int NOBR; static const int OL; static const int OPTION; static const int P; static const int PARAM; static const int PRE; static const int SAMP; static const int SCRIPT; static const int SELECT; static const int SMALL; static const int STRIKE; static const int STRONG; static const int STYLE; static const int SUB; static const int SUP; static const int TABLE; static const int TD; static const int TEXTAREA; static const int TH; static const int TITLE; static const int TR; static const int TT; static const int U; static const int UL; static const int VAR; static const int END_A; static const int END_ADDRESS; static const int END_APPLET; static const int END_B; static const int END_BIG; static const int END_BLOCKQUOTE; static const int END_BODY; static const int END_CAPTION; static const int END_CENTER; static const int END_CITE; static const int END_CODE; static const int END_DD; static const int END_DFN; static const int END_DIR; static const int END_DIV; static const int END_DL; static const int END_DT; static const int END_EM; static const int END_FONT; static const int END_FORM; static const int END_H1; static const int END_H2; static const int END_H3; static const int END_H4; static const int END_H5; static const int END_H6; static const int END_HEAD; static const int END_HTML; static const int END_I; static const int END_KBD; static const int END_LI; static const int END_MAP; static const int END_MENU; static const int END_NOBR; static const int END_OL; static const int END_OPTION; static const int END_P; static const int END_PRE; static const int END_SAMP; static const int END_SCRIPT; static const int END_SELECT; static const int END_SMALL; static const int END_STRIKE; static const int END_STRONG; static const int END_STYLE; static const int END_SUB; static const int END_SUP; static const int END_TABLE; static const int END_TD; static const int END_TEXTAREA; static const int END_TH; static const int END_TITLE; static const int END_TR; static const int END_TT; static const int END_U; static const int END_UL; static const int END_VAR; #line 254 "/usr/local/lib/bison.cc" /* decl const */ public: int YY_HTMLParser_PARSE (YY_HTMLParser_PARSE_PARAM); virtual void YY_HTMLParser_ERROR(char *msg) YY_HTMLParser_ERROR_BODY; #ifdef YY_HTMLParser_PURE #ifdef YY_HTMLParser_LSP_NEEDED virtual int YY_HTMLParser_LEX (YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY; #else virtual int YY_HTMLParser_LEX (YY_HTMLParser_STYPE *YY_HTMLParser_LVAL) YY_HTMLParser_LEX_BODY; #endif #else virtual int YY_HTMLParser_LEX() YY_HTMLParser_LEX_BODY; YY_HTMLParser_STYPE YY_HTMLParser_LVAL; #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE YY_HTMLParser_LLOC; #endif int YY_HTMLParser_NERRS; int YY_HTMLParser_CHAR; #endif #if YY_HTMLParser_DEBUG != 0 int YY_HTMLParser_DEBUG_FLAG; /* nonzero means print parse trace */ #endif public: YY_HTMLParser_CLASS(YY_HTMLParser_CONSTRUCTOR_PARAM); public: YY_HTMLParser_MEMBERS }; /* other declare folow */ /* #line 282 "/usr/local/lib/bison.cc" */ const int YY_HTMLParser_CLASS::DOCTYPE=258; const int YY_HTMLParser_CLASS::PCDATA=259; const int YY_HTMLParser_CLASS::SCAN_ERROR=260; const int YY_HTMLParser_CLASS::A=261; const int YY_HTMLParser_CLASS::ADDRESS=262; const int YY_HTMLParser_CLASS::APPLET=263; const int YY_HTMLParser_CLASS::AREA=264; const int YY_HTMLParser_CLASS::B=265; const int YY_HTMLParser_CLASS::BASE=266; const int YY_HTMLParser_CLASS::BASEFONT=267; const int YY_HTMLParser_CLASS::BIG=268; const int YY_HTMLParser_CLASS::BLOCKQUOTE=269; const int YY_HTMLParser_CLASS::BODY=270; const int YY_HTMLParser_CLASS::BR=271; const int YY_HTMLParser_CLASS::CAPTION=272; const int YY_HTMLParser_CLASS::CENTER=273; const int YY_HTMLParser_CLASS::CITE=274; const int YY_HTMLParser_CLASS::CODE=275; const int YY_HTMLParser_CLASS::DD=276; const int YY_HTMLParser_CLASS::DFN=277; const int YY_HTMLParser_CLASS::DIR=278; const int YY_HTMLParser_CLASS::DIV=279; const int YY_HTMLParser_CLASS::DL=280; const int YY_HTMLParser_CLASS::DT=281; const int YY_HTMLParser_CLASS::EM=282; const int YY_HTMLParser_CLASS::FONT=283; const int YY_HTMLParser_CLASS::FORM=284; const int YY_HTMLParser_CLASS::H1=285; const int YY_HTMLParser_CLASS::H2=286; const int YY_HTMLParser_CLASS::H3=287; const int YY_HTMLParser_CLASS::H4=288; const int YY_HTMLParser_CLASS::H5=289; const int YY_HTMLParser_CLASS::H6=290; const int YY_HTMLParser_CLASS::HEAD=291; const int YY_HTMLParser_CLASS::HR=292; const int YY_HTMLParser_CLASS::HTML=293; const int YY_HTMLParser_CLASS::I=294; const int YY_HTMLParser_CLASS::IMG=295; const int YY_HTMLParser_CLASS::INPUT=296; const int YY_HTMLParser_CLASS::ISINDEX=297; const int YY_HTMLParser_CLASS::KBD=298; const int YY_HTMLParser_CLASS::LI=299; const int YY_HTMLParser_CLASS::LINK=300; const int YY_HTMLParser_CLASS::MAP=301; const int YY_HTMLParser_CLASS::MENU=302; const int YY_HTMLParser_CLASS::META=303; const int YY_HTMLParser_CLASS::NOBR=304; const int YY_HTMLParser_CLASS::OL=305; const int YY_HTMLParser_CLASS::OPTION=306; const int YY_HTMLParser_CLASS::P=307; const int YY_HTMLParser_CLASS::PARAM=308; const int YY_HTMLParser_CLASS::PRE=309; const int YY_HTMLParser_CLASS::SAMP=310; const int YY_HTMLParser_CLASS::SCRIPT=311; const int YY_HTMLParser_CLASS::SELECT=312; const int YY_HTMLParser_CLASS::SMALL=313; const int YY_HTMLParser_CLASS::STRIKE=314; const int YY_HTMLParser_CLASS::STRONG=315; const int YY_HTMLParser_CLASS::STYLE=316; const int YY_HTMLParser_CLASS::SUB=317; const int YY_HTMLParser_CLASS::SUP=318; const int YY_HTMLParser_CLASS::TABLE=319; const int YY_HTMLParser_CLASS::TD=320; const int YY_HTMLParser_CLASS::TEXTAREA=321; const int YY_HTMLParser_CLASS::TH=322; const int YY_HTMLParser_CLASS::TITLE=323; const int YY_HTMLParser_CLASS::TR=324; const int YY_HTMLParser_CLASS::TT=325; const int YY_HTMLParser_CLASS::U=326; const int YY_HTMLParser_CLASS::UL=327; const int YY_HTMLParser_CLASS::VAR=328; const int YY_HTMLParser_CLASS::END_A=329; const int YY_HTMLParser_CLASS::END_ADDRESS=330; const int YY_HTMLParser_CLASS::END_APPLET=331; const int YY_HTMLParser_CLASS::END_B=332; const int YY_HTMLParser_CLASS::END_BIG=333; const int YY_HTMLParser_CLASS::END_BLOCKQUOTE=334; const int YY_HTMLParser_CLASS::END_BODY=335; const int YY_HTMLParser_CLASS::END_CAPTION=336; const int YY_HTMLParser_CLASS::END_CENTER=337; const int YY_HTMLParser_CLASS::END_CITE=338; const int YY_HTMLParser_CLASS::END_CODE=339; const int YY_HTMLParser_CLASS::END_DD=340; const int YY_HTMLParser_CLASS::END_DFN=341; const int YY_HTMLParser_CLASS::END_DIR=342; const int YY_HTMLParser_CLASS::END_DIV=343; const int YY_HTMLParser_CLASS::END_DL=344; const int YY_HTMLParser_CLASS::END_DT=345; const int YY_HTMLParser_CLASS::END_EM=346; const int YY_HTMLParser_CLASS::END_FONT=347; const int YY_HTMLParser_CLASS::END_FORM=348; const int YY_HTMLParser_CLASS::END_H1=349; const int YY_HTMLParser_CLASS::END_H2=350; const int YY_HTMLParser_CLASS::END_H3=351; const int YY_HTMLParser_CLASS::END_H4=352; const int YY_HTMLParser_CLASS::END_H5=353; const int YY_HTMLParser_CLASS::END_H6=354; const int YY_HTMLParser_CLASS::END_HEAD=355; const int YY_HTMLParser_CLASS::END_HTML=356; const int YY_HTMLParser_CLASS::END_I=357; const int YY_HTMLParser_CLASS::END_KBD=358; const int YY_HTMLParser_CLASS::END_LI=359; const int YY_HTMLParser_CLASS::END_MAP=360; const int YY_HTMLParser_CLASS::END_MENU=361; const int YY_HTMLParser_CLASS::END_NOBR=362; const int YY_HTMLParser_CLASS::END_OL=363; const int YY_HTMLParser_CLASS::END_OPTION=364; const int YY_HTMLParser_CLASS::END_P=365; const int YY_HTMLParser_CLASS::END_PRE=366; const int YY_HTMLParser_CLASS::END_SAMP=367; const int YY_HTMLParser_CLASS::END_SCRIPT=368; const int YY_HTMLParser_CLASS::END_SELECT=369; const int YY_HTMLParser_CLASS::END_SMALL=370; const int YY_HTMLParser_CLASS::END_STRIKE=371; const int YY_HTMLParser_CLASS::END_STRONG=372; const int YY_HTMLParser_CLASS::END_STYLE=373; const int YY_HTMLParser_CLASS::END_SUB=374; const int YY_HTMLParser_CLASS::END_SUP=375; const int YY_HTMLParser_CLASS::END_TABLE=376; const int YY_HTMLParser_CLASS::END_TD=377; const int YY_HTMLParser_CLASS::END_TEXTAREA=378; const int YY_HTMLParser_CLASS::END_TH=379; const int YY_HTMLParser_CLASS::END_TITLE=380; const int YY_HTMLParser_CLASS::END_TR=381; const int YY_HTMLParser_CLASS::END_TT=382; const int YY_HTMLParser_CLASS::END_U=383; const int YY_HTMLParser_CLASS::END_UL=384; const int YY_HTMLParser_CLASS::END_VAR=385; #line 282 "/usr/local/lib/bison.cc" /* const YY_HTMLParser_CLASS::token */ /*apres const */ YY_HTMLParser_CLASS::YY_HTMLParser_CLASS(YY_HTMLParser_CONSTRUCTOR_PARAM) YY_HTMLParser_CONSTRUCTOR_INIT { #if YY_HTMLParser_DEBUG != 0 YY_HTMLParser_DEBUG_FLAG=0; #endif YY_HTMLParser_CONSTRUCTOR_CODE; }; #endif /* #line 292 "/usr/local/lib/bison.cc" */ #define YYFINAL 286 #define YYFLAG -32768 #define YYNTBASE 131 #define YYTRANSLATE(x) ((unsigned)(x) <= 385 ? yytranslate[x] : 209) static const short yytranslate[] = { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130 }; #if YY_HTMLParser_DEBUG != 0 static const short yyprhs[] = { 0, 0, 2, 3, 6, 9, 12, 15, 18, 21, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 64, 65, 68, 71, 74, 77, 80, 83, 86, 90, 92, 96, 97, 100, 103, 106, 108, 110, 112, 116, 120, 124, 128, 130, 135, 136, 141, 142, 147, 148, 153, 154, 159, 160, 163, 166, 170, 172, 174, 180, 186, 187, 189, 192, 195, 199, 205, 209, 215, 217, 220, 223, 225, 227, 229, 233, 237, 238, 241, 246, 247, 250, 255, 261, 264, 268, 270, 273, 276, 279, 282, 285, 288, 293, 297, 301, 305, 309, 313, 317, 321, 325, 329, 333, 337, 341, 345, 349, 353, 357, 361, 366, 368, 372, 376, 378, 380, 384, 385, 388, 391, 392, 395, 398, 400, 404, 408, 410, 413, 417, 419, 421, 423, 425, 427, 429, 431, 433, 435, 437, 439, 441, 442, 444, 445, 447, 448, 450, 451, 453, 454, 456, 457, 459, 460, 462, 463, 465, 466, 468, 469, 471, 472, 474, 475, 477, 478, 480, 481, 483, 484, 486, 487, 489, 490, 492, 493, 495, 496, 498, 499, 501, 502, 504, 505, 507, 508, 510, 511, 513, 514, 516, 517, 519, 520, 522, 523, 525, 526, 528, 529, 531, 532, 534, 535, 537, 538, 540, 541, 543, 544, 546, 547, 549, 550, 552, 553, 555, 556, 558, 559 }; #endif static const short yyrhs[] = { 132, 0, 0, 132, 1, 0, 132, 3, 0, 132, 38, 0, 132, 101, 0, 132, 36, 0, 132, 100, 0, 132, 68, 169, 202, 0, 132, 42, 0, 132, 11, 0, 132, 48, 0, 132, 45, 0, 132, 56, 0, 132, 61, 0, 132, 15, 0, 132, 80, 0, 132, 157, 0, 132, 135, 0, 132, 136, 0, 132, 156, 0, 4, 0, 0, 134, 1, 0, 134, 56, 0, 134, 61, 0, 134, 157, 0, 134, 135, 0, 134, 136, 0, 134, 156, 0, 167, 137, 168, 0, 138, 0, 52, 137, 192, 0, 0, 137, 1, 0, 137, 157, 0, 137, 138, 0, 139, 0, 152, 0, 146, 0, 24, 134, 183, 0, 18, 134, 179, 0, 14, 134, 177, 0, 29, 134, 187, 0, 37, 0, 64, 170, 154, 121, 0, 0, 50, 140, 144, 108, 0, 0, 72, 141, 144, 206, 0, 0, 23, 142, 144, 87, 0, 0, 47, 143, 144, 106, 0, 0, 144, 1, 0, 144, 145, 0, 44, 172, 190, 0, 136, 0, 157, 0, 25, 172, 208, 146, 184, 0, 25, 172, 208, 147, 89, 0, 0, 147, 0, 147, 148, 0, 147, 149, 0, 26, 172, 208, 0, 26, 172, 90, 174, 208, 0, 21, 172, 208, 0, 21, 172, 85, 174, 208, 0, 151, 0, 150, 1, 0, 150, 151, 0, 158, 0, 135, 0, 136, 0, 54, 171, 193, 0, 17, 171, 81, 0, 0, 154, 1, 0, 154, 69, 155, 203, 0, 0, 155, 1, 0, 155, 65, 134, 200, 0, 155, 67, 134, 201, 200, 0, 155, 41, 0, 7, 171, 75, 0, 158, 0, 157, 158, 0, 133, 208, 0, 159, 208, 0, 160, 208, 0, 161, 208, 0, 164, 208, 0, 49, 171, 107, 208, 0, 70, 171, 204, 0, 39, 171, 188, 0, 10, 171, 176, 0, 71, 171, 205, 0, 59, 171, 196, 0, 13, 171, 178, 0, 58, 171, 195, 0, 62, 171, 198, 0, 63, 171, 199, 0, 27, 171, 185, 0, 60, 171, 197, 0, 22, 171, 182, 0, 20, 171, 181, 0, 55, 171, 194, 0, 43, 171, 189, 0, 73, 171, 207, 0, 19, 171, 180, 0, 6, 173, 172, 175, 0, 40, 0, 8, 162, 76, 0, 28, 172, 186, 0, 12, 0, 16, 0, 46, 163, 105, 0, 0, 162, 158, 0, 162, 53, 0, 0, 163, 1, 0, 163, 9, 0, 41, 0, 57, 165, 114, 0, 66, 133, 123, 0, 166, 0, 165, 166, 0, 51, 133, 191, 0, 30, 0, 31, 0, 32, 0, 33, 0, 34, 0, 35, 0, 94, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 0, 133, 0, 0, 153, 0, 0, 157, 0, 0, 150, 0, 0, 44, 0, 0, 52, 0, 0, 74, 0, 0, 77, 0, 0, 79, 0, 0, 78, 0, 0, 82, 0, 0, 83, 0, 0, 84, 0, 0, 86, 0, 0, 88, 0, 0, 89, 0, 0, 91, 0, 0, 92, 0, 0, 93, 0, 0, 102, 0, 0, 103, 0, 0, 104, 0, 0, 109, 0, 0, 110, 0, 0, 111, 0, 0, 112, 0, 0, 115, 0, 0, 116, 0, 0, 117, 0, 0, 119, 0, 0, 120, 0, 0, 122, 0, 0, 124, 0, 0, 125, 0, 0, 126, 0, 0, 127, 0, 0, 128, 0, 0, 129, 0, 0, 130, 0, 0, 1, 0 }; #if YY_HTMLParser_DEBUG != 0 static const short yyrline[] = { 0, 273, 304, 309, 312, 315, 319, 322, 326, 329, 333, 336, 339, 342, 345, 353, 361, 365, 368, 373, 376, 379, 384, 392, 396, 399, 407, 415, 420, 423, 426, 431, 442, 446, 454, 458, 461, 466, 471, 475, 478, 481, 487, 493, 499, 505, 510, 519, 520, 527, 527, 534, 534, 541, 541, 550, 554, 557, 563, 570, 575, 582, 591, 600, 604, 607, 611, 617, 623, 631, 637, 645, 650, 653, 658, 662, 665, 670, 678, 686, 690, 693, 701, 705, 708, 714, 721, 727, 737, 742, 747, 749, 750, 751, 752, 753, 761, 763, 764, 765, 766, 767, 768, 769, 770, 773, 775, 776, 777, 778, 779, 780, 781, 784, 795, 800, 808, 814, 819, 824, 832, 836, 840, 848, 852, 855, 861, 867, 873, 881, 886, 891, 901, 903, 904, 905, 906, 907, 910, 912, 913, 914, 915, 916, 921, 921, 922, 922, 923, 923, 924, 924, 926, 926, 927, 927, 929, 929, 930, 930, 931, 931, 932, 932, 933, 933, 934, 934, 935, 935, 936, 936, 937, 937, 938, 938, 939, 939, 940, 940, 941, 941, 942, 942, 943, 943, 944, 944, 945, 945, 946, 946, 947, 947, 948, 948, 949, 949, 950, 950, 951, 951, 952, 952, 953, 953, 954, 954, 955, 955, 956, 956, 957, 957, 958, 958, 959, 959, 960, 960, 961, 961, 963, 963 }; static const char * const yytname[] = { "$","error","$illegal.","DOCTYPE", "PCDATA","SCAN_ERROR","A","ADDRESS","APPLET","AREA","B","BASE","BASEFONT","BIG", "BLOCKQUOTE","BODY","BR","CAPTION","CENTER","CITE","CODE","DD","DFN","DIR","DIV", "DL","DT","EM","FONT","FORM","H1","H2","H3","H4","H5","H6","HEAD","HR","HTML", "I","IMG","INPUT","ISINDEX","KBD","LI","LINK","MAP","MENU","META","NOBR","OL", "OPTION","P","PARAM","PRE","SAMP","SCRIPT","SELECT","SMALL","STRIKE","STRONG", "STYLE","SUB","SUP","TABLE","TD","TEXTAREA","TH","TITLE","TR","TT","U","UL", "VAR","END_A","END_ADDRESS","END_APPLET","END_B","END_BIG","END_BLOCKQUOTE", "END_BODY","END_CAPTION","END_CENTER","END_CITE","END_CODE","END_DD","END_DFN", "END_DIR","END_DIV","END_DL","END_DT","END_EM","END_FONT","END_FORM","END_H1", "END_H2","END_H3","END_H4","END_H5","END_H6","END_HEAD","END_HTML","END_I","END_KBD", "END_LI","END_MAP","END_MENU","END_NOBR","END_OL","END_OPTION","END_P","END_PRE", "END_SAMP","END_SCRIPT","END_SELECT","END_SMALL","END_STRIKE","END_STRONG","END_STYLE", "END_SUB","END_SUP","END_TABLE","END_TD","END_TEXTAREA","END_TH","END_TITLE", "END_TR","END_TT","END_U","END_UL","END_VAR","document","document_","pcdata", "body_content","heading","block","paragraph_content","block_except_p","list", "@1","@2","@3","@4","list_content","list_item","definition_list","definition_list_content", "term_name","term_definition","flow","flow_","preformatted","caption","table_rows", "table_cells","address","texts","text","font","phrase","special","applet_content", "map_content","form","select_content","option","HX","END_HX","opt_pcdata","opt_caption", "opt_texts","opt_flow","opt_LI","opt_P","opt_END_A","opt_END_B","opt_END_BLOCKQUOTE", "opt_END_BIG","opt_END_CENTER","opt_END_CITE","opt_END_CODE","opt_END_DFN","opt_END_DIV", "opt_END_DL","opt_END_EM","opt_END_FONT","opt_END_FORM","opt_END_I","opt_END_KBD", "opt_END_LI","opt_END_OPTION","opt_END_P","opt_END_PRE","opt_END_SAMP","opt_END_SMALL", "opt_END_STRIKE","opt_END_STRONG","opt_END_SUB","opt_END_SUP","opt_END_TD","opt_END_TH", "opt_END_TITLE","opt_END_TR","opt_END_TT","opt_END_U","opt_END_UL","opt_END_VAR", "opt_error","" }; #endif static const short yyr1[] = { 0, 131, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 133, 134, 134, 134, 134, 134, 134, 134, 134, 135, 136, 136, 137, 137, 137, 137, 138, 138, 138, 138, 138, 138, 138, 138, 138, 140, 139, 141, 139, 142, 139, 143, 139, 144, 144, 144, 145, 145, 145, 146, 146, 147, 147, 147, 147, 148, 148, 149, 149, 150, 150, 150, 151, 151, 151, 152, 153, 154, 154, 154, 155, 155, 155, 155, 155, 156, 157, 157, 158, 158, 158, 158, 158, 158, 159, 159, 159, 159, 159, 159, 159, 159, 159, 160, 160, 160, 160, 160, 160, 160, 160, 161, 161, 161, 161, 161, 161, 161, 162, 162, 162, 163, 163, 163, 164, 164, 164, 165, 165, 166, 167, 167, 167, 167, 167, 167, 168, 168, 168, 168, 168, 168, 169, 169, 170, 170, 171, 171, 172, 172, 173, 173, 174, 174, 175, 175, 176, 176, 177, 177, 178, 178, 179, 179, 180, 180, 181, 181, 182, 182, 183, 183, 184, 184, 185, 185, 186, 186, 187, 187, 188, 188, 189, 189, 190, 190, 191, 191, 192, 192, 193, 193, 194, 194, 195, 195, 196, 196, 197, 197, 198, 198, 199, 199, 200, 200, 201, 201, 202, 202, 203, 203, 204, 204, 205, 205, 206, 206, 207, 207, 208, 208 }; static const short yyr2[] = { 0, 1, 0, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 2, 2, 3, 1, 3, 0, 2, 2, 2, 1, 1, 1, 3, 3, 3, 3, 1, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 2, 2, 3, 1, 1, 5, 5, 0, 1, 2, 2, 3, 5, 3, 5, 1, 2, 2, 1, 1, 1, 3, 3, 0, 2, 4, 0, 2, 4, 5, 2, 3, 1, 2, 2, 2, 2, 2, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 1, 3, 3, 1, 1, 3, 0, 2, 2, 0, 2, 2, 1, 3, 3, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }; static const short yydefact[] = { 2, 0, 3, 4, 22, 152, 148, 120, 148, 11, 117, 148, 23, 16, 118, 23, 148, 148, 148, 51, 23, 150, 148, 150, 23, 132, 133, 134, 135, 136, 137, 7, 45, 5, 148, 114, 126, 10, 148, 13, 123, 53, 12, 148, 47, 34, 148, 148, 14, 0, 148, 148, 148, 15, 148, 148, 146, 0, 144, 148, 148, 49, 148, 17, 8, 6, 0, 19, 20, 32, 38, 40, 39, 21, 18, 88, 0, 0, 0, 0, 34, 153, 150, 149, 0, 0, 158, 162, 0, 0, 166, 168, 170, 55, 0, 75, 76, 0, 71, 74, 0, 176, 178, 0, 182, 184, 0, 55, 0, 55, 0, 192, 194, 0, 0, 129, 196, 198, 200, 202, 204, 148, 147, 79, 0, 145, 210, 214, 216, 55, 220, 223, 90, 89, 91, 92, 93, 94, 0, 156, 87, 122, 115, 121, 159, 98, 163, 101, 24, 25, 26, 161, 28, 29, 30, 27, 43, 165, 42, 167, 112, 169, 108, 171, 107, 0, 173, 41, 72, 73, 63, 177, 105, 179, 116, 181, 44, 183, 97, 185, 110, 124, 125, 119, 0, 0, 0, 35, 191, 37, 36, 33, 193, 77, 195, 109, 188, 127, 130, 197, 102, 199, 100, 201, 106, 203, 103, 205, 104, 0, 0, 128, 211, 9, 215, 96, 217, 99, 0, 221, 111, 138, 139, 140, 141, 142, 143, 31, 157, 113, 56, 150, 52, 59, 57, 60, 174, 0, 54, 95, 48, 189, 131, 78, 80, 82, 46, 219, 50, 186, 175, 61, 150, 150, 62, 65, 66, 0, 187, 58, 0, 0, 83, 86, 23, 23, 213, 81, 154, 69, 154, 67, 0, 0, 155, 0, 0, 207, 84, 209, 206, 70, 68, 85, 0, 0, 0 }; static const short yydefgoto[] = { 284, 1, 66, 88, 95, 96, 110, 69, 70, 109, 129, 93, 107, 165, 234, 71, 237, 255, 256, 97, 98, 72, 122, 210, 257, 154, 83, 75, 76, 77, 78, 85, 106, 79, 114, 115, 80, 227, 126, 123, 84, 100, 82, 275, 229, 145, 156, 147, 158, 160, 162, 164, 167, 251, 172, 174, 176, 178, 180, 259, 242, 191, 193, 195, 200, 202, 204, 206, 208, 278, 280, 213, 267, 215, 217, 248, 220, 132 }; static const short yypact[] = {-32768, 1740,-32768,-32768,-32768, -24, 2148,-32768, 2148,-32768,-32768, 2148,-32768,-32768,-32768,-32768, 2148, 2148, 2148,-32768,-32768, 2078, 2148, 2078,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768, 2148,-32768,-32768,-32768, 2148,-32768,-32768, -32768,-32768, 2148,-32768,-32768, 2148, 2148,-32768, -26, 2148, 2148, 2148,-32768, 2148, 2148, 14, 28, 28, 2148, 2148, -32768, 2148,-32768,-32768,-32768, 283,-32768,-32768,-32768,-32768, -32768,-32768,-32768, 2148,-32768, 283, 283, 283, 283,-32768, -32768, 2078, 2148, -42, 2007, -43, -41, 414, 545, -48, -45, -46,-32768, 676,-32768,-32768, 807,-32768,-32768, 21, -50, -40, 938, -58, -52, 8,-32768, -51,-32768, 1069, -53, -49, 28, -32,-32768, -56, -55, -47, -57, -39, 2148,-32768,-32768, -59,-32768, -36, -60, -54,-32768, -44, -32768,-32768,-32768,-32768,-32768,-32768,-32768, 1838, -2,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768, 2148,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768, 1937,-32768,-32768,-32768,-32768, 50, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768, 1660, 283, 1587,-32768,-32768,-32768, 2148, -32768,-32768,-32768,-32768,-32768, -31,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -4, 7, -32768,-32768,-32768,-32768,-32768,-32768,-32768, 1200,-32768,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, 2078,-32768,-32768,-32768, 2148, -6, -5,-32768,-32768,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -19,-32768, -32768, 2078, 2078,-32768,-32768,-32768, 4,-32768,-32768, 29, 17,-32768,-32768,-32768,-32768,-32768,-32768, 40,-32768, 40, -32768, 1454, 1330,-32768, 27, 27,-32768,-32768,-32768, -28, -32768,-32768,-32768, 95, 99,-32768 }; static const short yypgoto[] = {-32768, -32768, -34, -13, 9, 2, 20, -84,-32768,-32768,-32768, -32768,-32768, -80,-32768, -69,-32768,-32768,-32768,-32768, 11, -32768,-32768,-32768,-32768, 103, -1, -17,-32768,-32768,-32768, -32768,-32768,-32768,-32768, -3,-32768,-32768,-32768,-32768, 1527, -22,-32768, -155,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, -163,-32768, -32768,-32768,-32768,-32768,-32768,-32768, -64 }; #define YYLAST 2221 static const short yytable[] = { 74, 102, 89, 68, 99, 262, 99, 94, 244, 181, 67, 103, 134, 135, 136, 137, 252, 182, 131, 113, 81, 253, 131, 124, 125, 113, 189, 184, 131, 186, 131, 121, 4, 140, 144, 159, 170, 146, -222, 161, 163, 171, -222, -222, 177, 263, -222, -222, -222, 218, -222, 179, 173, -222, 189, -222, 185, 133, 192, 199, 139, 201, 205, 194, 211, 99, 133, 214, 143, 264, 203, 265, 228, -212, 216, 21, 245, 243, 241, 196, 99, 207, 197, 250, 254, 258, 219, 155, 155, 212, 153, 153, 274, 155, 277, 285, 153, 152, 152, 286, 138, 236, 155, 152, 73, 153, -222, 270, 169, 190, -222, 198, 152, 183, 268, 276, -222, 283, -222, 0, 0, 239, 0, 0, 0, -212, 0, 0, 246, 0, 266, 0, 0, 0, 0, 0, 0, 190, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 235, 0, 0, 233, 0, 0, 0, 0, 0, 133, 0, 0, 0, 0, 0, 0, 0, 0, 0, 235, 0, 235, 233, 0, 233, 0, 0, 0, 0, 0, 0, 0, 269, 271, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 0, 281, 282, 0, 99, 0, 0, 235, 133, 0, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 260, 261, 0, 0, 0, 99, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 272, 273, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 155, 155, 0, 153, 153, 0, 0, 0, 0, 0, 152, 152, -222, 131, 0, -222, -222, 0, -222, -222, -222, 0, -222, -222, -222, -222, -222, -222, -222, 0, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, 0, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, -222, 0, -222, -222, -222, 0, -222, -222, -222, 0, 0, -222, -222, -222, 0, -222, -222, -222, -222, 0, -222, 0, -222, -222, -222, -222, -222, -160, 148, 0, -160, 4, 0, 5, 6, 7, 0, 8, -160, 10, 11, 12, -160, 14, 0, 15, 16, 17, -160, 18, 19, 20, 21, -160, 22, 23, 24, 25, 26, 27, 28, 29, 30, -160, 32, -160, 34, 35, 36, -160, 38, -160, -160, 40, 41, -160, 43, 44, 0, 45, -160, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -160, 57, -160, -160, -160, 59, 60, 61, 62, -160, -160, -160, -160, -160, 151, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, -160, 0, -160, -160, -160, 0, -160, -160, -160, 0, 0, -160, -160, -160, 0, -160, -160, -160, -160, 0, -160, 0, -160, -160, -160, -160, -160, -164, 148, 0, -164, 4, 0, 5, 6, 7, 0, 8, -164, 10, 11, 12, -164, 14, 0, 15, 16, 17, -164, 18, 19, 20, 21, -164, 22, 23, 24, 25, 26, 27, 28, 29, 30, -164, 32, -164, 34, 35, 36, -164, 38, -164, -164, 40, 41, -164, 43, 44, 0, 45, -164, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -164, 57, -164, -164, -164, 59, 60, 61, 62, -164, -164, -164, -164, -164, -164, -164, -164, 157, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, -164, 0, -164, -164, -164, 0, -164, -164, -164, 0, 0, -164, -164, -164, 0, -164, -164, -164, -164, 0, -164, 0, -164, -164, -164, -164, -164, -172, 148, 0, -172, 4, 0, 5, 6, 7, 0, 8, -172, 10, 11, 12, -172, 14, 0, 15, 16, 17, -172, 18, 19, 20, 21, -172, 22, 23, 24, 25, 26, 27, 28, 29, 30, -172, 32, -172, 34, 35, 36, -172, 38, -172, -172, 40, 41, -172, 43, 44, 0, 45, -172, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -172, 57, -172, -172, -172, 59, 60, 61, 62, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, 166, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, -172, 0, -172, -172, -172, 0, -172, -172, -172, 0, 0, -172, -172, -172, 0, -172, -172, -172, -172, 0, -172, 0, -172, -172, -172, -172, -172, -151, 168, 0, -151, 4, 0, 5, -151, 7, 0, 8, -151, 10, 11, 12, -151, 14, 0, 15, 16, 17, -151, 18, 19, 20, 21, -151, 22, 23, 24, 25, 26, 27, 28, 29, 30, -151, 32, -151, 34, 35, 36, -151, 38, -151, -151, 40, 41, -151, 43, 44, 0, 45, -151, 46, 47, -151, 49, 50, 51, 52, -151, 54, 55, 56, -151, 57, -151, -151, -151, 59, 60, 61, 62, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, -151, 0, -151, -151, -151, 0, -151, -151, -151, 0, 0, -151, -151, -151, 0, -151, -151, -151, -151, 0, -151, 0, -151, -151, -151, -151, -151, -180, 148, 0, -180, 4, 0, 5, 6, 7, 0, 8, -180, 10, 11, 12, -180, 14, 0, 15, 16, 17, -180, 18, 19, 20, 21, -180, 22, 23, 24, 25, 26, 27, 28, 29, 30, -180, 32, -180, 34, 35, 36, -180, 38, -180, -180, 40, 41, -180, 43, 44, 0, 45, -180, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -180, 57, -180, -180, -180, 59, 60, 61, 62, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, 175, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, -180, 0, -180, -180, -180, 0, -180, -180, -180, 0, 0, -180, -180, -180, 0, -180, -180, -180, -180, 0, -180, 0, -180, -180, -180, -180, -180, -190, 187, 0, -190, 4, 0, 5, -190, 7, 0, 8, -190, 10, 11, 12, -190, 14, 0, 15, 16, 17, -190, 18, 19, 20, 21, -190, 22, 23, 24, -190, -190, -190, -190, -190, -190, -190, 32, -190, 34, 35, 36, -190, 38, -190, -190, 40, 41, -190, 43, 44, 0, -190, -190, 46, 47, -190, 49, 50, 51, 52, -190, 54, 55, 56, -190, 57, -190, -190, -190, 59, 60, 61, 62, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, -190, 0, -190, -190, -190, 0, 188, -190, -190, 0, 0, -190, -190, -190, 0, -190, -190, -190, -190, 0, -190, 0, -190, -190, -190, -190, -190, -218, 230, 0, -218, 4, 0, 5, -218, 7, 0, 8, -218, 10, 11, 12, -218, 14, 0, 15, 16, 17, -218, 18, 19, 20, 21, -218, 22, 23, 24, -218, -218, -218, -218, -218, -218, -218, 32, -218, 34, 35, 36, -218, 38, 231, -218, 40, 41, -218, 43, 44, 0, 45, -218, 46, 47, -218, 49, 50, 51, 52, -218, 54, 55, 56, -218, 57, -218, -218, -218, 59, 60, 61, 62, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, -218, 0, -218, -218, -218, 0, -218, -218, -218, 0, 0, -218, -218, -218, 0, -218, -218, -218, -218, 0, -218, 0, -218, -218, -218, 247, -218, 148, 0, 0, 4, 0, 5, 6, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 32, 0, 34, 35, 36, 0, 38, 0, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -208, 57, -208, 0, -208, 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -208, -208, 0, 279, 148, -208, 0, 4, 0, 5, 6, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 32, 0, 34, 35, 36, 0, 38, 0, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 149, 49, 50, 51, 52, 150, 54, 55, 56, -206, 57, -206, 0, -206, 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 87, 0, 0, 0, 0, 90, 91, 92, 0, 0, 0, 101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 104, 0, 0, 0, 105, 0, 0, 0, 0, 108, 0, 0, 111, 112, -206, 277, 116, 117, 118, -206, 119, 120, 0, 0, 0, 127, 128, 230, 130, 0, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 34, 35, 36, 0, 38, 231, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 0, 49, 50, 51, 52, 209, 54, 55, 56, 0, 57, 0, 0, 0, 59, 60, 61, 62, 230, 0, 0, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 0, 0, 0, 0, 0, 240, 0, 32, 0, 34, 35, 36, 0, 38, 231, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 0, 49, 50, 51, 52, 0, 54, 55, 56, 0, 57, 0, 0, 0, 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, -1, 2, 0, 3, 4, 0, 5, 6, 7, 0, 8, 9, 10, 11, 12, 13, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 238, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 0, 39, 40, 41, 42, 43, 44, 0, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 0, 57, 0, 58, 0, 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 187, 64, 65, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 34, 35, 36, 0, 38, 0, 0, 40, 41, 0, 43, 44, 0, 0, 0, 46, 47, 0, 49, 50, 51, 52, 0, 54, 55, 56, 0, 57, 0, 0, 0, 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221, 222, 223, 224, 225, 226, 230, 0, 0, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 0, 0, 0, 0, 0, 0, 0, 32, 0, 34, 35, 36, 0, 38, 231, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 0, 49, 50, 51, 52, 0, 54, 55, 56, 0, 57, 0, 0, 0, 59, 60, 61, 62, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 0, 0, 14, 232, 0, 16, 17, 0, 18, 0, 0, 0, 0, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 35, 36, 0, 38, 0, 0, 40, 0, 0, 43, 0, 0, 0, 141, 0, 47, 0, 49, 50, 51, 52, 0, 54, 55, 0, 0, 57, 0, 0, 0, 59, 60, 0, 62, 0, 4, 142, 5, 0, 7, 0, 8, 0, 10, 11, 12, 0, 14, 0, 15, 16, 17, 0, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 32, 0, 34, 35, 36, 0, 38, 0, 0, 40, 41, 0, 43, 44, 0, 45, 0, 46, 47, 0, 49, 50, 51, 52, 0, 54, 55, 56, 0, 57, 0, 0, 0, 59, 60, 61, 62, 4, 0, 5, 0, 7, 0, 8, 0, 10, 11, 0, 0, 14, 0, 0, 16, 17, 0, 18, 0, 0, 0, 0, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 35, 36, 0, 38, 0, 0, 40, 0, 0, 43, 0, 0, 0, 0, 0, 47, 0, 49, 50, 51, 52, 0, 54, 55, 0, 0, 57, 0, 0, 0, 59, 60, 0, 62 }; static const short yycheck[] = { 1, 23, 15, 1, 21, 1, 23, 20, 1, 1, 1, 24, 76, 77, 78, 79, 21, 9, 1, 51, 44, 26, 1, 57, 58, 51, 110, 107, 1, 109, 1, 17, 4, 75, 77, 83, 100, 78, 21, 84, 86, 91, 21, 26, 102, 41, 25, 26, 21, 129, 21, 103, 92, 26, 138, 26, 107, 74, 111, 115, 82, 116, 119, 112, 123, 82, 83, 127, 85, 65, 117, 67, 74, 69, 128, 25, 69, 81, 109, 113, 97, 120, 114, 89, 89, 104, 130, 88, 89, 125, 88, 89, 52, 94, 122, 0, 94, 88, 89, 0, 80, 170, 103, 94, 1, 103, 89, 90, 97, 110, 89, 114, 103, 105, 85, 270, 89, 280, 89, -1, -1, 185, -1, -1, -1, 121, -1, -1, 121, -1, 126, -1, -1, -1, -1, -1, -1, 138, 155, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 165, -1, -1, 165, -1, -1, -1, -1, -1, 190, -1, -1, -1, -1, -1, -1, -1, -1, -1, 184, -1, 186, 184, -1, 186, -1, -1, -1, -1, -1, -1, -1, 260, 261, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 231, -1, 275, 276, -1, 231, -1, -1, 218, 235, -1, 218, -1, -1, -1, -1, -1, -1, -1, -1, -1, 252, 253, -1, -1, -1, 252, 253, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 264, 265, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 272, 273, -1, 272, 273, -1, -1, -1, -1, -1, 272, 273, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, -1, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, -1, 106, 107, 108, -1, 110, 111, 112, -1, -1, 115, 116, 117, -1, 119, 120, 121, 122, -1, 124, -1, 126, 127, 128, 129, 130, 1, -1, -1, 4, -1, 6, 7, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, 37, -1, 39, 40, 41, -1, 43, -1, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, -1, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 121, 122, -1, 124, 1, 126, -1, 4, -1, 6, 7, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, 37, -1, 39, 40, 41, -1, 43, -1, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, -1, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1, 11, -1, -1, -1, -1, 16, 17, 18, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 34, -1, -1, -1, 38, -1, -1, -1, -1, 43, -1, -1, 46, 47, 121, 122, 50, 51, 52, 126, 54, 55, -1, -1, -1, 59, 60, 1, 62, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1, 37, -1, 39, 40, 41, -1, 43, 44, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, -1, 57, 58, 59, 60, 121, 62, 63, 64, -1, 66, -1, -1, -1, 70, 71, 72, 73, 1, -1, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, -1, -1, -1, -1, -1, 108, -1, 37, -1, 39, 40, 41, -1, 43, 44, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, -1, 57, 58, 59, 60, -1, 62, 63, 64, -1, 66, -1, -1, -1, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, 0, 1, -1, 3, 4, -1, 6, 7, 8, -1, 10, 11, 12, 13, 14, 15, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, 106, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, -1, 45, 46, 47, 48, 49, 50, -1, 52, -1, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, -1, 66, -1, 68, -1, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, 80, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 100, 101, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1, 37, -1, 39, 40, 41, -1, 43, -1, -1, 46, 47, -1, 49, 50, -1, -1, -1, 54, 55, -1, 57, 58, 59, 60, -1, 62, 63, 64, -1, 66, -1, -1, -1, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 94, 95, 96, 97, 98, 99, 1, -1, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, -1, -1, -1, -1, -1, -1, -1, 37, -1, 39, 40, 41, -1, 43, 44, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, -1, 57, 58, 59, 60, -1, 62, 63, 64, -1, 66, -1, -1, -1, 70, 71, 72, 73, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, -1, -1, 16, 87, -1, 19, 20, -1, 22, -1, -1, -1, -1, 27, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, 40, 41, -1, 43, -1, -1, 46, -1, -1, 49, -1, -1, -1, 53, -1, 55, -1, 57, 58, 59, 60, -1, 62, 63, -1, -1, 66, -1, -1, -1, 70, 71, -1, 73, -1, 4, 76, 6, -1, 8, -1, 10, -1, 12, 13, 14, -1, 16, -1, 18, 19, 20, -1, 22, 23, 24, 25, -1, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, 37, -1, 39, 40, 41, -1, 43, -1, -1, 46, 47, -1, 49, 50, -1, 52, -1, 54, 55, -1, 57, 58, 59, 60, -1, 62, 63, 64, -1, 66, -1, -1, -1, 70, 71, 72, 73, 4, -1, 6, -1, 8, -1, 10, -1, 12, 13, -1, -1, 16, -1, -1, 19, 20, -1, 22, -1, -1, -1, -1, 27, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 39, 40, 41, -1, 43, -1, -1, 46, -1, -1, 49, -1, -1, -1, -1, -1, 55, -1, 57, 58, 59, 60, -1, 62, 63, -1, -1, 66, -1, -1, -1, 70, 71, -1, 73 }; #line 292 "/usr/local/lib/bison.cc" /* fattrs + tables */ /* parser code folow */ /* This is the parser code that is written into each bison parser when the %semantic_parser declaration is not specified in the grammar. It was written by Richard Stallman by simplifying the hairy parser used when %semantic_parser is specified. */ /* Note: dollar marks section change the next is replaced by the list of actions, each action as one case of the switch. */ #define yyerrok (yyerrstatus = 0) #define yyclearin (YY_HTMLParser_CHAR = YYEMPTY) #define YYEMPTY -2 #define YYEOF 0 #define YYACCEPT return(0) #define YYABORT return(1) #define YYERROR goto yyerrlab1 /* Like YYERROR except do call yyerror. This remains here temporarily to ease the transition to the new meaning of YYERROR, for GCC. Once GCC version 2 has supplanted version 1, this can go. */ #define YYFAIL goto yyerrlab #define YYRECOVERING() (!!yyerrstatus) #define YYBACKUP(token, value) \ do \ if (YY_HTMLParser_CHAR == YYEMPTY && yylen == 1) \ { YY_HTMLParser_CHAR = (token), YY_HTMLParser_LVAL = (value); \ yychar1 = YYTRANSLATE (YY_HTMLParser_CHAR); \ YYPOPSTACK; \ goto yybackup; \ } \ else \ { YY_HTMLParser_ERROR ("syntax error: cannot back up"); YYERROR; } \ while (0) #define YYTERROR 1 #define YYERRCODE 256 #ifndef YY_HTMLParser_PURE /* UNPURE */ #define YYLEX YY_HTMLParser_LEX() #ifndef YY_USE_CLASS /* If nonreentrant, and not class , generate the variables here */ int YY_HTMLParser_CHAR; /* the lookahead symbol */ YY_HTMLParser_STYPE YY_HTMLParser_LVAL; /* the semantic value of the */ /* lookahead symbol */ int YY_HTMLParser_NERRS; /* number of parse errors so far */ #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE YY_HTMLParser_LLOC; /* location data for the lookahead */ /* symbol */ #endif #endif #else /* PURE */ #ifdef YY_HTMLParser_LSP_NEEDED #define YYLEX YY_HTMLParser_LEX(&YY_HTMLParser_LVAL, &YY_HTMLParser_LLOC) #else #define YYLEX YY_HTMLParser_LEX(&YY_HTMLParser_LVAL) #endif #endif #ifndef YY_USE_CLASS #if YY_HTMLParser_DEBUG != 0 int YY_HTMLParser_DEBUG_FLAG; /* nonzero means print parse trace */ /* Since this is uninitialized, it does not stop multiple parsers from coexisting. */ #endif #endif /* YYINITDEPTH indicates the initial size of the parser's stacks */ #ifndef YYINITDEPTH #define YYINITDEPTH 200 #endif /* YYMAXDEPTH is the maximum size the stacks can grow to (effective only if the built-in stack extension method is used). */ #if YYMAXDEPTH == 0 #undef YYMAXDEPTH #endif #ifndef YYMAXDEPTH #define YYMAXDEPTH 10000 #endif #if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ #define __yy_bcopy(FROM,TO,COUNT) __builtin_memcpy(TO,FROM,COUNT) #else /* not GNU C or C++ */ /* This is the most reliable way to avoid incompatibilities in available built-in functions on various systems. */ #ifdef __cplusplus static void __yy_bcopy (char *from, char *to, int count) #else #ifdef __STDC__ static void __yy_bcopy (char *from, char *to, int count) #else static void __yy_bcopy (from, to, count) char *from; char *to; int count; #endif #endif { register char *f = from; register char *t = to; register int i = count; while (i-- > 0) *t++ = *f++; } #endif int #ifdef YY_USE_CLASS YY_HTMLParser_CLASS:: #endif YY_HTMLParser_PARSE(YY_HTMLParser_PARSE_PARAM) #ifndef __STDC__ #ifndef __cplusplus #ifndef YY_USE_CLASS /* parameter definition without protypes */ YY_HTMLParser_PARSE_PARAM_DEF #endif #endif #endif { register int yystate; register int yyn; register short *yyssp; register YY_HTMLParser_STYPE *yyvsp; int yyerrstatus; /* number of tokens to shift before error messages enabled */ int yychar1; /* lookahead token as an internal (translated) token number */ short yyssa[YYINITDEPTH]; /* the state stack */ YY_HTMLParser_STYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ short *yyss = yyssa; /* refer to the stacks thru separate pointers */ YY_HTMLParser_STYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE yylsa[YYINITDEPTH]; /* the location stack */ YY_HTMLParser_LTYPE *yyls = yylsa; YY_HTMLParser_LTYPE *yylsp; #define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) #else #define YYPOPSTACK (yyvsp--, yyssp--) #endif int yystacksize = YYINITDEPTH; #ifdef YY_HTMLParser_PURE int YY_HTMLParser_CHAR; YY_HTMLParser_STYPE YY_HTMLParser_LVAL; int YY_HTMLParser_NERRS; #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE YY_HTMLParser_LLOC; #endif #endif YY_HTMLParser_STYPE yyval; /* the variable used to return */ /* semantic values from the action */ /* routines */ int yylen; #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Starting parse\n"); #endif yystate = 0; yyerrstatus = 0; YY_HTMLParser_NERRS = 0; YY_HTMLParser_CHAR = YYEMPTY; /* Cause a token to be read. */ /* Initialize stack pointers. Waste one element of value and location stack so that they stay on the same level as the state stack. The wasted elements are never initialized. */ yyssp = yyss - 1; yyvsp = yyvs; #ifdef YY_HTMLParser_LSP_NEEDED yylsp = yyls; #endif /* Push a new state, which is found in yystate . */ /* In all cases, when you get here, the value and location stacks have just been pushed. so pushing a state here evens the stacks. */ yynewstate: *++yyssp = yystate; if (yyssp >= yyss + yystacksize - 1) { /* Give user a chance to reallocate the stack */ /* Use copies of these so that the &'s don't force the real ones into memory. */ YY_HTMLParser_STYPE *yyvs1 = yyvs; short *yyss1 = yyss; #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE *yyls1 = yyls; #endif /* Get the current used size of the three stacks, in elements. */ int size = yyssp - yyss + 1; #ifdef yyoverflow /* Each stack pointer address is followed by the size of the data in use in that stack, in bytes. */ yyoverflow("parser stack overflow", &yyss1, size * sizeof (*yyssp), &yyvs1, size * sizeof (*yyvsp), #ifdef YY_HTMLParser_LSP_NEEDED &yyls1, size * sizeof (*yylsp), #endif &yystacksize); yyss = yyss1; yyvs = yyvs1; #ifdef YY_HTMLParser_LSP_NEEDED yyls = yyls1; #endif #else /* no yyoverflow */ /* Extend the stack our own way. */ if (yystacksize >= YYMAXDEPTH) { YY_HTMLParser_ERROR("parser stack overflow"); return 2; } yystacksize *= 2; if (yystacksize > YYMAXDEPTH) yystacksize = YYMAXDEPTH; yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); __yy_bcopy ((char *)yyss1, (char *)yyss, size * sizeof (*yyssp)); yyvs = (YY_HTMLParser_STYPE *) alloca (yystacksize * sizeof (*yyvsp)); __yy_bcopy ((char *)yyvs1, (char *)yyvs, size * sizeof (*yyvsp)); #ifdef YY_HTMLParser_LSP_NEEDED yyls = (YY_HTMLParser_LTYPE *) alloca (yystacksize * sizeof (*yylsp)); __yy_bcopy ((char *)yyls1, (char *)yyls, size * sizeof (*yylsp)); #endif #endif /* no yyoverflow */ yyssp = yyss + size - 1; yyvsp = yyvs + size - 1; #ifdef YY_HTMLParser_LSP_NEEDED yylsp = yyls + size - 1; #endif #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Stack size increased to %d\n", yystacksize); #endif if (yyssp >= yyss + yystacksize - 1) YYABORT; } #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Entering state %d\n", yystate); #endif goto yybackup; yybackup: /* Do appropriate processing given the current state. */ /* Read a lookahead token if we need one and don't already have one. */ /* yyresume: */ /* First try to decide what to do without reference to lookahead token. */ yyn = yypact[yystate]; if (yyn == YYFLAG) goto yydefault; /* Not known => get a lookahead token if don't already have one. */ /* yychar is either YYEMPTY or YYEOF or a valid token in external form. */ if (YY_HTMLParser_CHAR == YYEMPTY) { #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Reading a token: "); #endif YY_HTMLParser_CHAR = YYLEX; } /* Convert token to internal form (in yychar1) for indexing tables with */ if (YY_HTMLParser_CHAR <= 0) /* This means end of input. */ { yychar1 = 0; YY_HTMLParser_CHAR = YYEOF; /* Don't call YYLEX any more */ #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Now at end of input.\n"); #endif } else { yychar1 = YYTRANSLATE(YY_HTMLParser_CHAR); #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) { fprintf (stderr, "Next token is %d (%s", YY_HTMLParser_CHAR, yytname[yychar1]); /* Give the individual parser a way to print the precise meaning of a token, for further debugging info. */ #ifdef YYPRINT YYPRINT (stderr, YY_HTMLParser_CHAR, YY_HTMLParser_LVAL); #endif fprintf (stderr, ")\n"); } #endif } yyn += yychar1; if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) goto yydefault; yyn = yytable[yyn]; /* yyn is what to do for this token type in this state. Negative => reduce, -yyn is rule number. Positive => shift, yyn is new state. New state is final state => don't bother to shift, just return success. 0, or most negative number => error. */ if (yyn < 0) { if (yyn == YYFLAG) goto yyerrlab; yyn = -yyn; goto yyreduce; } else if (yyn == 0) goto yyerrlab; if (yyn == YYFINAL) YYACCEPT; /* Shift the lookahead token. */ #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Shifting token %d (%s), ", YY_HTMLParser_CHAR, yytname[yychar1]); #endif /* Discard the token being shifted unless it is eof. */ if (YY_HTMLParser_CHAR != YYEOF) YY_HTMLParser_CHAR = YYEMPTY; *++yyvsp = YY_HTMLParser_LVAL; #ifdef YY_HTMLParser_LSP_NEEDED *++yylsp = YY_HTMLParser_LLOC; #endif /* count tokens shifted since error; after three, turn off error status. */ if (yyerrstatus) yyerrstatus--; yystate = yyn; goto yynewstate; /* Do the default action for the current state. */ yydefault: yyn = yydefact[yystate]; if (yyn == 0) goto yyerrlab; /* Do a reduction. yyn is the number of a rule to reduce with. */ yyreduce: yylen = yyr2[yyn]; yyval = yyvsp[1-yylen]; /* implement default value of the action */ #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) { int i; fprintf (stderr, "Reducing via rule %d (line %d), ", yyn, yyrline[yyn]); /* Print the symbols being reduced, and their result. */ for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) fprintf (stderr, "%s ", yytname[yyrhs[i]]); fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]); } #endif /* #line 697 "/usr/local/lib/bison.cc" */ switch (yyn) { case 1: #line 274 "HTMLParser.y" { process(*yyvsp[0].document); delete yyvsp[0].document; ; break;} case 2: #line 305 "HTMLParser.y" { yyval.document = new Document; yyval.document->body.content.reset(new list >); ; break;} case 3: #line 309 "HTMLParser.y" { yyval.document = yyvsp[-1].document; ; break;} case 4: #line 312 "HTMLParser.y" { yyval.document = yyvsp[-1].document; ; break;} case 5: #line 315 "HTMLParser.y" { yyval.document->attributes.reset(yyvsp[0].tag_attributes); yyval.document = yyvsp[-1].document; ; break;} case 6: #line 319 "HTMLParser.y" { yyval.document = yyvsp[-1].document; ; break;} case 7: #line 322 "HTMLParser.y" { delete yyvsp[0].tag_attributes; yyval.document = yyvsp[-1].document; ; break;} case 8: #line 326 "HTMLParser.y" { yyval.document = yyvsp[-1].document; ; break;} case 9: #line 329 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; // Ignore attributes (yyval.document = yyvsp[-3].document)->head.title.reset(yyvsp[-1].pcdata); ; break;} case 10: #line 333 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->head.isindex_attributes.reset(yyvsp[0].tag_attributes); ; break;} case 11: #line 336 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->head.base_attributes.reset(yyvsp[0].tag_attributes); ; break;} case 12: #line 339 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->head.meta_attributes.reset(yyvsp[0].tag_attributes); ; break;} case 13: #line 342 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->head.link_attributes.reset(yyvsp[0].tag_attributes); ; break;} case 14: #line 345 "HTMLParser.y" { auto_ptr<Script> s(new Script); s->attributes.reset(yyvsp[0].tag_attributes); if (!read_cdata("</SCRIPT>", &s->text)) { yyerror("CDATA terminal not found"); } (yyval.document = yyvsp[-1].document)->head.scripts.push_back(s); ; break;} case 15: #line 353 "HTMLParser.y" { auto_ptr<Style> s(new Style); s->attributes.reset(yyvsp[0].tag_attributes); if (!read_cdata("</STYLE>", &s->text)) { yyerror("CDATA terminal not found"); } (yyval.document = yyvsp[-1].document)->head.styles.push_back(s); ; break;} case 16: #line 361 "HTMLParser.y" { delete yyvsp[0].tag_attributes; yyval.document = yyvsp[-1].document; ; break;} case 17: #line 365 "HTMLParser.y" { yyval.document = yyvsp[-1].document; ; break;} case 18: #line 368 "HTMLParser.y" { Paragraph *p = new Paragraph; p->texts.reset(yyvsp[0].element_list); (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(p)); ; break;} case 19: #line 373 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].heading)); ; break;} case 20: #line 376 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 21: #line 379 "HTMLParser.y" { (yyval.document = yyvsp[-1].document)->body.content->push_back(auto_ptr<Element>(yyvsp[0].address)); ; break;} case 22: #line 385 "HTMLParser.y" { yyval.pcdata = new PCData; yyval.pcdata->text = *yyvsp[0].strinG; delete yyvsp[0].strinG; ; break;} case 23: #line 393 "HTMLParser.y" { yyval.element_list = new list<auto_ptr<Element> >; ; break;} case 24: #line 396 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list; ; break;} case 25: #line 399 "HTMLParser.y" { auto_ptr<Script> s(new Script); s->attributes.reset(yyvsp[0].tag_attributes); if (!read_cdata("</SCRIPT>", &s->text)) { yyerror("CDATA terminal not found"); } // ($$ = $1)->head.scripts.push_back(s); ; break;} case 26: #line 407 "HTMLParser.y" { auto_ptr<Style> s(new Style); s->attributes.reset(yyvsp[0].tag_attributes); if (!read_cdata("</STYLE>", &s->text)) { yyerror("CDATA terminal not found"); } // ($$ = $1)->head.styles.push_back(s); ; break;} case 27: #line 415 "HTMLParser.y" { Paragraph *p = new Paragraph; p->texts = auto_ptr<list<auto_ptr<Element> > >(yyvsp[0].element_list); (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(p)); ; break;} case 28: #line 420 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].heading)); ; break;} case 29: #line 423 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 30: #line 426 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].address)); ; break;} case 31: #line 432 "HTMLParser.y" { /* EXTENSION: Allow paragraph content in heading, not only texts */ if (yyvsp[-2].heading->level != yyvsp[0].inT) { yyerror ("Levels of opening and closing headings don't match"); } yyval.heading = yyvsp[-2].heading; yyval.heading->content.reset(yyvsp[-1].element_list); ; break;} case 32: #line 443 "HTMLParser.y" { yyval.element = yyvsp[0].element; ; break;} case 33: #line 446 "HTMLParser.y" { Paragraph *p = new Paragraph; p->attributes.reset(yyvsp[-2].tag_attributes); p->texts.reset(yyvsp[-1].element_list); yyval.element = p; ; break;} case 34: #line 455 "HTMLParser.y" { yyval.element_list = new list<auto_ptr<Element> >; ; break;} case 35: #line 458 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list; ; break;} case 36: #line 461 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list; yyval.element_list->splice(yyval.element_list->end(), *yyvsp[0].element_list); delete yyvsp[0].element_list; ; break;} case 37: #line 466 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 38: #line 472 "HTMLParser.y" { yyval.element = yyvsp[0].element; ; break;} case 39: #line 475 "HTMLParser.y" { yyval.element = yyvsp[0].preformatted; ; break;} case 40: #line 478 "HTMLParser.y" { yyval.element = yyvsp[0].definition_list; ; break;} case 41: #line 481 "HTMLParser.y" { Division *p = new Division; p->attributes.reset(yyvsp[-2].tag_attributes); p->body_content.reset(yyvsp[-1].element_list); yyval.element = p; ; break;} case 42: #line 487 "HTMLParser.y" { Center *p = new Center; delete yyvsp[-2].tag_attributes; // CENTER has no attributes. p->body_content.reset(yyvsp[-1].element_list); yyval.element = p; ; break;} case 43: #line 493 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; // BLOCKQUOTE has no attributes! BlockQuote *bq = new BlockQuote; bq->content.reset(yyvsp[-1].element_list); yyval.element = bq; ; break;} case 44: #line 499 "HTMLParser.y" { Form *f = new Form; f->attributes.reset(yyvsp[-2].tag_attributes); f->content.reset(yyvsp[-1].element_list); yyval.element = f; ; break;} case 45: #line 505 "HTMLParser.y" { HorizontalRule *h = new HorizontalRule; h->attributes.reset(yyvsp[0].tag_attributes); yyval.element = h; ; break;} case 46: #line 510 "HTMLParser.y" { Table *t = new Table; t->attributes.reset(yyvsp[-3].tag_attributes); t->caption.reset(yyvsp[-2].caption); t->rows.reset(yyvsp[-1].table_rows); yyval.element = t; ; break;} case 47: #line 520 "HTMLParser.y" { ++list_nesting; ; break;} case 48: #line 520 "HTMLParser.y" { OrderedList *ol = new OrderedList; ol->attributes.reset(yyvsp[-3].tag_attributes); ol->items.reset(yyvsp[-1].list_items); ol->nesting = --list_nesting; yyval.element = ol; ; break;} case 49: #line 527 "HTMLParser.y" { ++list_nesting; ; break;} case 50: #line 527 "HTMLParser.y" { UnorderedList *ul = new UnorderedList; ul->attributes.reset(yyvsp[-3].tag_attributes); ul->items.reset(yyvsp[-1].list_items); ul->nesting = --list_nesting; yyval.element = ul; ; break;} case 51: #line 534 "HTMLParser.y" { ++list_nesting; ; break;} case 52: #line 534 "HTMLParser.y" { Dir *d = new Dir; d->attributes.reset(yyvsp[-3].tag_attributes); d->items.reset(yyvsp[-1].list_items); d->nesting = --list_nesting; yyval.element = d; ; break;} case 53: #line 541 "HTMLParser.y" { ++list_nesting; ; break;} case 54: #line 541 "HTMLParser.y" { Menu *m = new Menu; m->attributes.reset(yyvsp[-3].tag_attributes); m->items.reset(yyvsp[-1].list_items); m->nesting = --list_nesting; yyval.element = m; ; break;} case 55: #line 551 "HTMLParser.y" { yyval.list_items = 0; ; break;} case 56: #line 554 "HTMLParser.y" { yyval.list_items = yyvsp[-1].list_items; ; break;} case 57: #line 557 "HTMLParser.y" { yyval.list_items = yyvsp[-1].list_items ? yyvsp[-1].list_items : new list<auto_ptr<ListItem> >; yyval.list_items->push_back(auto_ptr<ListItem>(yyvsp[0].list_item)); ; break;} case 58: #line 564 "HTMLParser.y" { ListNormalItem *lni = new ListNormalItem; lni->attributes.reset(yyvsp[-2].tag_attributes); lni->flow.reset(yyvsp[-1].element_list); yyval.list_item = lni; ; break;} case 59: #line 570 "HTMLParser.y" { /* EXTENSION: Handle a "block" in a list as an indented block. */ ListBlockItem *lbi = new ListBlockItem; lbi->block.reset(yyvsp[0].element); yyval.list_item = lbi; ; break;} case 60: #line 575 "HTMLParser.y" { /* EXTENSION: Treat "texts" in a list as an "<LI>". */ ListNormalItem *lni = new ListNormalItem; lni->flow.reset(yyvsp[0].element_list); yyval.list_item = lni; ; break;} case 61: #line 585 "HTMLParser.y" { delete yyvsp[-4].tag_attributes; delete yyvsp[-3].element_list; /* Kludge */ yyval.definition_list = yyvsp[-1].definition_list; ; break;} case 62: #line 591 "HTMLParser.y" { DefinitionList *dl = new DefinitionList; dl->attributes.reset(yyvsp[-4].tag_attributes); dl->preamble.reset(yyvsp[-3].element_list); dl->items.reset(yyvsp[-1].definition_list_item_list); yyval.definition_list = dl; ; break;} case 63: #line 601 "HTMLParser.y" { yyval.definition_list_item_list = 0; ; break;} case 64: #line 604 "HTMLParser.y" { yyval.definition_list_item_list = yyvsp[0].definition_list_item_list; ; break;} case 65: #line 607 "HTMLParser.y" { yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >; yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_name)); ; break;} case 66: #line 611 "HTMLParser.y" { yyval.definition_list_item_list = yyvsp[-1].definition_list_item_list ? yyvsp[-1].definition_list_item_list : new list<auto_ptr<DefinitionListItem> >; yyval.definition_list_item_list->push_back(auto_ptr<DefinitionListItem>(yyvsp[0].term_definition)); ; break;} case 67: #line 618 "HTMLParser.y" { /* EXTENSION: Allow "flow" instead of "texts" */ delete yyvsp[-2].tag_attributes; yyval.term_name = new TermName; yyval.term_name->flow.reset(yyvsp[-1].element_list); ; break;} case 68: #line 623 "HTMLParser.y" {/* EXTENSION: Ignore <P> after </DT> */ delete yyvsp[-4].tag_attributes; delete yyvsp[-1].tag_attributes; yyval.term_name = new TermName; yyval.term_name->flow.reset(yyvsp[-3].element_list); ; break;} case 69: #line 632 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.term_definition = new TermDefinition; yyval.term_definition->flow.reset(yyvsp[-1].element_list); ; break;} case 70: #line 637 "HTMLParser.y" {/* EXTENSION: Ignore <P> after </DD> */ delete yyvsp[-4].tag_attributes; delete yyvsp[-1].tag_attributes; yyval.term_definition = new TermDefinition; yyval.term_definition->flow.reset(yyvsp[-3].element_list); ; break;} case 71: #line 646 "HTMLParser.y" { yyval.element_list = new list<auto_ptr<Element> >; yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 72: #line 650 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list; ; break;} case 73: #line 653 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 74: #line 659 "HTMLParser.y" { yyval.element = yyvsp[0].element; ; break;} case 75: #line 662 "HTMLParser.y" { /* EXTENSION: Allow headings in "flow", i.e. in lists */ yyval.element = yyvsp[0].heading; ; break;} case 76: #line 665 "HTMLParser.y" { yyval.element = yyvsp[0].element; ; break;} case 77: #line 671 "HTMLParser.y" { yyval.preformatted = new Preformatted; yyval.preformatted->attributes.reset(yyvsp[-2].tag_attributes); yyval.preformatted->texts.reset(yyvsp[-1].element_list); ; break;} case 78: #line 679 "HTMLParser.y" { yyval.caption = new Caption; yyval.caption->attributes.reset(yyvsp[-2].tag_attributes); yyval.caption->texts.reset(yyvsp[-1].element_list); ; break;} case 79: #line 687 "HTMLParser.y" { yyval.table_rows = new list<auto_ptr<TableRow> >; ; break;} case 80: #line 690 "HTMLParser.y" { yyval.table_rows = yyvsp[-1].table_rows; ; break;} case 81: #line 693 "HTMLParser.y" { TableRow *tr = new TableRow; tr->attributes.reset(yyvsp[-2].tag_attributes); tr->cells.reset(yyvsp[-1].table_cells); (yyval.table_rows = yyvsp[-3].table_rows)->push_back(auto_ptr<TableRow>(tr)); ; break;} case 82: #line 702 "HTMLParser.y" { yyval.table_cells = new list<auto_ptr<TableCell> >; ; break;} case 83: #line 705 "HTMLParser.y" { yyval.table_cells = yyvsp[-1].table_cells; ; break;} case 84: #line 708 "HTMLParser.y" { TableCell *tc = new TableCell; tc->attributes.reset(yyvsp[-2].tag_attributes); tc->content.reset(yyvsp[-1].element_list); (yyval.table_cells = yyvsp[-3].table_cells)->push_back(auto_ptr<TableCell>(tc)); ; break;} case 85: #line 714 "HTMLParser.y" { /* EXTENSION: Allow "</TD>" in place of "</TH>". */ TableHeadingCell *thc = new TableHeadingCell; thc->attributes.reset(yyvsp[-3].tag_attributes); thc->content.reset(yyvsp[-2].element_list); (yyval.table_cells = yyvsp[-4].table_cells)->push_back(auto_ptr<TableCell>(thc)); ; break;} case 86: #line 721 "HTMLParser.y" { /* EXTENSION: Ignore <INPUT> between table cells. */ delete yyvsp[0].tag_attributes; yyval.table_cells = yyvsp[-1].table_cells; ; break;} case 87: #line 728 "HTMLParser.y" { /* Should be "address_content"... */ delete yyvsp[-2].tag_attributes; yyval.address = new Address; yyval.address->content.reset(yyvsp[-1].element_list); ; break;} case 88: #line 738 "HTMLParser.y" { yyval.element_list = new list<auto_ptr<Element> >; yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 89: #line 742 "HTMLParser.y" { (yyval.element_list = yyvsp[-1].element_list)->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 90: #line 748 "HTMLParser.y" { yyval.element = yyvsp[-1].pcdata; ; break;} case 91: #line 749 "HTMLParser.y" { yyval.element = yyvsp[-1].element; ; break;} case 92: #line 750 "HTMLParser.y" { yyval.element = yyvsp[-1].element; ; break;} case 93: #line 751 "HTMLParser.y" { yyval.element = yyvsp[-1].element; ; break;} case 94: #line 752 "HTMLParser.y" { yyval.element = yyvsp[-1].element; ; break;} case 95: #line 753 "HTMLParser.y" { /* EXTENSION: NS 1.1 / IE 2.0 */ NoBreak *nb = new NoBreak; delete yyvsp[-3].tag_attributes; nb->content.reset(yyvsp[-2].element_list); yyval.element = nb; ; break;} case 96: #line 762 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(TT, yyvsp[-1].element_list); ; break;} case 97: #line 763 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(I, yyvsp[-1].element_list); ; break;} case 98: #line 764 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(B, yyvsp[-1].element_list); ; break;} case 99: #line 765 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(U, yyvsp[-1].element_list); ; break;} case 100: #line 766 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(STRIKE, yyvsp[-1].element_list); ; break;} case 101: #line 767 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(BIG, yyvsp[-1].element_list); ; break;} case 102: #line 768 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SMALL, yyvsp[-1].element_list); ; break;} case 103: #line 769 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUB, yyvsp[-1].element_list); ; break;} case 104: #line 770 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Font(SUP, yyvsp[-1].element_list); ; break;} case 105: #line 774 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(EM, yyvsp[-1].element_list); ; break;} case 106: #line 775 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(STRONG, yyvsp[-1].element_list); ; break;} case 107: #line 776 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(DFN, yyvsp[-1].element_list); ; break;} case 108: #line 777 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CODE, yyvsp[-1].element_list); ; break;} case 109: #line 778 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(SAMP, yyvsp[-1].element_list); ; break;} case 110: #line 779 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(KBD, yyvsp[-1].element_list); ; break;} case 111: #line 780 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(VAR, yyvsp[-1].element_list); ; break;} case 112: #line 781 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; yyval.element = new Phrase(CITE, yyvsp[-1].element_list); ; break;} case 113: #line 788 "HTMLParser.y" { delete yyvsp[-2].tag_attributes; Anchor *a = new Anchor; a->attributes.reset(yyvsp[-3].tag_attributes); a->texts.reset(yyvsp[-1].element_list); yyval.element = a; ; break;} case 114: #line 795 "HTMLParser.y" { Image *i = new Image; i->attributes.reset(yyvsp[0].tag_attributes); yyval.element = i; ; break;} case 115: #line 800 "HTMLParser.y" { Applet *a = new Applet; a->attributes.reset(yyvsp[-2].tag_attributes); a->content.reset(yyvsp[-1].element_list); yyval.element = a; ; break;} case 116: #line 808 "HTMLParser.y" { Font2 *f2 = new Font2; f2->attributes.reset(yyvsp[-2].tag_attributes); f2->elements.reset(yyvsp[-1].element_list); yyval.element = f2; ; break;} case 117: #line 814 "HTMLParser.y" { BaseFont *bf = new BaseFont; bf->attributes.reset(yyvsp[0].tag_attributes); yyval.element = bf; ; break;} case 118: #line 819 "HTMLParser.y" { LineBreak *lb = new LineBreak; lb->attributes.reset(yyvsp[0].tag_attributes); yyval.element = lb; ; break;} case 119: #line 824 "HTMLParser.y" { Map *m = new Map; m->attributes.reset(yyvsp[-2].tag_attributes); m->areas.reset(yyvsp[-1].tag_attributes_list); yyval.element = m; ; break;} case 120: #line 833 "HTMLParser.y" { yyval.element_list = 0; ; break;} case 121: #line 836 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >; yyval.element_list->push_back(auto_ptr<Element>(yyvsp[0].element)); ; break;} case 122: #line 840 "HTMLParser.y" { yyval.element_list = yyvsp[-1].element_list ? yyvsp[-1].element_list : new list<auto_ptr<Element> >; Param *p = new Param; p->attributes.reset(yyvsp[0].tag_attributes); yyval.element_list->push_back(auto_ptr<Element>(p)); ; break;} case 123: #line 849 "HTMLParser.y" { yyval.tag_attributes_list = 0; ; break;} case 124: #line 852 "HTMLParser.y" { yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list; ; break;} case 125: #line 855 "HTMLParser.y" { yyval.tag_attributes_list = yyvsp[-1].tag_attributes_list ? yyvsp[-1].tag_attributes_list : new list<auto_ptr<list<TagAttribute> > >; yyval.tag_attributes_list->push_back(auto_ptr<list<TagAttribute> >(yyvsp[0].tag_attributes)); ; break;} case 126: #line 862 "HTMLParser.y" { Input *i = new Input; i->attributes.reset(yyvsp[0].tag_attributes); yyval.element = i; ; break;} case 127: #line 867 "HTMLParser.y" { Select *s = new Select; s->attributes.reset(yyvsp[-2].tag_attributes); s->content.reset(yyvsp[-1].option_list); yyval.element = s; ; break;} case 128: #line 873 "HTMLParser.y" { TextArea *ta = new TextArea; ta->attributes.reset(yyvsp[-2].tag_attributes); ta->pcdata.reset(yyvsp[-1].pcdata); yyval.element = ta; ; break;} case 129: #line 882 "HTMLParser.y" { yyval.option_list = new list<auto_ptr<Option> >; yyval.option_list->push_back(auto_ptr<Option>(yyvsp[0].option)); ; break;} case 130: #line 886 "HTMLParser.y" { (yyval.option_list = yyvsp[-1].option_list)->push_back(auto_ptr<Option>(yyvsp[0].option)); ; break;} case 131: #line 892 "HTMLParser.y" { yyval.option = new Option; yyval.option->attributes.reset(yyvsp[-2].tag_attributes); yyval.option->pcdata.reset(yyvsp[-1].pcdata); ; break;} case 132: #line 902 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 1; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 133: #line 903 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 2; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 134: #line 904 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 3; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 135: #line 905 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 4; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 136: #line 906 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 5; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 137: #line 907 "HTMLParser.y" { yyval.heading = new Heading; yyval.heading->level = 6; yyval.heading->attributes.reset(yyvsp[0].tag_attributes); ; break;} case 138: #line 911 "HTMLParser.y" { yyval.inT = 1; ; break;} case 139: #line 912 "HTMLParser.y" { yyval.inT = 2; ; break;} case 140: #line 913 "HTMLParser.y" { yyval.inT = 3; ; break;} case 141: #line 914 "HTMLParser.y" { yyval.inT = 4; ; break;} case 142: #line 915 "HTMLParser.y" { yyval.inT = 5; ; break;} case 143: #line 916 "HTMLParser.y" { yyval.inT = 6; ; break;} case 144: #line 921 "HTMLParser.y" { yyval.pcdata = 0; ; break;} case 145: #line 921 "HTMLParser.y" { yyval.pcdata = yyvsp[0].pcdata; ; break;} case 146: #line 922 "HTMLParser.y" { yyval.caption = 0; ; break;} case 147: #line 922 "HTMLParser.y" { yyval.caption = yyvsp[0].caption; ; break;} case 148: #line 923 "HTMLParser.y" { yyval.element_list = 0; ; break;} case 149: #line 923 "HTMLParser.y" { yyval.element_list = yyvsp[0].element_list; ; break;} case 150: #line 924 "HTMLParser.y" { yyval.element_list = 0; ; break;} case 151: #line 924 "HTMLParser.y" { yyval.element_list = yyvsp[0].element_list; ; break;} case 152: #line 926 "HTMLParser.y" { yyval.tag_attributes = 0; ; break;} case 153: #line 926 "HTMLParser.y" { yyval.tag_attributes = yyvsp[0].tag_attributes; ; break;} case 154: #line 927 "HTMLParser.y" { yyval.tag_attributes = 0; ; break;} case 155: #line 927 "HTMLParser.y" { yyval.tag_attributes = yyvsp[0].tag_attributes; ; break;} } #line 697 "/usr/local/lib/bison.cc" /* the action file gets copied in in place of this dollarsign */ yyvsp -= yylen; yyssp -= yylen; #ifdef YY_HTMLParser_LSP_NEEDED yylsp -= yylen; #endif #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) { short *ssp1 = yyss - 1; fprintf (stderr, "state stack now"); while (ssp1 != yyssp) fprintf (stderr, " %d", *++ssp1); fprintf (stderr, "\n"); } #endif *++yyvsp = yyval; #ifdef YY_HTMLParser_LSP_NEEDED yylsp++; if (yylen == 0) { yylsp->first_line = YY_HTMLParser_LLOC.first_line; yylsp->first_column = YY_HTMLParser_LLOC.first_column; yylsp->last_line = (yylsp-1)->last_line; yylsp->last_column = (yylsp-1)->last_column; yylsp->text = 0; } else { yylsp->last_line = (yylsp+yylen-1)->last_line; yylsp->last_column = (yylsp+yylen-1)->last_column; } #endif /* Now "shift" the result of the reduction. Determine what state that goes to, based on the state we popped back to and the rule number reduced by. */ yyn = yyr1[yyn]; yystate = yypgoto[yyn - YYNTBASE] + *yyssp; if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) yystate = yytable[yystate]; else yystate = yydefgoto[yyn - YYNTBASE]; goto yynewstate; yyerrlab: /* here on detecting error */ if (! yyerrstatus) /* If not already recovering from an error, report this error. */ { ++YY_HTMLParser_NERRS; #ifdef YY_HTMLParser_ERROR_VERBOSE yyn = yypact[yystate]; if (yyn > YYFLAG && yyn < YYLAST) { int size = 0; char *msg; int x, count; count = 0; for (x = 0; x < (sizeof(yytname) / sizeof(char *)); x++) if (yycheck[x + yyn] == x) size += strlen(yytname[x]) + 15, count++; msg = (char *) malloc(size + 15); if (msg != 0) { strcpy(msg, "parse error"); if (count < 5) { count = 0; for (x = 0; x < (sizeof(yytname) / sizeof(char *)); x++) if (yycheck[x + yyn] == x) { strcat(msg, count == 0 ? ", expecting `" : " or `"); strcat(msg, yytname[x]); strcat(msg, "'"); count++; } } YY_HTMLParser_ERROR(msg); free(msg); } else YY_HTMLParser_ERROR ("parse error; also virtual memory exceeded"); } else #endif /* YY_HTMLParser_ERROR_VERBOSE */ YY_HTMLParser_ERROR("parse error"); } goto yyerrlab1; yyerrlab1: /* here on error raised explicitly by an action */ if (yyerrstatus == 3) { /* if just tried and failed to reuse lookahead token after an error, discard it. */ /* return failure if at end of input */ if (YY_HTMLParser_CHAR == YYEOF) YYABORT; #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Discarding token %d (%s).\n", YY_HTMLParser_CHAR, yytname[yychar1]); #endif YY_HTMLParser_CHAR = YYEMPTY; } /* Else will try to reuse lookahead token after shifting the error token. */ yyerrstatus = 3; /* Each real token shifted decrements this */ goto yyerrhandle; yyerrdefault: /* current state does not do anything special for the error token. */ #if 0 /* This is wrong; only states that explicitly want error tokens should shift them. */ yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ if (yyn) goto yydefault; #endif yyerrpop: /* pop the current state because it cannot handle the error token */ if (yyssp == yyss) YYABORT; yyvsp--; yystate = *--yyssp; #ifdef YY_HTMLParser_LSP_NEEDED yylsp--; #endif #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) { short *ssp1 = yyss - 1; fprintf (stderr, "Error: state stack now"); while (ssp1 != yyssp) fprintf (stderr, " %d", *++ssp1); fprintf (stderr, "\n"); } #endif yyerrhandle: yyn = yypact[yystate]; if (yyn == YYFLAG) goto yyerrdefault; yyn += YYTERROR; if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) goto yyerrdefault; yyn = yytable[yyn]; if (yyn < 0) { if (yyn == YYFLAG) goto yyerrpop; yyn = -yyn; goto yyreduce; } else if (yyn == 0) goto yyerrpop; if (yyn == YYFINAL) YYACCEPT; #if YY_HTMLParser_DEBUG != 0 if (YY_HTMLParser_DEBUG_FLAG) fprintf(stderr, "Shifting error token, "); #endif *++yyvsp = YY_HTMLParser_LVAL; #ifdef YY_HTMLParser_LSP_NEEDED *++yylsp = YY_HTMLParser_LLOC; #endif yystate = yyn; goto yynewstate; } /* END */ /* #line 891 "/usr/local/lib/bison.cc" */ #line 965 "HTMLParser.y" /* } */ /* * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual * methods of a class are inline or pure virtual, so we define the destructor, * which is the only virtual method, non-inline, although it is empty. */ HTMLParser::~HTMLParser() { } /* ------------------------------------------------------------------------- */ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/HTMLParser.h�����������������������������������������������������������������������0100644�0000000�0000000�00000027765�07525767057�014447� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#ifndef YY_HTMLParser_h_included #define YY_HTMLParser_h_included #line 1 "/usr/local/lib/bison.h" /* before anything */ #ifdef c_plusplus #ifndef __cplusplus #define __cplusplus #endif #endif #ifdef __cplusplus #ifndef YY_USE_CLASS #define YY_USE_CLASS #endif #else #endif #include <stdio.h> /* #line 14 "/usr/local/lib/bison.h" */ #define YY_HTMLParser_PURE #define YY_HTMLParser_DEBUG 1 #define YY_HTMLParser_LEX_BODY = 0 #define YY_HTMLParser_ERROR_BODY = 0 #define YY_HTMLParser_MEMBERS \ virtual ~HTMLParser(); \ virtual void process(const Document &) = 0;\ virtual bool read_cdata(const char *terminal, string *) = 0;\ int list_nesting; #define YY_HTMLParser_CONSTRUCTOR_INIT : list_nesting(0) #line 69 "HTMLParser.y" typedef union { Document *document; Element *element; list<auto_ptr<Element> > *element_list; PCData *pcdata; string *strinG; list<TagAttribute> *tag_attributes; int inT; list<auto_ptr<TableRow> > *table_rows; list<auto_ptr<TableCell> > *table_cells; ListItem *list_item; list<auto_ptr<ListItem> > *list_items; Caption *caption; Heading *heading; list<auto_ptr<Option> > *option_list; Option *option; DefinitionList *definition_list; list<auto_ptr<DefinitionListItem> > *definition_list_item_list; TermName *term_name; TermDefinition *term_definition; Preformatted *preformatted; Address *address; list<auto_ptr<list<TagAttribute> > > *tag_attributes_list; } yy_HTMLParser_stype; #define YY_HTMLParser_STYPE yy_HTMLParser_stype #line 14 "/usr/local/lib/bison.h" /* %{ and %header{ and %union, during decl */ #ifndef YY_HTMLParser_COMPATIBILITY #ifndef YY_USE_CLASS #define YY_HTMLParser_COMPATIBILITY 1 #else #define YY_HTMLParser_COMPATIBILITY 0 #endif #endif #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifdef YYLTYPE #ifndef YY_HTMLParser_LTYPE #define YY_HTMLParser_LTYPE YYLTYPE /* WARNING obsolete !!! user defined YYLTYPE not reported into generated header */ /* use %define LTYPE */ #endif #endif #ifdef YYSTYPE #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE YYSTYPE /* WARNING obsolete !!! user defined YYSTYPE not reported into generated header */ /* use %define STYPE */ #endif #endif #ifdef YYDEBUG #ifndef YY_HTMLParser_DEBUG #define YY_HTMLParser_DEBUG YYDEBUG /* WARNING obsolete !!! user defined YYDEBUG not reported into generated header */ /* use %define DEBUG */ #endif #endif #ifdef YY_HTMLParser_STYPE #ifndef yystype #define yystype YY_HTMLParser_STYPE #endif #endif #endif #ifndef YY_HTMLParser_PURE /* #line 54 "/usr/local/lib/bison.h" */ #line 54 "/usr/local/lib/bison.h" /* YY_HTMLParser_PURE */ #endif /* #line 56 "/usr/local/lib/bison.h" */ #line 56 "/usr/local/lib/bison.h" /* prefix */ #ifndef YY_HTMLParser_DEBUG /* #line 58 "/usr/local/lib/bison.h" */ #line 58 "/usr/local/lib/bison.h" /* YY_HTMLParser_DEBUG */ #endif #ifndef YY_HTMLParser_LSP_NEEDED /* #line 61 "/usr/local/lib/bison.h" */ #line 61 "/usr/local/lib/bison.h" /* YY_HTMLParser_LSP_NEEDED*/ #endif /* DEFAULT LTYPE*/ #ifdef YY_HTMLParser_LSP_NEEDED #ifndef YY_HTMLParser_LTYPE typedef struct yyltype { int timestamp; int first_line; int first_column; int last_line; int last_column; char *text; } yyltype; #define YY_HTMLParser_LTYPE yyltype #endif #endif /* DEFAULT STYPE*/ #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE int #endif /* DEFAULT MISCELANEOUS */ #ifndef YY_HTMLParser_PARSE #define YY_HTMLParser_PARSE yyparse #endif #ifndef YY_HTMLParser_LEX #define YY_HTMLParser_LEX yylex #endif #ifndef YY_HTMLParser_LVAL #define YY_HTMLParser_LVAL yylval #endif #ifndef YY_HTMLParser_LLOC #define YY_HTMLParser_LLOC yylloc #endif #ifndef YY_HTMLParser_CHAR #define YY_HTMLParser_CHAR yychar #endif #ifndef YY_HTMLParser_NERRS #define YY_HTMLParser_NERRS yynerrs #endif #ifndef YY_HTMLParser_DEBUG_FLAG #define YY_HTMLParser_DEBUG_FLAG yydebug #endif #ifndef YY_HTMLParser_ERROR #define YY_HTMLParser_ERROR yyerror #endif #ifndef YY_HTMLParser_PARSE_PARAM #ifndef __STDC__ #ifndef __cplusplus #ifndef YY_USE_CLASS #define YY_HTMLParser_PARSE_PARAM #ifndef YY_HTMLParser_PARSE_PARAM_DEF #define YY_HTMLParser_PARSE_PARAM_DEF #endif #endif #endif #endif #ifndef YY_HTMLParser_PARSE_PARAM #define YY_HTMLParser_PARSE_PARAM void #endif #endif /* TOKEN C */ #ifndef YY_USE_CLASS #ifndef YY_HTMLParser_PURE extern YY_HTMLParser_STYPE YY_HTMLParser_LVAL; #endif /* #line 134 "/usr/local/lib/bison.h" */ #define DOCTYPE 258 #define PCDATA 259 #define SCAN_ERROR 260 #define A 261 #define ADDRESS 262 #define APPLET 263 #define AREA 264 #define B 265 #define BASE 266 #define BASEFONT 267 #define BIG 268 #define BLOCKQUOTE 269 #define BODY 270 #define BR 271 #define CAPTION 272 #define CENTER 273 #define CITE 274 #define CODE 275 #define DD 276 #define DFN 277 #define DIR 278 #define DIV 279 #define DL 280 #define DT 281 #define EM 282 #define FONT 283 #define FORM 284 #define H1 285 #define H2 286 #define H3 287 #define H4 288 #define H5 289 #define H6 290 #define HEAD 291 #define HR 292 #define HTML 293 #define I 294 #define IMG 295 #define INPUT 296 #define ISINDEX 297 #define KBD 298 #define LI 299 #define LINK 300 #define MAP 301 #define MENU 302 #define META 303 #define NOBR 304 #define OL 305 #define OPTION 306 #define P 307 #define PARAM 308 #define PRE 309 #define SAMP 310 #define SCRIPT 311 #define SELECT 312 #define SMALL 313 #define STRIKE 314 #define STRONG 315 #define STYLE 316 #define SUB 317 #define SUP 318 #define TABLE 319 #define TD 320 #define TEXTAREA 321 #define TH 322 #define TITLE 323 #define TR 324 #define TT 325 #define U 326 #define UL 327 #define VAR 328 #define END_A 329 #define END_ADDRESS 330 #define END_APPLET 331 #define END_B 332 #define END_BIG 333 #define END_BLOCKQUOTE 334 #define END_BODY 335 #define END_CAPTION 336 #define END_CENTER 337 #define END_CITE 338 #define END_CODE 339 #define END_DD 340 #define END_DFN 341 #define END_DIR 342 #define END_DIV 343 #define END_DL 344 #define END_DT 345 #define END_EM 346 #define END_FONT 347 #define END_FORM 348 #define END_H1 349 #define END_H2 350 #define END_H3 351 #define END_H4 352 #define END_H5 353 #define END_H6 354 #define END_HEAD 355 #define END_HTML 356 #define END_I 357 #define END_KBD 358 #define END_LI 359 #define END_MAP 360 #define END_MENU 361 #define END_NOBR 362 #define END_OL 363 #define END_OPTION 364 #define END_P 365 #define END_PRE 366 #define END_SAMP 367 #define END_SCRIPT 368 #define END_SELECT 369 #define END_SMALL 370 #define END_STRIKE 371 #define END_STRONG 372 #define END_STYLE 373 #define END_SUB 374 #define END_SUP 375 #define END_TABLE 376 #define END_TD 377 #define END_TEXTAREA 378 #define END_TH 379 #define END_TITLE 380 #define END_TR 381 #define END_TT 382 #define END_U 383 #define END_UL 384 #define END_VAR 385 #line 134 "/usr/local/lib/bison.h" /* #defines token */ /* after #define tokens, before const tokens S5*/ #else #ifndef YY_HTMLParser_CLASS #define YY_HTMLParser_CLASS HTMLParser #endif #ifndef YY_HTMLParser_INHERIT #define YY_HTMLParser_INHERIT #endif #ifndef YY_HTMLParser_MEMBERS #define YY_HTMLParser_MEMBERS #endif #ifndef YY_HTMLParser_LEX_BODY #define YY_HTMLParser_LEX_BODY #endif #ifndef YY_HTMLParser_ERROR_BODY #define YY_HTMLParser_ERROR_BODY #endif #ifndef YY_HTMLParser_CONSTRUCTOR_PARAM #define YY_HTMLParser_CONSTRUCTOR_PARAM #endif class YY_HTMLParser_CLASS YY_HTMLParser_INHERIT { public: /* static const int token ... */ /* #line 160 "/usr/local/lib/bison.h" */ static const int DOCTYPE; static const int PCDATA; static const int SCAN_ERROR; static const int A; static const int ADDRESS; static const int APPLET; static const int AREA; static const int B; static const int BASE; static const int BASEFONT; static const int BIG; static const int BLOCKQUOTE; static const int BODY; static const int BR; static const int CAPTION; static const int CENTER; static const int CITE; static const int CODE; static const int DD; static const int DFN; static const int DIR; static const int DIV; static const int DL; static const int DT; static const int EM; static const int FONT; static const int FORM; static const int H1; static const int H2; static const int H3; static const int H4; static const int H5; static const int H6; static const int HEAD; static const int HR; static const int HTML; static const int I; static const int IMG; static const int INPUT; static const int ISINDEX; static const int KBD; static const int LI; static const int LINK; static const int MAP; static const int MENU; static const int META; static const int NOBR; static const int OL; static const int OPTION; static const int P; static const int PARAM; static const int PRE; static const int SAMP; static const int SCRIPT; static const int SELECT; static const int SMALL; static const int STRIKE; static const int STRONG; static const int STYLE; static const int SUB; static const int SUP; static const int TABLE; static const int TD; static const int TEXTAREA; static const int TH; static const int TITLE; static const int TR; static const int TT; static const int U; static const int UL; static const int VAR; static const int END_A; static const int END_ADDRESS; static const int END_APPLET; static const int END_B; static const int END_BIG; static const int END_BLOCKQUOTE; static const int END_BODY; static const int END_CAPTION; static const int END_CENTER; static const int END_CITE; static const int END_CODE; static const int END_DD; static const int END_DFN; static const int END_DIR; static const int END_DIV; static const int END_DL; static const int END_DT; static const int END_EM; static const int END_FONT; static const int END_FORM; static const int END_H1; static const int END_H2; static const int END_H3; static const int END_H4; static const int END_H5; static const int END_H6; static const int END_HEAD; static const int END_HTML; static const int END_I; static const int END_KBD; static const int END_LI; static const int END_MAP; static const int END_MENU; static const int END_NOBR; static const int END_OL; static const int END_OPTION; static const int END_P; static const int END_PRE; static const int END_SAMP; static const int END_SCRIPT; static const int END_SELECT; static const int END_SMALL; static const int END_STRIKE; static const int END_STRONG; static const int END_STYLE; static const int END_SUB; static const int END_SUP; static const int END_TABLE; static const int END_TD; static const int END_TEXTAREA; static const int END_TH; static const int END_TITLE; static const int END_TR; static const int END_TT; static const int END_U; static const int END_UL; static const int END_VAR; #line 160 "/usr/local/lib/bison.h" /* decl const */ public: int YY_HTMLParser_PARSE(YY_HTMLParser_PARSE_PARAM); virtual void YY_HTMLParser_ERROR(char *) YY_HTMLParser_ERROR_BODY; #ifdef YY_HTMLParser_PURE #ifdef YY_HTMLParser_LSP_NEEDED virtual int YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY; #else virtual int YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL) YY_HTMLParser_LEX_BODY; #endif #else virtual int YY_HTMLParser_LEX() YY_HTMLParser_LEX_BODY; YY_HTMLParser_STYPE YY_HTMLParser_LVAL; #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE YY_HTMLParser_LLOC; #endif int YY_HTMLParser_NERRS; int YY_HTMLParser_CHAR; #endif #if YY_HTMLParser_DEBUG != 0 public: int YY_HTMLParser_DEBUG_FLAG; /* nonzero means print parse trace */ #endif public: YY_HTMLParser_CLASS(YY_HTMLParser_CONSTRUCTOR_PARAM); public: YY_HTMLParser_MEMBERS }; /* other declare folow */ #endif #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifndef YYSTYPE #define YYSTYPE YY_HTMLParser_STYPE #endif #ifndef YYLTYPE #define YYLTYPE YY_HTMLParser_LTYPE #endif #ifndef YYDEBUG #ifdef YY_HTMLParser_DEBUG #define YYDEBUG YY_HTMLParser_DEBUG #endif #endif #endif /* END */ /* #line 209 "/usr/local/lib/bison.h" */ #endif �����������html2text-1.3.2a/HTMLParser.k�����������������������������������������������������������������������0100644�0000000�0000000�00000027765�07525767074�014451� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#ifndef YY_HTMLParser_h_included #define YY_HTMLParser_h_included #line 1 "/usr/local/lib/bison.h" /* before anything */ #ifdef c_plusplus #ifndef __cplusplus #define __cplusplus #endif #endif #ifdef __cplusplus #ifndef YY_USE_CLASS #define YY_USE_CLASS #endif #else #endif #include <stdio.h> /* #line 14 "/usr/local/lib/bison.h" */ #define YY_HTMLParser_PURE #define YY_HTMLParser_DEBUG 1 #define YY_HTMLParser_LEX_BODY = 0 #define YY_HTMLParser_ERROR_BODY = 0 #define YY_HTMLParser_MEMBERS \ virtual ~HTMLParser(); \ virtual void process(const Document &) = 0;\ virtual bool read_cdata(const char *terminal, string *) = 0;\ int list_nesting; #define YY_HTMLParser_CONSTRUCTOR_INIT : list_nesting(0) #line 69 "HTMLParser.y" typedef union { Document *document; Element *element; list<auto_ptr<Element> > *element_list; PCData *pcdata; string *strinG; list<TagAttribute> *tag_attributes; int inT; list<auto_ptr<TableRow> > *table_rows; list<auto_ptr<TableCell> > *table_cells; ListItem *list_item; list<auto_ptr<ListItem> > *list_items; Caption *caption; Heading *heading; list<auto_ptr<Option> > *option_list; Option *option; DefinitionList *definition_list; list<auto_ptr<DefinitionListItem> > *definition_list_item_list; TermName *term_name; TermDefinition *term_definition; Preformatted *preformatted; Address *address; list<auto_ptr<list<TagAttribute> > > *tag_attributes_list; } yy_HTMLParser_stype; #define YY_HTMLParser_STYPE yy_HTMLParser_stype #line 14 "/usr/local/lib/bison.h" /* %{ and %header{ and %union, during decl */ #ifndef YY_HTMLParser_COMPATIBILITY #ifndef YY_USE_CLASS #define YY_HTMLParser_COMPATIBILITY 1 #else #define YY_HTMLParser_COMPATIBILITY 0 #endif #endif #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifdef YYLTYPE #ifndef YY_HTMLParser_LTYPE #define YY_HTMLParser_LTYPE YYLTYPE /* WARNING obsolete !!! user defined YYLTYPE not reported into generated header */ /* use %define LTYPE */ #endif #endif #ifdef YYSTYPE #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE YYSTYPE /* WARNING obsolete !!! user defined YYSTYPE not reported into generated header */ /* use %define STYPE */ #endif #endif #ifdef YYDEBUG #ifndef YY_HTMLParser_DEBUG #define YY_HTMLParser_DEBUG YYDEBUG /* WARNING obsolete !!! user defined YYDEBUG not reported into generated header */ /* use %define DEBUG */ #endif #endif #ifdef YY_HTMLParser_STYPE #ifndef yystype #define yystype YY_HTMLParser_STYPE #endif #endif #endif #ifndef YY_HTMLParser_PURE /* #line 54 "/usr/local/lib/bison.h" */ #line 54 "/usr/local/lib/bison.h" /* YY_HTMLParser_PURE */ #endif /* #line 56 "/usr/local/lib/bison.h" */ #line 56 "/usr/local/lib/bison.h" /* prefix */ #ifndef YY_HTMLParser_DEBUG /* #line 58 "/usr/local/lib/bison.h" */ #line 58 "/usr/local/lib/bison.h" /* YY_HTMLParser_DEBUG */ #endif #ifndef YY_HTMLParser_LSP_NEEDED /* #line 61 "/usr/local/lib/bison.h" */ #line 61 "/usr/local/lib/bison.h" /* YY_HTMLParser_LSP_NEEDED*/ #endif /* DEFAULT LTYPE*/ #ifdef YY_HTMLParser_LSP_NEEDED #ifndef YY_HTMLParser_LTYPE typedef struct yyltype { int timestamp; int first_line; int first_column; int last_line; int last_column; char *text; } yyltype; #define YY_HTMLParser_LTYPE yyltype #endif #endif /* DEFAULT STYPE*/ #ifndef YY_HTMLParser_STYPE #define YY_HTMLParser_STYPE int #endif /* DEFAULT MISCELANEOUS */ #ifndef YY_HTMLParser_PARSE #define YY_HTMLParser_PARSE yyparse #endif #ifndef YY_HTMLParser_LEX #define YY_HTMLParser_LEX yylex #endif #ifndef YY_HTMLParser_LVAL #define YY_HTMLParser_LVAL yylval #endif #ifndef YY_HTMLParser_LLOC #define YY_HTMLParser_LLOC yylloc #endif #ifndef YY_HTMLParser_CHAR #define YY_HTMLParser_CHAR yychar #endif #ifndef YY_HTMLParser_NERRS #define YY_HTMLParser_NERRS yynerrs #endif #ifndef YY_HTMLParser_DEBUG_FLAG #define YY_HTMLParser_DEBUG_FLAG yydebug #endif #ifndef YY_HTMLParser_ERROR #define YY_HTMLParser_ERROR yyerror #endif #ifndef YY_HTMLParser_PARSE_PARAM #ifndef __STDC__ #ifndef __cplusplus #ifndef YY_USE_CLASS #define YY_HTMLParser_PARSE_PARAM #ifndef YY_HTMLParser_PARSE_PARAM_DEF #define YY_HTMLParser_PARSE_PARAM_DEF #endif #endif #endif #endif #ifndef YY_HTMLParser_PARSE_PARAM #define YY_HTMLParser_PARSE_PARAM void #endif #endif /* TOKEN C */ #ifndef YY_USE_CLASS #ifndef YY_HTMLParser_PURE extern YY_HTMLParser_STYPE YY_HTMLParser_LVAL; #endif /* #line 134 "/usr/local/lib/bison.h" */ #define DOCTYPE 258 #define PCDATA 259 #define SCAN_ERROR 260 #define A 261 #define ADDRESS 262 #define APPLET 263 #define AREA 264 #define B 265 #define BASE 266 #define BASEFONT 267 #define BIG 268 #define BLOCKQUOTE 269 #define BODY 270 #define BR 271 #define CAPTION 272 #define CENTER 273 #define CITE 274 #define CODE 275 #define DD 276 #define DFN 277 #define DIR 278 #define DIV 279 #define DL 280 #define DT 281 #define EM 282 #define FONT 283 #define FORM 284 #define H1 285 #define H2 286 #define H3 287 #define H4 288 #define H5 289 #define H6 290 #define HEAD 291 #define HR 292 #define HTML 293 #define I 294 #define IMG 295 #define INPUT 296 #define ISINDEX 297 #define KBD 298 #define LI 299 #define LINK 300 #define MAP 301 #define MENU 302 #define META 303 #define NOBR 304 #define OL 305 #define OPTION 306 #define P 307 #define PARAM 308 #define PRE 309 #define SAMP 310 #define SCRIPT 311 #define SELECT 312 #define SMALL 313 #define STRIKE 314 #define STRONG 315 #define STYLE 316 #define SUB 317 #define SUP 318 #define TABLE 319 #define TD 320 #define TEXTAREA 321 #define TH 322 #define TITLE 323 #define TR 324 #define TT 325 #define U 326 #define UL 327 #define VAR 328 #define END_A 329 #define END_ADDRESS 330 #define END_APPLET 331 #define END_B 332 #define END_BIG 333 #define END_BLOCKQUOTE 334 #define END_BODY 335 #define END_CAPTION 336 #define END_CENTER 337 #define END_CITE 338 #define END_CODE 339 #define END_DD 340 #define END_DFN 341 #define END_DIR 342 #define END_DIV 343 #define END_DL 344 #define END_DT 345 #define END_EM 346 #define END_FONT 347 #define END_FORM 348 #define END_H1 349 #define END_H2 350 #define END_H3 351 #define END_H4 352 #define END_H5 353 #define END_H6 354 #define END_HEAD 355 #define END_HTML 356 #define END_I 357 #define END_KBD 358 #define END_LI 359 #define END_MAP 360 #define END_MENU 361 #define END_NOBR 362 #define END_OL 363 #define END_OPTION 364 #define END_P 365 #define END_PRE 366 #define END_SAMP 367 #define END_SCRIPT 368 #define END_SELECT 369 #define END_SMALL 370 #define END_STRIKE 371 #define END_STRONG 372 #define END_STYLE 373 #define END_SUB 374 #define END_SUP 375 #define END_TABLE 376 #define END_TD 377 #define END_TEXTAREA 378 #define END_TH 379 #define END_TITLE 380 #define END_TR 381 #define END_TT 382 #define END_U 383 #define END_UL 384 #define END_VAR 385 #line 134 "/usr/local/lib/bison.h" /* #defines token */ /* after #define tokens, before const tokens S5*/ #else #ifndef YY_HTMLParser_CLASS #define YY_HTMLParser_CLASS HTMLParser #endif #ifndef YY_HTMLParser_INHERIT #define YY_HTMLParser_INHERIT #endif #ifndef YY_HTMLParser_MEMBERS #define YY_HTMLParser_MEMBERS #endif #ifndef YY_HTMLParser_LEX_BODY #define YY_HTMLParser_LEX_BODY #endif #ifndef YY_HTMLParser_ERROR_BODY #define YY_HTMLParser_ERROR_BODY #endif #ifndef YY_HTMLParser_CONSTRUCTOR_PARAM #define YY_HTMLParser_CONSTRUCTOR_PARAM #endif class YY_HTMLParser_CLASS YY_HTMLParser_INHERIT { public: /* static const int token ... */ /* #line 160 "/usr/local/lib/bison.h" */ static const int DOCTYPE; static const int PCDATA; static const int SCAN_ERROR; static const int A; static const int ADDRESS; static const int APPLET; static const int AREA; static const int B; static const int BASE; static const int BASEFONT; static const int BIG; static const int BLOCKQUOTE; static const int BODY; static const int BR; static const int CAPTION; static const int CENTER; static const int CITE; static const int CODE; static const int DD; static const int DFN; static const int DIR; static const int DIV; static const int DL; static const int DT; static const int EM; static const int FONT; static const int FORM; static const int H1; static const int H2; static const int H3; static const int H4; static const int H5; static const int H6; static const int HEAD; static const int HR; static const int HTML; static const int I; static const int IMG; static const int INPUT; static const int ISINDEX; static const int KBD; static const int LI; static const int LINK; static const int MAP; static const int MENU; static const int META; static const int NOBR; static const int OL; static const int OPTION; static const int P; static const int PARAM; static const int PRE; static const int SAMP; static const int SCRIPT; static const int SELECT; static const int SMALL; static const int STRIKE; static const int STRONG; static const int STYLE; static const int SUB; static const int SUP; static const int TABLE; static const int TD; static const int TEXTAREA; static const int TH; static const int TITLE; static const int TR; static const int TT; static const int U; static const int UL; static const int VAR; static const int END_A; static const int END_ADDRESS; static const int END_APPLET; static const int END_B; static const int END_BIG; static const int END_BLOCKQUOTE; static const int END_BODY; static const int END_CAPTION; static const int END_CENTER; static const int END_CITE; static const int END_CODE; static const int END_DD; static const int END_DFN; static const int END_DIR; static const int END_DIV; static const int END_DL; static const int END_DT; static const int END_EM; static const int END_FONT; static const int END_FORM; static const int END_H1; static const int END_H2; static const int END_H3; static const int END_H4; static const int END_H5; static const int END_H6; static const int END_HEAD; static const int END_HTML; static const int END_I; static const int END_KBD; static const int END_LI; static const int END_MAP; static const int END_MENU; static const int END_NOBR; static const int END_OL; static const int END_OPTION; static const int END_P; static const int END_PRE; static const int END_SAMP; static const int END_SCRIPT; static const int END_SELECT; static const int END_SMALL; static const int END_STRIKE; static const int END_STRONG; static const int END_STYLE; static const int END_SUB; static const int END_SUP; static const int END_TABLE; static const int END_TD; static const int END_TEXTAREA; static const int END_TH; static const int END_TITLE; static const int END_TR; static const int END_TT; static const int END_U; static const int END_UL; static const int END_VAR; #line 160 "/usr/local/lib/bison.h" /* decl const */ public: int YY_HTMLParser_PARSE(YY_HTMLParser_PARSE_PARAM); virtual void YY_HTMLParser_ERROR(char *) YY_HTMLParser_ERROR_BODY; #ifdef YY_HTMLParser_PURE #ifdef YY_HTMLParser_LSP_NEEDED virtual int YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL,YY_HTMLParser_LTYPE *YY_HTMLParser_LLOC) YY_HTMLParser_LEX_BODY; #else virtual int YY_HTMLParser_LEX(YY_HTMLParser_STYPE *YY_HTMLParser_LVAL) YY_HTMLParser_LEX_BODY; #endif #else virtual int YY_HTMLParser_LEX() YY_HTMLParser_LEX_BODY; YY_HTMLParser_STYPE YY_HTMLParser_LVAL; #ifdef YY_HTMLParser_LSP_NEEDED YY_HTMLParser_LTYPE YY_HTMLParser_LLOC; #endif int YY_HTMLParser_NERRS; int YY_HTMLParser_CHAR; #endif #if YY_HTMLParser_DEBUG != 0 public: int YY_HTMLParser_DEBUG_FLAG; /* nonzero means print parse trace */ #endif public: YY_HTMLParser_CLASS(YY_HTMLParser_CONSTRUCTOR_PARAM); public: YY_HTMLParser_MEMBERS }; /* other declare folow */ #endif #if YY_HTMLParser_COMPATIBILITY != 0 /* backward compatibility */ #ifndef YYSTYPE #define YYSTYPE YY_HTMLParser_STYPE #endif #ifndef YYLTYPE #define YYLTYPE YY_HTMLParser_LTYPE #endif #ifndef YYDEBUG #ifdef YY_HTMLParser_DEBUG #define YYDEBUG YY_HTMLParser_DEBUG #endif #endif #endif /* END */ /* #line 209 "/usr/local/lib/bison.h" */ #endif �����������html2text-1.3.2a/HTMLParser.y�����������������������������������������������������������������������0100644�0000000�0000000�00000063463�07760112171�014441� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Sun Apr 7 11:54:06 CEST 2002: Make some closing tags optional * Mon Jul 22 13:42:13 CEST 2002: Don't insert Paragraphs to other block elements * Mon Aug 12 17:14:57 CEST 2002: Make even more closing tags optional */ /***************************************************************************/ %name HTMLParser %define PURE %define DEBUG 1 %{ /* ------------------------------------------------------------------------- */ #ident "$Id: HTMLParser.y,v 1.14 1999/10/26 10:56:55 arno Exp $" #include "html.h" #include "HTMLParser.h" // MIPS machines don't have "alloca()", so disable stack realloc'ing. #ifdef mips #define yyoverflow yyerror("parser stack overflow"), (void) #endif /* ------------------------------------------------------------------------- */ %} /* ------------------------------------------------------------------------- */ %define LEX_BODY = 0 %define ERROR_BODY = 0 %define MEMBERS\ virtual ~HTMLParser(); \ virtual void process(const Document &) = 0;\ virtual bool read_cdata(const char *terminal, string *) = 0;\ int list_nesting; %define CONSTRUCTOR_INIT : list_nesting(0) %union { Document *document; Element *element; list<auto_ptr<Element> > *element_list; PCData *pcdata; string *strinG; list<TagAttribute> *tag_attributes; int inT; list<auto_ptr<TableRow> > *table_rows; list<auto_ptr<TableCell> > *table_cells; ListItem *list_item; list<auto_ptr<ListItem> > *list_items; Caption *caption; Heading *heading; list<auto_ptr<Option> > *option_list; Option *option; DefinitionList *definition_list; list<auto_ptr<DefinitionListItem> > *definition_list_item_list; TermName *term_name; TermDefinition *term_definition; Preformatted *preformatted; Address *address; list<auto_ptr<list<TagAttribute> > > *tag_attributes_list; } %type <document> document_ %type <pcdata> pcdata %type <pcdata> opt_pcdata %type <element_list> body_content %type <heading> heading %type <heading> HX %type <inT> END_HX %type <element> block %type <element> block_except_p %type <element> text %type <element_list> texts %type <element_list> opt_texts %type <element> font %type <element> phrase %type <element> special %type <element> form %type <table_rows> table_rows %type <table_cells> table_cells %type <caption> caption %type <caption> opt_caption %type <element_list> applet_content %type <definition_list> definition_list %type <definition_list_item_list>definition_list_content %type <term_name> term_name %type <term_definition> term_definition %type <option_list> select_content %type <option> option %type <element> list %type <list_items> list_content %type <list_item> list_item %type <preformatted> preformatted %type <element_list> opt_flow %type <element_list> flow %type <element> flow_ %type <element_list> paragraph_content %type <address> address %type <tag_attributes_list> map_content %type <tag_attributes> opt_LI %type <tag_attributes> opt_P %token DOCTYPE %token <strinG> PCDATA %token SCAN_ERROR %token <tag_attributes> A %token <tag_attributes> ADDRESS %token <tag_attributes> APPLET %token <tag_attributes> AREA %token <tag_attributes> B %token <tag_attributes> BASE %token <tag_attributes> BASEFONT %token <tag_attributes> BIG %token <tag_attributes> BLOCKQUOTE %token <tag_attributes> BODY %token <tag_attributes> BR %token <tag_attributes> CAPTION %token <tag_attributes> CENTER %token <tag_attributes> CITE %token <tag_attributes> CODE %token <tag_attributes> DD %token <tag_attributes> DFN %token <tag_attributes> DIR %token <tag_attributes> DIV %token <tag_attributes> DL %token <tag_attributes> DT %token <tag_attributes> EM %token <tag_attributes> FONT %token <tag_attributes> FORM %token <tag_attributes> H1 %token <tag_attributes> H2 %token <tag_attributes> H3 %token <tag_attributes> H4 %token <tag_attributes> H5 %token <tag_attributes> H6 %token <tag_attributes> HEAD %token <tag_attributes> HR %token <tag_attributes> HTML %token <tag_attributes> I %token <tag_attributes> IMG %token <tag_attributes> INPUT %token <tag_attributes> ISINDEX %token <tag_attributes> KBD %token <tag_attributes> LI %token <tag_attributes> LINK %token <tag_attributes> MAP %token <tag_attributes> MENU %token <tag_attributes> META %token <tag_attributes> NOBR %token <tag_attributes> OL %token <tag_attributes> OPTION %token <tag_attributes> P %token <tag_attributes> PARAM %token <tag_attributes> PRE %token <tag_attributes> SAMP %token <tag_attributes> SCRIPT %token <tag_attributes> SELECT %token <tag_attributes> SMALL %token <tag_attributes> STRIKE %token <tag_attributes> STRONG %token <tag_attributes> STYLE %token <tag_attributes> SUB %token <tag_attributes> SUP %token <tag_attributes> TABLE %token <tag_attributes> TD %token <tag_attributes> TEXTAREA %token <tag_attributes> TH %token <tag_attributes> TITLE %token <tag_attributes> TR %token <tag_attributes> TT %token <tag_attributes> U %token <tag_attributes> UL %token <tag_attributes> VAR %token END_A %token END_ADDRESS %token END_APPLET %token END_B %token END_BIG %token END_BLOCKQUOTE %token END_BODY %token END_CAPTION %token END_CENTER %token END_CITE %token END_CODE %token END_DD %token END_DFN %token END_DIR %token END_DIV %token END_DL %token END_DT %token END_EM %token END_FONT %token END_FORM %token END_H1 %token END_H2 %token END_H3 %token END_H4 %token END_H5 %token END_H6 %token END_HEAD %token END_HTML %token END_I %token END_KBD %token END_LI %token END_MAP %token END_MENU %token END_NOBR %token END_OL %token END_OPTION %token END_P %token END_PRE %token END_SAMP %token END_SCRIPT %token END_SELECT %token END_SMALL %token END_STRIKE %token END_STRONG %token END_STYLE %token END_SUB %token END_SUP %token END_TABLE %token END_TD %token END_TEXTAREA %token END_TH %token END_TITLE %token END_TR %token END_TT %token END_U %token END_UL %token END_VAR /* ------------------------------------------------------------------------- */ %start document %% /* { */ document: document_ { process(*$1); delete $1; } ; /* * Well... actually, an HTML document should look like * * <!DOCTYPE ...> * <HTML> * <HEAD> * ... * </HEAD> * <BODY> * ... * </BODY> * </HTML> * * but... * * (A) All seven tags are optional * (B) The contents of the HEAD and the BODY section can be distinuished * (C) Most people out there do not know which element to put before, into, * or after which section... * * so... let's just forget about the structure of an HTML document, discard * the seven tags, and process the remainder as a series of sections. */ document_: /* empty */ { $$ = new Document; $$->body.content.reset(new list<auto_ptr<Element> >); } | document_ error { $$ = $1; } | document_ DOCTYPE { $$ = $1; } | document_ HTML { $$->attributes.reset($2); $$ = $1; } | document_ END_HTML { $$ = $1; } | document_ HEAD { delete $2; $$ = $1; } | document_ END_HEAD { $$ = $1; } | document_ TITLE opt_pcdata opt_END_TITLE { delete $2; // Ignore <TITLE> attributes ($$ = $1)->head.title.reset($3); } | document_ ISINDEX { ($$ = $1)->head.isindex_attributes.reset($2); } | document_ BASE { ($$ = $1)->head.base_attributes.reset($2); } | document_ META { ($$ = $1)->head.meta_attributes.reset($2); } | document_ LINK { ($$ = $1)->head.link_attributes.reset($2); } | document_ SCRIPT { auto_ptr<Script> s(new Script); s->attributes.reset($2); if (!read_cdata("</SCRIPT>", &s->text)) { yyerror("CDATA terminal not found"); } ($$ = $1)->head.scripts.push_back(s); } | document_ STYLE { auto_ptr<Style> s(new Style); s->attributes.reset($2); if (!read_cdata("</STYLE>", &s->text)) { yyerror("CDATA terminal not found"); } ($$ = $1)->head.styles.push_back(s); } | document_ BODY { delete $2; $$ = $1; } | document_ END_BODY { $$ = $1; } | document_ texts { Paragraph *p = new Paragraph; p->texts.reset($2); ($$ = $1)->body.content->push_back(auto_ptr<Element>(p)); } | document_ heading { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } | document_ block { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } | document_ address { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } ; pcdata: PCDATA { $$ = new PCData; $$->text = *$1; delete $1; } ; body_content: /* empty */ { $$ = new list<auto_ptr<Element> >; } | body_content error { $$ = $1; } | body_content SCRIPT { auto_ptr<Script> s(new Script); s->attributes.reset($2); if (!read_cdata("</SCRIPT>", &s->text)) { yyerror("CDATA terminal not found"); } // ($$ = $1)->head.scripts.push_back(s); } | body_content STYLE { auto_ptr<Style> s(new Style); s->attributes.reset($2); if (!read_cdata("</STYLE>", &s->text)) { yyerror("CDATA terminal not found"); } // ($$ = $1)->head.styles.push_back(s); } | body_content texts { Paragraph *p = new Paragraph; p->texts = auto_ptr<list<auto_ptr<Element> > >($2); ($$ = $1)->push_back(auto_ptr<Element>(p)); } | body_content heading { ($$ = $1)->push_back(auto_ptr<Element>($2)); } | body_content block { ($$ = $1)->push_back(auto_ptr<Element>($2)); } | body_content address { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ; heading: HX paragraph_content END_HX { /* EXTENSION: Allow paragraph content in heading, not only texts */ if ($1->level != $3) { yyerror ("Levels of opening and closing headings don't match"); } $$ = $1; $$->content.reset($2); } ; block: block_except_p { $$ = $1; } | P paragraph_content opt_END_P { Paragraph *p = new Paragraph; p->attributes.reset($1); p->texts.reset($2); $$ = p; } ; paragraph_content: /* EXTENSION: Allow blocks (except "<P>") in paragraphs. */ /* empty */ { $$ = new list<auto_ptr<Element> >; } | paragraph_content error { $$ = $1; } | paragraph_content texts { $$ = $1; $$->splice($$->end(), *$2); delete $2; } | paragraph_content block_except_p { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ; block_except_p: list { $$ = $1; } | preformatted { $$ = $1; } | definition_list { $$ = $1; } | DIV body_content opt_END_DIV { Division *p = new Division; p->attributes.reset($1); p->body_content.reset($2); $$ = p; } | CENTER body_content opt_END_CENTER { Center *p = new Center; delete $1; // CENTER has no attributes. p->body_content.reset($2); $$ = p; } | BLOCKQUOTE body_content opt_END_BLOCKQUOTE { delete $1; // BLOCKQUOTE has no attributes! BlockQuote *bq = new BlockQuote; bq->content.reset($2); $$ = bq; } | FORM body_content opt_END_FORM { Form *f = new Form; f->attributes.reset($1); f->content.reset($2); $$ = f; } | HR { HorizontalRule *h = new HorizontalRule; h->attributes.reset($1); $$ = h; } | TABLE opt_caption table_rows END_TABLE { Table *t = new Table; t->attributes.reset($1); t->caption.reset($2); t->rows.reset($3); $$ = t; } ; list: OL { ++list_nesting; } list_content END_OL { OrderedList *ol = new OrderedList; ol->attributes.reset($1); ol->items.reset($3); ol->nesting = --list_nesting; $$ = ol; } | UL { ++list_nesting; } list_content opt_END_UL { UnorderedList *ul = new UnorderedList; ul->attributes.reset($1); ul->items.reset($3); ul->nesting = --list_nesting; $$ = ul; } | DIR { ++list_nesting; } list_content END_DIR { Dir *d = new Dir; d->attributes.reset($1); d->items.reset($3); d->nesting = --list_nesting; $$ = d; } | MENU { ++list_nesting; } list_content END_MENU { Menu *m = new Menu; m->attributes.reset($1); m->items.reset($3); m->nesting = --list_nesting; $$ = m; } ; list_content: /* empty */ { $$ = 0; } | list_content error { $$ = $1; } | list_content list_item { $$ = $1 ? $1 : new list<auto_ptr<ListItem> >; $$->push_back(auto_ptr<ListItem>($2)); } ; list_item: LI opt_flow opt_END_LI { ListNormalItem *lni = new ListNormalItem; lni->attributes.reset($1); lni->flow.reset($2); $$ = lni; } | block { /* EXTENSION: Handle a "block" in a list as an indented block. */ ListBlockItem *lbi = new ListBlockItem; lbi->block.reset($1); $$ = lbi; } | texts { /* EXTENSION: Treat "texts" in a list as an "<LI>". */ ListNormalItem *lni = new ListNormalItem; lni->flow.reset($1); $$ = lni; } ; definition_list: /* EXTENSION: Allow nested <DL>s. */ /* EXTENSION: "</DL>" optional. */ DL opt_flow opt_error definition_list opt_END_DL { delete $1; delete $2; /* Kludge */ $$ = $4; } /* EXTENSION: Accept a "preamble" in the DL */ | DL opt_flow opt_error definition_list_content END_DL { DefinitionList *dl = new DefinitionList; dl->attributes.reset($1); dl->preamble.reset($2); dl->items.reset($4); $$ = dl; } ; definition_list_content: /* empty */ { $$ = 0; } | definition_list_content { $$ = $1; } | definition_list_content term_name { $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >; $$->push_back(auto_ptr<DefinitionListItem>($2)); } | definition_list_content term_definition { $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >; $$->push_back(auto_ptr<DefinitionListItem>($2)); } ; term_name: DT opt_flow opt_error { /* EXTENSION: Allow "flow" instead of "texts" */ delete $1; $$ = new TermName; $$->flow.reset($2); } | DT opt_flow END_DT opt_P opt_error {/* EXTENSION: Ignore <P> after </DT> */ delete $1; delete $4; $$ = new TermName; $$->flow.reset($2); } ; term_definition: DD opt_flow opt_error { delete $1; $$ = new TermDefinition; $$->flow.reset($2); } | DD opt_flow END_DD opt_P opt_error {/* EXTENSION: Ignore <P> after </DD> */ delete $1; delete $4; $$ = new TermDefinition; $$->flow.reset($2); } ; flow: flow_ { $$ = new list<auto_ptr<Element> >; $$->push_back(auto_ptr<Element>($1)); } | flow error { $$ = $1; } | flow flow_ { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ; flow_: text { $$ = $1; } | heading { /* EXTENSION: Allow headings in "flow", i.e. in lists */ $$ = $1; } | block { $$ = $1; } ; preformatted: PRE opt_texts opt_END_PRE { $$ = new Preformatted; $$->attributes.reset($1); $$->texts.reset($2); } ; caption: CAPTION opt_texts END_CAPTION { $$ = new Caption; $$->attributes.reset($1); $$->texts.reset($2); } ; table_rows: /* empty */ { $$ = new list<auto_ptr<TableRow> >; } | table_rows error { $$ = $1; } | table_rows TR table_cells opt_END_TR { TableRow *tr = new TableRow; tr->attributes.reset($2); tr->cells.reset($3); ($$ = $1)->push_back(auto_ptr<TableRow>(tr)); } ; table_cells: /* empty */ { $$ = new list<auto_ptr<TableCell> >; } | table_cells error { $$ = $1; } | table_cells TD body_content opt_END_TD { TableCell *tc = new TableCell; tc->attributes.reset($2); tc->content.reset($3); ($$ = $1)->push_back(auto_ptr<TableCell>(tc)); } | table_cells TH body_content opt_END_TH opt_END_TD { /* EXTENSION: Allow "</TD>" in place of "</TH>". */ TableHeadingCell *thc = new TableHeadingCell; thc->attributes.reset($2); thc->content.reset($3); ($$ = $1)->push_back(auto_ptr<TableCell>(thc)); } | table_cells INPUT { /* EXTENSION: Ignore <INPUT> between table cells. */ delete $2; $$ = $1; } ; address: ADDRESS opt_texts END_ADDRESS { /* Should be "address_content"... */ delete $1; $$ = new Address; $$->content.reset($2); } ; /* ------------------------------------------------------------------------- */ texts: text { $$ = new list<auto_ptr<Element> >; $$->push_back(auto_ptr<Element>($1)); } | texts text { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ; text: pcdata opt_error { $$ = $1; } | font opt_error { $$ = $1; } | phrase opt_error { $$ = $1; } | special opt_error { $$ = $1; } | form opt_error { $$ = $1; } | NOBR opt_texts END_NOBR opt_error { /* EXTENSION: NS 1.1 / IE 2.0 */ NoBreak *nb = new NoBreak; delete $1; nb->content.reset($2); $$ = nb; } ; font: TT opt_texts opt_END_TT { delete $1; $$ = new Font(TT, $2); } | I opt_texts opt_END_I { delete $1; $$ = new Font(I, $2); } | B opt_texts opt_END_B { delete $1; $$ = new Font(B, $2); } | U opt_texts opt_END_U { delete $1; $$ = new Font(U, $2); } | STRIKE opt_texts opt_END_STRIKE { delete $1; $$ = new Font(STRIKE, $2); } | BIG opt_texts opt_END_BIG { delete $1; $$ = new Font(BIG, $2); } | SMALL opt_texts opt_END_SMALL { delete $1; $$ = new Font(SMALL, $2); } | SUB opt_texts opt_END_SUB { delete $1; $$ = new Font(SUB, $2); } | SUP opt_texts opt_END_SUP { delete $1; $$ = new Font(SUP, $2); } ; phrase: EM opt_texts opt_END_EM { delete $1; $$ = new Phrase(EM, $2); } | STRONG opt_texts opt_END_STRONG { delete $1; $$ = new Phrase(STRONG, $2); } | DFN opt_texts opt_END_DFN { delete $1; $$ = new Phrase(DFN, $2); } | CODE opt_texts opt_END_CODE { delete $1; $$ = new Phrase(CODE, $2); } | SAMP opt_texts opt_END_SAMP { delete $1; $$ = new Phrase(SAMP, $2); } | KBD opt_texts opt_END_KBD { delete $1; $$ = new Phrase(KBD, $2); } | VAR opt_texts opt_END_VAR { delete $1; $$ = new Phrase(VAR, $2); } | CITE opt_texts opt_END_CITE { delete $1; $$ = new Phrase(CITE, $2); } ; special: /* EXTENSION: Allow "flow" in <A>, not only "texts". */ /* EXTENSION: Allow useless <LI> in anchor. */ /* EXTENSION: "</A>" optional.*/ A opt_LI opt_flow opt_END_A { delete $2; Anchor *a = new Anchor; a->attributes.reset($1); a->texts.reset($3); $$ = a; } | IMG { Image *i = new Image; i->attributes.reset($1); $$ = i; } | APPLET applet_content END_APPLET { Applet *a = new Applet; a->attributes.reset($1); a->content.reset($2); $$ = a; } /* EXTENSION: "flow" in <FONT> allowed, not only "texts". */ /* EXTENSION: "</FONT>" optional. */ | FONT opt_flow opt_END_FONT { Font2 *f2 = new Font2; f2->attributes.reset($1); f2->elements.reset($2); $$ = f2; } | BASEFONT { BaseFont *bf = new BaseFont; bf->attributes.reset($1); $$ = bf; } | BR { LineBreak *lb = new LineBreak; lb->attributes.reset($1); $$ = lb; } | MAP map_content END_MAP { Map *m = new Map; m->attributes.reset($1); m->areas.reset($2); $$ = m; } ; applet_content: /* empty */ { $$ = 0; } | applet_content text { $$ = $1 ? $1 : new list<auto_ptr<Element> >; $$->push_back(auto_ptr<Element>($2)); } | applet_content PARAM { $$ = $1 ? $1 : new list<auto_ptr<Element> >; Param *p = new Param; p->attributes.reset($2); $$->push_back(auto_ptr<Element>(p)); } ; map_content: /* empty */ { $$ = 0; } | map_content error { $$ = $1; } | map_content AREA { $$ = $1 ? $1 : new list<auto_ptr<list<TagAttribute> > >; $$->push_back(auto_ptr<list<TagAttribute> >($2)); } ; form: INPUT { Input *i = new Input; i->attributes.reset($1); $$ = i; } | SELECT select_content END_SELECT { Select *s = new Select; s->attributes.reset($1); s->content.reset($2); $$ = s; } | TEXTAREA pcdata END_TEXTAREA { TextArea *ta = new TextArea; ta->attributes.reset($1); ta->pcdata.reset($2); $$ = ta; } ; select_content: option { $$ = new list<auto_ptr<Option> >; $$->push_back(auto_ptr<Option>($1)); } | select_content option { ($$ = $1)->push_back(auto_ptr<Option>($2)); } ; option: OPTION pcdata opt_END_OPTION { $$ = new Option; $$->attributes.reset($1); $$->pcdata.reset($2); } ; /* ------------------------------------------------------------------------- */ HX: H1 { $$ = new Heading; $$->level = 1; $$->attributes.reset($1); } | H2 { $$ = new Heading; $$->level = 2; $$->attributes.reset($1); } | H3 { $$ = new Heading; $$->level = 3; $$->attributes.reset($1); } | H4 { $$ = new Heading; $$->level = 4; $$->attributes.reset($1); } | H5 { $$ = new Heading; $$->level = 5; $$->attributes.reset($1); } | H6 { $$ = new Heading; $$->level = 6; $$->attributes.reset($1); } ; END_HX: END_H1 { $$ = 1; } | END_H2 { $$ = 2; } | END_H3 { $$ = 3; } | END_H4 { $$ = 4; } | END_H5 { $$ = 5; } | END_H6 { $$ = 6; } ; /* ------------------------------------------------------------------------- */ opt_pcdata: /* empty */ { $$ = 0; } | pcdata { $$ = $1; }; opt_caption: /* empty */ { $$ = 0; } | caption { $$ = $1; }; opt_texts: /* empty */ { $$ = 0; } | texts { $$ = $1; }; opt_flow: /* empty */ { $$ = 0; } | flow { $$ = $1; }; opt_LI: /* empty */ { $$ = 0; } | LI { $$ = $1; }; opt_P: /* empty */ { $$ = 0; } | P { $$ = $1; }; opt_END_A: /* empty */ | END_A; opt_END_B: /* empty */ | END_B; opt_END_BLOCKQUOTE: /* empty */ | END_BLOCKQUOTE; opt_END_BIG: /* empty */ | END_BIG; opt_END_CENTER: /* empty */ | END_CENTER; opt_END_CITE: /* empty */ | END_CITE; opt_END_CODE: /* empty */ | END_CODE; opt_END_DFN: /* empty */ | END_DFN; opt_END_DIV: /* empty */ | END_DIV; opt_END_DL: /* empty */ | END_DL; opt_END_EM: /* empty */ | END_EM; opt_END_FONT: /* empty */ | END_FONT; opt_END_FORM: /* empty */ | END_FORM; opt_END_I: /* empty */ | END_I; opt_END_KBD: /* empty */ | END_KBD; opt_END_LI: /* empty */ | END_LI; opt_END_OPTION: /* empty */ | END_OPTION; opt_END_P: /* empty */ | END_P; opt_END_PRE: /* empty */ | END_PRE; opt_END_SAMP: /* empty */ | END_SAMP; opt_END_SMALL: /* empty */ | END_SMALL; opt_END_STRIKE: /* empty */ | END_STRIKE; opt_END_STRONG: /* empty */ | END_STRONG; opt_END_SUB: /* empty */ | END_SUB; opt_END_SUP: /* empty */ | END_SUP; opt_END_TD: /* empty */ | END_TD; opt_END_TH: /* empty */ | END_TH; opt_END_TITLE: /* empty */ | END_TITLE; opt_END_TR: /* empty */ | END_TR; opt_END_TT: /* empty */ | END_TT; opt_END_U: /* empty */ | END_U; opt_END_UL: /* empty */ | END_UL; opt_END_VAR: /* empty */ | END_VAR; opt_error: /* empty */ | error; %% /* } */ /* * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual * methods of a class are inline or pure virtual, so we define the destructor, * which is the only virtual method, non-inline, although it is empty. */ HTMLParser::~HTMLParser() { } /* ------------------------------------------------------------------------- */ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/INSTALL����������������������������������������������������������������������������0100644�0000000�0000000�00000017306�07534114105�013350� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## INSTALL - How to compile and install Sat Aug 31 12:04:42 CEST 2002 ## =========================================================================== # # For the impatient: 'cd' into the html2text distribution directory ./configure (in some shells 'sh ./configure') make (or in some cases 'gmake') 'mv' the html2text executable to its installation directory (e.g. /usr/local/bin) ## --------------------------------------------------------------------------- # # html2text can be compiled on many UNIX-like platforms, in particular those # that provide a GNU compiler (g++). 1. Untar the distribution package: $ gunzip html2text-1.3.X.tar.gz $ tar xf html2text-1.3.X.tar 2. Change into the html2text distribution directory: $ cd html2text-1.3.X 3. To generate the make files, issue: $ ./configure If you're using a csh on an old version of System V, you might need to type 'sh ./configure' to prevent the csh from trying to execute 'configure' itself. While the script is running, it prints some messages telling which features it is checking for, alike (the exact output depends on your platform): | Checking C++ compiler... use "g++" | Checking <sys/poll.h>... OK | Checking for socket libraries... no extra libraries required | Checking "bool"... built-in | Checking "explicit"... built-in | Checking Standard C++ library... works; no need to make "./libstd" | Checking "auto_ptr"... not defined or not working, use | "./libstd/include/auto_ptr.h" | Checking "makedepend" includes... use "-I/usr/include -I/usr/include/bits | -I/usr/include/g++ -I/usr/include/gnu -I/usr/include/sys | -I/usr/lib/gcc-lib/i486-suse-linux/2.95.3/include " | Creating "./Makefile" from "./Makefile.in"... done | | Preparing completed. You may now run "make" (or "gmake"). 4. Then compile html2text with: $ make On non-GNU systems and/or if 'make' fails, you might need to use 'gmake' instead of 'make', in order to force a compilation by g++. Compiling 'html2text' takes awhile. While 'make' is running, it will print some messages (and, hopefully, no errors), alike: | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g html2text.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g html.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g HTMLControl.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g HTMLParser.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g Area.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g format.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g sgml.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g table.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g urlistream.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g Properties.C | g++ -c -DVERSION=1.3.1 -DAUTO_PTR_BROKEN -O2 -g cmp_nocase.C | g++ -O2 -g html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o | sgml.o table.o urlistream.o Properties.o cmp_nocase.o -o html2text | | Compilation completed. You may now move "html2text", "html2text.1.gz" | and "html2textrc.5.gz" to their installation directories (e.g. | "/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5"). 5. No automatic installation is provided by now. Be root. Then move the html2text executable and the manual pages to their respective installation directories (e.g. /usr/local/bin, /usr/local/man/man1 and /usr/local/man/man5). Be sure you set the execution rights accordingly and the files are owned by root or whoever owns binaries on your system. E.g.: # install -s -m 755 html2text /usr/local/bin # install -m 644 html2text.1.gz /usr/local/man/man1 # install -m 644 html2textrc.5.gz /usr/local/man/man5 After this, create a directory for the documentation files at an appropriate place (e.g. /usr/doc/html2text or /usr/share/doc/html2text), and move all documentation files into that folder. E.g.: # install -d -m 755 /usr/share/doc/html2text # install -b -p -m 644 *[ABD-Z] /usr/share/doc/html2text Be sure you read the README file! ## --------------------------------------------------------------------------- # # If you prefer to install the program as an RPM package: Place the orginal source tarball to rpm's build directory, e.g. /usr/src/packages/SOURCES Then, download the spec file from the program's homepage and start the build with the command rpm -bb html2text.spec After successfull compilation, rpm will print a message like | Wrote: /usr/src/packages/RPMS/i386/html2text-1.3.1-1.i386.rpm This package can now be installed with rpm, e.g. rpm -Uhv /usr/src/packages/RPMS/i386/html2text-1.3.1-1.i386.rpm ## =========================================================================== # # Hints for porting html2text to other platforms and for solving other problems: On g++ version 3, the 'istream.h' header file from the compiler's 'backward' directory is used. You might need to point to that directory in the Makefile's line beginning with 'LIBSTDCXX_INCLUDES', e.g. LIBSTDCXX_INCLUDES = -I/usr/local/include/g++-v3/backward Some compilers have weird built-in rules which collide with the rules in the make files. If 'make' fails, then try to disable the built-in rules. For most 'make' utilities, this can be achieved by specifying the '-r' command line option, i.e. 'make -r'. From version 1.02, html2text requires the "socket()" function and its friends, which live in different libraries for the different systems. "configure" tries several sets of "-l" linker options to find the right libraries. Some C++ compilers lack a built-in "bool" data type. "configure" attempts to find a suitable definition which it passes to the compiler with "-DBOOL_DEFINITION=...". html2text requires a standard C++ library. Since some old C++ compilers do not come with a standard C++ library, I have implemented parts of the library in the 'libstd' subdirectory; the exported header files are in 'libstd/include'. For platforms which come with a (working) standard C++ library, the "home-grown" library in 'libstd' is not compiled and linked. However, on some platforms, these librarys did not work. I don't know if this because the compiler or our 'libstd' library is broken, or because we have some syntax errors in the source code files. Some old GNU Standard C++ Library versions falsely re-names "list::erase()" as "list::remove()". I fixed this with a "-DSTRING_ERASE=remove" on the "g++" command line. Some compilers do come with a Standard C++ Library, but the definition of the "auto_ptr" template is missing from "<memory>", or is not standard-compliant. This condition is checked by "configure" and "./libstd/include/auto_ptr.h" is included if necessary (preprocessor symbol "HAS_WORKING_AUTO_PTR"). "make depend" attempts to run the MAKEDEPEND utility (not included in this package). Unfortunately, most C++ compilers implicitly use some "secret" include directories, which MAKEDEPEND doesn't know of; as a result, MAKEDEPEND may issue some "could not find..." errors, which shouldn't worry you. The "configure" script attempts to guess the "secret include directory" and passes it with "-I" to MAKEDEPEND. To generate 'HTMLParser.C' and 'HTMLParser.h' from 'HTMLParser.y' and 'HTMLParker.k' (which can be archived with the 'make bison-local' directive), you need bison++ version 2.2, written by Alain Coetmeur. Unfortunately, this version is completely outdated and thus not longer being maintained or supported in any way. As you will need it if you want to hack into the program, you can find the program's sources on html2text's homepage. However, the already generated 'HTMLParser.C' and 'HTMLParser.h' files are included into html2text's source code package, enableing anybody to compile html2text without having to install bison++-2.2 before. ## =========================================================================== Martin Bayer <mbayer@zedat.fu-berlin.de> ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/configure��������������������������������������������������������������������������0100755�0000000�0000000�00000015131�10000540606�014207� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#! /bin/sh # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License in the file COPYING for more details. # Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> # Dates and reasons of modifications: # Son Mar 18 21:55:24 CET 2001 # Fre Jun 8 18:46:58 CEST 2001 # Thu Oct 4 21:54:50 CEST 2001 # Sun Apr 7 12:04:48 CEST 2002 # Tue Nov 11 21:30:26 CET 2003 echo='/bin/echo -e' rm -rf configure-tmp || exit 1; mkdir configure-tmp || exit 1; tmp_file="configure-tmp/xxx"; makedirs="."; # # $CXX # $echo 'Checking C++ compiler... \c'; cat <<EOF >$tmp_file.C; #include <iostream> int main(int, char **) { std::cout << "hello" << std::endl; return 0; } EOF CXX=unknown; for i in "CC" "g++" "cc" "$CC"; do if $i -c $tmp_file.C 2>/dev/null; then CXX="$i"; break; fi; done; if test "$CXX" = unknown; then $echo "Error: Could not find a working C++ compiler."; exit 1; fi; $echo "use \"$CXX\""; # # $SYS_POLL_MISSING # $echo 'Checking <sys/poll.h>... \c'; SYS_POLL_MISSING=unknown; cat <<EOF >$tmp_file.C; #ifdef SYS_POLL_MISSING /* { */ struct pollfd { int fd; short events; short revents; }; extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout); #define POLLIN 0x0001 #define POLLPRI 0x0002 #define POLLOUT 0x0004 #define POLLERR 0x0008 #define POLLHUP 0x0010 #define POLLNVAL 0x0020 #else /* } { */ #include <sys/poll.h> #endif /* } */ int main() { struct pollfd fds[3]; return poll(fds, 3, 700); } EOF for i in "" -DSYS_POLL_MISSING; do if $CXX $tmp_file.C $i -o $tmp_file 2>/dev/null; then SYS_POLL_MISSING="$i"; break; fi; done; case "$SYS_POLL_MISSING" in unknown) $echo 'Error: Could not get "poll()" to working.'; exit 1;; "") $echo "OK";; *) $echo "use \"$SYS_POLL_MISSING\"";; esac; # # $SOCKET_LIBRARIES # $echo 'Checking for socket libraries... \c'; SOCKET_LIBRARIES=unknown; cat >$tmp_file.C <<EOF; extern "C" int socket(); extern "C" void gethostbyname(); int main() { socket(); gethostbyname(); return 0; } EOF for i in "" "-lbsocket" "-lbsocket -lnsl" "-lsocket" "-lsocket -lnsl"; do if $CXX $tmp_file.C $i -o $tmp_file 2>/dev/null; then SOCKET_LIBRARIES="$i"; break; fi; done; if test "$SOCKET_LIBRARIES" = unknown; then $echo "Error: Could not determine the library for the socket API."; exit 1; fi; if test "$SOCKET_LIBRARIES" = ""; then $echo "no extra libraries required"; else $echo "use \"$SOCKET_LIBRARIES\""; fi; # # $BOOL_DEFINITION # $echo 'Checking "bool"... \c'; BOOL_DEFINITION=unknown; cat <<EOF >$tmp_file.C; #ifdef BOOL_DEFINITION BOOL_DEFINITION #endif int main(int argc, char **) { bool x = argc == 3; x = !x; if (x && argc == 7) x = false; return 0; } EOF for i in \ '' \ '-DBOOL_DEFINITION="typedef unsigned char bool;const bool false=0,true=1;"' \ '-DBOOL_DEFINITION="enum bool{false,true};"'; \ do if eval "$CXX $tmp_file.C $i -o $tmp_file 2>/dev/null"; then BOOL_DEFINITION="$i"; break; fi; done; case "$BOOL_DEFINITION" in unknown) $echo 'Error: Could not a suitable definition for "bool".'; exit 1;; "") $echo "built-in";; *) $echo "use '$BOOL_DEFINITION'";; esac; # # $EXPLICIT # $echo 'Checking "explicit"... \c'; EXPLICIT=unknown; cat <<EOF >$tmp_file.C; struct C { explicit C(int) {} }; int main(int, char **) { C x(7); return 0; } EOF for i in \ '' \ '-Dexplicit='; \ do if eval "$CXX $tmp_file.C $i -o $tmp_file 2>/dev/null"; then EXPLICIT="$i"; break; fi; done; case "$EXPLICIT" in unknown) $echo 'Error: Could not a suitable definition for "explicit".'; exit 1;; "") $echo "built-in";; *) $echo "use '$EXPLICIT'";; esac; # # $LIBSTDCXX_INCLUDES, $LIBSTDCXX_LIBS # $echo 'Checking Standard C++ library... \c'; cat <<EOF >$tmp_file.C; #include <string> #include <list> #include <memory> #include <utility> #include <map> #include <set> #include <new> #include <vector> using namespace std; void func() { map<string, string> x; } EOF if $CXX -c $tmp_file.C 2>/dev/null; then LIBSTDCXX_INCLUDES=""; LIBSTDCXX_LIBS=""; $echo 'works; no need to make "./libstd"'; else LIBSTDCXX_INCLUDES='-Ilibstd/include'; LIBSTDCXX_LIBS='libstd/libstd.a'; echo 'not available or not working; use "./libstd"'; makedirs="$makedirs ./libstd"; fi; # # $AUTO_PTR_BROKEN # AUTO_PTR_BROKEN=""; $echo 'Checking "auto_ptr"... \c'; cat <<EOF >$tmp_file.C; #include <memory> #include <string> #include <list> using namespace std; int main(int, char**) { auto_ptr<string> x(new string("hello")); *x = "world"; (void) x.get(); (void) x.release(); x.reset(0); // egcs-2.91.66 lacks "reset()"! // G++ 2.95.1 on AIX 4.2 cannot compile this: auto_ptr<int> api; list<auto_ptr<int> > lapi; lapi.push_back(api); return 0; } EOF if eval "$CXX -c $LIBSTDCXX_INCLUDES $EXPLICIT $BOOL_DEFINITION $tmp_file.C" 2>/dev/null; then $echo 'defined in <memory>, good'; else $echo 'not defined or not working, use "./libstd/include/auto_ptr.h"'; AUTO_PTR_BROKEN="-DAUTO_PTR_BROKEN"; fi; # # $MAKEDEPEND_INCLUDES # MAKEDEPEND_INCLUDES=""; $echo 'Checking "makedepend" includes... \c'; echo "#include <iostream>" >$tmp_file.C; MAKEDEPEND_INCLUDES=`$CXX -E $tmp_file.C 2>/dev/null | sed -n \ -e 's/^#line .*"\(\/.*\)\/.*".*/-I\1/p' \ -e 's/^# [1-9][0-9]* "\(\/.*\)\/.*".*/-I\1/p' | sort -u | tr '\n' ' '`; if test "$MAKEDEPEND_INCLUDES" = ""; then $echo none; else $echo "use \"$MAKEDEPEND_INCLUDES\""; fi; # # Create "Makefile" from "Makefile.in". # rm -f Makefile libstd/Makefile; cmd=sed; for i in \ SYS_POLL_MISSING \ SOCKET_LIBRARIES \ CXX \ BOOL_DEFINITION \ EXPLICIT \ LIBSTDCXX_INCLUDES \ LIBSTDCXX_LIBS \ AUTO_PTR_BROKEN \ MAKEDEPEND_INCLUDES; \ do cmd="$cmd -e \"s|@$i@|\$$i|g\""; done; for dir in $makedirs; do $echo "Creating \"$dir/Makefile\" from \"$dir/Makefile.in\"... \\c"; cat <<EOF >$dir/Makefile; # # This make file was generated from "Makefile.in" by "./configure" on # `date` -- all your changes will be lost if you # run "./configure" again! # EOF eval "$cmd" <$dir/Makefile.in >>$dir/Makefile; $echo 'done'; if test -f $dir/Dependencies; then true; else >$dir/Dependencies; fi; done; # # Clean up. # rm -rf configure-tmp; rm -f xxx.o; cat <<EOF; Preparing completed. You may now run "make" (or "gmake"). EOF ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/Properties.C�����������������������������������������������������������������������0100644�0000000�0000000�00000010100�07760112171�014542� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Fre Jun 8 17:24:35 CEST 2001: new method * Wed Jul 2 22:02:51 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #include <ctype.h> #include <iostream> #include "Properties.h" /* ------------------------------------------------------------------------- */ const char * Properties::getProperty(const char *key, const char *dflt) const { map<string, string>::const_iterator i; i = property_map.find(key); return i == property_map.end() ? dflt : (*i).second.c_str(); } // neue Methode fuer leere Attribute - Johannes Geiger const char * Properties::getProperty(const char *key) const { map<string, string>::const_iterator i; i = property_map.find(key); return i == property_map.end() ? NULL : (*i).second.c_str(); } /* ------------------------------------------------------------------------- */ void Properties::load(istream &is) { string key, value; while (readProperty(is, &key, &value)) setProperty(key, value); } /* ------------------------------------------------------------------------- */ /* * Expand the escape sequence at "line[pos]". Backslash-Newline reads another * line from "is". */ static void expandEscape(string *line_in_out, string::size_type *pos_in_out, istream &is) { for (;;) { if (line_in_out->at(*pos_in_out) != '\\') { ++*pos_in_out; return; } if (*pos_in_out != line_in_out->size() - 1) break; string tmp; if (!getline(is, tmp)) {++*pos_in_out; return; } int j; for (j = 0; j < tmp.size() && isspace(tmp[j]); ++j); line_in_out->replace( *pos_in_out, string::npos, tmp, j, string::npos ); } char c = line_in_out->at(*pos_in_out + 1); switch (c) { case 't': c = '\t'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; } line_in_out->replace(*pos_in_out, 2, &c, 1); ++*pos_in_out; } /*static*/ bool Properties::readProperty(istream &is, string *key_return, string *value_return) { string line; string::size_type i, l; // Skip empty and comment lines. for (;;) { if (!getline(is, line)) return false; l = line.size(); // Skip leading white-space. for (i = 0; i < l && isspace(line[i]); ++i); if (i == l) continue; // Line contains only white-space. // Ignore comment lines. if (line[i] == '#' || line[i] == '!') continue; // Comment line. break; } // Parse key. string::size_type bok = i; while (i < line.size()) { char c = line[i]; if (isspace(c) || c == '=' || c == ':') break; expandEscape(&line, &i, is); } string::size_type eok = i; // Skip key terminator. while (i < l && isspace(line[i])) ++i; if (i < l && (line[i] == '=' || line[i] == ':')) { for (++i; i < l && isspace(line[i]); ++i); } // Substitute escape sequences in value. string::size_type bov = i; while (i < line.size()) expandEscape(&line, &i, is); // Return key and value. key_return->assign(line, bok, eok - bok); value_return->assign(line, bov, string::npos); return true; } /* ------------------------------------------------------------------------- */ ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/Properties.h�����������������������������������������������������������������������0100644�0000000�0000000�00000005302�07760112171�014617� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Fre Jun 8 17:29:50 CEST 2001 * Thu Oct 4 22:38:59 CEST 2001: ported to g++ 3.0 * Wed Jul 2 22:03:29 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #ifndef __Properties_h_INCLUDED__ /* { */ #define __Properties_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif #include <string> #include <map> #include <istream> using std::string; using std::map; using std::istream; /* ------------------------------------------------------------------------- */ class Properties { public: const char *getProperty(const char *key, const char *dflt) const; const char *getProperty(const char *key) const; void setProperty(const string &key, const string &value) { property_map[key] = value; } void setProperty(const char *key, const string &value) { property_map[key] = value; } void setProperty(const string &key, const char *value) { property_map[key] = value; } void setProperty(const char *key, const char *value) { property_map[key] = value; } // Read from file. void load(istream &); private: static bool readProperty( istream &is, string *key_return, string *value_return ); map<string, string> property_map; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/README�����������������������������������������������������������������������������0100644�0000000�0000000�00000020020�10001242725�013153� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������## This is the README file for html2text Wed Jan 14 14:35:57 CET 2004 ## =========================================================================== html2text is a command line utility, written in C++, that converts HTML documents into plain text. It was written up to version 1.2.2 for and is copyrighted by GMRS Software GmbH, Unterschleißheim. html2text reads HTML documents from standard input or a (local or remote) URI, and formats them into a stream of plain text characters that is written to standard output or into an output-file, preserving the original positions of table fields. ISO 8859-1 is used for output by default, plain-ASCII output can be chosen by setting the "-ascii" command line option. Type "html2text -help" for an overview of all command line options. Examples: html2text <file> | less html2text -o outfile.txt -ascii -nobs <file> The rendering is largely customisable through the "html2textrc" file and the "-style" command line option, that may be used to change quickly some formatting defaults. See the html2textrc(5) manual page for details. Although html2text was written for the conversion of HTML 3.2 documents, most constructs of HTML 4 are renderred as well, including most SGML entities, provided that they are written as "named entities" and not as a numeric value. The program tries to parse even XHTML documents and the HTML produced by word processors, but this not always as successful as other HTML parsers, because html2text is, as already said, for all that an HTML 3.2 converter. The program accepts also syntactically incorrect input, attempting to interpret it "reasonably". If the output is however not satisfactory, of if rendering fails completely, and you have the possibility to correct the HTML source code, you may want to use the "-unparse" or "-check" options to find out what exactly html2text's problem is. This program was written because GMRS was looking for a good, free HTML-to-text converter for UNIX, and they couldn't find one on the net. The best they could find was lynx, i.e. "lynx -dump", but lynx could not cope with tables. # ---------------------------------------------------------------------------- # For information on compiling and installing the package on your system, # please refer to the file INSTALL. html2text was developed and is tested under Linux. However, it uses no O/S-specific features and should be easily portable to other platforms (at least to other UNIX-ish platforms). It is reported to compile and work on the following platforms: + AIX 4.3/g++ 2.95.1 + AIX 4.3.2.0/g++ 2.95.2.1 + CYGWIN_NT-5.0 1.5.4/gcc 3.2 + FreeBSD 5.1/gcc 3.2.1 + IRIX64 6.5/MIPS 7.41 + Linux 2.2.18/g++ 2.95.2 + Linux 2.4.16/g++ 2.95.3 + Linux 2.4.22/gcc 3.3.2 + NetBSD 1.6.1/gcc 2.95 + SINIX/CDS++ 2.0A00 You will find some hints for porting it to other platforms at the end of the file "INSTALL". Note for version 1.3.2(a): Version 1.3.2 is distributed in two "flavours": 1.3.2A contains changes needed for g++ 3.3 and later, which are not backwards-compatible. Thus, if you use an older (or other) compiler, please use version 1.3.2 (without 'a'), if you have g++ 3.3 (and up) installed, please use version 1.3.2A. Cross-patches (from 1.3.2 to 1.3.2a and viceversa) are included into the source code packages of either flavours. # ---------------------------------------------------------------------------- # Published under the terms of the GNU General Public License. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ---------------------------------------------------------------------------- # GMRS agreed to change the program's license terms to GPL. Message-ID: <01c401c10f72$d11c3660$12c8a8c0@jag> Reply-To: "David Geffen" <geffen@one4net.com> From: "David Geffen" <david@one4net.com> To: <mbayer@zedat.fu-berlin.de> Date: Wed, 18 Jul 2001 12:17:14 +0200 Organization: GMRS Software GmbH Hallo Herr Bayer, html2text darf unter die GPL veroeffentlicht werden, solange one4net keinerlei Nachteile oder Verpflichtungen dadurch entstehen. Mit freundlichen Gruessen David Geffen ----- Original Message ----- From: "Martin Bayer" <mbayer@zedat.fu-berlin.de> To: <geffen@one4net.com> Sent: Thursday, July 12, 2001 5:39 PM Subject: Re: Lizenzbedingungen von 'html2text' > Guten Tag! > > On Mon, Jun 25, 2001 at 03:23:31PM +0200, David Geffen wrote: > > > Aus diesem Grunde möchte ich Sie herzlich bitten, zu überlegen, ob es > > > für GMRS nicht möglich wäre, 'html2text' nachträglich unter die GPL zu > > > stellen. > > > > ich bin erst heute zurueck aus dem Urlaub gekommen. > > > > Ich werde mich in den naechsten paar Tage dazu melden. > > Darf ich Sie fragen, ob Sie in dieser Angelegenheit bereits zu einem > Entschluss gekommen sind? Es ist mittlerweile gelungen, das Programm nach > g++3 zu portieren, und da wäre es schön, wenn bereits diese neue Version > unter GPL veröffentlicht werden könne. > > Mit den besten Grüßen > -- > Martin Bayer > c.ne Ostiense, 212/E/15 > E-Mail: mail@mbayer.de I-00154 Roma > WWW: http://www.mbayer.de GSM: +39 3476605285 # ---------------------------------------------------------------------------- # This program is not provided nor supported by GMRS any longer. Since GMRS decided not to develop nor to support this program any longer, they also did not provide its source code any more. With this, I realised, the source code of this program was hardly to obtain, as most archives included at best a precompiled version. Because I liked the features, I offered a webspace where this program now is living at, http://userpage.fu-berlin.de/~mbayer/tools/html2text.html I'm afraid in this way I've become the maintainer of this package, even if I actually don't have time free to spend on working on the program by myself. Please keep this in mind if you are going to write me. :-) The source code can also be obtained from the Ibiblio network at [ftp|http]://ftp.ibiblio.org/pub/linux/apps/www/converters/ If you are going to retrieve the source code from within automated scripts, e.g. by a software packaging manager, please prefer downloading it from the Ibiblio server or one of its mirrors. # ---------------------------------------------------------------------------- # »We accept patches.« Please include in all your messages information on · the version of html2text you are referring to (`html2text -version`), if you obtained the program in binary form, the version number as supplied by your package manager (e.g. `rpm -q html2text`); · name and version of your operating system (`uname -a`); · name and version of your compiler (`cc -v`). If you think you found a possible security impact, please let _me_ know _first_. If you think you found a bug, please try first to find out its possible reason by yourself, using the "-unparse", "-check", "-debug-scanner", and "-debug-parser" command line options, in order to save other people's time. I will not consider any "bug report" that just claims "your program is buggy!!!!!1", nor will I answer to any mail asking me O/S-specific questions. I will include into the TODO list any sensible feature request. And, last but not least, patches are always very welcome. :-) Martin Bayer <mbayer@zedat.fu-berlin.de> For all e-mails, use of PGP (GPG) is encouraged. You will find my public key (ID: 0xCB537B60) on my homepage and on keyservers. The key's fingerprint is: "46A1 B556 41CD C77A 0261 D22F 41A6 EB90 CB53 7B60". ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/auto_aptr.h������������������������������������������������������������������������0100644�0000000�0000000�00000006273�07357140447�014502� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes software developed by GMRS Software GmbH." * The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Thu Oct 4 21:29:40 CEST 2001: ported to g++ 3.0 */ /***************************************************************************/ #ifndef __auto_aptr_h_INCLUDED__ /* { */ #define __auto_aptr_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: auto_aptr.h,v 1.2 1999/07/09 09:08:40 arno Exp $" /* ------------------------------------------------------------------------- */ /* * This template class is very similar to the standard "auto_ptr", but it is * used for *array* pointers rather than *object* pointers, i.e. the pointer * passed to it must have been allocated with "new[]", and "auto_aptr" will * delete it with "delete[]". */ /* ------------------------------------------------------------------------- */ #include <stdlib.h> template <class T> class auto_aptr { public: // Constructor/copy/destroy auto_aptr() : p(0) {} auto_aptr(T *x) : p(x) {} auto_aptr(const auto_aptr<T> &x) : p(x.p) { ((auto_aptr<T> *) &x)->p = 0; } void operator=(const auto_aptr<T> &x) { delete[] p; p = x.p; ((auto_aptr<T> *) &x)->p = 0; } // Extension: "operator=(T *)" is identical to "auto_aptr::reset(T *)". void operator=(T *x) { delete[] p; p = x; } ~auto_aptr() { delete[] p; } // Members T &operator[](size_t idx) const { if (!p) abort(); return p[idx]; } T *get() const { return (T *) p; } T *release() { T *tmp = p; p = 0; return tmp; } void reset(T *x = 0) { delete[] p; p = x; } // These would make a nice extension, but are not provided by many other // implementations. //operator const void *() const { return p; } //int operator!() const { return p == 0; } private: T *p; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/cmp_nocase.C�����������������������������������������������������������������������0100644�0000000�0000000�00000005067�06771645270�014551� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ident "$Id: cmp_nocase.C,v 1.1 1999/09/21 09:07:04 arno Exp $" #include <ctype.h> #include "cmp_nocase.h" /* ------------------------------------------------------------------------- */ int _cmp_nocase(const char *s1, size_t l1, const char *s2, size_t l2) { const char *e1 = s1 + l1; const char *e2 = s2 + l2; while (s1 != e1 && s2 != e2) { int c1 = toupper(*s1); int c2 = toupper(*s2); if (c1 < c2) return -1; if (c1 > c2) return 1; ++s1, ++s2; } return s1 != e1 ? 1 : s2 != e2 ? -1 : 0; } /* ------------------------------------------------------------------------- */ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/cmp_nocase.h�����������������������������������������������������������������������0100644�0000000�0000000�00000005367�07357142174�014615� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes software developed by GMRS Software GmbH." * The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Thu Oct 4 21:31:58 CEST 2001: ported to g++ 3.0 */ /***************************************************************************/ #ifndef __cmp_nocase_h_INCLUDED__ /* { */ #define __cmp_nocase_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include <string.h> #include <string> using std::string; /* ------------------------------------------------------------------------- */ /* * The Standard C++ library is lacking a case-insensitive string comparison * function... so I define my own, adapting Stroustrup's ("The C++ Programming * Language", 3rd edition). */ // Helper extern int _cmp_nocase(const char *s1, size_t l1, const char *s2, size_t l2); // -1: s1 < s2; 0: s1 == s2, 1: s1 > s2 inline int cmp_nocase(const string &s1, const string &s2) { return _cmp_nocase(s1.data(), s1.length(), s2.data(), s2.length()); } inline int cmp_nocase(const char *s1, const string &s2) { return _cmp_nocase(s1, strlen(s1), s2.data(), s2.length()); } inline int cmp_nocase(const string &s1, const char *s2) { return _cmp_nocase(s1.data(), s1.length(), s2, strlen(s2)); } inline int cmp_nocase(const char *s1, const char *s2) { return _cmp_nocase(s1, strlen(s1), s2, strlen(s2)); } /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/Makefile.in������������������������������������������������������������������������0100644�0000000�0000000�00000010311�10001244126�014337� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� # ----------------------------------------------------------------------------- # # Portions Copyright (c) 1999 GMRS Software GmbH # Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de # All rights reserved. # # Author: Arno Unkrig <arno@unkrig.de> # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License in the file # COPYING for more details. # # ----------------------------------------------------------------------------- # # Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> # # ----------------------------------------------------------------------------- VERSION=1.3.2a BISONXX = bison++ YFLAGS = INSTALLER = install BINDIR = /usr/local/bin MANDIR = /usr/local/man DOCDIR = /usr/share/doc/html2text CXX = @CXX@ BOOL_DEFINITION = @BOOL_DEFINITION@ EXPLICIT = @EXPLICIT@ SYS_POLL_MISSING = @SYS_POLL_MISSING@ SOCKET_LIBRARIES = @SOCKET_LIBRARIES@ LIBSTDCXX_INCLUDES = @LIBSTDCXX_INCLUDES@ LIBSTDCXX_LIBS = @LIBSTDCXX_LIBS@ AUTO_PTR_BROKEN = @AUTO_PTR_BROKEN@ MAKEDEPEND_INCLUDES = @MAKEDEPEND_INCLUDES@ DEBUG=-O2 -g INCLUDES = $(LIBSTDCXX_INCLUDES) DEFINES = -DVERSION=$(VERSION) $(SYS_POLL_MISSING) $(BOOL_DEFINITION) $(EXPLICIT) $(AUTO_PTR_BROKEN) CPPFLAGS = $(INCLUDES) $(DEFINES) CXXFLAGS = $(CPPFLAGS) $(DEBUG) LDFLAGS = $(DEBUG) LOADLIBES = $(LIBSTDCXX_LIBS) $(SOCKET_LIBRARIES) .SUFFIXES : .C .o .C.o : $(CXX) -c $(CXXFLAGS) $*.C # ----------------------------------------------------------------------------- default : all all : html2text @echo ; @echo 'Compilation completed. You may now move "html2text", "html2text.1.gz"'; @echo 'and "html2textrc.5.gz" to their installation directories (e.g.'; @echo '"/usr/local/bin", "/usr/local/man/man1" and "/usr/local/man/man5").'; @echo OBJS = html2text.o html.o HTMLControl.o HTMLParser.o Area.o format.o sgml.o table.o urlistream.o Properties.o cmp_nocase.o html2text : $(OBJS) $(LIBSTDCXX_LIBS) $(CXX) $(LDFLAGS) $(OBJS) $(LOADLIBES) $(LDLIBS) -o $@ libstd/libstd.a : cd libstd && $(MAKE) # ----------------------------------------------------------------------------- # Since it is very unlikely that bison++-2.2 is installed (available on # html2text's homepage), HTMLParser.h and HTMLParser.C are only built when # 'make bison-local' is issued. bison-local : cmp -s HTMLParser.h HTMLParser.k || cp HTMLParser.k HTMLParser.h; $(BISONXX) $(YFLAGS) -o HTMLParser.C -d -h HTMLParser.k HTMLParser.y # ----------------------------------------------------------------------------- # This is mostly thought for RPM builts and users that don't read the documentation. install : $(INSTALLER) -s -m 755 html2text $(BINDIR); $(INSTALLER) -m 644 html2text.1.gz $(MANDIR)/man1; $(INSTALLER) -m 644 html2textrc.5.gz $(MANDIR)/man5; $(INSTALLER) -d -m 755 $(DOCDIR); $(INSTALLER) -p -m 644 CHANGES COPYING CREDITS KNOWN_BUGS README RELEASE_NOTES TODO $(DOCDIR) # ----------------------------------------------------------------------------- SUBDIRS = libstd # "./configure" creates "Makefile"s only in the subdirectories that need to # be built, so we check for the existance of these "Makefile". clean clobber depend : @for i in $(SUBDIRS); do \ if test -r $$i/Makefile; then \ ( \ cd $$i && echo "*** make $@ in `pwd`" && $(MAKE) $@ || \ { echo "*** make $@ error in `pwd`" && false; } \ ) || exit 1; \ echo "*** Back in `pwd`"; \ fi; \ done; clean : local-clean local-clean : rm -f *.o *~ core html2text; clobber : local-clobber local-clobber : local-clean rm -f html2text depend : local-depend local-depend : HTMLParser.h @>Dependencies makedepend -f Dependencies $(CPPFLAGS) $(MAKEDEPEND_INCLUDES) *.C @rm -f Dependencies.bak # ----------------------------------------------------------------------------- include Dependencies �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/format.C���������������������������������������������������������������������������0100644�0000000�0000000�00000126450�07760112171�013716� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Sun Mar 18 19:51:54 CET 2001: fixed segfault * Fre Jun 8 17:37:12 CEST 2001: new image handling * Thu Oct 4 21:34:26 CEST 2001: ported to g++ 3.0 * Mon Jul 29 13:09:26 CEST 2002: fixed runtime increment * Wed Jul 2 22:04:08 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #include <sstream> #include <stdlib.h> #include <ctype.h> #include <vector> #include <map> #include "html.h" #include "HTMLParser.h" #include "sgml.h" #include "cmp_nocase.h" #include "format.h" #include "Properties.h" using std::endl; using std::flush; #ifndef nelems #define nelems(array) (sizeof(array) / sizeof((array)[0])) #endif /* ------------------------------------------------------------------------- */ static Line *line_format(const list<auto_ptr<Element> > *elements); static Area *make_up(const Line &line, Area::size_type w, int halign); static Area *format( const list<auto_ptr<Element> > *elements, Area::size_type w, int halign ); static void format( const list<auto_ptr<Element> > *elements, Area::size_type indent_left, Area::size_type w, int halign, ostream &os ); /* ------------------------------------------------------------------------- */ /* * Helper class that retrieves several block-formatting properties in one * go. */ struct BlockFormat { Area::size_type vspace_before; Area::size_type vspace_after; Area::size_type indent_left; Area::size_type indent_right; BlockFormat( const char *item_name, Area::size_type default_vspace_before = 0, Area::size_type default_vspace_after = 0, Area::size_type default_indent_left = 0, Area::size_type default_indent_right = 0 ); Area::size_type effective_width(Area::size_type) const; }; /* * Helper class that retrieves several list-formatting properties in one * go. */ struct ListFormat { Area::size_type vspace_before; Area::size_type vspace_between; Area::size_type vspace_after; auto_ptr<vector<int> > indents; auto_ptr<vector<string> > default_types; ListFormat( const char *item_name, Area::size_type default_vspace_before = 0, Area::size_type default_vspace_between = 0, Area::size_type default_vspace_after = 0, const char *default_indents = "6", const char *default_default_types = "DISC CIRCLE SQUARE" ); Area::size_type get_indent(int nesting) const; const string &get_default_type(int nesting) const; int get_type( const list<TagAttribute> *attributes, int nesting, int default_default_type ) const; }; /* ------------------------------------------------------------------------- */ // Attributes: VERSION (ignored) Area * Document::format(Area::size_type w, int halign) const { static BlockFormat bf("DOCUMENT"); auto_ptr<Area> res(body.format(bf.effective_width(w), halign)); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } void Document::format( Area::size_type indent_left, Area::size_type w, int halign, ostream &os ) const { static BlockFormat bf("DOCUMENT"); for (int i = 0; i < bf.vspace_before; ++i) os << endl; body.format( indent_left + bf.indent_left, bf.effective_width(w), halign, os ); for (int j = 0; j < bf.vspace_after; ++j) os << endl; } /* ------------------------------------------------------------------------- */ // Attributes: BACKGROUND BGCOLOR TEXT LINK VLINK ALINK (ignored) Area * Body::format(Area::size_type w, int halign) const { static BlockFormat bf("BODY"); auto_ptr<Area> res( ::format(content.get(), bf.effective_width(w), halign) ); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } void Body::format( Area::size_type indent_left, Area::size_type w, int halign, ostream &os ) const { static BlockFormat bf("BODY"); for (int i = 0; i < bf.vspace_before; ++i) os << endl; ::format( content.get(), indent_left + bf.indent_left, bf.effective_width(w), halign, os ); for (int j = 0; j < bf.vspace_after; ++j) os << endl; } /* ------------------------------------------------------------------------- */ enum { NO_BULLET, ARABIC_NUMBERS, LOWER_ALPHA, UPPER_ALPHA, LOWER_ROMAN, UPPER_ROMAN, DISC, SQUARE, CIRCLE, CUSTOM1, CUSTOM2, CUSTOM3 }; // Attributes: TYPE (processed) COMPACT (ignored) Area * OrderedList::format(Area::size_type w, int /*halign*/ ) const { if (!items.get()) return 0; static ListFormat lf("OL", 0, 0, 0, "6", "1"); int type = lf.get_type(attributes.get(), nesting, ARABIC_NUMBERS); auto_ptr<Area> res; const list<auto_ptr<ListItem> > &il(*items); list<auto_ptr<ListItem> >::const_iterator i; int number = 1; for (i = il.begin(); i != il.end(); ++i) { auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting), &number)); if (a.get()) { if (res.get()) { res->append(lf.vspace_between); } else { res.reset(new Area); res->append(lf.vspace_before); } *res += *a; } } if (res.get()) res->append(lf.vspace_after); return res.release(); } /* * <UL>, <DIR> and <MENU> are currently formatted totally identically, because * this is what Netscape does, and the HTML 3.2 spec and "HTML -- The * Definitive Guide" give no clear indication as to how to format them. */ // Attributes: TYPE (processed) COMPACT (ignored) Area * UnorderedList::format(Area::size_type w, int /*halign*/ ) const { if (!items.get()) return 0; static ListFormat lf("UL"); int type = lf.get_type(attributes.get(), nesting, SQUARE); auto_ptr<Area> res; const list<auto_ptr<ListItem> > &il(*items); list<auto_ptr<ListItem> >::const_iterator i; for (i = il.begin(); i != il.end(); ++i) { auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting))); if (a.get()) { if (res.get()) { res->append(lf.vspace_between); } else { res.reset(new Area); res->append(lf.vspace_before); } *res += *a; } } if (res.get()) res->append(lf.vspace_after); return res.release(); } // Attributes: TYPE (extension, processed) COMPACT (ignored) Area * Dir::format(Area::size_type w, int /*halign*/ ) const { if (!items.get()) return 0; static ListFormat lf("DIR"); int type = lf.get_type(attributes.get(), nesting, SQUARE); auto_ptr<Area> res; const list<auto_ptr<ListItem> > &il(*items); list<auto_ptr<ListItem> >::const_iterator i; for (i = il.begin(); i != il.end(); ++i) { auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting))); if (a.get()) { if (res.get()) { res->append(lf.vspace_between); } else { res.reset(new Area); res->append(lf.vspace_before); } *res += *a; } } if (res.get()) res->append(lf.vspace_after); return res.release(); } // Attributes: TYPE (extension, processed) COMPACT (ignored) Area * Menu::format(Area::size_type w, int /*halign*/ ) const { if (!items.get()) return 0; static ListFormat lf("MENU", 0, 0, 0, "2", "NO_BULLET"); int type = lf.get_type(attributes.get(), nesting, NO_BULLET); auto_ptr<Area> res; const list<auto_ptr<ListItem> > &il(*items); list<auto_ptr<ListItem> >::const_iterator i; for (i = il.begin(); i != il.end(); ++i) { auto_ptr<Area> a((*i)->format(w, type, lf.get_indent(nesting))); if (a.get()) { if (res.get()) { res->append(lf.vspace_between); } else { res.reset(new Area); res->append(lf.vspace_before); } *res += *a; } } if (res.get()) res->append(lf.vspace_after); return res.release(); } // Attributes: TYPE VALUE (ignored) Area * ListNormalItem::format( Area::size_type w, int type, Area::size_type indent, int *number_in_out /*= 0*/ ) const { int number = 0; if (number_in_out) { number = *number_in_out = get_attribute( attributes.get(), "VALUE", *number_in_out ); } static const char *disc_bullet = Formatting::getString("LI.disc_bullet", "*"); static const char *square_bullet = Formatting::getString("LI.square_bullet", "#"); static const char *circle_bullet = Formatting::getString("LI.circle_bullet", "o"); static const char *custom1_bullet = Formatting::getString("LI.custom1_bullet", "+"); static const char *custom2_bullet = Formatting::getString("LI.custom2_bullet", "-"); static const char *custom3_bullet = Formatting::getString("LI.custom3_bullet", "~"); string bullet; switch (type) { case NO_BULLET: break; case DISC: bullet = disc_bullet; break; case SQUARE: bullet = square_bullet; break; case CIRCLE: bullet = circle_bullet; break; case CUSTOM1: bullet = custom1_bullet; break; case CUSTOM2: bullet = custom2_bullet; break; case CUSTOM3: bullet = custom3_bullet; break; case ARABIC_NUMBERS: { std::ostringstream oss; oss << number << '.'; // << std::ends; bullet = oss.str(); // oss.rdbuf()->freeze(0); } break; case LOWER_ALPHA: bullet = number <= 26 ? (char) (number - 1 + 'a') : 'z'; bullet += '.'; break; case UPPER_ALPHA: bullet = number <= 26 ? (char) (number - 1 + 'A') : 'Z'; bullet += '.'; break; case LOWER_ROMAN: { static const char *lower_roman[] = { "0", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", "xiii", "xiv", "xv", "xvi", "xvii", "xviii", "xix", "xx","xxi","xxii","xxiii","xxiv","xxv","xxvi","xxvii","xxviii","xxix" }; const char *p = ( number >= 0 && number < (int) nelems(lower_roman) ? lower_roman[number] : "???" ); bullet = p; bullet += '.'; } break; case UPPER_ROMAN: { static const char *upper_roman[] = { "0", "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV", "XVI", "XVII", "XVIII", "XIX", "XX","XXI","XXII","XXIII","XXIV","XXV","XXVI","XXVII","XXVIII","XXIX" }; const char *p = ( number >= 0 && number < (int) nelems(upper_roman) ? upper_roman[number] : "???" ); bullet = p; bullet += '.'; } break; } if (bullet.length() >= indent) indent = bullet.length() + 1; auto_ptr<Area> res(::format(flow.get(), w - indent, Area::LEFT)); // KLUDGE: Some people write "<UL> <B><LI>Bla</B>Bla </UL>", which actually // defines a bold and empty list item before "Bla Bla". This is very // difficult to handle... so... let's just ignore empty list items. if (!res.get()) return 0; *res >>= indent; res->insert(bullet, indent - bullet.length() - 1, 0); if (number_in_out) (*number_in_out)++; return res.release(); } Area * ListBlockItem::format( Area::size_type w, int /*type*/, Area::size_type indent, int * /*number_in_out*/ /*= 0*/ ) const { if (!block.get()) return 0; auto_ptr<Area> res(block->format(w - indent, Area::LEFT)); if (!res.get()) return 0; /* * Hm... shouldn't there be a bullet before the item? */ *res >>= indent; return res.release(); } /* ------------------------------------------------------------------------- */ // Attributes: COMPACT (ignored) Area * DefinitionList::format(Area::size_type w, int halign) const { static struct DefinitionListFormat { const Area::size_type vspace_before; const Area::size_type vspace_between; const Area::size_type vspace_after; DefinitionListFormat() : vspace_before (Formatting::getInt("DL.vspace.before", 0)), vspace_between(Formatting::getInt("DL.vspace.between", 0)), vspace_after (Formatting::getInt("DL.vspace.after", 0)) {} } dlf; auto_ptr<Area> res; if (preamble.get()) { res.reset(::format(preamble.get(), w, halign)); if (res.get()) res->prepend(dlf.vspace_before); } if (items.get()) { const list<auto_ptr<DefinitionListItem> > &il(*items); list<auto_ptr<DefinitionListItem> >::const_iterator i; for (i = il.begin(); i != il.end(); ++i) { auto_ptr<Area> a((*i)->format(w, halign)); if (!a.get()) continue; if (res.get()) { res->append(dlf.vspace_between); *res += *a; } else { res = a; res->prepend(dlf.vspace_before); } } } if (res.get()) res->append(dlf.vspace_after); return res.release(); } Area * TermName::format(Area::size_type w, int halign) const { static BlockFormat bf("DT", 0, 0, 2); auto_ptr<Area> res(::format(flow.get(), bf.effective_width(w), halign)); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } Area * TermDefinition::format(Area::size_type w, int halign) const { static BlockFormat bf("DD", 0, 0, 6); auto_ptr<Area> res(::format(flow.get(), bf.effective_width(w), halign)); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } /* ------------------------------------------------------------------------- */ // Attributes: ALIGN NOSHADE SIZE WIDTH (ignored) Area * HorizontalRule::format(Area::size_type w, int /*halign*/ ) const { static const char *marker = Formatting::getString("HR.marker", "="); static BlockFormat bf("HR"); Area *res = new Area(bf.effective_width(w), 1, *marker ? *marker : ' '); *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res; } /* ------------------------------------------------------------------------- */ // Attributes: ALIGN (processed) Area * Heading::format(Area::size_type w, int halign) const { halign = get_attribute( attributes.get(), "ALIGN", halign, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); static char cell_attributes[7]; if (!cell_attributes[0]) { cell_attributes[0] = 1; cell_attributes[1] = Formatting::getAttributes("H1.attributes", Cell::BOLD); cell_attributes[2] = Formatting::getAttributes("H2.attributes", Cell::BOLD); cell_attributes[3] = Formatting::getAttributes("H3.attributes", Cell::BOLD); cell_attributes[4] = Formatting::getAttributes("H4.attributes", Cell::BOLD); cell_attributes[5] = Formatting::getAttributes("H5.attributes", Cell::BOLD); cell_attributes[6] = Formatting::getAttributes("H6.attributes", Cell::BOLD); } auto_ptr<Area> res; auto_ptr<Line> line(::line_format(content.get())); if (line.get()) { static const char *prefixes[7]; if (!prefixes[1]) { prefixes[1] = Formatting::getString("H1.prefix", "****** "); prefixes[2] = Formatting::getString("H2.prefix", "***** " ); prefixes[3] = Formatting::getString("H3.prefix", "**** " ); prefixes[4] = Formatting::getString("H4.prefix", "*** " ); prefixes[5] = Formatting::getString("H5.prefix", "** " ); prefixes[6] = Formatting::getString("H6.prefix", "* " ); } auto_ptr<Line> l(new Line(prefixes[level])); l->insert(*line, l->length()); static const char *suffixes[7]; if (!suffixes[1]) { suffixes[1] = Formatting::getString("H1.suffix", " ******"); suffixes[2] = Formatting::getString("H2.suffix", " *****" ); suffixes[3] = Formatting::getString("H3.suffix", " ****" ); suffixes[4] = Formatting::getString("H4.suffix", " ***" ); suffixes[5] = Formatting::getString("H5.suffix", " **" ); suffixes[6] = Formatting::getString("H6.suffix", " *" ); } l->append(suffixes[level]); l->add_attribute(cell_attributes[level]); res.reset(make_up(*l, w, halign)); if (!res.get()) return 0; } else { /* * Hm. Heading is not line-formattable... */ res.reset(::format(content.get(), w, halign)); if (!res.get()) return 0; res->add_attribute(cell_attributes[level]); } static int vspace_before[7]; if (vspace_before[0] == 0) { vspace_before[0] = 1; vspace_before[1] = Formatting::getInt("H1.vspace.before", 0); vspace_before[2] = Formatting::getInt("H2.vspace.before", 0); vspace_before[3] = Formatting::getInt("H3.vspace.before", 0); vspace_before[4] = Formatting::getInt("H4.vspace.before", 0); vspace_before[5] = Formatting::getInt("H5.vspace.before", 0); vspace_before[6] = Formatting::getInt("H6.vspace.before", 0); } res->prepend(vspace_before[level]); static int vspace_after[7]; if (vspace_after[0] == 0) { vspace_after[0] = 1; vspace_after[1] = Formatting::getInt("H1.vspace.after", 0); vspace_after[2] = Formatting::getInt("H2.vspace.after", 0); vspace_after[3] = Formatting::getInt("H3.vspace.after", 0); vspace_after[4] = Formatting::getInt("H4.vspace.after", 0); vspace_after[5] = Formatting::getInt("H5.vspace.after", 0); vspace_after[6] = Formatting::getInt("H6.vspace.after", 0); } res->append(vspace_after[level]); return res.release(); } // Attributes: WIDTH (processed) Area * Preformatted::format(Area::size_type w, int halign) const { w = get_attribute(attributes.get(), "WIDTH", w); static BlockFormat bf("PRE"); /* * Attempt to line-format the <PRE>. */ auto_ptr<Area> res; auto_ptr<Line> line(::line_format(texts.get())); if (line.get()) { res.reset(make_up(*line, bf.effective_width(w), halign)); } /* * Failed; block-format it. */ if (!res.get()) { res.reset(::format(texts.get(), bf.effective_width(w), halign)); if (!res.get()) return 0; } *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } // Attributes: ALIGN (processed) Area * Paragraph::format(Area::size_type w, int halign) const { if (!texts.get()) return 0; halign = get_attribute( attributes.get(), "ALIGN", halign, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); static BlockFormat bf("P"); Area *res = ::format(texts.get(), bf.effective_width(w), halign); if (!res) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res; } // Attributes: SRC ALT (processed) ALIGN HEIGHT WIDTH BORDER HSPACE VSPACE // USEMAP ISMAP (ignored) Line * Image::line_format() const { // new image handling - Johannes Geiger static const char *repl_all = Formatting::getString("IMG.replace.all"); static const char *repl_noalt = Formatting::getString("IMG.replace.noalt"); static const char *alt_prefix = Formatting::getString("IMG.alt.prefix", "["); static const char *alt_suffix = Formatting::getString("IMG.alt.suffix", "]"); if (repl_all) { return new Line(repl_all); } { bool ex; string alt(get_attribute(attributes.get(), "ALT", &ex)); if (ex) { if (!alt.empty()) { replace_sgml_entities(&alt); return new Line(alt_prefix + alt + alt_suffix); } else { return NULL; } } } if (repl_noalt) { return new Line(repl_noalt); } { string src(get_attribute(attributes.get(), "SRC", "")); if (!src.empty()) return new Line('[' + src + ']'); } return new Line("[Image]"); } // Attributes: CODEBASE CODE (ignored) ALT (processed) NAME WIDTH HEIGHT // (ignored) ALIGN (processed) HSPACE VSPACE (ignored) Area * Applet::format(Area::size_type w, int /*halign*/ ) const { if (content.get()) { int halign = get_attribute( attributes.get(), "ALIGN", Area::CENTER, "LEFT", Area::LEFT, "MIDDLE", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); Area *a = ::format(content.get(), w, halign); if (a) return a; } { string alt(get_attribute(attributes.get(), "ALT", "")); if (!alt.empty()) return new Area("[Java Applet: " + alt + ']'); } { string code(get_attribute(attributes.get(), "CODE", "")); if (!code.empty()) return new Area("[Java Applet " + code + ']'); } return new Area("[Java Applet]"); } Line * Applet::line_format() const { if (content.get()) { Line *l = ::line_format(content.get()); if (l) return l; } { string alt(get_attribute(attributes.get(), "ALT", "")); if (!alt.empty()) return new Line("[Java Applet: " + alt + ']'); } { string code(get_attribute(attributes.get(), "CODE", "")); if (!code.empty()) return new Line("[Java Applet " + code + ']'); } return new Line("[Java Applet]"); } // Attributes: NAME HREF REL REV TITLE (ignored) // Attributes: ALIGN (processed) Area * Division::format(Area::size_type w, int halign) const { return ::format(body_content.get(), w, get_attribute( attributes.get(), "ALIGN", halign, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 )); } Area * Center::format(Area::size_type w, int /*halign*/ ) const { return ::format(body_content.get(), w, Area::CENTER); } Area * BlockQuote::format(Area::size_type w, int halign) const { static BlockFormat bf("BLOCKQUOTE", 0, 0, 5, 5); auto_ptr<Area> res(::format( content.get(), bf.effective_width(w), halign )); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } Area * Address::format(Area::size_type w, int halign) const { static BlockFormat bf("ADDRESS", 0, 0, 5, 5); auto_ptr<Area> res(::format( content.get(), bf.effective_width(w), halign )); if (!res.get()) return 0; *res >>= bf.indent_left; res->prepend(bf.vspace_before); res->append(bf.vspace_after); return res.release(); } /* ------------------------------------------------------------------------- */ // Attributes: ACTION METHOD ENCTYPE (ignored) Area * Form::format(Area::size_type w, int halign) const { return content.get() ? ::format(content.get(), w, halign) : 0; } // Attributes: TYPE (processed) NAME (ignored) VALUE CHECKED SIZE (processed) // MAXLENGTH (ignored) SRC (processed) ALIGN (ignored) Line * Input::line_format() const { string type = get_attribute(attributes.get(), "TYPE", "TEXT"); string name = get_attribute(attributes.get(), "NAME", ""); string value = get_attribute(attributes.get(), "VALUE", ""); bool checked = get_attribute(attributes.get(), "CHECKED", "0") != "0"; int size = get_attribute(attributes.get(), "SIZE", -1); string src = get_attribute(attributes.get(), "SRC", ""); string res; if (cmp_nocase(type, "TEXT") == 0) { if (size == -1) size = 20; if (value.empty()) value = name; // if ((int) value.length() > size) { value.erase(size); } else if ((int) value.length() < size) value.append(size - value.length(), ' '); res = '[' + value + ']'; } else if (cmp_nocase(type, "PASSWORD") == 0) { if (size == -1) size = 20; res = '[' + string(size, '*') + ']'; } else if (cmp_nocase(type, "CHECKBOX") == 0) { res = checked ? '*' : LATIN1_ordm; // "ordm" looks like a superscript zero. } else if (cmp_nocase(type, "RADIO") == 0) { res = checked ? '#' : 'o'; } else if (cmp_nocase(type, "SUBMIT") == 0) { res = value.empty() ? string("[Submit]") : '[' + value + ']'; } else if (cmp_nocase(type, "IMAGE") == 0) { res = "[Submit " + src + ']'; } else if (cmp_nocase(type, "RESET") == 0) { res = value.empty() ? string("[Reset]") : '[' + value + ']'; } else if (cmp_nocase(type, "FILE") == 0) { res = "[File]"; } else if (cmp_nocase(type, "HIDDEN") == 0) { return 0; } else { res = "[Unknown INPUT type]"; } return new Line(res); } // Attributes: NAME SIZE (ignored) MULTIPLE (processed) Line * Select::line_format() const { if (!content.get() || content->empty()) return new Line("[Empty selection]"); bool multiple = get_attribute(attributes.get(), "MULTIPLE", "0") != "0"; auto_ptr<Line> res(new Line(multiple ? "[One or more of " : "[One of: ")); const list<auto_ptr<Option> > &c(*content); list<auto_ptr<Option> >::const_iterator i; for (i = c.begin(); i != c.end(); ++i) { if (!(*i).get()) continue; if (i != c.begin()) *res += '/'; auto_ptr<Line> l((*i)->pcdata->line_format()); *res += *l; } *res += ']'; return res.release(); } // Attributes: NAME ROWS COLS Area * TextArea::format(Area::size_type w, int halign) const { auto_ptr<Line> line(pcdata->line_format()); return line.get() ? make_up(*line, w, halign) : 0; } /* ------------------------------------------------------------------------- */ Line * PCData::line_format() const { return new Line(text); } // Item: Default cell attribute: // <TT> <I> => NONE // <B> => BOLD // <U> => UNDERLINE // <STRIKE> => STRIKETHROUGH // <BIG> <SMALL> <SUB> <SUP> => NONE static char get_font_cell_attributes(int attribute) { if (attribute == HTMLParser::TT) { static char a = Formatting::getAttributes("TT.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::I) { static char a = Formatting::getAttributes("I.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::B) { static char a = Formatting::getAttributes("B.attributes", Cell::BOLD); return a; } else if (attribute == HTMLParser::U) { static char a = Formatting::getAttributes("U.attributes", Cell::UNDERLINE); return a; } else if (attribute == HTMLParser::STRIKE) { static char a = Formatting::getAttributes("STRIKE.attributes", Cell::STRIKETHROUGH); return a; } else if (attribute == HTMLParser::BIG) { static char a = Formatting::getAttributes("BIG.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::SMALL) { static char a = Formatting::getAttributes("SMALL.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::SUB) { static char a = Formatting::getAttributes("SUB.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::SUP) { static char a = Formatting::getAttributes("SUP.attributes", Cell::NONE); return a; } return Cell::NONE; } Line * Font::line_format() const { auto_ptr<Line> res(::line_format(texts.get())); if (!res.get()) return 0; char a = get_font_cell_attributes(attribute); if (a != Cell::NONE) res->add_attribute(a); return res.release(); } // Item: Default cell attribute: Area * Font::format(Area::size_type w, int halign) const { auto_ptr<Area> res(::format(texts.get(), w, halign)); if (!res.get()) return 0; char a = get_font_cell_attributes(attribute); if (a != Cell::NONE) res->add_attribute(a); return res.release(); } // Item: Default cell attribute: // <EM> <STRONG> => BOLD // <DFN> <CODE> <SAMP> <KBD> <VAR> <CITE> => NONE static char get_phrase_cell_attributes(int attribute) { if (attribute == HTMLParser::EM) { static char a = Formatting::getAttributes("EM.attributes", Cell::BOLD); return a; } else if (attribute == HTMLParser::STRONG) { static char a = Formatting::getAttributes("STRONG.attributes", Cell::BOLD); return a; } else if (attribute == HTMLParser::DFN) { static char a = Formatting::getAttributes("DFN.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::CODE) { static char a = Formatting::getAttributes("CODE.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::SAMP) { static char a = Formatting::getAttributes("SAMP.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::KBD) { static char a = Formatting::getAttributes("KBD.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::VAR) { static char a = Formatting::getAttributes("VAR.attributes", Cell::NONE); return a; } else if (attribute == HTMLParser::CITE) { static char a = Formatting::getAttributes("CITE.attributes", Cell::NONE); return a; } return Cell::NONE; } Line * Phrase::line_format() const { auto_ptr<Line> res(::line_format(texts.get())); if (!res.get()) return 0; char a = get_phrase_cell_attributes(attribute); if (a != Cell::NONE) res->add_attribute(a); return res.release(); } // EM STRONG => BOLD // DFN CODE SAMP KBD VAR CITE => (nothing) Area * Phrase::format(Area::size_type w, int halign) const { auto_ptr<Area> res(::format(texts.get(), w, halign)); if (!res.get()) return 0; char a = get_phrase_cell_attributes(attribute); if (a != Cell::NONE) res->add_attribute(a); return res.release(); } // Attributes: SIZE COLOR (ignored) Area * Font2::format(Area::size_type w, int halign) const { return ::format(elements.get(), w, halign); } // Attributes: SIZE COLOR (ignored) Line * Font2::line_format() const { return ::line_format(elements.get()); } static char get_link_cell_attributes(const string &href) { if (href.at(0) == '#') { static const char internal_link_attributes = Formatting::getAttributes("A.attributes.internal_link", Cell::UNDERLINE); return internal_link_attributes; } else { static const char external_link_attributes = Formatting::getAttributes("A.attributes.external_link", Cell::UNDERLINE); return external_link_attributes; } } // Attributes: NAME HREF REL REV TITLE (ignored) Line * Anchor::line_format() const { auto_ptr<Line> res(::line_format(texts.get())); if (!res.get()) return 0; string href(get_attribute(attributes.get(), "HREF", "")); if (!href.empty()) res->add_attribute(get_link_cell_attributes(href)); return res.release(); } Area * Anchor::format(Area::size_type w, int halign) const { auto_ptr<Area> res(::format(texts.get(), w, halign)); if (!res.get()) return 0; string href(get_attribute(attributes.get(), "HREF", "")); if (!href.empty()) res->add_attribute(get_link_cell_attributes(href)); return res.release(); } // Attributes: CLEAR (ignored) Line * LineBreak::line_format() const { return new Line("\n"); } Area * TableHeadingCell::format(Area::size_type w, int halign) const { Area *a = TableCell::format(w, halign); if (a) a->add_attribute(Cell::BOLD); return a; } Area * Caption::format(Area::size_type w, int halign) const { auto_ptr<Line> l(::line_format(texts.get())); return l.get() ? make_up(*l, w, halign) : 0; } // Attributes: (none) Line * NoBreak::line_format() const { Line *l(::line_format(content.get())); if (!l) return 0; for (Line::size_type i = 0; i < l->length(); ++i) { Cell &c((*l)[i]); if (c.character == ' ') c.character = LATIN1_nbsp; } return l; } /* ------------------------------------------------------------------------- */ /* * Make up "line" into an Area. Attempt to return an Area no wider than "w". */ static Area * make_up(const Line &line, Area::size_type w, int halign) { //{ // cout << "make_up(\""; // for (Line::size_type i = 0; i < line.length(); i++) { // if (isprint(line[i].character)) cout << line[i].character; // else cout << "[" << (int) line[i].character << "]"; // } // cout << "\")" << endl; //} if (line.empty()) return 0; auto_ptr<Area> res(new Area); Line::size_type from = 0; while (from < line.length()) { /* * A sole newline character has a special meaning: Append a blank line. */ if (line[from].character == '\n') { res->resize(res->width(), res->height() + 1); from++; continue; } Line::size_type to = from + 1; Line::size_type lbp = (Line::size_type) -1; // "Last break position". /* * Determine the line break position. */ while (to < line.length()) { if (line[to].character == '\n') { break; } char c1 = line[to].character, c2 = line[to - 1].character; if (c1 == ' ' || c1 == '('/*)*/ || c1 == '['/*]*/ || c1 == '{'/*}*/ || ( ( c2 == '-' || c2 == '/' || c2 == ':' ) && c1 != ',' && c1 != '.' && c1 != ';' && c1 != ':' )) { lbp = to++; while (to < line.length() && line[to].character == ' ') to++; } else { to++; } if (to - from > w && lbp != (Area::size_type) -1) { to = lbp; break; } } /* * Copy the "from...to" range from the "line" to the bottom of the "res" * Area. */ Area::size_type x = 0; Area::size_type len = to - from; if (halign == Area::LEFT || len >= w) { ; } else if (halign == Area::CENTER) { x += (w - len) / 2; } else if (halign == Area::RIGHT) { x += w - len; } res->insert(line.cells() + from, len, x, res->height()); /* * Determine the beginnning of the next line. */ if (to == line.length()) break; from = to; if (line[from].character == '\n') { ++from; } else if (line[from].character == ' ') { do { ++from; } while (from < line.length() && line[from].character == ' '); } } return res.release(); } /* ------------------------------------------------------------------------- */ /* * Attempt to line-format all "elements". If one of the elements can only be * area-formatted, return null. In that case, "::format()" (below) will * probably work. */ static Line * line_format(const list<auto_ptr<Element> > *elements) { auto_ptr<Line> res; if (elements) { list<auto_ptr<Element> >::const_iterator i; for (i = elements->begin(); i != elements->end(); ++i) { auto_ptr<Line> l((*i)->line_format()); if (!l.get()) return 0; if (res.get()) { *res += *l; } else { res = l; } } } return res.release(); } /* ------------------------------------------------------------------------- */ /* * Basically, a list of "Text"s is a stream of words that has to be formatted * into an area. But... as an extension to HTML 3.2 we want to allow "Block"s * be embedded in "Text", e.g. * * <FONT COLOR=red><P>Bla</P><P>Bloh</P></FONT> * * Attempt to line-format the "Text". This will fail if there is a "Block" * inside the "Text". * * The "Text" could not be line-formatted, so... append a line-break and * the area-formatted "Text". */ static Area * format( const list<auto_ptr<Element> > *elements, Area::size_type w, int halign ) { if (!elements) return 0; auto_ptr<Area> res; auto_ptr<Line> line; list<auto_ptr<Element> >::const_iterator i; for (i = elements->begin(); i != elements->end(); ++i) { if (!(*i).get()) continue; auto_ptr<Line> l((*i)->line_format()); if (l.get()) { if (line.get()) { *line += *l; } else { line = l; } continue; } auto_ptr<Area> a((*i)->format(w, halign)); if (a.get()) { if (line.get()) { auto_ptr<Area> a2(make_up(*line, w, halign)); if (a2.get()) { if (res.get()) { *res += *a2; } else { res = a2; } } line.reset(); } if (res.get()) { *res += *a; } else { res = a; } } } if (line.get()) { auto_ptr<Area> a2(make_up(*line, w, halign)); if (a2.get()) { if (res.get()) { *res += *a2; } else { res = a2; }\ } } return res.release(); } /* * A copy of the above function, but the formatted text is printed to "os" * rather than into an Area. */ static void format( const list<auto_ptr<Element> > *elements, Area::size_type indent_left, Area::size_type w, int halign, ostream &os ) { if (!elements) return; auto_ptr<Line> line; list<auto_ptr<Element> >::const_iterator i; for (i = elements->begin(); i != elements->end(); ++i) { if (!(*i).get()) continue; auto_ptr<Line> l((*i)->line_format()); if (l.get()) { if (line.get()) { *line += *l; } else { line = l; } continue; } auto_ptr<Area> a((*i)->format(w, halign)); if (a.get()) { if (line.get()) { auto_ptr<Area> a2(make_up(*line, w, halign)); if (a2.get()) { *a2 >>= indent_left; os << *a2 << flush; } line.reset(); } *a >>= indent_left; os << *a << flush; } } if (line.get()) { auto_ptr<Area> a2(make_up(*line, w, halign)); if (a2.get()) { *a2 >>= indent_left; os << *a2 << flush; } } } /* ------------------------------------------------------------------------- */ static Properties formatting_properties; /* ----------------------- */ /*static*/ void Formatting::setProperty(const char *key, const char *value) { formatting_properties.setProperty(key, value); } /* ----------------------- */ /*static*/ void Formatting::loadProperties(istream &is) { formatting_properties.load(is); } /* ----------------------- */ /*static*/ const char * Formatting::getString(const char *key, const char *dflt) { return formatting_properties.getProperty(key, dflt); } // neue Methode fuer leere Attribute - Johannes Geiger const char * Formatting::getString(const char *key) { return formatting_properties.getProperty(key); } /* ----------------------- */ /* * Property not set => 0 * Property contains only white-space => 0 * Property conains one non-white-space character => { "x" } */ /*static*/ vector<string> * Formatting::getStringVector(const char *key, const char *dflt) { const char *p = formatting_properties.getProperty(key, dflt); if (!p) return 0; vector<string> *res = 0; for (;;) { while (isspace(*p)) ++p; if (!*p) break; const char *q = p + 1; while (*q && !isspace(*q)) ++q; if (!res) res = new vector<string>; res->push_back(string(p, q - p)); p = q; } return res; } /* ----------------------- */ /*static*/ int Formatting::getInt(const char *key, int dflt) { const char *p = formatting_properties.getProperty(key, 0); return p ? atoi(p) : dflt; } /* ----------------------- */ /*static*/ vector<int> * Formatting::getIntVector(const char *key, const char *dflt) { const char *p = formatting_properties.getProperty(key, dflt); if (!p) return 0; vector<int> *res = 0; for (;;) { while (isspace(*p)) ++p; if (!*p) break; if (!res) res = new vector<int>; res->push_back(atoi(p)); ++p; while (*p && !isspace(*p)) ++p; } return res; } /* ----------------------- */ /*static*/ char Formatting::getAttributes(const char *key, char dflt) { auto_ptr<vector<string> > v(getStringVector(key, 0)); if (!v.get() || v->empty()) return dflt; char res = Cell::NONE; for (vector<string>::const_iterator i = v->begin(); i != v->end(); ++i) { if (!cmp_nocase(*i, "NONE")) res = Cell::NONE; else if (!cmp_nocase(*i, "BOLD")) res |= Cell::BOLD; else if (!cmp_nocase(*i, "UNDERLINE")) res |= Cell::UNDERLINE; else if (!cmp_nocase(*i, "STRIKETHROUGH")) res |= Cell::STRIKETHROUGH; else ; } return res; } /* ------------------------------------------------------------------------- */ BlockFormat::BlockFormat( const char *item_name, Area::size_type default_vspace_before /* = 0 */ , Area::size_type default_vspace_after /* = 0 */ , Area::size_type default_indent_left /* = 0 */ , Area::size_type default_indent_right /* = 0 */ ) { char lb[80]; sprintf(lb, "%s.vspace.before", item_name); vspace_before = Formatting::getInt(lb, default_vspace_before); sprintf(lb, "%s.vspace.after", item_name); vspace_after = Formatting::getInt(lb, default_vspace_after); sprintf(lb, "%s.indent.left", item_name); indent_left = Formatting::getInt(lb, default_indent_left); sprintf(lb, "%s.indent.right", item_name); indent_right = Formatting::getInt(lb, default_indent_right); } Area::size_type BlockFormat::effective_width(Area::size_type w) const { /* * No problem if "w" is wide enough... */ if (indent_left + 10 + indent_right <= w) { return w - indent_left - indent_right; } /* * Does reducing the right indent help? */ if (indent_left + 10 <= w) return 10; /* * Do it with right indent == 0. */ if (indent_left + 1 <= w) return w - indent_left; /* * Even that doesn't help, return "1". */ return 1; } /* ------------------------------------------------------------------------- */ ListFormat::ListFormat( const char *item_name, Area::size_type default_vspace_before /* = 0 */ , Area::size_type default_vspace_between /* = 0 */ , Area::size_type default_vspace_after /* = 0 */ , const char *default_indents /* = "6" */ , const char *default_default_types /* = "DISC CIRCLE SQUARE" */ ) { char lb[80]; sprintf(lb, "%s.vspace.before", item_name); vspace_before = Formatting::getInt(lb, default_vspace_before ); sprintf(lb, "%s.vspace.between", item_name); vspace_between = Formatting::getInt(lb, default_vspace_between); sprintf(lb, "%s.vspace.after", item_name); vspace_after = Formatting::getInt(lb, default_vspace_after ); sprintf(lb, "%s.indents", item_name); indents.reset(Formatting::getIntVector(lb, default_indents)); sprintf(lb, "%s.default_types", item_name); default_types.reset(Formatting::getStringVector(lb, default_default_types)); } Area::size_type ListFormat::get_indent(int nesting) const { return ( (!indents.get() || indents.get()->empty()) ? 6 : nesting < indents->size() ? (*indents)[nesting] : indents->back() ); } int ListFormat::get_type( const list<TagAttribute> *attributes, int nesting, int default_default_type ) const { const char *default_type = ( !default_types.get() || default_types->empty() ? 0 : nesting < default_types->size() ? (*default_types)[nesting].c_str() : default_types->back().c_str() ); return get_attribute( attributes, "TYPE", default_type, // dflt1 default_default_type, // dflt2, if dflt1 fails "NO_BULLET", NO_BULLET, "DISC", DISC, "SQUARE", SQUARE, "CIRCLE", CIRCLE, "CUSTOM1", CUSTOM1, "CUSTOM2", CUSTOM2, "CUSTOM3", CUSTOM3, "1", ARABIC_NUMBERS, "a", LOWER_ALPHA, "A", UPPER_ALPHA, "i", LOWER_ROMAN, "I", UPPER_ROMAN, 0 ); } /* ------------------------------------------------------------------------- */ ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/format.h���������������������������������������������������������������������������0100644�0000000�0000000�00000004610�07760112171�013754� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Fre Jun 8 17:49:39 CEST 2001: new method * Thu Oct 4 21:36:51 CEST 2001: ported to g++ 3.0 * Wed Jul 2 22:05:32 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #ifndef __format_h_INCLUDED__ /* { */ #define __format_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include <string> #include <vector> #include <istream> using std::string; using std::vector; using std::istream; /* ------------------------------------------------------------------------- */ class Formatting { public: static void setProperty(const char *key, const char *value); static void loadProperties(istream &is); static const char *getString(const char *key, const char *dflt); // neue Methode fuer leere Attribute - Johannes Geiger static const char *getString(const char *key); static vector<string> *getStringVector(const char *key, const char *dflt); static int getInt(const char *key, int dflt); static vector<int> * getIntVector(const char *key, const char *dflt); static char getAttributes(const char *key, char dflt); private: Formatting(); // Do not instantiate me! }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ ������������������������������������������������������������������������������������������������������������������������html2text-1.3.2a/html.C�����������������������������������������������������������������������������0100644�0000000�0000000�00000040364�07760112171�013371� 0����������������������������������������������������������������������������������������������������ustar �root����������������������������root������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig <arno@unkrig.de> */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de> * Dates and reasons of modifications: * Fre Jun 8 17:43:02 CEST 2001: new method * Wed Jul 2 22:07:12 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #include <stdlib.h> #include <stdarg.h> #include <iostream> #include "html.h" #include "HTMLParser.h" #include "cmp_nocase.h" /* ------------------------------------------------------------------------- */ /* * Define some helpers. */ #define define_foreach(T, args, action) \ void foreach args { \ for (T::const_iterator i = l.begin(); i != l.end(); ++i) { \ action; \ } \ } #define pack(T) \ define_foreach(list<auto_ptr<T> >, ( \ const list<auto_ptr<T> > &l, \ ostream &os, \ ostream_manipulator separator \ ), (*i)->unparse(os, separator)) static pack(Element) static pack(TableCell) static pack(TableRow) static pack(ListItem) static pack(Option) static pack(DefinitionListItem) static pack(Script) static pack(Style) #undef pack /* * Special helper for "const auto_ptr<list<TagAttribute> > &". */ static ostream &operator<<(ostream &os, const auto_ptr<list<TagAttribute> > &a) { if (a.get()) { const list<TagAttribute> &al(*a); list<TagAttribute>::const_iterator i; for (i = al.begin(); i != al.end(); ++i) { os << " " << (*i).first << "=\"" << (*i).second << "\""; } } return os; } /* ------------------------------------------------------------------------- */ /* * Brothers of "endl". */ static ostream &none(ostream &os) { return os; } /* ------------------------------------------------------------------------- */ /* * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual * methods of a class are inline or pure virtual, so we define the virtual * "Element::~Element()", which is the only virtual method, non-inline, * although it is empty. */ Element::~Element() { } /* ------------------------------------------------------------------------- */ void Document::unparse(ostream &os, ostream_manipulator separator) const { os << "<HTML" << attributes << ">" << separator; head.unparse(os, separator); body.unparse(os, separator); os << "</HTML>" << separator; } void Head::unparse(ostream &os, ostream_manipulator separator) const { os << "<HEAD>" << separator; if (title.get()) { os << "<TITLE>" << separator; title->unparse(os, separator); os << "" << separator; } if (isindex_attributes.get()) { os << "" << std::endl; } if (base_attributes.get()) os << "" << std::endl; foreach(scripts, os, separator); foreach(styles, os, separator); if (meta_attributes.get()) os << "" << std::endl; if (link_attributes.get()) os << "" << std::endl; os << "" << separator; } void Script::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator << text << "" << separator; } void Style::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator << text << "" << separator; } void Body::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void PCData::unparse(ostream &os, ostream_manipulator separator) const { for (string::size_type j = 0; j < text.length(); ++j) { char c = text[j]; switch (((int) c) & 255) { case LATIN1_nbsp: os << " "; break; case '&': os << "&"; break; case '<': os << "<"; break; case '>': os << ">"; break; case '"': os << """; break; default: if (c & 0x80) { os << "&#" << (((int) c) & 255) << ";"; } else { os << c; } break; } } os << separator; } void Heading::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void Paragraph::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (texts.get()) foreach(*texts, os, separator); os << "

" << separator; } void Image::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void Applet::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void Param::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void Division::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (body_content.get()) foreach(*body_content, os, separator); os << "" << separator; } void Center::unparse(ostream &os, ostream_manipulator separator) const { os << "
" << separator; if (body_content.get()) foreach(*body_content, os, separator); os << "
" << separator; } void BlockQuote::unparse(ostream &os, ostream_manipulator separator) const { os << "
" << separator; if (content.get()) foreach(*content, os, separator); os << "
" << separator; } void Address::unparse(ostream &os, ostream_manipulator separator) const { os << "
" << separator; if (content.get()) foreach(*content, os, separator); os << "
" << separator; } void Form::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void Preformatted::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (texts.get()) { foreach(*texts, os, none); os << separator; } os << "" << separator; } void HorizontalRule::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void Input::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void Option::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (pcdata.get()) pcdata->unparse(os, separator); os << "" << std::endl; } void Select::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << std::endl; } void TextArea::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (pcdata.get()) pcdata->unparse(os, separator); os << "" << std::endl; } /* ------------------------------------------------------------------------- */ void OrderedList::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (items.get()) foreach(*items, os, separator); os << "" << separator; } void UnorderedList::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (items.get()) foreach(*items, os, separator); os << "" << separator; } void Dir::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (items.get()) foreach(*items, os, separator); os << "" << separator; } void Menu::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (items.get()) foreach(*items, os, separator); os << "" << separator; } void ListNormalItem::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (flow.get()) foreach(*flow, os, separator); os << "" << separator; } void ListBlockItem::unparse(ostream &os, ostream_manipulator separator) const { if (block.get()) block->unparse(os, separator); } /* ------------------------------------------------------------------------- */ void DefinitionList::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (items.get()) foreach(*items, os, separator); os << "" << separator; } void TermName::unparse(ostream &os, ostream_manipulator separator) const { os << "
" << separator; if (flow.get()) foreach(*flow, os, separator); os << "
" << separator; } void TermDefinition::unparse(ostream &os, ostream_manipulator separator) const { os << "
" << separator; if (flow.get()) foreach(*flow, os, separator); os << "
" << separator; } /* ------------------------------------------------------------------------- */ void Table::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (caption.get()) caption->unparse(os, separator); if (rows.get()) foreach(*rows, os, separator); os << "" << separator; } void NoBreak::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void Font::unparse(ostream &os, ostream_manipulator separator) const { const char *tag = ( attribute == HTMLParser::TT ? "TT" : attribute == HTMLParser::I ? "I" : attribute == HTMLParser::B ? "B" : attribute == HTMLParser::U ? "U" : attribute == HTMLParser::STRIKE ? "STRIKE" : attribute == HTMLParser::BIG ? "BIG" : attribute == HTMLParser::SMALL ? "SMALL" : attribute == HTMLParser::SUB ? "SUB" : attribute == HTMLParser::SUP ? "SUP" : "???" ); os << "<" << tag << ">" << separator; if (texts.get()) foreach(*texts, os, separator); os << "" << separator; } void Phrase::unparse(ostream &os, ostream_manipulator separator) const { const char *tag = ( attribute == HTMLParser::EM ? "EM" : attribute == HTMLParser::STRONG ? "STRONG" : attribute == HTMLParser::DFN ? "DFN" : attribute == HTMLParser::CODE ? "CODE" : attribute == HTMLParser::SAMP ? "SAMP" : attribute == HTMLParser::KBD ? "KBD" : attribute == HTMLParser::VAR ? "VAR" : attribute == HTMLParser::CITE ? "CITE" : "???" ); os << "<" << tag << ">" << separator; if (texts.get()) foreach(*texts, os, separator); os << "" << separator; } void Anchor::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (texts.get()) foreach(*texts, os, separator); os << "" << separator; } void BaseFont::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void LineBreak::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; } void Map::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (areas.get()) { const list > > &al(*areas); list > >::const_iterator i; for (i = al.begin(); i != al.end(); ++i) { os << "" << separator; } } os << "" << separator; } void Font2::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (elements.get()) foreach(*elements, os, separator); os << "" << separator; } void TableRow::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (cells.get()) foreach(*cells, os, separator); os << "" << separator; } void TableCell::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void TableHeadingCell::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (content.get()) foreach(*content, os, separator); os << "" << separator; } void Caption::unparse(ostream &os, ostream_manipulator separator) const { os << "" << separator; if (texts.get()) foreach(*texts, os, separator); os << "" << separator; } /* ------------------------------------------------------------------------- */ string get_attribute( const list *as, const char *name, const char *dflt ) { if (as) { list::const_iterator i; for (i = as->begin(); i != as->end(); ++i) { if (cmp_nocase((*i).first, name) == 0) return string((*i).second); } } return string(dflt); } // *exists is set to false if attribute *name does not exist - Johannes Geiger string get_attribute( const list *as, const char *name, bool *exists ) { *exists = true; if (as) { list::const_iterator i; for (i = as->begin(); i != as->end(); ++i) { if (cmp_nocase((*i).first, name) == 0) return string((*i).second); } } *exists = false; return string(""); } int get_attribute( const list *as, const char *name, int dflt ) { if (as) { list::const_iterator i; for (i = as->begin(); i != as->end(); ++i) { if (cmp_nocase((*i).first, name) == 0) return atoi((*i).second.c_str()); } } return dflt; } int get_attribute( const list *as, const char *name, int dflt, const char *s1, int v1, ... ) { if (as) { list::const_iterator i; for (i = as->begin(); i != as->end(); ++i) { if (cmp_nocase((*i).first, name) == 0) { const char *s = s1; int v = v1; va_list va; va_start(va, v1); for (;;) { if (cmp_nocase(s, (*i).second) == 0) break; s = va_arg(va, const char *); if (!s) { v = dflt; break; } v = va_arg(va, int); } va_end(va); return v; } } } return dflt; } int get_attribute( const list *as, const char *name, // Attribute name const char *dflt1, // If attribute not specified int dflt2, // If string value does not match s1, ... const char *s1, int v1, ... ) { if (as) { list::const_iterator i; for (i = as->begin(); i != as->end(); ++i) { if (cmp_nocase((*i).first, name) == 0) { dflt1 = (*i).second.c_str(); break; } } } if (!dflt1) return dflt2; const char *s = s1; int v = v1; va_list va; va_start(va, v1); for (;;) { if (cmp_nocase(s, dflt1) == 0) break; s = va_arg(va, const char *); if (!s) break; v = va_arg(va, int); } va_end(va); return s ? v : dflt2; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/html.h0100644000000000000000000004674707357140252013454 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes software developed by GMRS Software GmbH." * The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Fre Jun 8 17:46:31 CEST 2001: new method * Thu Oct 4 21:38:47 CEST 2001: ported to g++ 3.0 */ /***************************************************************************/ #ifndef __html_h_INCLUDED__ /* { */ #define __html_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include #include #ifdef AUTO_PTR_BROKEN /* { */ # define auto_ptr broken_auto_ptr # include # undef auto_ptr # include "libstd/include/auto_ptr.h" #else /* } { */ # include #endif /* } */ #include #include "Area.h" /* ------------------------------------------------------------------------- */ #define LATIN1_nbsp 160 #define LATIN1_iexcl 161 #define LATIN1_cent 162 #define LATIN1_pound 163 #define LATIN1_curren 164 #define LATIN1_yen 165 #define LATIN1_brvbar 166 #define LATIN1_sect 167 #define LATIN1_uml 168 #define LATIN1_copy 169 #define LATIN1_ordf 170 #define LATIN1_laquo 171 #define LATIN1_not 172 #define LATIN1_shy 173 #define LATIN1_reg 174 #define LATIN1_macr 175 #define LATIN1_deg 176 #define LATIN1_plusmn 177 #define LATIN1_sup2 178 #define LATIN1_sup3 179 #define LATIN1_acute 180 #define LATIN1_micro 181 #define LATIN1_para 182 #define LATIN1_middot 183 #define LATIN1_cedil 184 #define LATIN1_sup1 185 #define LATIN1_ordm 186 #define LATIN1_raquo 187 #define LATIN1_frac14 188 #define LATIN1_frac12 189 #define LATIN1_frac34 190 #define LATIN1_iquest 191 #define LATIN1_Agrave 192 #define LATIN1_Aacute 193 #define LATIN1_Acirc 194 #define LATIN1_Atilde 195 #define LATIN1_Auml 196 #define LATIN1_Aring 197 #define LATIN1_AElig 198 #define LATIN1_Ccedil 199 #define LATIN1_Egrave 200 #define LATIN1_Eacute 201 #define LATIN1_Ecirc 202 #define LATIN1_Euml 203 #define LATIN1_Igrave 204 #define LATIN1_Iacute 205 #define LATIN1_Icirc 206 #define LATIN1_Iuml 207 #define LATIN1_ETH 208 #define LATIN1_Ntilde 209 #define LATIN1_Ograve 210 #define LATIN1_Oacute 211 #define LATIN1_Ocirc 212 #define LATIN1_Otilde 213 #define LATIN1_Ouml 214 #define LATIN1_times 215 #define LATIN1_Oslash 216 #define LATIN1_Ugrave 217 #define LATIN1_Uacute 218 #define LATIN1_Ucirc 219 #define LATIN1_Uuml 220 #define LATIN1_Yacute 221 #define LATIN1_THORN 222 #define LATIN1_szlig 223 #define LATIN1_agrave 224 #define LATIN1_aacute 225 #define LATIN1_acirc 226 #define LATIN1_atilde 227 #define LATIN1_auml 228 #define LATIN1_aring 229 #define LATIN1_aelig 230 #define LATIN1_ccedil 231 #define LATIN1_egrave 232 #define LATIN1_eacute 233 #define LATIN1_ecirc 234 #define LATIN1_euml 235 #define LATIN1_igrave 236 #define LATIN1_iacute 237 #define LATIN1_icirc 238 #define LATIN1_iuml 239 #define LATIN1_eth 240 #define LATIN1_ntilde 241 #define LATIN1_ograve 242 #define LATIN1_oacute 243 #define LATIN1_ocirc 244 #define LATIN1_otilde 245 #define LATIN1_ouml 246 #define LATIN1_divide 247 #define LATIN1_oslash 248 #define LATIN1_ugrave 249 #define LATIN1_uacute 250 #define LATIN1_ucirc 251 #define LATIN1_uuml 252 #define LATIN1_yacute 253 #define LATIN1_thorn 254 #define LATIN1_yuml 255 /* ------------------------------------------------------------------------- */ using std::string; using std::pair; using std::list; typedef pair TagAttribute; string get_attribute( const list *, const char *name, const char *dflt ); // neue Methode fuer leere Attribute - Johannes Geiger string get_attribute( const list *, const char *name, bool *exists ); int get_attribute( const list *, const char *name, int dflt ); int get_attribute( const list *, const char *name, int dflt, const char *s1, int v1, ... /* ... NULL */ ); int get_attribute( const list *, const char *name, const char *dflt1, int dflt2, const char *s1, int v1, ... /* ... NULL */ ); /* ------------------------------------------------------------------------- */ typedef ostream &(*ostream_manipulator)(ostream &); struct Element { virtual ~Element(); virtual void unparse(ostream &, ostream_manipulator separator) const = 0; /* * Attempt to line-format the element. If the element contains "Block"s, * then it cannot be line-formatted, and 0 will be returned. However, it * is still possible to try "format()" (see below). */ virtual Line *line_format() const { return 0; } /* * Format the element into a rectangular area. Attempt to not exceed "width". */ virtual Area *format( Area::size_type /*width*/, int /*halign*/ ) const { return 0; } virtual struct PCData *to_PCData() { return 0; } }; /* ------------------------------------------------------------------------- */ struct PCData : public Element { string text; /*virtual*/ PCData *to_PCData() { return this; } /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct Font : public Element { int attribute; // TT I B U STRIKE BIG SMALL // SUB SUP auto_ptr > > texts; Font(int a, list > *t = 0) : attribute(a), texts(t) {} /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Phrase : public Element { int attribute; // EM STRONG DFN CODE SAMP // KBD VAR CITE auto_ptr > > texts; Phrase(int a, list > *t = 0) : attribute(a), texts(t) {} /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Font2 : public Element { auto_ptr > attributes; // SIZE COLOR auto_ptr > > elements; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Anchor : public Element { auto_ptr > attributes; // NAME HREF REL REV TITLE auto_ptr > > texts; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct BaseFont : public Element { auto_ptr > attributes; // SIZE /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; }; struct LineBreak : public Element { auto_ptr > attributes; // CLEAR /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct Map : public Element { auto_ptr > attributes; // NAME auto_ptr > > > areas; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; }; struct Paragraph : public Element { auto_ptr > attributes; // ALIGN auto_ptr > > texts; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Image : public Element { auto_ptr > attributes; // SRC ALT ALIGN WIDTH HEIGHT // BORDER HSPACE VSPACE USEMAP // ISMAP /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct Applet : public Element { auto_ptr > attributes; // CODEBASE CODE ALT NAME // WIDTH HEIGHT ALIGN HSPACE // VSPACE auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Param : public Element { auto_ptr > attributes; // NAME VALUE /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; }; struct Division : public Element { auto_ptr > attributes; // ALIGN auto_ptr > > body_content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Center : public Element { // No attributes specified for
! auto_ptr > > body_content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct BlockQuote : public Element { // No attributes specified for
! auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Address : public Element { // No attributes specified for
! auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; /* ------------------------------------------------------------------------- */ struct Form : public Element { auto_ptr > attributes; // ACTION METHOD ENCTYPE auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Input : public Element { auto_ptr > attributes; // TYPE NAME VALUE CHECKED SIZE // MAXLENGTH SRC ALIGN /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct Option { auto_ptr > attributes; // SELECTED VALUE auto_ptr pcdata; void unparse(ostream &, ostream_manipulator separator) const; }; struct Select : public Element { auto_ptr > attributes; // NAME SIZE MULTIPLE auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct TextArea : public Element { auto_ptr > attributes; // NAME ROWS COLS auto_ptr pcdata; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; /* ------------------------------------------------------------------------- */ struct Preformatted : public Element { auto_ptr > attributes; // WIDTH auto_ptr > > texts; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Body { auto_ptr > attributes; // BACKGROUND BGCOLOR TEXT // LINK VLINK ALINK auto_ptr > > content; virtual ~Body() {} virtual void unparse(ostream &, ostream_manipulator separator) const; virtual Area *format(Area::size_type w, int halign) const; void format( Area::size_type indent_left, Area::size_type w, int halign, ostream &os ) const; }; struct Script { auto_ptr > attributes; // LANGUAGE, ??? string text; void unparse(ostream &, ostream_manipulator separator) const; }; struct Style { auto_ptr > attributes; // ??? string text; void unparse(ostream &, ostream_manipulator separator) const; }; struct Head { auto_ptr title; auto_ptr > isindex_attributes; // PROMPT auto_ptr > base_attributes; // HREF list > scripts; list > styles; auto_ptr > meta_attributes; // HTTP-EQUIV NAME CONTENT auto_ptr > link_attributes; // HREF REL REV TITLE void unparse(ostream &, ostream_manipulator separator) const; }; struct Document { auto_ptr > attributes; // VERSION Head head; Body body; void unparse(ostream &, ostream_manipulator separator) const; Area *format(Area::size_type w, int halign) const; void format( Area::size_type indent_left, Area::size_type w, int halign, ostream &os ) const; }; struct Heading : public Element { int level; auto_ptr > attributes; // ALIGN auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct TableCell : public Body { /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; }; struct TableHeadingCell : public TableCell { /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct TableRow { auto_ptr > attributes; // ALIGN VALIGN auto_ptr > > cells; void unparse(ostream &, ostream_manipulator separator) const; }; struct Caption { auto_ptr > attributes; // ALIGN auto_ptr > > texts; void unparse(ostream &, ostream_manipulator separator) const; Area *format(Area::size_type w, int halign) const; }; struct Table : public Element { auto_ptr > attributes; // ALIGN WIDTH BORDER // CELLSPACING CELLPADDING auto_ptr caption; auto_ptr > > rows; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct NoBreak : public Element { auto_ptr > > content; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Line *line_format() const; }; struct HorizontalRule : public Element { auto_ptr > attributes; // ALIGN NOSHADE SIZE WIDTH /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; /* ------------------------------------------------------------------------- */ struct ListItem { virtual ~ListItem() {} virtual void unparse(ostream &, ostream_manipulator separator) const = 0; virtual Area *format( Area::size_type w, int style, Area::size_type indent, int *number_in_out = 0 ) const = 0; }; struct ListNormalItem : public ListItem { auto_ptr > attributes; // TYPE VALUE auto_ptr > > flow; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format( Area::size_type w, int style, Area::size_type indent, int *number_in_out ) const; }; struct ListBlockItem : public ListItem { auto_ptr block; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format( Area::size_type w, int style, Area::size_type indent, int * ) const; }; struct OrderedList : public Element { auto_ptr > attributes; // TYPE START COMPACT auto_ptr > > items; int nesting; // Item indentation depends on on the list nesting level. /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct UnorderedList : public Element { auto_ptr > attributes; // TYPE COMPACT auto_ptr > > items; int nesting; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Dir : public Element { auto_ptr > attributes; // COMPACT auto_ptr > > items; int nesting; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct Menu : public Element { auto_ptr > attributes; // COMPACT auto_ptr > > items; int nesting; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; /* ------------------------------------------------------------------------- */ struct DefinitionListItem { virtual ~DefinitionListItem() {} virtual void unparse(ostream &, ostream_manipulator separator) const = 0; virtual Area *format(Area::size_type w, int halign) const = 0; }; struct TermName : public DefinitionListItem { auto_ptr > > flow; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct TermDefinition : public DefinitionListItem { auto_ptr > > flow; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; struct DefinitionList : public Element { auto_ptr > attributes; // COMPACT auto_ptr > > preamble; auto_ptr > > items; /*virtual*/ void unparse(ostream &, ostream_manipulator separator) const; /*virtual*/ Area *format(Area::size_type w, int halign) const; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/html2text.C0100644000000000000000000002613307760112171014356 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Fre Jun 8 19:00:26 CEST 2001: new image handling * Thu Oct 4 21:42:24 CEST 2001: ported to g++ 3.0, bugfix for '-' as synonym for STDIN * Mon Jul 22 13:48:26 CEST 2002: Made finaly reading from STDIN work. * Sat Sep 14 15:04:09 CEST 2002: Added plain ASCII output patch by Bela Lubkin * Wed Jul 2 22:08:45 CEST 2003: ported to g++ 3.3 */ /***************************************************************************/ #include #include #include #include "html.h" #include "HTMLControl.h" #include "urlistream.h" #include "format.h" #define stringify(x) stringify2(x) #define stringify2(x) #x /* ------------------------------------------------------------------------- */ class MyParser : public HTMLControl { public: enum { PRINT_AS_ASCII, UNPARSE, SYNTAX_CHECK }; MyParser( urlistream &is_, bool debug_scanner_, bool debug_parser_, ostream &os_, int mode_, int width_, const char *file_name_ ) : HTMLControl(is_, debug_scanner_, debug_parser_), os(os_), mode(mode_), width(width_), file_name(file_name_) {} private: /*virtual*/ void yyerror(char *); /*virtual*/ void process(const Document &); ostream &os; int mode; int width; string file_name; }; /*virtual*/ void MyParser::yyerror(char *p) { /* * Swallow parse error messages if not in "syntax check" mode. */ if (mode != SYNTAX_CHECK && !strcmp(p, "parse error")) return; std::cerr << "File \"" << file_name << "\", line " << current_line << ", column " << current_column << ": " << p << std::endl; } /*virtual*/ void MyParser::process(const Document &document) { switch (mode) { case PRINT_AS_ASCII: document.format(/*indent_left*/ 0, width, Area::LEFT, os); break; case UNPARSE: document.unparse(os, std::endl); break; case SYNTAX_CHECK: break; default: std::cerr << "??? Invalid mode " << mode << " ??? " << std::endl; exit(1); break; } } /* ------------------------------------------------------------------------- */ static const char *usage = "\ Usage:\n\ html2text -help\n\ html2text -version\n\ html2text [ -unparse | -check ] [ -debug-scanner ] [ -debug-parser ] \\\n\ [ -rcfile ] [ -style ( compact | pretty ) ] [ -width ] \\\n\ [ -o ] [ -nobs ] [ -ascii ] [ ] ...\n\ Formats HTML document(s) read from or STDIN and generates ASCII\n\ text.\n\ -help Print this text and exit\n\ -version Print program version and copyright notice\n\ -unparse Generate HTML instead of ASCII output\n\ -check Do syntax checking only\n\ -debug-scanner Report parsed tokens on STDERR (debugging)\n\ -debug-parser Report parser activity on STDERR (debugging)\n\ -rcfile Read instead of \"$HOME/.html2textrc\"\n\ -style compact Create a \"compact\" output format (default)\n\ -style pretty Insert some vertical space for nicer output\n\ -width Optimize for screen widths other than 79\n\ -o Redirect output into \n\ -nobs Do not use backspaces for boldface and underlining\n\ -ascii Use plain ASCII for output instead of ISO-8859-1\n\ "; int use_iso8859 = 1; int main(int argc, char **argv) { if (argc == 2 && !strcmp(argv[1], "-help")) { std::cout << "This is html2text, version " stringify(VERSION) << std::endl << std::endl << usage; exit(0); } if (argc == 2 && !strcmp(argv[1], "-version")) { std::cout << "This is html2text, version " stringify(VERSION) << std::endl << std::endl << "The latest version can be found at http://userpage.fu-berlin.de/~mbayer/tools/" << std::endl << std::endl << "This program is distributed in the hope that it will be useful, but WITHOUT" << std::endl << "ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS" << std::endl << "FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details." << std::endl << std::endl; exit(0); } bool mode = MyParser::PRINT_AS_ASCII; bool debug_scanner = false; bool debug_parser = false; const char *home = getenv("HOME"); string rcfile = string(home ? home : "") + "/.html2textrc"; const char *style = "compact"; int width = 79; const char *output_file_name = "-"; bool use_backspaces = true; int i; for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) { const char *arg = argv[i]; if (!strcmp(arg, "-unparse" )) { mode = MyParser::UNPARSE; } else if (!strcmp(arg, "-check" )) { mode = MyParser::SYNTAX_CHECK; } else if (!strcmp(arg, "-debug-scanner")) { debug_scanner = true; } else if (!strcmp(arg, "-debug-parser" )) { debug_parser = true; } else if (!strcmp(arg, "-rcfile" )) { rcfile = argv[++i]; } else if (!strcmp(arg, "-style" )) { style = argv[++i]; } else if (!strcmp(arg, "-width" )) { width = atoi(argv[++i]); } else if (!strcmp(arg, "-o" )) { output_file_name = argv[++i]; } else if (!strcmp(arg, "-nobs" )) { use_backspaces = false; } else if (!strcmp(arg, "-ascii" )) { use_iso8859 = false; } else { std::cerr << "Unrecognized command line option \"" << arg << "\", try \"-help\"." << std::endl; exit(1); } } if (i > argc) { std::cerr << "Error: Required parameter after \"" << argv[argc - 1] << "\" missing." << std::endl; exit(1); } const char *const *input_urls; int number_of_input_urls; if (i >= argc) { static const char *const x = "-"; input_urls = &x; number_of_input_urls = 1; } else { input_urls = argv + i; number_of_input_urls = argc - i; } /* * Set up formatting: First, set some formatting properties depending on * the "-style" command line option. */ if (!strcmp(style, "compact")) { ; } else if (!strcmp(style, "pretty")) { /* * The "pretty" style was kindly supplied by diligent user Rolf Niepraschk. */ static const struct { const char *key; const char *value; } properties[] = { { "OL.TYPE", "1" }, { "OL.vspace.before", "1" }, { "OL.vspace.after", "1" }, { "OL.indents", "5" }, { "UL.vspace.before", "1" }, { "UL.vspace.after", "1" }, { "UL.indents", "2" }, { "DL.vspace.before", "1" }, { "DL.vspace.after", "1" }, { "DT.vspace.before", "1" }, { "DIR.vspace.before", "1" }, { "DIR.indents", "2" }, { "MENU.vspace.before", "1" }, { "MENU.vspace.after", "1" }, { "DT.indent", "2" }, { "DD.indent", "6" }, { "HR.marker", "-" }, { "H1.prefix", "" }, { "H2.prefix", "" }, { "H3.prefix", "" }, { "H4.prefix", "" }, { "H5.prefix", "" }, { "H6.prefix", "" }, { "H1.suffix", "" }, { "H2.suffix", "" }, { "H3.suffix", "" }, { "H4.suffix", "" }, { "H5.suffix", "" }, { "H6.suffix", "" }, { "H1.vspace.before", "2" }, { "H2.vspace.before", "1" }, { "H3.vspace.before", "1" }, { "H4.vspace.before", "1" }, { "H5.vspace.before", "1" }, { "H6.vspace.before", "1" }, { "H1.vspace.after", "1" }, { "H2.vspace.after", "1" }, { "H3.vspace.after", "1" }, { "H4.vspace.after", "1" }, { "H5.vspace.after", "1" }, { "H6.vspace.after", "1" }, { "TABLE.vspace.before", "1" }, { "TABLE.vspace.after", "1" }, { "CODE.vspace.before", "0" }, { "CODE.vspace.after", "0" }, { "BLOCKQUOTE.vspace.before", "1" }, { "BLOCKQUOTE.vspace.after", "1" }, { "PRE.vspace.before", "1" }, { "PRE.vspace.after", "1" }, { "PRE.indent.left", "2" }, { "IMG.replace.noalt", "" }, { "IMG.alt.prefix", " " }, { "IMG.alt.suffix", " " }, { 0, 0 } }, *p; for (p = properties; p->key; ++p) { Formatting::setProperty(p->key, p->value); } } else { std::cerr << "Unknown style \"" << style << "\" specified -- try \"-help\"." << std::endl; ::exit(1); } { std::ifstream ifs(rcfile.c_str()); if (!ifs.rdbuf()->is_open()) ifs.open("/etc/html2textrc"); if (ifs.rdbuf()->is_open()) { Formatting::loadProperties(ifs); } } /* * Set up printing. */ Area::use_backspaces = use_backspaces; ostream *osp; std::ofstream ofs; if (!strcmp(output_file_name, "-")) { osp = &std::cout; } else { ofs.open(output_file_name, std::ios::out); if (!ofs) { std::cerr << "Could not open output file \"" << output_file_name << "\"." << std::endl; exit(1); } osp = &ofs; } for (i = 0; i < number_of_input_urls; ++i) { const char *input_url = input_urls[i]; if (number_of_input_urls != 1) { *osp << "###### " << input_url << " ######" << std::endl; } istream *isp; urlistream uis; uis.open(input_url); if (!uis.is_open()) { std::cerr << "Opening input URL \"" << input_url << "\": " << uis.open_error() << std::endl; exit(1); } MyParser parser( uis, debug_scanner, debug_parser, *osp, mode, width, input_url ); if (parser.yyparse() != 0) exit(1); } return 0; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/COPYING0100644000000000000000000004311407357143740013360 0ustar rootroot GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. html2text-1.3.2a/pretty.style0100644000000000000000000000201107310172645014720 0ustar rootroot /* these are the defaults uses in '-style pretty' mode */ OL.TYPE = 1 OL.vspace.before = 1 OL.vspace.after = 1 OL.indents = 5 UL.vspace.before = 1 UL.vspace.after = 1 UL.indents = 2 DL.vspace.before = 1 DL.vspace.after = 1 DT.vspace.before = 1 DIR.vspace.before = 1 DIR.indents = 2 MENU.vspace.before = 1 MENU.vspace.after = 1 DT.indent = 2 DD.indent = 6 HR.marker = - H1.prefix = H2.prefix = H3.prefix = H4.prefix = H5.prefix = H6.prefix = H1.suffix = H2.suffix = H3.suffix = H4.suffix = H5.suffix = H6.suffix = H1.vspace.before = 2 H2.vspace.before = 1 H3.vspace.before = 1 H4.vspace.before = 1 H5.vspace.before = 1 H6.vspace.before = 1 H1.vspace.after = 1 H2.vspace.after = 1 H3.vspace.after = 1 H4.vspace.after = 1 H5.vspace.after = 1 H6.vspace.after = 1 TABLE.vspace.before = 1 TABLE.vspace.after = 1 CODE.vspace.before = 0 CODE.vspace.after = 0 BLOCKQUOTE.vspace.before = 1 BLOCKQUOTE.vspace.after = 1 PRE.vspace.before = 1 PRE.vspace.after = 1 PRE.indent.left = 2 IMG.replace.noalt = IMG.alt.prefix = \ IMG.alt.suffix = \ html2text-1.3.2a/sgml.C0100644000000000000000000004003107760112527013363 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 22:14:38 CEST 2001: included EURO-sign * Sat Sep 14 15:23:25 CEST 2002: Added plain ASCII output patch by Bela Lubkin * Thu Nov 20 18:23:59 CET 2003: SGML entities array revised */ /***************************************************************************/ #include #include #include #include "html.h" #include "sgml.h" #ifndef nelems #define nelems(array) (sizeof(array) / sizeof((array)[0])) #endif /* ------------------------------------------------------------------------- */ /* * Selected SGML entities, with translations to ISO-8859-1 and to * plain ASCII. */ /* Straight-ASCII and extra entities partially * added by Bela Lubkin . */ /* * Keep this array sorted alphabetically! */ static const struct TextToInt { char name[8]; int iso8859code; char *asciistr; } entities[] = { { "AElig", LATIN1_AElig, "AE" }, { "AMP", 0, "&" }, { "Aacute", LATIN1_Aacute, "A'" }, { "Acirc", LATIN1_Acirc, "A^" }, { "Agrave", LATIN1_Agrave, "A`" }, { "Alpha", 0, "A" }, { "Aring", LATIN1_Aring, "AA" }, { "Atilde", LATIN1_Atilde, "A~" }, { "Auml", LATIN1_Auml, "A\"" }, { "Beta", 0, "B" }, { "Ccedil", LATIN1_Ccedil, "C," }, { "Chi", 0, "H" }, { "Dagger", 0, "++" }, { "Delta", 0, "D" }, { "ETH", LATIN1_ETH, "D-" }, { "Eacute", LATIN1_Eacute, "E'" }, { "Ecirc", LATIN1_Ecirc, "E^" }, { "Egrave", LATIN1_Egrave, "E`" }, { "Epsilon", 0, "E" }, { "Eta", 0, "E" }, { "Euml", LATIN1_Euml, "E\"" }, { "GT", 0, ">" }, { "Gamma", 0, "G" }, { "Iacute", LATIN1_Iacute, "I'" }, { "Icirc", LATIN1_Icirc, "I^" }, { "Igrave", LATIN1_Igrave, "I`" }, { "Iota", 0, "I" }, { "Iuml", LATIN1_Iuml, "I\"" }, { "Kappa", 0, "K" }, { "LT", 0, "<" }, { "Lambda", 0, "L" }, { "Mu", 0, "M" }, { "Ntilde", LATIN1_Ntilde, "N~" }, { "Nu", 0, "N" }, { "OElig", 0, "OE" }, { "Oacute", LATIN1_Oacute, "O'" }, { "Ocirc", LATIN1_Ocirc, "O^" }, { "Ograve", LATIN1_Ograve, "O`" }, { "Omega", 0, "O" }, { "Omicron", 0, "O" }, { "Oslash", LATIN1_Oslash, "O/" }, { "Otilde", LATIN1_Otilde, "O~" }, { "Ouml", LATIN1_Ouml, "O\"" }, { "Phi", 0, "F" }, { "Pi", 0, "P" }, { "Prime", 0, "''" }, { "Psi", 0, "PS" }, { "QUOT", 0, "\"" }, { "Rho", 0, "R" }, { "Scaron", 0, "S" }, { "Sigma", 0, "S" }, { "THORN", LATIN1_THORN, "TH" }, { "Tau", 0, "T" }, { "Theta", 0, "TH" }, { "Uacute", LATIN1_Uacute, "U'" }, { "Ucirc", LATIN1_Ucirc, "U^" }, { "Ugrave", LATIN1_Ugrave, "U`" }, { "Upsilon", 0, "U" }, { "Uuml", LATIN1_Uuml, "U\"" }, { "Xi", 0, "X" }, { "Yacute", LATIN1_Yacute, "Y'" }, { "Yuml", 0, "Y\"" }, { "Zeta", 0, "Z" }, { "aacute", LATIN1_aacute, "a'" }, { "acirc", LATIN1_acirc, "a^" }, { "acute", LATIN1_acute, "'" }, { "aelig", LATIN1_aelig, "ae" }, { "agrave", LATIN1_agrave, "a`" }, { "alefsym", 0, "Aleph" }, { "alpha", 0, "a" }, { "amp", 0, "&" }, { "and", 0, "AND" }, { "ang", 0, "-V" }, { "apos", 0, "'" }, { "aring", LATIN1_aring, "aa" }, { "asymp", 0, "~=" }, { "atilde", LATIN1_atilde, "a~" }, { "auml", LATIN1_auml, "a\"" }, { "bdquo", 0, "\"" }, { "beta", 0, "b" }, { "brvbar", LATIN1_brvbar, "|" }, { "bull", 0, " o " }, { "cap", 0, "(U" }, { "ccedil", LATIN1_ccedil, "c," }, { "cedil", LATIN1_cedil, "," }, { "cent", LATIN1_cent, "-c-" }, { "chi", 0, "h" }, { "circ", 0, "^" }, // { "clubs", 0, "[clubs]" }, { "cong", 0, "?=" }, { "copy", LATIN1_copy, "(c)" }, { "crarr", 0, "<-'" }, { "cup", 0, ")U" }, { "curren", LATIN1_curren, "CUR" }, { "dArr", 0, "vv" }, { "dagger", 0, "+" }, { "darr", 0, "v" }, { "deg", LATIN1_deg, "DEG" }, { "delta", 0, "d" }, // { "diams", 0, "[diamonds]" }, { "divide", LATIN1_divide, "/" }, { "eacute", LATIN1_eacute, "e'" }, { "ecirc", LATIN1_ecirc, "e^" }, { "egrave", LATIN1_egrave, "e`" }, { "empty", 0, "{}" }, { "epsilon", 0, "e" }, { "equiv", 0, "==" }, { "eta", 0, "e" }, { "eth", LATIN1_eth, "d-" }, { "euml", LATIN1_euml, "e\"" }, { "euro", 0, "EUR" }, { "exist", 0, "TE" }, { "fnof", 0, "f" }, { "forall", 0, "FA" }, { "frac12", LATIN1_frac12, " 1/2" }, { "frac14", LATIN1_frac14, " 1/4" }, { "frac34", LATIN1_frac34, " 3/4" }, { "frasl", 0, "/" }, { "gamma", 0, "g" }, { "ge", 0, ">=" }, { "gt", 0, ">" }, { "hArr", 0, "<=>" }, { "harr", 0, "<->" }, // { "hearts", 0, "[hearts]" }, { "hellip", 0, "..." }, { "iacute", LATIN1_iacute, "i'" }, { "icirc", LATIN1_icirc, "i^" }, { "iexcl", LATIN1_iexcl, "!" }, { "igrave", LATIN1_igrave, "i`" }, { "image", 0, "Im" }, { "infin", 0, "oo" }, { "int", 0, "INT" }, { "iota", 0, "i" }, { "iquest", LATIN1_iquest, "?" }, { "isin", 0, "(-" }, { "iuml", LATIN1_iuml, "i\"" }, { "kappa", 0, "k" }, { "lArr", 0, "<=" }, { "lambda", 0, "l" }, { "lang", 0, "" }, { "lsaquo", 0, "<" }, { "lsquo", 0, "`" }, { "lt", 0, "<" }, { "macr", LATIN1_macr, "-" }, { "mdash", 0, "--" }, { "micro", LATIN1_micro, "my" }, { "middot", LATIN1_middot, "." }, { "minus", 0, "-" }, { "mu", 0, "m" }, { "nabla", 0, "Nabla" }, { "nbsp", LATIN1_nbsp, " " }, { "ndash", 0, "-" }, { "ne", 0, "!=" }, { "ni", 0, "-)" }, { "not", LATIN1_not, "NOT" }, { "notin", 0, "!(-" }, { "nsub", 0, "!(C" }, { "ntilde", LATIN1_ntilde, "n~" }, { "nu", 0, "n" }, { "oacute", LATIN1_oacute, "o'" }, { "ocirc", LATIN1_ocirc, "o^" }, { "oelig", 0, "oe" }, { "ograve", LATIN1_ograve, "o`" }, { "oline", LATIN1_macr, "-" }, { "omega", 0, "o" }, { "omicron", 0, "o" }, { "oplus", 0, "(+)" }, { "or", 0, "OR" }, { "ordf", LATIN1_ordf, "-a" }, { "ordm", LATIN1_ordm, "-o" }, { "oslash", LATIN1_oslash, "o/" }, { "otilde", LATIN1_otilde, "o~" }, { "otimes", 0, "(x)" }, { "ouml", LATIN1_ouml, "o\"" }, { "para", LATIN1_para, "P:" }, { "part", 0, "PART" }, { "permil", 0, " 0/00" }, { "perp", 0, "-T" }, { "phi", 0, "f" }, { "pi", 0, "p" }, { "piv", 0, "Pi" }, { "plusmn", LATIN1_plusmn, "+/-" }, { "pound", LATIN1_pound, "-L-" }, { "prime", 0, "'" }, { "prod", 0, "PROD" }, { "prop", 0, "0(" }, { "psi", 0, "ps" }, { "quot", 0, "\"" }, { "rArr", 0, "=>" }, { "radic", 0, "SQRT" }, { "rang", 0, "/>" }, { "raquo", LATIN1_raquo, ">>" }, { "rarr", 0, "->" }, // { "rceil", 0, ">|" }, { "rdquo", 0, "\"" }, { "real", 0, "Re" }, { "reg", LATIN1_reg, "(R)" }, // { "rfloor", 0, "|>" }, { "rho", 0, "r" }, { "rsaquo", 0, ">" }, { "rsquo", 0, "'" }, { "sbquo", 0, "'" }, { "scaron", 0, "s" }, { "sdot", 0, "DOT" }, { "sect", LATIN1_sect, "S:" }, { "shy", LATIN1_shy, "" }, { "sigma", 0, "s" }, { "sigmaf", 0, "s" }, { "sim", 0, "~" }, // { "spades", 0, "[spades]" }, { "sub", 0, "(C" }, { "sube", 0, "(_" }, { "sum", 0, "SUM" }, { "sup", 0, ")C" }, { "sup1", LATIN1_sup1, "^1" }, { "sup2", LATIN1_sup2, "^2" }, { "sup3", LATIN1_sup3, "^3" }, { "supe", 0, ")_" }, { "szlig", LATIN1_szlig, "ss" }, { "tau", 0, "t" }, { "there4", 0, ".:" }, { "theta", 0, "th" }, { "thorn", LATIN1_thorn, "th" }, { "tilde", 0, "~" }, { "times", LATIN1_times, "x" }, { "trade", 0, "[TM]" }, { "uArr", 0, "^^" }, { "uacute", LATIN1_uacute, "u'" }, { "uarr", 0, "^" }, { "ucirc", LATIN1_ucirc, "u^" }, { "ugrave", LATIN1_ugrave, "u`" }, { "uml", LATIN1_uml, "\"" }, { "upsilon", 0, "u" }, { "uuml", LATIN1_uuml, "u\"" }, { "weierp", 0, "P" }, { "xi", 0, "x" }, { "yacute", LATIN1_yacute, "y'" }, { "yen", LATIN1_yen, "YEN" }, { "yuml", LATIN1_yuml, "y\"" }, { "zeta", 0, "z" }, }; extern int use_iso8859; /* ------------------------------------------------------------------------- */ void replace_sgml_entities(string *s) { string::size_type j = 0; for (;;) { string::size_type l = s->length(); /* * Skip characters before ampersand. */ while (j < l && s->at(j) != '&') ++j; /* * We could convert high-bit chars to "é" here if use_iso8859 * is off, then let them be translated or not. Is the purpose of * !use_iso8859 to allow SGML entities to be seen, or to strongly * filter against high-ASCII chars that might blow up a terminal * that doesn't speak ISO8859? For the moment, "allow SGML entities * to be seen" -- no filtering here. */ if (j >= l) break; /* * So we have an ampersand... */ /* * Don't process the last three characters; an SGML entity wouldn't fit * in anyway! */ if (j + 3 >= l) break; // Watch out! Unsigned arithmetics! string::size_type beg = j++; // Skip the ampersand; /* * Look at the next character. */ char c = s->at(j++); if (c == '#') { /* * Decode entities like "é". * Some authors forget the ";", but we tolerate this. */ c = s->at(j++); if (isdigit(c)) { int x = c - '0'; for (; j < l; ++j) { c = s->at(j); if (c == ';') { ++j; break; } if (!isdigit(c)) break; x = 10 * x + c - '0'; } if (use_iso8859 || (x < 128)) { s->replace(beg, j - beg, 1, (char) x); j = beg + 1; } else { for (int i = 0; i < nelems(entities); i++) { if (x == entities[i].iso8859code) { s->replace(beg, j - beg, entities[i].asciistr); break; } } /* else don't replace it at all, we don't have a translation */ } } } else if (isalpha(c)) { /* * Decode entities like " ". * Some authors forget the ";", but we tolerate this. */ char name[8]; name[0] = c; size_t i = 1; for (; j < l; ++j) { c = s->at(j); if (c == ';') { ++j; break; } if (!isalnum(c)) break; if (i < sizeof(name) - 1) name[i++] = c; } name[i] = '\0'; const TextToInt *entity = (const TextToInt *) bsearch( name, entities, nelems(entities), sizeof(TextToInt), (int (*)(const void *, const void *)) strcmp ); if (entity != NULL) { if (use_iso8859 && entity->iso8859code) { s->replace(beg, j - beg, 1, (char) entity->iso8859code); j = beg + 1; } else if (entity->asciistr) { s->replace(beg, j - beg, entity->asciistr); j = beg + 1; } /* else don't replace it at all, we don't have a translation */ } } else { ; /* EXTENSION: Allow literal '&' sometimes. */ } } } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/sgml.h0100644000000000000000000000507007005304410013416 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __sgml_h_INCLUDED__ /* { */ #define __sgml_h_INCLUDED__ /* { */ /* ------------------------------------------------------------------------- */ #ident "$Id: sgml.h,v 1.4 1999/10/26 10:57:12 arno Exp $" #include /* ------------------------------------------------------------------------- */ /* * Replace SGML entities like "ä" and "é" within "*s". */ extern void replace_sgml_entities(string *s); /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/table.C0100644000000000000000000004452507516766542013537 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Mon Jul 22 13:30:32 CEST 2002: fixed segfault */ /***************************************************************************/ /* ------------------------------------------------------------------------- */ /* * "Table::format()" has been taken out of "format.C" because it is way more * complex than the "format()" methods of the other HTML elements. */ /* ------------------------------------------------------------------------- */ #ident "$Id: table.C,v 1.15 1999/11/12 18:57:35 arno Exp $" #include "html.h" #include "auto_aptr.h" #include "format.h" /* ------------------------------------------------------------------------- */ // Should be local to "Table::format()", but CFRONT can't handle this. struct LogicalCell { const TableCell *cell; // Points to the parsed cell.. int x, y; // Position of cell in table. int w, h; // COLSPAN/ROWSPAN of cell. int halign; int valign; Area::size_type width; // Current contents width. bool minimized; // Cannot be narrowed any more. auto_ptr area; // Formatted cell -- computed at a late stage. }; /* ------------------------------------------------------------------------- */ /* * Correct x and y of the logical cells according to the other cells' ROWSPAN * and COLSPAN. */ static void correct_xy( list > *const lcs_in_out, int *const number_of_rows_in_out, int *const number_of_columns_in_out ) { list >::iterator i; for (i = lcs_in_out->begin(); i != lcs_in_out->end(); ++i) { const LogicalCell *const p = (*i).get(); if (p->w != 1 || p->h != 1) { list >::iterator j; for (j = i, ++j; j != lcs_in_out->end(); ++j) { LogicalCell *const q = (*j).get(); if (q->y != p->y) break; q->x += p->w - 1; } for (; j != lcs_in_out->end(); ++j) { LogicalCell *const q = (*j).get(); if (q->y >= p->y + p->h) break; if (q->x >= p->x) q->x += p->w; } } if (p->x + p->w > *number_of_columns_in_out) *number_of_columns_in_out = p->x + p->w; if (p->y + p->h > *number_of_rows_in_out ) *number_of_rows_in_out = p->y + p->h; } } static void create_lcs( const Table &t, const Area::size_type w, const Area::size_type left_border_width, const Area::size_type right_border_width, const Area::size_type column_spacing, list > *const lcs_return, int *const number_of_rows_return, int *const number_of_columns_return ) { *number_of_rows_return = 0; *number_of_columns_return = 0; const list > &rl(*t.rows); list >::const_iterator ri; int y; for (ri = rl.begin(), y = 0; ri != rl.end(); ++ri, ++y) { if (!(*ri).get()) continue; const TableRow &row(**ri); int row_halign = get_attribute( row.attributes.get(), "ALIGN", Area::LEFT, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); int row_valign = get_attribute( row.attributes.get(), "VALIGN", Area::MIDDLE, "TOP", Area::LEFT, "MIDDLE", Area::MIDDLE, "BOTTOM", Area::BOTTOM, 0 ); const list > &cl(*row.cells); list >::const_iterator ci; int x; for (ci = cl.begin(), x = 0; ci != cl.end(); ++ci, ++x) { if (!(*ci).get()) continue; const TableCell &cell(**ci); auto_ptr p(new LogicalCell); p->cell = &cell; p->x = x; p->y = y; p->w = get_attribute(cell.attributes.get(), "COLSPAN", 1); p->h = get_attribute(cell.attributes.get(), "ROWSPAN", 1); if (p->w < 1) p->w = 1; if (p->h < 1) p->h = 1; if (x + p->w > *number_of_columns_return) { *number_of_columns_return = x + p->w; } if (y + p->h > *number_of_rows_return) { *number_of_rows_return = y + p->h; } p->halign = get_attribute( cell.attributes.get(), "ALIGN", row_halign, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); p->valign = get_attribute( cell.attributes.get(), "VALIGN", row_valign, "TOP", Area::TOP, "MIDDLE", Area::MIDDLE, "BOTTOM", Area::BOTTOM, 0 ); { auto_ptr tmp(cell.format( w - left_border_width - right_border_width - (*number_of_columns_return - 1) * (column_spacing + 0), Area::LEFT // Yields better results than "p->halign"! )); p->width = tmp.get() ? tmp->width() : 0; } p->minimized = false; lcs_return->push_back(p); } } correct_xy(lcs_return, number_of_rows_return, number_of_columns_return); } static void compute_widths( const list > &lcs, const int number_of_columns, const Area::size_type column_spacing, const Area::size_type left_border_width, const Area::size_type right_border_width, Area::size_type *const table_width_return, Area::size_type *const column_widths_return ) { /* * Compute the column widths. */ { for (int i = 0; i < number_of_columns; i++) column_widths_return[i] = 0; } for (int x = 0; x < number_of_columns; x++) { list >::const_iterator i; for (i = lcs.begin(); i != lcs.end(); ++i) { const LogicalCell &lc(**i); if (x != lc.x + lc.w - 1) continue; // Cell not in column? Area::size_type width = lc.width; for (int j = lc.x; j < x; j++) { // Beware! "width" is unsigned! if (column_widths_return[j] + column_spacing >= width) { width = 0; } else { width -= column_widths_return[j] + column_spacing; } } if (width >= column_widths_return[x]) column_widths_return[x] = width; } } /* * Compute the table width. */ *table_width_return = ( left_border_width + (number_of_columns - 1) * column_spacing + right_border_width ); { for (int x = 0; x < number_of_columns; x++) { *table_width_return += column_widths_return[x]; } } } /* * Examine the table for the widest column that can be narrowed. (A column * cannot be narrowed if its widest cell cannot be narrowed.) Return "false" * if none of the columns can be narrowed. Otherwise, attempt to narrow that * column and update "lcs_in_out", "column_widths_in_out", and * "table_width_in_out". */ static bool narrow_table( list > *const lcs_in_out, const int number_of_columns, const Area::size_type column_spacing, Area::size_type *const column_widths_in_out, Area::size_type *const table_width_in_out ) { /* * Seek for the widest column that can still be narrowed. */ int widest_column = -1; for (int x = 0; x < number_of_columns; x++) { // Zero width columns cannot be narrowed. if (column_widths_in_out[x] == 0) continue; // Is this the widest column so far? if ( widest_column != -1 && column_widths_in_out[x] <= column_widths_in_out[widest_column] ) continue; // Yes; can the widest cell(s) in this column be narrowed? const list > &lcl(*lcs_in_out); list >::const_iterator i; for (i = lcl.begin(); i != lcl.end(); ++i) { const LogicalCell &lc = **i; if (lc.x + lc.w - 1 != x) continue; // Not in this column. if (!lc.minimized) continue; Area::size_type left_of_cell = 0; for (int j = lc.x; j < x; j++) { left_of_cell += column_widths_in_out[j] + column_spacing; } if (lc.width >= left_of_cell + column_widths_in_out[x]) { // Minimized cell is as wide as the column; cannot narrow this // column. break; } } if (i == lcl.end()) widest_column = x; } /* * Give up if there is no more cell that can be narrowed. */ if (widest_column == -1) return false; /* * Attempt to narrow the "widest_column" by one character. */ const Area::size_type old_column_width = column_widths_in_out[widest_column]; list >::iterator i; Area::size_type new_column_width = 0; for (i = lcs_in_out->begin(); i != lcs_in_out->end(); ++i) { LogicalCell &lc = **i; if (lc.x + lc.w - 1 != widest_column) continue; // Not in this column. Area::size_type left_of_column = 0; for (int j = lc.x; j < widest_column; ++j) { left_of_column += column_widths_in_out[j] + column_spacing; } Area::size_type w = lc.width; if (w >= left_of_column + old_column_width) { auto_ptr tmp(lc.cell->format( left_of_column + old_column_width - 1, Area::LEFT // Yields better results than "lc.halign"! )); w = tmp->width(); if (w >= left_of_column + old_column_width) lc.minimized = true; } if (w > left_of_column + new_column_width) { new_column_width = w - left_of_column; } } column_widths_in_out[widest_column] = new_column_width; *table_width_in_out -= old_column_width - new_column_width; //cerr // << "Narrowed column " // << widest_column // << " from " // << old_column_width // << " to " // << new_column_width // << endl; //cerr << "table_width=" << *table_width_in_out << endl; //for (int z = 0; z < number_of_columns; ++z) { // cerr << "column_widths[" << z << "]=" << column_widths_in_out[z] << endl; //} return true; } /* * Compute the heights of each row. Take into account the cells of the row, * plus the cells above that "hang" into the row. * * As a side effect, format the table cells. */ static void compute_row_heights( list > *lcs_in_out, const int number_of_rows, const Area::size_type row_spacing, Area::size_type *const row_heights_return, const int column_spacing, const Area::size_type *column_widths ) { { for (int y = 0; y < number_of_rows; y++) row_heights_return[y] = 0; } list >::reverse_iterator i; for (i = lcs_in_out->rbegin(); i != lcs_in_out->rend(); ++i) { LogicalCell &lc(**i); Area::size_type w = (lc.w - 1) * column_spacing; for (int x = lc.x; x < lc.x + lc.w; ++x) w += column_widths[x]; lc.area.reset(lc.cell->format(w, lc.halign)); if (!lc.area.get()) continue; // cerr << "lc.halign=" << lc.halign << ", w=" << w << endl; // cerr // << "***" << *lc.area << "***" << endl << // lc.area->width() << "x" << lc.area->height() << endl; Area::size_type h = (lc.h - 1) * row_spacing; { for (int y = lc.y; y < lc.y + lc.h; ++y) h += row_heights_return[y]; } if (lc.area->height() > h) { row_heights_return[lc.y + lc.h - 1] += lc.area->height() - h; } } } /* ------------------------------------------------------------------------- */ // Attributes: ALIGN (processed) WIDTH (ignored) BORDER (processed) // CELLSPACING CELLPADDING (ignored) // Attributes: ALIGN VALIGN (processed) //
Attributes: NOWRAP (ignored) ROWSPAN COLSPAN ALIGN VALIGN // (processed) WIDTH HEIGHT (ignored) Area * Table::format(Area::size_type w, int halign) const { halign = get_attribute( attributes.get(), "ALIGN", halign, "LEFT", Area::LEFT, "CENTER", Area::CENTER, "RIGHT", Area::RIGHT, 0 ); // => default => no border //
=> "" => draw border //
=> "0" => no border //
=> "1" => draw border bool draw_border = get_attribute(attributes.get(), "BORDER", "0") != "0"; static const Area::size_type column_spacing = 1; static const Area::size_type row_spacing = 0; Area::size_type left_border_width = draw_border ? 1 : 0; Area::size_type right_border_width = draw_border ? 1 : 0; static const Area::size_type top_border_width = 0; static const Area::size_type bottom_border_width = 0; /* * Iterate through the table's cells and create a list of "LogicalCell"s. * Compute the positions and sizes of all cells, format their contents, and * compute the number of rows and columns. */ list > lcs; int number_of_rows, number_of_columns; create_lcs( *this, w, left_border_width, right_border_width, column_spacing, &lcs, &number_of_rows, &number_of_columns ); /* * The code below relies on that a table has 1 or more rows and one or * more columns. Arno Unkrig 2002-07-21. */ if (number_of_rows == 0 || number_of_columns == 0) return 0; /* * Now compute the column widths and the table width. */ auto_aptr column_widths = new Area::size_type[number_of_columns]; Area::size_type table_width; compute_widths( lcs, number_of_columns, column_spacing, left_border_width, right_border_width, &table_width, column_widths.get() ); /* * Narrow the widest column that can be narrowed, until the entire table is * narrow enough. */ while (table_width > w) { if (!narrow_table( &lcs, /* in/out */ number_of_columns, column_spacing, column_widths.get(), /* in/out */ &table_width /* in/out */ )) break; } /* * At this point, all cells are formatted such that the table width fits * into "w" (if possible). */ /* * Compute row heights. */ auto_aptr row_heights = new Area::size_type[number_of_rows]; compute_row_heights( &lcs, number_of_rows, row_spacing, row_heights.get(), column_spacing, column_widths.get() ); Area::size_type table_height = ( top_border_width + (number_of_rows - 1) * row_spacing + bottom_border_width ); { for (int y = 0; y < number_of_rows; y++) table_height += row_heights[y]; } /* * Everything is prepared... start drawing! */ auto_ptr res(new Area); { static int vspace_before = Formatting::getInt("TABLE.vspace.before", 0); res->prepend(vspace_before); } Area::size_type x0 = 0; if (halign != Area::LEFT && table_width < w) { if (halign == Area::CENTER) x0 += (w - table_width) / 2; else if (halign == Area::RIGHT) x0 += w - table_width; } /* * Draw the caption, if any. */ if (caption.get()) { auto_ptr cap(caption->format(table_width, Area::CENTER)); if (cap.get() && cap->height() >= 1) { cap->add_attribute(Cell::BOLD); res->insert(*cap, x0, 0); } } /* * Draw the top and the left border. */ Area::size_type y0 = res->height(); if (draw_border) { if (y0 == 0) y0 = 1; res->fill('|', x0, y0, left_border_width, table_height); // Some trickery: The top border underline is easily masked by the bold // caption, so remove the boldness where possible in favor of the // underline. { Cell *cells = (*res)[y0 - 1]; for (Area::size_type x = 0; x < res->width(); x++) { if (cells[x].character == ' ') cells[x].attribute = Cell::NONE; } } res->add_attribute( Cell::UNDERLINE, x0 + left_border_width, y0 - 1, table_width - left_border_width - right_border_width, 1 ); } /* * Draw the cells and their bottom and right borders. */ { const list > &lcl(lcs); list >::const_iterator i; for (i = lcl.begin(); i != lcl.end(); ++i) { const LogicalCell &lc = **i; // Calculate cell position. Area::size_type x = x0 + left_border_width, y = y0 + top_border_width; { for (int j = 0; j < lc.x; j++) x += column_widths[j] + column_spacing;} { for (int j = 0; j < lc.y; j++) y += row_heights [j] + row_spacing; } // Calculate cell dimensions. Area::size_type w = (lc.w - 1) * column_spacing; { for (int j = lc.x; j < lc.x + lc.w; j++) w += column_widths[j]; } Area::size_type h = (lc.h - 1) * row_spacing; { for (int j = lc.y; j < lc.y + lc.h; j++) h += row_heights[j]; } // Draw cell contents and borders. if (lc.area.get()) { res->insert(*lc.area, x, y, w, h, lc.halign, lc.valign); } if (draw_border) { // If the right neighbor cell bottom is flush with this cell's bottom, // then also underline the border between the two cells. bool underline_column_separator = false; { int lx = lc.x + lc.w, ly = lc.y + lc.h; list >::const_iterator j; for (j = lcl.begin(); j != lcl.end(); ++j) { const LogicalCell &lc2 = **j; if (lc2.x == lx && lc2.y + lc2.h == ly) { underline_column_separator = true; break; } } } res->add_attribute( Cell::UNDERLINE, x, y + h - 1, // x, y w + underline_column_separator, 1 // w, h ); res->fill('|', x + w, y, 1, h); } } } { static int vspace_after = Formatting::getInt("TABLE.vspace.after", 0); res->append(vspace_after); } return res.release(); } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/urlistream.C0100644000000000000000000002267207516771060014625 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 21:49:09 CEST 2001: ported to g++ 3.0 * Sun Apr 7 11:59:03 CEST 2002: Handle URLs with missing node * Mon Jul 22 13:53:02 CEST 2002: Made finaly reading from STDIN work. */ /***************************************************************************/ #include #include #include #include #include #include #include #include #include #ifdef SYS_POLL_MISSING /* { */ struct pollfd { int fd; /* file descriptor */ short events; /* requested events */ short revents; /* returned events */ }; extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout); #define POLLIN 0x0001 /* There is data to read */ #define POLLPRI 0x0002 /* There is urgent data to read */ #define POLLOUT 0x0004 /* Writing now will not block */ #define POLLERR 0x0008 /* Error condition */ #define POLLHUP 0x0010 /* Hung up */ #define POLLNVAL 0x0020 /* Invalid request: fd not open */ #else /* } { */ #include #endif /* } */ #include #include #include #include #include "urlistream.h" using std::ios; /* ------------------------------------------------------------------------- */ /* * Compile with * * CC -DTESTING urlistream.C -o urlcat */ /* ------------------------------------------------------------------------- */ void urlistream::open( const char *url, int timeout /* = default_timeout */ // Milliseconds ) { if (is_open()) close(); /* * "http:"
URL? */ if (!memcmp(url, "http:", 5)) { http_open(url + 5, timeout); return; } /* * "file:" URL? */ if (!memcmp(url, "file:", 5)) { file_open(url + 5); return; } /* * is the URL a bare file name? */ if (strchr(url, ':') == NULL) { file_open(url); return; } open_error_ = "Unknown protocol (only \"file:\" and \"http:\" allowed)"; } void urlistream::open( const string &url, int timeout /* = default_timeout */ // Milliseconds ) { open(url.c_str(), timeout); } /* ------------------------------------------------------------------------- */ void urlistream::http_open( const char *address, // The URL portion after "http:" int timeout // Milliseconds ) { /* * Break up the HTTP address: * * "//" [ ":" ] * * A missing node is interpreted as node "/" - Arno */ char host_name[100]; char port_name[100]; char node_name[1000]; { const char *p = address; char *q; if (*p++ != '/' || *p++ != '/') { open_error_ = "HTTP address does not begin with \"//\""; return; } for (q = host_name; *p && *p != ':' && *p != '/'; ++p) { if (q < host_name + sizeof(host_name) - 1) *q++ = *p; } *q = '\0'; if (*p == ':') { ++p; for (q = port_name; *p && *p != '/'; ++p) { if (q < port_name + sizeof(port_name) - 1) *q++ = *p; } *q = '\0'; } else { strcpy(port_name, "80"); } for (q = node_name; *p && *p != '#'; ++p) { if (q < node_name + sizeof(node_name) - 1) *q++ = *p; } *q = '\0'; if (!node_name[0]) { strcpy(node_name, "/"); } } struct sockaddr_in soc_address; soc_address.sin_family = AF_INET; /* * Parse the host name. */ { const char *p; int dot_count = 0; for (p = host_name; *p; ++p) { if (*p == '.') { ++dot_count; } else if (!isdigit(*p)) break; } if (*p == '\0' && dot_count == 3) { soc_address.sin_addr.s_addr = inet_addr(host_name); } else { struct hostent *h = gethostbyname(host_name); if ( h == 0 || h->h_addrtype != AF_INET || h->h_length != sizeof(struct in_addr) ) { open_error_ = "Could not resolve host name"; return; } soc_address.sin_addr = *(struct in_addr *) h->h_addr; } } /* * Parse the port name. */ if (isdigit(port_name[0])) { soc_address.sin_port = htons(atoi(port_name)); } else { struct servent *s = getservbyname(port_name, 0); soc_address.sin_port = htons(s ? s->s_port : 80); } /* * Strip the "#anchor" suffix from the node name. */ { char *p = strchr(node_name, '#'); if (p) *p = '\0'; } /* * On-the-fly definition of "FileHandle" which closes a UNIX file descriptor * on destruction. */ class FileHandle { public: FileHandle() : fd(-1) {} ~FileHandle() { if (fd != -1) ::close(fd); } operator int() { return fd; } int operator=(int x) { return (fd = x); } private: int fd; } fd; fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (fd == -1) { open_error_ = strerror(errno); return; } /* * Make the socket non-blocking, so the "connect()" can be canceled. This * means that when we issue the "connect()" we should NOT have to wait for * the accept on the other end. */ if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { open_error_ = strerror(errno); return; } /* * Issue the "connect()". Since the server can't do an instantaneous * "accept()" and we are non-blocking, this will almost certainly return * a negative status. */ if (connect( fd, (struct sockaddr *) &soc_address, sizeof(soc_address) ) == -1) { if (errno != EINPROGRESS && errno != EAGAIN) { open_error_ = strerror(errno); return; } for (;;) { struct pollfd p; int res; p.fd = fd; p.events = POLLOUT; res = poll(&p, 1, timeout); /* * Interrupted "poll()"? */ if (res == -1 && errno == EINTR) continue; /* * Check for errors. */ if (res == -1 && errno != EALREADY) { open_error_ = strerror(errno); return; } /* * Check for timeout. */ if (res == 0) { open_error_ = "\"connect()\" timed out"; return; } /* * Extra check here for connection success, if we try to * connect again, and get EISCONN, it means we have a * successful connection. * * Notice: On SINIX 5.43 B2000, the "poll()" returns "1" when the * timeout occurs (!?). When we call "poll()", we encounter a SIGPIPE * (!?). If we ignore it, "poll()" returns EINVAL (22) (!?). */ void (*sigpipe_handler)(int) = signal(SIGPIPE, SIG_IGN); /* { */ res = connect(fd, (struct sockaddr *) &soc_address, sizeof(soc_address)); (void) signal(SIGPIPE, sigpipe_handler); /* } */ if (res == -1 && errno == EISCONN) break; if (res == -1 && errno != EALREADY) { open_error_ = strerror(errno); return; } } } /* * Make the socket blocking again on good "connect()". */ if (fcntl(fd, F_SETFL, 0) == -1) { open_error_ = strerror(errno); return; } /* * Issue the HTTP request. * * Notice: "GET xyz" means "return the document without a header". */ char command[4 + (sizeof(node_name) - 1) + 4 + 1]; sprintf(command, "GET %s\r\n\r\n", node_name); ssize_t command_length = strlen(command); if (::write(fd, command, command_length) != command_length) { open_error_ = "Error sending HTTP GET request"; return; } /* * Attach the file descriptor to the ifstream. */ fd_ = fd; fd = -1; // ...so that it is not implicitly "::close()"'d. } /* ------------------------------------------------------------------------- */ void urlistream::file_open(const char *file_name) { fd_ = !strcmp(file_name, "-") ? ::dup(0) : ::open(file_name, O_RDONLY); open_error_ = strerror(errno); } /* ------------------------------------------------------------------------- */ const char * urlistream::open_error() const { return open_error_ ? open_error_ : "No error"; } /* ------------------------------------------------------------------------- */ int urlistream::get() { char ch; int ret = ::read(fd_, &ch, 1); return (ret > 0 ? ch : -1); } #ifdef TESTING /* { */ int main(int argc, char **argv) { if (argc < 2) { cerr << "Usage: urlcat [ ... ]" << endl; exit(1); } for (int i = 1; i < argc; ++i) { urlistream uis(argv[i]); if (!uis.is_open()) { cerr << "Opening \"" << argv[i] << "\": " << uis.open_error() << endl; exit(1); } for (;;) { int c = uis.get(); if (c == EOF) break; cout << (char) c; } } return 0; } #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/urlistream.h0100644000000000000000000000541307760121351014655 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 21:50:51 CEST 2001: ported to g++ 3.0 * Sun Apr 7 12:00:50 CEST 2002: Handle URLs with missing node * Wed Jul 2 22:08:05 CEST 2003: ported to g++ 3.3 * Tue Nov 11 21:11:51 CET 2003: fd_ might be uninitialized */ /***************************************************************************/ #ifndef __urlistream_h_INCLUDED__ /* { */ #define __urlistream_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #include #include #include using std::string; /* ------------------------------------------------------------------------- */ /* * Similar to an "ifstream", but "open()" is overloaded: Pass an URL instead * of a file name. */ class urlistream /* : public ifstream */ { public: enum { default_timeout = 10000 }; urlistream() : open_error_(0), fd_(-1) {} urlistream( const char *url, int timeout = default_timeout // Milliseconds ) : open_error_(0), fd_(-1) { open(url, timeout); } urlistream( const string &url, int timeout = default_timeout // Milliseconds ) : open_error_(0), fd_(-1) { open(url, timeout); } void open( const char *url, int timeout = default_timeout // Milliseconds ); void open( const string &url, int timeout = default_timeout // Milliseconds ); int is_open() const { return (fd_ > 0); } void close() { ::close(fd_); } const char *open_error() const; int get(); private: void http_open(const char *address, int timeout); void file_open(const char *file_name); const char *open_error_; int fd_; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/0040755000000000000000000000000007161241364013600 5ustar rootroothtml2text-1.3.2a/libstd/Makefile.in0100644000000000000000000000513607016566223015652 0ustar rootroot # # Copyright (c) 1999 # GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. # http://www.gmrs.de # All rights reserved. # Author: Arno Unkrig (arno.unkrig@gmrs.de) # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by GMRS Software GmbH. # 4. The name of GMRS Software GmbH may not be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. # # $Id: Makefile.in,v 1.5 1999/11/23 19:35:47 arno Exp $ CXX = @CXX@ BOOL_DEFINITION = @BOOL_DEFINITION@ EXPLICIT = @EXPLICIT@ AR = ar MAKEDEPEND_INCLUDES = @MAKEDEPEND_INCLUDES@ DEBUG = -O INCLUDES = -I./include DEFINES = $(BOOL_DEFINITION) $(EXPLICIT) CPPFLAGS = $(INCLUDES) $(DEFINES) CXXFLAGS = $(CPPFLAGS) $(DEBUG) .SUFFIXES : .y .C .o .C.o: $(CXX) -c $(CXXFLAGS) $*.C # ----------------------------------------------------------------------------- default: all all: libstd.a OBJS = string.o rb_tree.o vector_base.o libstd.a: $(OBJS) $(AR) rv $@ $? # ----------------------------------------------------------------------------- clean : rm -f $(OBJS) clobber : clean rm -f libstd.a depend : @>Dependencies makedepend -f Dependencies $(CPPFLAGS) $(MAKEDEPEND_INCLUDES) *.C @rm -f Dependencies.bak include Dependencies html2text-1.3.2a/libstd/rb_tree.C0100644000000000000000000005176607014622325015337 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ident "$Id: rb_tree.C,v 1.2 1999/11/17 21:57:41 arno Exp $" #include #include "rb_tree.h" #define ASSERT(x) if (!(x)) cerr << __FILE__ << ":" << __LINE__ << ": Assertion failed: " << #x << endl; else /* ------------------------------------------------------------------------- */ rb_tree::rb_tree( const node_type *from, const node_type *to, node_type *(*node_copier)(const node_type *) ) { while (from && from != to) { insert(node_copier(from)); from = successor(from); } } /* * Returns "end()" if tree is empty. */ rb_tree::node_type * rb_tree::begin() { node_type *n = end(); while (n->left) n = n->left; return n; } const rb_tree::node_type * rb_tree::begin() const { const node_type *n = end(); while (n->left) n = n->left; return n; } /* * Finds the first node that matches. * Returns "end()" if key cannot be found. */ rb_tree::node_type * rb_tree::find_first(value_pointer vp) { node_type *n = lower_bound(vp); return n == end() || node_less_than(vp, n) ? end() : n; } /* * Finds any of the nodes that match. * Returns "end()" if key cannot be found. * Notice that this function is faster than "find_first()". */ rb_tree::node_type * rb_tree::find_any(value_pointer vp) { node_type *n = root; while (n) { if (node_less_than(vp, n)) { n = n->left; } else if (node_less_than(n, vp)) { n = n->right; } else { return n; } } return end(); } /*static*/ rb_tree::size_type rb_tree::size(const node_type *n) { return ( (n->left ? size(n->left ) : 0) + 1 + (n->right ? size(n->right) : 0) ); } rb_tree::size_type rb_tree::count(value_pointer vp, const node_type *n) const { if (node_less_than(n, vp)) return n->right ? count(vp, n->right) : 0; if (node_less_than(vp, n)) return n->left ? count(vp, n->left ) : 0; return ( 1 + (n->left ? count(vp, n->left ) : 0) + (n->right ? count(vp, n->right) : 0) ); } /* * Returns "end()" if tree is less than the tree. */ rb_tree::node_type * rb_tree::lower_bound(value_pointer vp) { if (!root) return end(); node_type *n = root; node_type *res = end(); for (;;) { while (node_less_than(n, vp)) { n = n->right; if (!n) return res; } for (;;) { if (!n->left) return n; if (node_less_than(n->left, vp)) { res = n; n = n->left; break; } n = n->left; } } } rb_tree::node_type * rb_tree::upper_bound(value_pointer vp) { if (!root) return end(); node_type *n = root; node_type *res = end(); for (;;) { while (!node_less_than(vp, n)) { n = n->right; if (!n) return res; } for (;;) { if (!n->left) return n; if (!node_less_than(vp, n->left)) { res = n; n = n->left; break; } n = n->left; } } } void rb_tree::check( int *depth_return, int *black_depth_return, int *count_in_out, const node_type *n, ostream &os ) const { if (!n) { *depth_return = *black_depth_return = 0; return; } if (n->parent == end()) { if (root != n) os << "Inconsistent root pointer" << endl; if (n->color != node_type::BLACK) os << "Top node is not black" << endl; } // "n->parent->left == n" should also be true for the top node! if (n->parent->left != n && n->parent->right != n) { os << "Node is not child of its parent" << endl; } (*count_in_out)++; int ld, rd, lbd, rbd; if (n->left) { if (n->color == node_type::RED && n->left->color == node_type::RED) { os << "Red node has red left child" << endl; } check(&ld, &lbd, count_in_out, n->left, os); } else { ld = 0; } if (n->right) { if (n->color == node_type::RED && n->right->color == node_type::RED) { os << "Red node has red right child" << endl; } check(&rd, &rbd, count_in_out, n->right, os); } else { rd = 0; } *depth_return = 1 + (ld > rd ? ld : rd); if (n->left && n->right && lbd != rbd) { os << "Inconsistent black depth" << endl; } *black_depth_return = ( (n->color == node_type::BLACK) + (n->left ? lbd : n->right ? rbd : 0) ); } /* * This function has two modes of operation: * * (A) indent == -1: * Print like * * nodevalue1, nodevalue2, nodevalue3 * * Notice the ascending order. * Notice that only the node *values* are printed, not the other node * members like "left", "right", "parent", "color". * * (B) indent >= 0: * Print a tree-like structure; one line per node, like: * * node1 * node2 * node3 * node4 * node5 * node6 * node7 * * Notice that the entire nodes are printed, i.e. "left", "right", * "parent", "color", "value". * Notice that this only yields nice results if "operator<<(os, value)" * does not generate any newline characters. */ void rb_tree::print_subtree( const node_type *n, ostream &os, int indent, void *closure ) const { ASSERT(n); if (indent == -1) { if (n->left) { print_subtree(n->left, os, -1, closure); os << ", "; } print_node_value(*n, os, closure); if (n->right) { os << ", "; print_subtree(n->right, os, -1, closure); } } else { if (n->left) print_subtree(n->left, os, indent + 1, closure); for (int i = 0; i < indent; ++i) os << " "; print_node(*n, os, closure); os << endl; if (n->right) print_subtree(n->right, os, indent + 1, closure); } } /* * Warning: Calls pure virtual method "copy_node()". */ rb_tree::node_type * rb_tree::copy_subtree(const node_type *n1, node_type *pa) const { ASSERT(n1); node_type *n2 = copy_node(n1); n2->left = n1->left ? copy_subtree(n1->left, n2) : 0; n2->right = n1->right ? copy_subtree(n1->right, n2) : 0; n2->parent = pa; n2->color = n1->color; return n2; } /* * Warning: Calls pure virtual method "delete_node()". * Notice: Does not invalidate the "n->parent"'s reference to "n". */ void rb_tree::delete_subtree(node_type *n) { if (n->left ) delete_subtree(n->left ); if (n->right) delete_subtree(n->right); delete_node(n); } /* * May be called from the destructor, because no pure virtual functions are * called. * Notice: Does not invalidate the "n->parent"'s reference to "n". */ /*static*/ void rb_tree::delete_subtree(node_type *n, void (*node_deletor)(node_type *)) { if (n->left ) delete_subtree(n->left, node_deletor); if (n->right) delete_subtree(n->right, node_deletor); node_deletor(n); } /* * Only "node_less_than()" must work for "x", i.e. "x"'s value must be set; * all other member are explicitly set here. */ /* * Case 1: * * BLACK x-> RED (Continue with new "x".) * / \ / \ * RED RED ===> BLACK BLACK * | | * x-> RED RED * * -------------------------------------------------------------------- * * Case 2: * * F-BLACK D-BLACK (Done.) * / \ / \ * B-RED G-BLACK-opt ===> B-RED F-RED * / \ / \ / \ * A-opt D-RED <-x A-opt C-opt E-opt G-BLACK-opt * / \ * C-opt E-opt * * -------------------------------------------------------------------- * * Case 3: * * F-BLACK D-BLACK (Done.) * / \ / \ * D-RED G-BLACK-opt ===> B-RED F-RED * / \ / \ / \ * x-> B-RED E-opt A-opt C-opt E-opt G-BLACK-opt * / \ * A-opt C-opt * * (These cases apply for "parent(x) == left_child(grandparent(x))"; otherwise * mirror the diagrams.) */ rb_tree::node_type * rb_tree::insert(node_type *x) { x->left = 0; x->right = 0; if (empty()) { x->parent = end(); x->color = node_type::BLACK; root = x; return x; } x->color = node_type::RED; node_type *y = root; for (;;) { if (node_less_than(x, y)) { if (!y->left) { y->left = x; x->parent = y; break; } y = y->left; } else { if (!y->right) { y->right = x; x->parent = y; break; } y = y->right; } } node_type *saved_x = x; /* * Rebalance the tree from "x" to "root". */ while (x->parent->color == node_type::RED) { ASSERT(x); ASSERT(x != end()); ASSERT(x->color == node_type::RED); ASSERT(x->parent != end()); ASSERT(x->parent->color == node_type::RED); ASSERT(x->parent->parent != end()); ASSERT(x->parent->parent->color == node_type::BLACK); node_type *pa = x->parent; node_type *gp = pa->parent; ASSERT(x ->color == node_type::RED); ASSERT(pa->color == node_type::RED); ASSERT(gp->color == node_type::BLACK); node_type *uncle; if (pa == gp->left) { /* * Parent is left child of grandparent. */ uncle = gp->right; // May be 0 if (!uncle || uncle->color == node_type::BLACK) { if (pa->right == x) { /* * X is right child of parent. */ if (x->left) x->left->parent = pa; // Case 2 pa->right = x->left; x->left = pa; pa->parent = x; pa = x; } if (pa->right) pa->right->parent = gp; // Case 3 pa->parent = gp->parent; if (gp == gp->parent->left) { gp->parent->left = pa; } else { gp->parent->right = pa; } gp->parent = pa; gp->left = pa->right; pa->right = gp; pa->color = node_type::BLACK; gp->color = node_type::RED; break; } } else { /* * Parent is right child of grandparent. */ uncle = gp->left; // May be 0 if (!uncle || uncle->color == node_type::BLACK) { if (pa->left == x) { /* * X is left child of parent. */ if (x->right) x->right->parent = pa; // Case 2 pa->left = x->right; x->right = pa; pa->parent = x; pa = x; } if (pa->left) pa->left->parent = gp; // Case 3 pa->parent = gp->parent; if (gp == gp->parent->left) { gp->parent->left = pa; } else { gp->parent->right = pa; } gp->parent = pa; gp->right = pa->left; pa->left = gp; pa->color = node_type::BLACK; gp->color = node_type::RED; break; } } pa ->color = node_type::BLACK; // Case 1 uncle->color = node_type::BLACK; if (gp == root) break; gp->color = node_type::RED; x = gp; } return saved_x; } void rb_tree::insert(const node_type *from, const node_type *to) { while (from && from != to) { insert(copy_node(from)); from = successor(from); } } /* * Erase all matching nodes. Return number of nodes erased. */ rb_tree::size_type rb_tree::erase_all(value_pointer vp) { size_type count = 0; for ( node_type *z = lower_bound(vp); z != end() && !node_less_than(vp, z); z = erase(z), ++count ); return count; } /* * Erase one of the matching nodes. Return "false" if no node matches. */ bool rb_tree::erase_one(value_pointer vp) { node_type *z = find_any(vp); if (z == end()) return false; erase(z); return true; } rb_tree::node_type * rb_tree::erase(node_type *z) { if (z == end()) return end(); ASSERT(z); ASSERT(z != end()); node_type *succ = successor(z); // May be "end()" node_type *y = z->left && z->right ? succ : z; ASSERT(y); ASSERT(y != end()); ASSERT(!(y->left && y->right)); node_type *x = y->left ? y->left : y->right; // May be 0. node_type *x_parent = y->parent; // May be "end()" /* * Isolate "y". */ if (x) x->parent = x_parent; if (x_parent->left == y) { x_parent->left = x; } else { x_parent->right = x; } node_type::color_type old_color = y->color; /* * Implant "y" in place of "z"; this isolates "z". */ if (y != z) { /* * Instead of copying the value, change all the links. */ y->parent = z->parent; y->left = z->left; y->right = z->right; y->color = z->color; if (z->parent->left == z) { z->parent->left = y; } else { z->parent->right = y; } if (z->left) z->left->parent = y; if (z->right) z->right->parent = y; if (x_parent == z) x_parent = y; } delete_node(z); /* * Check if fixup is necessary... */ if (old_color == node_type::RED) return succ; while (x != root && (!x || x->color == node_type::BLACK)) { ASSERT(x_parent != end()); if (x == x_parent->left) { node_type *sibling = x_parent->right; ASSERT(sibling); if (sibling->color == node_type::RED) { // Case 1 sibling->color = node_type::BLACK; x_parent->color = node_type::RED; left_rotate(x_parent); ASSERT(x_parent); ASSERT(x_parent != end()); sibling = x_parent->right; } ASSERT(sibling); ASSERT(sibling != end()); if ( (!sibling->left || sibling->left->color == node_type::BLACK) && (!sibling->right || sibling->right->color == node_type::BLACK) ) { // Case 2 sibling->color = node_type::RED; x = x_parent; ASSERT(x_parent); ASSERT(x_parent != end()); x_parent = x_parent->parent; } else { if ( !sibling->right || sibling->right->color == node_type::BLACK ) { // Case 3 ASSERT(sibling); ASSERT(sibling != end()); ASSERT(sibling->left); /*if (sibling->left)*/ sibling->left->color = node_type::BLACK; sibling->color = node_type::RED; right_rotate(sibling); ASSERT(x_parent); ASSERT(x_parent != end()); sibling = x_parent->right; } ASSERT(x_parent); ASSERT(x_parent != end()); sibling->color = x_parent->color; // Case 4 x_parent->color = node_type::BLACK; ASSERT(sibling); ASSERT(sibling->right); /*if (sibling->right)*/ sibling->right->color = node_type::BLACK; left_rotate(x_parent); break; } } else { // x->parent()->right == x node_type *sibling = x_parent->left; ASSERT(sibling); if (sibling->color == node_type::RED) { // Case 1 sibling->color = node_type::BLACK; x_parent->color = node_type::RED; right_rotate(x_parent); ASSERT(x_parent); ASSERT(x_parent != end()); sibling = x_parent->left; } ASSERT(sibling); if ( (!sibling->right || sibling->right->color == node_type::BLACK) && (!sibling->left || sibling->left->color == node_type::BLACK) ) { // Case 2 sibling->color = node_type::RED; x = x_parent; ASSERT(x_parent); ASSERT(x_parent != end()); x_parent = x_parent->parent; } else { ASSERT(sibling); ASSERT(sibling != end()); if ( !sibling->left || sibling->left->color == node_type::BLACK ) { // Case 3 ASSERT(sibling->right); /*if (sibling->right)*/ sibling->right->color = node_type::BLACK; sibling->color = node_type::RED; left_rotate(sibling); ASSERT(x_parent); ASSERT(x_parent != end()); sibling = x_parent->left; } ASSERT(x_parent); ASSERT(x_parent != end()); sibling->color = x_parent->color; // Case 4 x_parent->color = node_type::BLACK; ASSERT(sibling); ASSERT(sibling != end()); ASSERT(sibling->left); /*if (sibling->left)*/ sibling->left->color = node_type::BLACK; right_rotate(x_parent); break; } } } ASSERT(x != end()); if (x) x->color = node_type::BLACK; return succ; } /* * Erase all nodes from "n1" (inclusive) to "n2" (exclusive). "n1" may only be * "end()" if "n2" is also "end()". */ rb_tree::node_type * rb_tree::erase(node_type *n1, node_type *n2) { while (n1 != n2) n1 = erase(n1); return n1; } void rb_tree::swap(rb_tree &x) { /* * Swap pointers to root node. */ node_type *tmp = root; root = x.root; x.root = tmp; /* * Fix up parent pointers of root nodes. */ if (root) root->parent = end(); if (x.root) x.root->parent = x.end(); } /* * Returns "end()" if "x" was the last node. */ /*static*/ const rb_tree::node_type * rb_tree::successor(const node_type *x) { if (x->right) { x = x->right; while (x->left) x = x->left; return x; } else { for (;;) { if (x == x->parent->left) return x->parent; x = x->parent; } } } void rb_tree::print(ostream &os, void *closure) const { if (root) { os << "{ "; print_subtree(root, os, -1, closure); os << " }"; } else { os << "{}"; } } void rb_tree::debug_print(ostream &os, void *closure) const { if (root) print_subtree(root, os, 0, closure); } // "x" must have a right child, but not necessarily a parent. void rb_tree::left_rotate(node_type *x) { ASSERT(x); ASSERT(x->right); node_type *y = x->right; // set temporary y x->right = y->left; // y's left is x's right if (y->left) y->left->parent = x; // link y->left's parent to x y->parent = x->parent; // link x's parent to y if (x == x->parent->left) { x->parent->left = y; } else { x->parent->right = y; } y->left = x; // x is left child of y x->parent = y; } // "x" must have a left child, but not necessarily a parent. void rb_tree::right_rotate(node_type *x) { ASSERT(x); ASSERT(x->left); node_type* y = x->left; // set temporary y x->left = y->right; // x's left is y's right if (y->right) y->right->parent = x; // link y->right's parent to x y->parent = x->parent; // link y's parent to x if (x == x->parent->left) { x->parent->left = y; } else { x->parent->right = y; } y->right = x; // x is right child of y x->parent = y; } /*virtual*/ void rb_tree::print_node(const node_type &n, ostream &os, void *closure) const { os << "value="; print_node_value(n, os, closure); os << ", left="; if (n.left) { print_node_value(*n.left, os, closure); } else { os << "0"; } os << ", right="; if (n.right) { print_node_value(*n.right, os, closure); } else { os << "0"; } os << ", color=" << (n.color == node_type::RED ? "RED" : "BLACK"); os << ", parent="; if (n.parent == end()) { os << "TOP"; } else { print_node_value(*n.parent, os, closure); } } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/rb_tree.h0100644000000000000000000001657607012075731015405 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __rb_tree_h_INCLUDED__ /* { */ #define __rb_tree_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: rb_tree.h,v 1.2 1999/11/09 19:54:01 arno Exp $" #include #include "include/utility" // For "pair" #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif /* ------------------------------------------------------------------------- */ class ostream; class rb_tree { protected: // Protected types struct node_type { enum color_type { RED, BLACK }; // Notice: "left" must be the first member, because the "parent" pointer // of the top node points to the "root" member of the "rb_tree" object, // so that "top->parent->left == top"! This is tricky, but this way // "parent" is always non-zero... node_type *left; node_type *right; node_type *parent; color_type color; }; typedef void *value_pointer; // Types public: typedef size_t size_type; // Construct/Copy/Destruct protected: rb_tree() : root(0) {} rb_tree( const node_type *, const node_type *, node_type *(*node_copier)(const node_type *) ); rb_tree(const rb_tree &x) : root(x.root ? x.copy_subtree(x.root, end()) : 0) {} //~rb_tree() // {} const rb_tree &operator=(const rb_tree &); // Iterators node_type *begin(); const node_type *begin() const; node_type *end() { return (node_type *) &root; } const node_type *end() const { return (const node_type *) &root; } // Capacity public: bool empty() const { return root == 0; } size_type size() const { return root ? size(root) : 0; } size_type max_size() const { return (size_type) -1; } // Modifiers node_type *insert(node_type *); //node_type *insert(node_type *where, node_type *); void insert(const node_type *from, const node_type *to); size_type erase_all(value_pointer); bool erase_one(value_pointer); node_type *erase(node_type *); node_type *erase(node_type *, node_type *); void swap(rb_tree &x); void clear() { if (root) { delete_subtree(root); root = 0; } } // Operations. node_type *find_first(value_pointer); const node_type *find_first(value_pointer vp) const { return ((rb_tree *) this)->find_first(vp); } node_type *find_any(value_pointer); const node_type *find_any(value_pointer vp) const { return ((rb_tree *) this)->find_any(vp); } size_type count(value_pointer vp) const { return root ? count(vp, root) : 0; } node_type *lower_bound(value_pointer vp); const node_type *lower_bound(value_pointer vp) const { return ((rb_tree *) this)->lower_bound(vp); } node_type *upper_bound(value_pointer vp); const node_type *upper_bound(value_pointer vp) const { return ((rb_tree *) this)->upper_bound(vp); } bool operator==(const rb_tree &x) const; bool operator<(const rb_tree &x) const; void check( int *depth_return, int *black_depth_return, int *count_return, ostream &os ) const { *count_return = 0; check(depth_return, black_depth_return, count_return, root, os); } // Protected member functions. protected: void clear(void (*node_deletor)(node_type *)) { if (root) { delete_subtree(root, node_deletor); root = 0; } } public: static node_type *successor(node_type *x) { return (node_type *) successor((const node_type *) x); } static const node_type *successor(const node_type *); protected: // List-like print-out: "{ nodevalue1, nodevalue2, nodevalue3 }". Calls // pure virtual "print_node_value()". void print(ostream &os, void *closure) const; // Tree-like print-out; rather for debugging. Calls "print_node()", which // calls pure virtual "print_node_value()"; void debug_print(ostream &os, void *closure) const; // Virtual member functions to be implemented by derived class private: virtual bool node_less_than(const node_type *x, const node_type *y) const = 0; virtual bool node_less_than(value_pointer x, const node_type *y) const = 0; virtual bool node_less_than(const node_type *x, value_pointer y ) const = 0; virtual void print_node(const node_type &, ostream &, void *closure) const; virtual void print_node_value( const node_type &, ostream &, void *closure ) const = 0; virtual node_type *copy_node(const node_type *) const = 0; virtual void delete_node(node_type *) const = 0; // Private member functions static size_type size(const node_type *); size_type count(value_pointer, const node_type *) const; node_type *copy_subtree(const node_type *n, node_type *pa) const; void delete_subtree(node_type *); static void delete_subtree(node_type *, void (*)(node_type *)); void print_subtree( const node_type *, ostream &, int indent, void *closure ) const; void left_rotate(node_type *); void right_rotate(node_type *); void check( int *depth_return, int *black_depth_return, int *count_in_out, const node_type *n, ostream &os ) const; // Private members. private: node_type *root; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/string.C0100644000000000000000000003410507005566321015212 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ident "$Id: string.C,v 1.10 1999/10/27 12:15:13 arno Exp $" /* * Uncomment "cerr" for debugging, */ //#define DBG(x) x #include #include #include #include #ifndef DBG #define DBG(x) #endif /* ------------------------------------------------------------------------- */ /*static*/ const string::size_type string::npos = (string::size_type) -1; /*static*/ char string::null = 0; /* ------------------------------------------------------------------------- */ string::string(const string &x) : p(*x.p ? strdup(x.p) : &null) { DBG(cerr << "string::string(string &\"" << x.p << "\")" << endl); } /* ------------------------------------------------------------------------- */ string::string(const char *x, size_type n) { if (!n) { p = &null; return; } p = (char *) malloc(n + 1); memcpy(p, x, n); p[n] = '\0'; } /* ------------------------------------------------------------------------- */ string::string(const char *x) : p(*x ? strdup(x) : &null) { DBG(cerr << "string::string(\"" << x << "\")" << endl); } /* ------------------------------------------------------------------------- */ string::string(size_type n, char c) { if (n == 0) { p = &null; } else { p = (char *) malloc(n + 1); memset(p, c, n); p[n] = '\0'; } } /* ------------------------------------------------------------------------- */ // Internal use only: Create an uninitialized string. string::string(size_type x) : p(x ? (char *) malloc(x + 1) : &null) { } /* ------------------------------------------------------------------------- */ string::~string() { DBG(cerr << "string::~string(\"" << p << "\")" << endl); if (p != &null) free(p); p = (char *) 0xa5a5a5a5; // For a fast core dump. } /* ------------------------------------------------------------------------- */ const string & string::operator=(const string &x) { if (p != &null) free(p); p = *x.p ? strdup(x.p) : &null; return *this; } /* ------------------------------------------------------------------------- */ const string & string::operator=(const char *x) { if (p != &null) free(p); p = *x ? strdup(x) : &null; return *this; } /* ------------------------------------------------------------------------- */ const string & string::operator=(char c) { if (p != &null) free(p); p = (char *) malloc(2); p[0] = c; p[1] = '\0'; return *this; } /* ------------------------------------------------------------------------- */ string::iterator string::end() { return strchr(p, '\0'); } string::const_iterator string::end() const { return strchr(p, '\0'); } string::reverse_iterator string::rbegin() { return strchr(p, '\0'); } string::const_reverse_iterator string::rbegin() const { return strchr(p, '\0'); } /* ------------------------------------------------------------------------- */ const string & string::operator+=(char c) { DBG(cerr << "string::operator+=(\"" << p << "\", '" << c << "')" << endl); if (p == &null) { p = (char *) malloc(2); p[0] = c; p[1] = '\0'; } else { size_t len = strlen(p); p = (char *) realloc(p, len + 2); p[len] = c; p[len + 1] = '\0'; } return *this; } /* ------------------------------------------------------------------------- */ const string & string::operator+=(const char *x) { if (p == &null) { p = strdup(x); } else { size_t len = strlen(p); p = (char *) realloc(p, len + strlen(x) + 1); strcpy(p + len, x); } return *this; } /* ------------------------------------------------------------------------- */ void string::resize(size_type n, char c /*= ends*/ ) { if (n == 0) { if (p != &null) free(p); p = &null; return; } if (p == &null) { p = (char *) malloc(n + 1); memset(p, c, n); } else { size_type len = strlen(p); if (n > len) { p = (char *) realloc(p, n + 1); memset(p + len, c, n - len); } } p[n] = '\0'; } /* ------------------------------------------------------------------------- */ string & string::append(size_type n, char c) { if (p == &null) { p = (char *) malloc(n + 1); memset(p, c, n); p[n] = '\0'; } else { size_type len = strlen(p); p = (char *) realloc(p, len + n + 1); memset(p + len, c, n); p[len + n] = '\0'; } return *this; } /* ------------------------------------------------------------------------- */ string & string::assign(const string &s, size_type pos, size_type n) { if (p != &null) free(p); if (n == 0) { p = &null; return *this; } size_type sl = s.size(); if (pos >= sl) { p = &null; return *this; } if (n == npos || n > sl - pos) n = sl - pos; p = (char *) malloc(n + 1); memcpy(p, s.p + pos, n); p[n] = '\0'; return *this; } /* ------------------------------------------------------------------------- */ string & string::insert(size_type pos, size_type n, char c) { if (n) { if (p == &null) { // ASSERT that "pos == 0". p = (char *) malloc(n + 1); memset(p, c, n); p[n] = '\0'; } else { size_type len = strlen(p); // ASSERT that "pos <= len". p = (char *) realloc(p, len + n + 1); memmove(p + pos + n, p + pos, len - pos + 1); memset(p + pos, c, n); } } return *this; } /* ------------------------------------------------------------------------- */ string & string::erase(size_type pos /*= 0*/, size_type n /*= npos*/ ) { DBG(cerr << "\"" << p << "\"::erase(" << pos << ", " << n << ") -> \""); size_type len = strlen(p); // ASSERT on that "pos <= len". if (n == npos || pos + n >= len) { p[pos] = '\0'; } else { strcpy(p + pos, p + pos + n); } DBG(cerr << p << "\"" << endl); return *this; } /* ------------------------------------------------------------------------- */ string & string::replace( size_type pos1, size_type n1, const string &s, size_type pos2, size_type n2 ) { size_type l1 = size(); size_type l2 = s.size(); if (pos1 > l1 || pos2 > l2) return *this; if (n1 == npos || pos1 + n1 > l1) n1 = l1 - pos1; if (n2 == npos || pos2 + n2 > l2) n2 = l2 - pos2; if (pos1 + n1 == l1) { size_type j = pos1 + n2 + 1; p = (char *) (p == &null ? malloc(j) : realloc(p, j)); memcpy(p + pos1, s.p + pos2, n2); p[pos1 + n2] = '\0'; } else { size_type j = l1 - n1 + n2 + 1; p = (char *) (p == &null ? malloc(j) : realloc(p, j)); memmove(p + pos1 + n2, p + pos1 + n1, l1 - pos1 - n1 + 1); memcpy(p + pos1, s.p + pos2, n2); } return *this; } /* ------------------------------------------------------------------------- */ string & string::replace(size_type pos, size_type n1, const char *x, size_type n2) { if (p == &null) { // ASSERT that "pos == 0". if (n2) { p = (char *) malloc(n2 + 1); memcpy(p, x, n2); p[n2] = '\0'; } } else { size_type len1 = strlen(p); if (n1 == npos || pos + n1 >= len1) { p = (char *) realloc(p, pos + n2 + 1); memcpy(p + pos, x, n2); p[pos + n2] = '\0'; } else { if (n2 > n1) { p = (char *) realloc(p, len1 - n1 + n2 + 1); memmove(p + pos + n2, p + pos + n1, len1 - pos - n1 + 1); } else if (n2 < n1) { memmove(p + pos + n2, p + pos + n1, len1 - pos - n1 + 1); p = (char *) realloc(p, len1 - n1 + n2 + 1); } memcpy(p + pos, x, n2); } } return *this; } /* ------------------------------------------------------------------------- */ string & string::replace(size_type pos, size_type n, const char *x) { if (p == &null) { // ASSERT that "pos == 0". if (*x) p = strdup(x); } else { size_type len1 = strlen(p); size_type len2 = strlen(x); if (n == npos || pos + n >= len1) { p = (char *) realloc(p, pos + len2 + 1); strcpy(p + pos, x); } else { if (len2 > n) { p = (char *) realloc(p, len1 - n + len2 + 1); memmove(p + pos + len2, p + pos + n, len1 - pos - n + 1); } else if (len2 < n) { memmove(p + pos + len2, p + pos + n, len1 - pos - n + 1); p = (char *) realloc(p, len1 - n + len2 + 1); } memcpy(p + pos, x, len2); } } return *this; } /* ------------------------------------------------------------------------- */ string & string::replace(size_type pos, size_type n1, size_type n2, char c) { if (p == &null) { // ASSERT that "pos == 0". if (n2 > 0) { p = (char *) malloc(n2 + 1); memset(p, c, n2); p[n2] = '\0'; } } else { size_type len1 = strlen(p); if (n1 == npos || pos + n1 >= len1) { p = (char *) realloc(p, pos + n2 + 1); memset(p + pos, c, n2); p[pos + n2] = '\0'; } else { if (n2 > n1) { p = (char *) realloc(p, len1 - n1 + n2 + 1); memmove(p + pos + n2, p + pos + n1, len1 - pos - n1 + 1); } else if (n2 < n1) { memmove(p + pos + n2, p + pos + n1, len1 - pos - n1 + 1); p = (char *) realloc(p, len1 - n1 + n2 + 1); } memset(p + pos, c, n2); } } return *this; } /* ------------------------------------------------------------------------- */ string::size_type string::length() const { return strlen(p); } string::size_type string::size() const { return strlen(p); } /* ------------------------------------------------------------------------- */ ostream & operator<<(ostream &os, const string &s) { return os << s.p; } /* ------------------------------------------------------------------------- */ /* * Note: "istream::getline(p, n, delim)" reads the delimiter, but does not * put it into "p", so we must compare "gcount()" against "strlen(p)". * * "getline(is, str, delim)" reads the delimiter, but it is not entered into * "str". */ /*friend of string*/ istream & getline(istream &is, string &s_return, char delim /*= '\n'*/ ) { char buffer[1024]; if (is.getline(buffer, sizeof(buffer), delim)) { s_return = buffer; while (is.gcount() == strlen(buffer)) { // Delimiter not read yet if (!is.getline(buffer, sizeof(buffer), delim)) break; s_return += buffer; } } return is; } /* ------------------------------------------------------------------------- */ bool operator==(const string &x, const string &y) { return strcmp(x.c_str(), y.c_str()) == 0; } bool operator==(const char *x, const string &y) { return strcmp(x, y.c_str()) == 0; } bool operator==(const string &x, const char *y) { return strcmp(x.c_str(), y) == 0; } /* ------------------------------------------------------------------------- */ bool operator<(const string &x, const string &y) { return strcmp(x.c_str(), y.c_str()) < 0; } bool operator<(const char *x, const string &y) { return strcmp(x, y.c_str()) < 0; } bool operator<(const string &x, const char *y) { return strcmp(x.c_str(), y) < 0; } /* ------------------------------------------------------------------------- */ bool operator!=(const string &x, const string &y) { return strcmp(x.c_str(), y.c_str()) != 0; } bool operator!=(const char *x, const string &y) { return strcmp(x, y.c_str()) != 0; } bool operator!=(const string &x, const char *y) { return strcmp(x.c_str(), y) != 0; } /* ------------------------------------------------------------------------- */ string operator+(const string &s1, const string &s2) { string::size_type l1 = strlen(s1.p), l2 = strlen(s2.p); string res(l1 + l2); memcpy(res.p, s1.p, l1); memcpy(res.p + l1, s2.p, l2); res.p[l1 + l2] = '\0'; return res; } string operator+(const char *p1, const string &s2) { string::size_type l1 = strlen(p1), l2 = strlen(s2.p); string res(l1 + l2); memcpy(res.p, p1, l1); memcpy(res.p + l1, s2.p, l2); res.p[l1 + l2] = '\0'; return res; } string operator+(char c, const string &s) { string::size_type l = strlen(s.p); string res(1 + l); res.p[0] = c; memcpy(res.p + 1, s.p, l); res.p[1 + l] = '\0'; return res; } string operator+(const string &s1, const char *p2) { string::size_type l1 = strlen(s1.p), l2 = strlen(p2); string res(l1 + l2); memcpy(res.p, s1.p, l1); memcpy(res.p + l1, p2, l2); res.p[l1 + l2] = '\0'; return res; } string operator+(const string &s, char c) { string::size_type l = strlen(s.p); string res(l + 1); memcpy(res.p, s.p, l); res.p[l] = c; res.p[l + 1] = '\0'; return res; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/vector_base.C0100644000000000000000000002537607005304430016201 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ident "$Id: vector_base.C,v 1.1 1999/10/26 10:57:28 arno Exp $" #include #include #include "vector_base.h" /* ------------------------------------------------------------------------- */ /* * The minimum number of elements in a vector; in other words: If the vector * is not empty, then it reserves space for at least that many elements. Must * be a power of two. * As the vector grows beyond its limit, its size is doubled. */ #define MIN_SIZE 4 /* ------------------------------------------------------------------------- */ vector_base::vector_base( size_t element_size, size_type n, value_default_constructor c ) { init(element_size, n); for (char *q = p; n; --n, q += element_size) c(q); } vector_base::vector_base( size_t element_size, size_type n, value_copy_constructor c, void *value ) { init(element_size, n); for (char *q = p; n; --n, q += element_size) c( /*that*/ q, value); } vector_base::vector_base( size_t element_size, value_copy_constructor c, char *from, char *to ) { size_type n = (to - from) / element_size; init(element_size, n); for (char *q = p; n; --n, q += element_size, from += element_size) { c( /*that*/ q, from); } } vector_base::~vector_base() { free(p); } void vector_base::assign(size_t element_size, char *from, char *to) { clear(element_size); size_type n = (to - from) / element_size; init(element_size, n); for (char *q = p; n; --n, q += element_size, from += element_size) { construct_value(q, from); } } void vector_base::assign(size_t element_size, size_type n) { clear(element_size); init(element_size, n); for (char *q = p; n; --n, q += element_size) construct_value(q); } void vector_base::assign(size_t element_size, size_type n, void *v) { clear(element_size); init(element_size, n); for (char *q = p; n; --n, q += element_size) construct_value(q, v); } void vector_base::resize(size_t element_size, size_type n) { if (n > size1) { reserve(element_size, n); char *from = p + element_size * size1; char *to = p + element_size * n; for (char *q = from; q < to; q += element_size) construct_value(q); } else { char *from = p + element_size * n; char *to = p + element_size * size1; for (char *q = from; q < to; q += element_size) destruct_value(q); } size1 = n; } void vector_base::resize(size_t element_size, size_type n, void *v) { if (n > size1) { reserve(element_size, n); char *from = p + element_size * size1; char *to = p + element_size * n; for (char *q = from; q < to; q += element_size) construct_value(q, v); } else { char *from = p + element_size * n; char *to = p + element_size * size1; for (char *q = from; q < to; q += element_size) destruct_value(q); } size1 = n; } /* * If "iter_in_out != 0", assume that "*iter_in_out" is an iterator in this * vector, and correct it while new elements are "reserve()"ed. */ void vector_base::reserve( size_t element_size, size_type n, char **iter_in_out /*= 0*/ ) { if (n <= size2) return; size_type m; for (m = MIN_SIZE; m < n; m <<= 1); char *p2 = (char *) malloc(element_size * m); if (!p2) abort(); char *q1 = p, *q2 = p2; for (int i = 0; i < size1; ++i, q1 += element_size, q2 += element_size) { construct_value(q2, q1); destruct_value(q1); } if (iter_in_out) *iter_in_out += p2 - p; free(p); p = p2; size2 = m; } void vector_base::push_back(size_t element_size, void *v) { reserve(element_size, size1 + 1); construct_value(p + element_size * size1, v); ++size1; } void vector_base::pop_back(size_t element_size) { if (size1 == 0) abort(); --size1; destruct_value(p + element_size * size1); } void * vector_base::insert(size_t element_size, char *pos) { if (size1 == size2) reserve(element_size, size1 + 1, &pos); char *end = p + element_size * size1; if (pos == end) { construct_value(pos); } else { construct_value(end, end - element_size); for (char *q = end - element_size; q != pos; q -= element_size) { assign_value(q, q - element_size); } assign_value(pos); } ++size1; return pos; } void * vector_base::insert(size_t element_size, char *pos, void *v) { if (size1 == size2) reserve(element_size, size1 + 1, &pos); char *end = p + element_size * size1; if (pos == end) { construct_value(pos, v); } else { construct_value(end, end - element_size); for (char *q = end - element_size; q != pos; q -= element_size) { assign_value(q, q - element_size); } assign_value(pos, v); } ++size1; return pos; } void vector_base::insert(size_t element_size, char *pos, size_type n, void *v) { if (size1 + n > size2) reserve(element_size, size1 + n, &pos); char *end = p + element_size * size1; char *q = end - element_size; char *r = q + element_size * n; while (q >= pos && r >= end) { construct_value(r, q); r -= element_size; q -= element_size; } while (q >= pos) { assign_value(r, q); r -= element_size; q -= element_size; } while (r >= end) { construct_value(r, v); r -= element_size; } while (r >= pos) { assign_value(r, v); r -= element_size; } size1 += n; } void vector_base::insert(size_t element_size, char *pos, char *from, char *to) { size_type n = (to - from) / element_size; if (size1 + n > size2) reserve(element_size, size1 + n, &pos); char *end = p + element_size * size1; char *q = end - element_size; char *r = q + element_size * n; while (q >= pos && r >= end) { construct_value(r, q); r -= element_size; q -= element_size; } while (q >= pos) { assign_value(r, q); r -= element_size; q -= element_size; } while (r >= end) { to -= element_size; construct_value(r, to); r -= element_size; } while (r >= pos) { to -= element_size; assign_value(r, to); r -= element_size; } size1 += n; } void * vector_base::erase(size_t element_size, char *pos) { char *pos2 = pos; char *end = p + element_size * --size1; while (pos2 < end) { assign_value(pos2, pos2 + element_size); pos2 += element_size; } destruct_value(pos2); return pos; } void * vector_base::erase(size_t element_size, char *from, char *to) { char *res = from; char *end = p + element_size * size1; size1 -= (to - from) / element_size; while (to < end) { assign_value(from, to); from += element_size; to += element_size; } while (from < end) { destruct_value(from); from += element_size; } return res; } void vector_base::swap(vector_base &x) { size_type size1_tmp = size1; size_type size2_tmp = size2; char *p_tmp = p; size1 = x.size1; size2 = x.size2; p = x.p; x.size1 = size1_tmp; x.size2 = size2_tmp; x.p = p_tmp; } void vector_base::clear(size_t element_size) { char *q = p; for (size_type i = 0; i < size1; q += element_size, ++i) { destruct_value(q); } size1 = 0; } void vector_base::clear(size_t element_size, value_destructor d) { char *q = p; for (size_type i = 0; i < size1; q += element_size, ++i) d(q); size1 = 0; } /* ------------------------------------------------------------------------- */ void vector_base::init(size_t element_size, size_type n) { size1 = n; for (size2 = MIN_SIZE; size2 < n; size2 <<= 1); p = (char *) malloc(element_size * size2); if (!p) abort(); } // Need to use a function pointer because we don't "operator==(T, T)" to be // a member function. bool vector_base::equals( size_t element_size, const vector_base &y, value_comparator value_equals ) const { if (size1 != y.size1) return false; char *xp = p, *xend = p + size1 * element_size; char *yp = y.p, *yend = y.p + y.size1 * element_size; for (;;) { if (xp >= xend) return yp >= yend; if (yp >= yend) return false; if (!value_equals(xp, yp)) return false; xp += element_size; yp += element_size; } } // Need to use a function pointer because we don't "operator<(T, T)" to be // a member function. bool vector_base::less_than( size_t element_size, const vector_base &y, value_comparator value_less_than ) const { char *xp = p, *xend = p + size1 * element_size; char *yp = y.p, *yend = y.p + y.size1 * element_size; for (;;) { if (xp >= xend) return yp < yend; if (yp >= yend) return true; if (value_less_than(xp, yp)) return true; xp += element_size; yp += element_size; } } void vector_base::print(size_t element_size, ostream &os, void *closure) const { os << "[ "; char *q = p; for (size_type i = 0; i < size1; ++i, q += element_size) { os << "[" << i << "]="; print_value(os, q, closure); os << " "; } os << "]"; } /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/vector_base.h0100644000000000000000000001230007005304432016227 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __vector_base_h_INCLUDED__ /* { */ #define __vector_base_h_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: vector_base.h,v 1.1 1999/10/26 10:57:30 arno Exp $" #include #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif class ostream; /* ------------------------------------------------------------------------- */ class vector_base { public: typedef size_t size_type; protected: typedef void (*value_default_constructor)(void *that); typedef void (*value_copy_constructor)(void *that, void *v); typedef void (*value_destructor)(void *that); protected: // Construct/Copy/Destroy vector_base() : p(0), size1(0), size2(0) {} vector_base(size_t element_size, size_type n, value_default_constructor c); vector_base( size_t element_size, size_type n, value_copy_constructor c, void *value ); vector_base( size_t element_size, value_copy_constructor c, char *from, char *to ); ~vector_base(); void assign(size_t element_size, char *from, char *to); void assign(size_t element_size, size_type n); void assign(size_t element_size, size_type n, void *v); // Capacity public: size_type size() const { return size1; } size_type max_size() const { return (size_type) -1; } protected: void resize(size_t element_size, size_type n); void resize(size_t element_size, size_type n, void *v); public: size_type capacity() const { return size2; } bool empty() const { return size1 == 0; } protected: void reserve(size_t element_size, size_type n, char **iter_in_out = 0); // Modifiers void push_back(size_t element_size, void *v); void pop_back(size_t element_size); void *insert(size_t element_size, char *pos); void *insert(size_t element_size, char *pos, void *v); void insert(size_t element_size, char *pos, size_type n, void *v); void insert(size_t element_size, char *pos, char *from, char *to); void *erase(size_t element_size, char *pos); void *erase(size_t element_size, char *from, char *to); void swap(vector_base &x); void clear(size_t element_size); void clear(size_t element_size, value_destructor d); protected: virtual void construct_value(void *that) = 0; virtual void construct_value(void *that, void *v) = 0; virtual void destruct_value(void *that) = 0; virtual void assign_value(void *to) = 0; virtual void assign_value(void *to, void *from) = 0; virtual void print_value(ostream &os, void *v, void *closure) const = 0; void init(size_t element_size, size_type n); typedef bool (*value_comparator)(void *, void *); bool equals( size_t element_size, const vector_base &y, value_comparator value_equals ) const; bool less_than( size_t element_size, const vector_base &y, value_comparator value_less_than ) const; void print(size_t element_size, ostream &os, void *closure) const; char *p; size_type size1; size_type size2; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/0040755000000000000000000000000007161241364015223 5ustar rootroothtml2text-1.3.2a/libstd/include/auto_ptr.h0100644000000000000000000000647007357145061017241 0ustar rootroot /***************************************************************************/ /* * Portions Copyright (c) 1999 GMRS Software GmbH * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de * All rights reserved. * * Author: Arno Unkrig * * All advertising materials mentioning features or use of this software * must display the following acknowledgement: * "This product includes software developed by GMRS Software GmbH." * The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. */ /* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License in the file COPYING for more details. */ /***************************************************************************/ /* * Changes to version 1.2.2 were made by Martin Bayer * Dates and reasons of modifications: * Thu Oct 4 22:44:17 CEST 2001: ported to g++ 3.0 */ /***************************************************************************/ #ifndef __auto_ptr_h_INCLUDED__ /* { */ #define __auto_ptr_h_INCLUDED__ /* ------------------------------------------------------------------------- */ /* * Yet another implementation of the "auto_ptr" template... I am not sure * if the standard does specify "auto_ptr", and how, but this implementation * uses a scheme *without* an "owns" flag: When the ownership is taken away * from the "auto_ptr", its pointer *is set to 0*! This may seem odd, but * in practice, it saves you from problems because such an "auto_ptr" can * never be dangling, only "0", which is checked in "operator*()" and * "operator->()". I never found this scheme limiting. */ /* ------------------------------------------------------------------------- */ #include template class auto_ptr { public: // Constructor/copy/destroy explicit auto_ptr(T *x = 0) : p(x) {} auto_ptr(const auto_ptr &x) : p(x.p) { ((auto_ptr *) &x)->p = 0; } void operator=(const auto_ptr &x) { delete p; p = x.p; ((auto_ptr *) &x)->p = 0; } // Would be a nice extension, but is not portable; use "reset()" instead: //void operator=(T *x); { delete p; p = x; } ~auto_ptr() { delete p; } // Members T &operator*() const { if (!p) abort(); return *(T *) p; } T *operator->() const { if (!p) abort(); return (T *) p; } T *get() const { return (T *) p; } T *release() { T *tmp = p; p = 0; return tmp; } void reset(T *x = 0) { delete p; p = x; } // These would make a nice extension, but are not provided by many other // implementations. //operator const void *() const { return p; } //int operator!() const { return p == 0; } private: T *p; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/list0100644000000000000000000002524607005304435016123 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __list_INCLUDED__ /* { */ #define __list_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: list,v 1.7 1999/10/26 10:57:33 arno Exp $" #include // For "size_t". #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif /* ------------------------------------------------------------------------- */ /* * This is a simplified, but otherwise correct implementation of the "list" * class as specified by the ANSI C++ library. * * Missing features: * (1) "list::Allocator" is missing ("list" uses "operator new()" instead). * (2) Several "unimportant" methods are not implemented (but they are * "declared" in comments). */ /* ------------------------------------------------------------------------- */ template class list { private: struct Node { T value; Node *prev; Node *next; Node(Node *p, Node *n) : prev(p), next(n) {} Node(const T &v, Node *p, Node *n) : value(v), prev(p), next(n) {} } root; public: // Types class iterator; class const_iterator; class reverse_iterator; class const_reverse_iterator; typedef T &reference; typedef const T &const_reference; typedef size_t size_type; //typedef difference_type; typedef T value_type; //typedef allocator_type; // Construct/Copy/Destroy list() : root(&root, &root) {} //list(size_type); //list(size_type, const T &); //list(const_iterator, const_iterator); list(const list &x) : root(&root, &root) { insert(begin(), x.begin(), x.end()); } ~list() { Node *n, *nn; for (n = root.next; n != &root; n = nn) { nn = n->next; delete n; } } const list &operator=(const list &x) { clear(); insert(begin(), x.begin(), x.end()); return *this; } //void assign(...); //allocator_type get_allocator() const; // Iterators iterator begin() { return iterator(root.next); } const_iterator begin() const { return const_iterator(root.next); } iterator end() { return iterator(&root); } const_iterator end() const { return const_iterator(&root); } reverse_iterator rbegin() { return reverse_iterator(&root); } const_reverse_iterator rbegin() const { return const_reverse_iterator(&root); } reverse_iterator rend() { return reverse_iterator(root.next); } const_reverse_iterator rend() const { return const_reverse_iterator(root.next); } // Capacity bool empty() const { return root.next == &root; } size_type size() const { size_type res = 0; for (const Node *n = root.next; n != &root; n = n->next) res++; return res; } //size_type max_size() const; //void resize(...); // Element access reference front() { return root.next->value; } const_reference front() const { return root.next->value; } reference back() { return root.prev->value; } const_reference back() const { return root.prev->value; } // Modifiers void push_front(const T &x) { Node *n = new Node(x, &root, root.next); root.next->prev = n; root.next = n; } void pop_front() { Node *f = root.next; if (f != &root) { f->next->prev = &root; root.next = f->next; delete f; } } void push_back(const T &x) { Node *n = new Node(x, root.prev, &root); root.prev->next = n; root.prev = n; } void pop_back() { Node *f = root.prev; if (f != &root) { f->prev->next = &root; root.prev = f->prev; delete f; } } iterator insert(iterator pos) { Node *n = new Node(pos.node->prev, pos.node); pos.node->prev->next = n; pos.node->prev = n; return iterator(n); } iterator insert(iterator pos, const T &value) { Node *n = new Node(value, pos.node->prev, pos.node); pos.node->prev->next = n; pos.node->prev = n; return iterator(n); } //void insert(iterator, size_type, const T&); void insert(iterator pos, const_iterator from, const_iterator to) { while (from != to) { insert(pos, *from); ++from; } } //iterator erase(iterator); //iterator erase(iterator, iterator); //void swap(list &); void clear() { Node *n, *nn; for (n = root.next; n != &root; n = nn) { nn = n->next; delete n; } root.prev = root.next = &root; } // Special mutative operations on list void splice(iterator pos, list &x) { Node *n, *nn; for (n = x.root.next; n != &x.root; n = nn) { nn = n->next; pos.node->prev->next = n; n->prev = pos.node->prev; n->next = pos.node; pos.node->prev = n; } x.root.prev = x.root.next = &x.root; } void splice(iterator pos1, list & /*x*/ , iterator pos2) { pos1.node->prev->next = pos2.node; pos2.node->prev->next = pos2.node->next; pos2.node->next->prev = pos2.node->prev; pos2.node->prev = pos1.node->prev; pos2.node->next = pos1.node; pos1.node->prev = pos2.node; } void splice(iterator pos1, list & /*x*/ , iterator pos2, iterator pos3) { pos1.node->prev->next = pos2.node; pos2.node->prev->next = pos3.node; pos3.node->prev->next = pos1.node; Node *tmp = pos2.node->prev; pos2.node->prev = pos1.node->prev; pos1.node->prev = pos3.node->prev; pos3.node->prev = tmp; } //void remove(const T &); //void remove_is(Predicate); //void unique(); //void unique(BinaryPredicate); //void merge(list &); //void merge(list &, Compare); //void sort(); //void sort(Compare); //void reverse(); class iterator { public: iterator() : node((Node *) 0xa3a3a3a3) {} iterator operator++() { return node = node->next; } // Prefix ++ iterator operator--() { return node = node->prev; } // Prefix -- iterator operator++(int) // Postfix ++ { iterator i(node); node = node->next; return i; } iterator operator--(int) // Postfix -- { iterator i(node); node = node->prev; return i; } T &operator*() const { return node->value; } bool operator==(const iterator x) { return node == x.node; } bool operator!=(const iterator x) { return node != x.node; } protected: iterator(Node *n) : node(n) {} Node *node; friend list; }; class const_iterator { public: const_iterator() : node((Node *) 0xa3a3a3a3) {} // Can convert "iterator" to "const_iterator", but not the other way round. const_iterator(iterator x) : node(x.node) {} const_iterator operator++() { return node = node->next; } // Prefix ++ const_iterator operator--() { return node = node->prev; } // Prefix -- const_iterator operator++(int) // Postfix ++ { const_iterator i(node); node = node->next; return i; } const_iterator operator--(int) // Postfix -- { const_iterator i(node); node = node->prev; return i; } const T &operator*() const { return node->value; } bool operator==(const const_iterator x) { return node == x.node; } bool operator!=(const const_iterator x) { return node != x.node; } protected: const_iterator(const Node *n) : node(n) {} const Node *node; friend list; }; class reverse_iterator : public iterator { public: reverse_iterator() {} reverse_iterator(const iterator &x) : iterator(x) {} iterator operator++() { return iterator::operator--(); } // Prefix ++ iterator operator--() { return iterator::operator++(); } // Prefix -- iterator operator++(int) { return iterator::operator--(0); } // Postfix ++ iterator operator--(int) { return iterator::operator++(0); } // Postfix -- T &operator*() const { return node->prev->value; } protected: reverse_iterator(Node *n) : iterator(n) {} private: friend list; }; class const_reverse_iterator : public const_iterator { public: const_reverse_iterator() {} const_reverse_iterator(const const_iterator &x) : const_iterator(x) {} const_iterator operator++() // Prefix ++ { return const_iterator::operator--(); } const_iterator operator--() // Prefix -- { return const_iterator::operator++(); } const_iterator operator++(int) // Postfix ++ { return const_iterator::operator--(0); } const_iterator operator--(int) // Postfix -- { return const_iterator::operator++(0); } const T &operator*() const { return node->prev->value; } protected: const_reverse_iterator(const Node *n) : const_iterator(n) {} private: friend list; }; }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/map0100644000000000000000000004347507012076004015725 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __map_INCLUDED__ /* { */ #define __map_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: map,v 1.2 1999/11/09 19:54:44 arno Exp $" #include "../rb_tree.h" /* ------------------------------------------------------------------------- */ template class map__node; template class map__iterator; template class map__const_iterator; template class map; template class multimap; template struct map__node : public rb_tree::node_type { map__node(const pair &v) : value(v) { } //~map__node() { //} pair value; }; template class map__iterator { public: map__iterator() { } map__iterator(const map__iterator &x) : node(x.node) { } pair &operator*() const { return ((map__node *) node)->value; } pair *operator->() const { return &((map__node *) node)->value; } bool operator==(const map__iterator &x) const { return node == x.node; } bool operator!=(const map__iterator &x) const { return node != x.node; } const map__iterator &operator++() { // pre-increment node = rb_tree::successor(node); return *this; } private: map__iterator(rb_tree::node_type *n) : node(n) { } rb_tree::node_type *node; friend map; friend multimap; friend map__const_iterator; }; template class map__const_iterator { public: map__const_iterator() { } map__const_iterator(const map__const_iterator &x) : node(x.node) { } map__const_iterator(const map__iterator &x) : node(x.node) { } const pair &operator*() const { return ((const map__node *) node)->value; } const pair *operator->() const { return &((const map__node *) node)->value; } bool operator==(const map__const_iterator &x) const { return node == x.node; } bool operator!=(const map__const_iterator &x) const { return node != x.node; } const map__const_iterator &operator++() { // pre-incr node = rb_tree::successor(node); return *this; } private: map__const_iterator(const rb_tree::node_type *n) : node(n) { } const rb_tree::node_type *node; friend map; friend multimap; }; /* ------------------------------------------------------------------------- */ template class map : public rb_tree { // Public types public: typedef pair value_type; typedef map__iterator iterator; typedef map__const_iterator const_iterator; // Private types private: typedef map__node node_type; // Construct/Copy/Destroy public: explicit map() : rb_tree() { } map(const_iterator i1, const_iterator i2) : rb_tree(i1.node, i2.node, copy_node2) { } map(const map &x) : rb_tree(x) { } const map &operator=( const map &x ) { rb_tree::operator=(x); return *this; } ~map() { clear(delete_node2); } // Iterators iterator begin() { return iterator(rb_tree::begin()); } const_iterator begin() const { return const_iterator(rb_tree::begin()); } iterator end() { return iterator(rb_tree::end()); } const_iterator end() const { return const_iterator(rb_tree::end()); } //reverse_iterator rbegin(); //const_reverse_iterator rbegin() const; //reverse_iterator rend(); //const_reverse_iterator rend() const; // Capacity //bool empty() const; // Inherited from "rb_tree". //size_type size() const; // Inherited from "rb_tree". //size_type max_size() const; // Inherited from "rb_tree". // Element access mapped_type &operator[](const key_type &x) { rb_tree::node_type *n = rb_tree::find_any((value_pointer) &x); if (n == rb_tree::end()) { n = rb_tree::insert(new node_type(value_type(x, mapped_type()))); } return ((node_type *) n)->value.second; } // Modifiers // Must not use "iterator", else CFRONT fails. pair, bool> insert(const value_type &x) { if (rb_tree::find_any((value_pointer) &x.first) != rb_tree::end()) { return pair(end(), false); } return pair( iterator(rb_tree::insert(new node_type(x))), true ); } iterator insert(iterator, const value_type &); void insert(const_iterator from, const_iterator to) { rb_tree::insert(from.node, to.node); } size_type erase(const key_type &x) { return rb_tree::erase_one((value_pointer) &x); } iterator erase(iterator i) { return iterator(rb_tree::erase(i.node)); } iterator erase(iterator i1, iterator i2) { return iterator(rb_tree::erase(i1.node, i2.node)); } void swap(map &x) { rb_tree::swap((rb_tree &) x); } //void clear(); // Inherited from "rb_tree". // Map operations iterator find(const key_type &x) { return iterator(rb_tree::find_any((value_pointer) &x)); } const_iterator find(const key_type &x) const { return const_iterator(rb_tree::find_any((value_pointer) &x)); } size_type count(const key_type &x) const { return find_any((value_pointer) &x) != rb_tree::end(); } iterator lower_bound(const key_type &x) { return iterator(rb_tree::lower_bound((value_pointer) &x)); } const_iterator lower_bound(const key_type &x) const { return const_iterator(rb_tree::lower_bound((value_pointer) &x)); } iterator upper_bound(const key_type &x) { return iterator(rb_tree::upper_bound((value_pointer) &x)); } const_iterator upper_bound(const key_type &x) const { return const_iterator(rb_tree::upper_bound((value_pointer) &x)); } // Must not use "pair", else CFRONT fails pair< map__iterator, map__iterator > equal_range(const key_type &x) { return pair(lower_bound(x), upper_bound(x)); } // Must not use "pair", else CFRONT fails pair< map__const_iterator, map__const_iterator > equal_range(const key_type &x) const { return pair( lower_bound(x), upper_bound(x) ); } bool operator==(const map &x) const { return rb_tree::operator==((const rb_tree &) x); } bool operator<(const map &x) const { return rb_tree::operator<((const rb_tree &) x); } // Implementation of "rb_tree"'s virtual methods. private: /*virtual*/ bool node_less_than( const rb_tree::node_type *x, const rb_tree::node_type *y ) const { return ( ((const node_type *) x)->value.first < ((const node_type *) y)->value.first ); } /*virtual*/ bool node_less_than( value_pointer x, const rb_tree::node_type *y ) const { return *(const key_type *) x < ((const node_type *) y)->value.first; } /*virtual*/ bool node_less_than( const rb_tree::node_type *x, value_pointer y ) const { return ((const node_type *) x)->value.first < *(const key_type *) y; } /*virtual*/ rb_tree::node_type *copy_node(const rb_tree::node_type *n) const { return new node_type(((const node_type *) n)->value); } /*virtual*/ void delete_node(rb_tree::node_type *n) const { delete (node_type *) n; } typedef void (*key_mapped_printer)( ostream &, const key_type &, const mapped_type & ); /*virtual*/ void print_node_value( const rb_tree::node_type &n, ostream &os, void *closure ) const { (*(key_mapped_printer *) closure)( os, ((const node_type &) n).value.first, ((const node_type &) n).value.second ); } void print(ostream &os, key_mapped_printer np) const { rb_tree::print(os, (void *) &np); } friend ostream &operator<<( ostream &, const map & ); // Needed by "map(iter, iter)". static rb_tree::node_type *copy_node2(const rb_tree::node_type *n) { return new node_type(((const node_type *) n)->value); } // Needed by "~map()". static void delete_node2(rb_tree::node_type *n) { delete (node_type *) n; } friend map__iterator; friend map__const_iterator; }; /* ------------------------------------------------------------------------- */ template class multimap : public rb_tree { // Public types public: typedef pair value_type; typedef map__iterator iterator; typedef map__const_iterator const_iterator; // Private types private: typedef map__node node_type; // Construct/Copy/Destroy public: explicit multimap() : rb_tree() { } multimap(const_iterator i1, const_iterator i2) : rb_tree(i1.node, i2.node, copy_node2) { } multimap(const multimap &x) : rb_tree(x) { } const multimap &operator=( const multimap &x ) { rb_tree::operator=(x); return *this; } ~multimap() { clear(delete_node2); } // Iterators iterator begin() { return iterator((node_type *) rb_tree::begin()); } const_iterator begin() const { return const_iterator((const node_type *) rb_tree::begin()); } iterator end() { return iterator((node_type *) rb_tree::end()); } const_iterator end() const { return const_iterator((const node_type *) rb_tree::end()); } //reverse_iterator rbegin(); //const_reverse_iterator rbegin() const; //reverse_iterator rend(); //const_reverse_iterator rend() const; // Capacity //bool empty() const; // Inherited from "rb_tree". //size_type size() const; // Inherited from "rb_tree". //size_type max_size() const; // Inherited from "rb_tree". // Modifiers iterator insert(const value_type &x) { return iterator((node_type *) rb_tree::insert(new node_type(x))); } //iterator insert(iterator, const value_type &); void insert(const_iterator from, const_iterator to) { rb_tree::insert(from.node, to.node); } size_type erase(const key_type &x) { return rb_tree::erase_all((value_pointer) &x); } iterator erase(iterator i) { return iterator(rb_tree::erase(i.node)); } iterator erase(iterator i1, iterator i2) { return iterator(rb_tree::erase(i1.node, i2.node)); } void swap(multimap &x) { rb_tree::swap((rb_tree &) x); } //void clear(); // Inherited from "rb_tree". // Multimap operations iterator find(const key_type &x) { return iterator(rb_tree::find_first((value_pointer) &x)); } const_iterator find(const key_type &x) const { return const_iterator(rb_tree::find_first((value_pointer) &x)); } size_type count(const key_type &x) const { return rb_tree::count((value_pointer) &x); } iterator lower_bound(const key_type &x) { return iterator(rb_tree::lower_bound((value_pointer) &x)); } const_iterator lower_bound(const key_type &x) const { return const_iterator(rb_tree::lower_bound((value_pointer) &x)); } iterator upper_bound(const key_type &x) { return iterator(rb_tree::upper_bound((value_pointer) &x)); } const_iterator upper_bound(const key_type &x) const { return const_iterator(rb_tree::upper_bound((value_pointer) &x)); } // Must not use "pair", else CFRONT fails // G++ 2.7.2.1 cannot compile this ("field "first" has incomplete type")!? //pair< // map__iterator, // map__iterator //> equal_range(const key_type &x) { // return pair(lower_bound(x), upper_bound(x)); //} // Must not use "pair", else CFRONT fails pair< map__const_iterator, map__const_iterator > equal_range(const key_type &x) const { return pair(lower_bound(x), upper_bound(x)); } bool operator==(const multimap &x) const { return rb_tree::operator==((const rb_tree &) x); } bool operator<(const multimap &x) const { return rb_tree::operator<((const rb_tree &) x); } // Implementation of "rb_tree"'s virtual methods. private: /*virtual*/ bool node_less_than( const rb_tree::node_type *x, const rb_tree::node_type *y ) const { return ( ((const node_type *) x)->value.first < ((const node_type *) y)->value.first ); } /*virtual*/ bool node_less_than( value_pointer x, const rb_tree::node_type *y ) const { return *(const key_type *) x < ((const node_type *) y)->value.first; } /*virtual*/ bool node_less_than( const rb_tree::node_type *x, value_pointer y ) const { return ((const node_type *) x)->value.first < *(const key_type *) y; } /*virtual*/ rb_tree::node_type *copy_node(const rb_tree::node_type *n) const { return new node_type(((const node_type *) n)->value); } /*virtual*/ void delete_node(rb_tree::node_type *n) const { delete (node_type *) n; } typedef void (*key_mapped_printer)( ostream &, const key_type &, const mapped_type & ); /*virtual*/ void print_node_value( const rb_tree::node_type &n, ostream &os, void *closure ) const { (*(key_mapped_printer *) closure)( os, ((const node_type &) n).value.first, ((const node_type &) n).value.second ); } void print(ostream &os, key_mapped_printer np) const { rb_tree::print(os, (void *) &np); } friend ostream &operator<<( ostream &, const multimap & ); // Needed by "multimap(iter, iter)". static rb_tree::node_type *copy_node2(const rb_tree::node_type *n) { return new node_type(((const node_type *) n)->value); } // Needed by "~multimap()". static void delete_node2(rb_tree::node_type *n) { delete (node_type *) n; } friend map__iterator; friend map__const_iterator; }; /* ------------------------------------------------------------------------- */ /* * MUST DEFINE THESE HERE AT THE END OF THIS FILE; FOR ELSE STUPID CFRONT * INSTANTIATES IT OUT-OF-LINE!? */ template inline void map__print_key_mapped( ostream &os, const key_type &key, const mapped_type &mapped ) { os << "(" << key << " => " << mapped << ")"; } template inline ostream & operator<<(ostream &os, const map &x) { x.print(os, map__print_key_mapped); return os; } template inline ostream & operator<<(ostream &os, const multimap &x) { x.print(os, map__print_key_mapped); return os; } /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/memory0100644000000000000000000000455406670774177016504 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __memory_INCLUDED__ /* { */ #define __memory_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: memory,v 1.2 1999/03/08 16:19:11 arno Exp $" #include "auto_ptr.h" /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/new0100644000000000000000000000476007005304440015733 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __new_INCLUDED__ /* { */ #define __new_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: new,v 1.1 1999/10/26 10:57:36 arno Exp $" #include /* ------------------------------------------------------------------------- */ inline void * operator new(size_t, void *p) { return p; } /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/set0100644000000000000000000003734207005304441015740 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __set_INCLUDED__ /* { */ #define __set_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: set,v 1.1 1999/10/26 10:57:37 arno Exp $" #include "../rb_tree.h" /* ------------------------------------------------------------------------- */ template class set__node; template class set__iterator; template class set__const_iterator; template class set; template class multiset; template struct set__node : public rb_tree::node_type { set__node(const value_type &v) : value(v) { } //~set__node() { //} value_type value; }; template class set__iterator { public: set__iterator() { } set__iterator(const set__iterator &x) : node(x.node) { } value_type &operator*() const { return ((set__node *) node)->value; } value_type *operator->() const { return &((set__node *) node)->value; } bool operator==(const set__iterator &x) const { return node == x.node; } bool operator!=(const set__iterator &x) const { return node != x.node; } const set__iterator &operator++() { // pre-increment node = rb_tree::successor(node); return *this; } private: set__iterator(rb_tree::node_type *n) : node(n) { } rb_tree::node_type *node; friend set; friend multiset; friend set__const_iterator; }; template class set__const_iterator { public: set__const_iterator() { } set__const_iterator(const set__const_iterator &x) : node(x.node) { } set__const_iterator(const set__iterator &x) : node(x.node) { } const value_type &operator*() const { return ((const set__node *) node)->value; } const value_type *operator->() const { return &((const set__node *) node)->value; } bool operator==(const set__const_iterator &x) const { return node == x.node; } bool operator!=(const set__const_iterator &x) const { return node != x.node; } const set__const_iterator &operator++() { // pre-increment node = rb_tree::successor(node); return *this; } private: set__const_iterator(const rb_tree::node_type *n) : node(n) { } const rb_tree::node_type *node; friend set; friend multiset; }; /* ------------------------------------------------------------------------- */ template class set : public rb_tree { // Public types public: typedef set__iterator iterator; typedef set__const_iterator const_iterator; // Private types private: typedef set__node node_type; // Construct/Copy/Destroy public: explicit set() : rb_tree() { } set(const_iterator i1, const_iterator i2) : rb_tree(i1.node, i2.node, copy_node2) { } set(const set &x) : rb_tree(x) { } const set &operator=(const set &x) { rb_tree::operator=(x); return *this; } ~set() { clear(delete_node2); } // Iterators iterator begin() { return iterator(rb_tree::begin()); } const_iterator begin() const { return const_iterator(rb_tree::begin()); } iterator end() { return iterator(rb_tree::end()); } const_iterator end() const { return const_iterator(rb_tree::end()); } //reverse_iterator rbegin(); //const_reverse_iterator rbegin() const; //reverse_iterator rend(); //const_reverse_iterator rend() const; // Capacity //bool empty() const; // Inherited from "rb_tree". //size_type size() const; // Inherited from "rb_tree". //size_type max_size() const; // Inherited from "rb_tree". // Modifiers // Must not use "iterator", else CFRONT fails. pair, bool> insert(const value_type &x) { if (rb_tree::find_any((value_pointer) &x) != rb_tree::end()) { return pair(end(), false); } return pair( iterator(rb_tree::insert(new node_type(x))), true ); } iterator insert(iterator, const value_type &); void insert(const_iterator from, const_iterator to) { rb_tree::insert(from.node, to.node); } size_type erase(const value_type &x) { return rb_tree::erase_one((value_pointer) &x); } iterator erase(iterator i) { return iterator(rb_tree::erase(i.node)); } iterator erase(iterator i1, iterator i2) { return iterator(rb_tree::erase(i1.node, i2.node)); } void swap(set &x) { rb_tree::swap((rb_tree &) x); } //void clear(); // Inherited from "rb_tree". // Set operations iterator find(const value_type &x) { return iterator(rb_tree::find_any((value_pointer) &x)); } const_iterator find(const value_type &x) const { return const_iterator(rb_tree::find_any((value_pointer) &x)); } size_type count(const value_type &x) const { return find_any((value_pointer) &x) != rb_tree::end(); } iterator lower_bound(const value_type &x) { return iterator(rb_tree::lower_bound((value_pointer) &x)); } const_iterator lower_bound(const value_type &x) const { return const_iterator(rb_tree::lower_bound((value_pointer) &x)); } iterator upper_bound(const value_type &x) { return iterator(rb_tree::upper_bound((value_pointer) &x)); } const_iterator upper_bound(const value_type &x) const { return const_iterator(rb_tree::upper_bound((value_pointer) &x)); } // Must not use "pair", else CFRONT fails pair< set__iterator, set__iterator > equal_range(const value_type &x) { return pair(lower_bound(x), upper_bound(x)); } // Must not use "pair", else CFRONT fails pair< set__const_iterator, set__const_iterator > equal_range(const value_type &x) const { return pair( lower_bound(x), upper_bound(x) ); } bool operator==(const set &x) const { return rb_tree::operator==((const rb_tree &) x); } bool operator<(const set &x) const { return rb_tree::operator<((const rb_tree &) x); } // Implementation of "rb_tree"'s virtual methods. private: /*virtual*/ bool node_less_than( const rb_tree::node_type *x, const rb_tree::node_type *y ) const { return ((const node_type *) x)->value < ((const node_type *) y)->value; } /*virtual*/ bool node_less_than( value_pointer x, const rb_tree::node_type *y ) const { return *(const value_type *) x < ((const node_type *) y)->value; } /*virtual*/ bool node_less_than( const rb_tree::node_type *x, value_pointer y ) const { return ((const node_type *) x)->value < *(const value_type *) y; } /*virtual*/ rb_tree::node_type *copy_node(const rb_tree::node_type *n) const { return new node_type(((const node_type *) n)->value); } /*virtual*/ void delete_node(rb_tree::node_type *n) const { delete (node_type *) n; } typedef void (*value_printer)(ostream &, const value_type &); /*virtual*/ void print_node_value( const rb_tree::node_type &n, ostream &os, void *closure ) const { (*(value_printer *) closure)(os, ((const node_type &) n).value); } void print(ostream &os, value_printer np) const { rb_tree::print(os, (void *) &np); } friend ostream &operator<<(ostream &, const set &); // Needed by "set(iter, iter)". static rb_tree::node_type *copy_node2(const rb_tree::node_type *n) { return new node_type(((const node_type *) n)->value); } // Needed by "~set()". static void delete_node2(rb_tree::node_type *n) { delete (node_type *) n; } friend set__iterator; friend set__const_iterator; }; /* ------------------------------------------------------------------------- */ template class multiset : public rb_tree { // Public types public: typedef set__iterator iterator; typedef set__const_iterator const_iterator; // Private types private: typedef set__node node_type; // Construct/Copy/Destroy public: explicit multiset() : rb_tree() { } multiset(const_iterator i1, const_iterator i2) : rb_tree(i1.node, i2.node, copy_node2) { } multiset(const multiset &x) : rb_tree(x) { } const multiset &operator=(const multiset &x) { rb_tree::operator=(x); return *this; } ~multiset() { clear(delete_node2); } // Iterators iterator begin() { return iterator((node_type *) rb_tree::begin()); } const_iterator begin() const { return const_iterator((const node_type *) rb_tree::begin()); } iterator end() { return iterator((node_type *) rb_tree::end()); } const_iterator end() const { return const_iterator((const node_type *) rb_tree::end()); } //reverse_iterator rbegin(); //const_reverse_iterator rbegin() const; //reverse_iterator rend(); //const_reverse_iterator rend() const; // Capacity //bool empty() const; // Inherited from "rb_tree". //size_type size() const; // Inherited from "rb_tree". //size_type max_size() const; // Inherited from "rb_tree". // Modifiers iterator insert(const value_type &x) { return iterator((node_type *) rb_tree::insert(new node_type(x))); } //iterator insert(iterator, const value_type &); void insert(const_iterator from, const_iterator to) { rb_tree::insert(from.node, to.node); } size_type erase(const value_type &x) { return rb_tree::erase_all((value_pointer) &x); } iterator erase(iterator i) { return iterator(rb_tree::erase(i.node)); } iterator erase(iterator i1, iterator i2) { return iterator(rb_tree::erase(i1.node, i2.node)); } void swap(multiset &x) { rb_tree::swap((rb_tree &) x); } //void clear(); // Inherited from "rb_tree". // Multiset operations iterator find(const value_type &x) { return iterator(rb_tree::find_first((value_pointer) &x)); } const_iterator find(const value_type &x) const { return const_iterator(rb_tree::find_first((value_pointer) &x)); } size_type count(const value_type &x) const { return rb_tree::count((value_pointer) &x); } iterator lower_bound(const value_type &x) { return iterator(rb_tree::lower_bound((value_pointer) &x)); } const_iterator lower_bound(const value_type &x) const { return const_iterator(rb_tree::lower_bound((value_pointer) &x)); } iterator upper_bound(const value_type &x) { return iterator(rb_tree::upper_bound((value_pointer) &x)); } const_iterator upper_bound(const value_type &x) const { return const_iterator(rb_tree::upper_bound((value_pointer) &x)); } // Must not use "pair", else CFRONT fails pair< set__iterator, set__iterator > equal_range(const value_type &x) { return pair(lower_bound(x), upper_bound(x)); } // Must not use "pair", else CFRONT fails pair< set__const_iterator, set__const_iterator > equal_range(const value_type &x) const { return pair(lower_bound(x), upper_bound(x)); } bool operator==(const multiset &x) const { return rb_tree::operator==((const rb_tree &) x); } bool operator<(const multiset &x) const { return rb_tree::operator<((const rb_tree &) x); } // Implementation of "rb_tree"'s virtual methods. private: /*virtual*/ bool node_less_than( const rb_tree::node_type *x, const rb_tree::node_type *y ) const { return ((const node_type *) x)->value < ((const node_type *) y)->value; } /*virtual*/ bool node_less_than( value_pointer x, const rb_tree::node_type *y ) const { return *(const value_type *) x < ((const node_type *) y)->value; } /*virtual*/ bool node_less_than( const rb_tree::node_type *x, value_pointer y ) const { return ((const node_type *) x)->value < *(const value_type *) y; } /*virtual*/ rb_tree::node_type *copy_node(const rb_tree::node_type *n) const { return new node_type(((const node_type *) n)->value); } /*virtual*/ void delete_node(rb_tree::node_type *n) const { delete (node_type *) n; } typedef void (*value_printer)(ostream &, const value_type &); /*virtual*/ void print_node_value( const rb_tree::node_type &n, ostream &os, void *closure ) const { (*(value_printer *) closure)(os, ((const node_type &) n).value); } friend ostream &operator<<(ostream &, const set &); // Needed by "multiset(iter, iter)". static rb_tree::node_type *copy_node2(const rb_tree::node_type *n) { return new node_type(((const node_type *) n)->value); } // Needed by "~multiset()". static void delete_node2(rb_tree::node_type *n) { delete (node_type *) n; } friend set__iterator; friend set__const_iterator; }; /* ------------------------------------------------------------------------- */ template inline void set__print_value(ostream &os, const value_type &value) { os << value; } template inline ostream & operator<<(ostream &os, const set &x) { x.print(os, set__print_value); return os; } template inline ostream & operator<<(ostream &os, const multiset &x) { x.print(os, set__print_value); return os; } /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/string0100644000000000000000000002271407005566334016463 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __string_INCLUDED__ /* { */ #define __string_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: string,v 1.11 1999/10/27 12:15:24 arno Exp $" #include // For "size_t". #ifdef BOOL_DEFINITION BOOL_DEFINITION #undef BOOL_DEFINITION #endif /* ------------------------------------------------------------------------- */ class istream; class ostream; /* ------------------------------------------------------------------------- */ /* * This is a simplified, but otherwise correct implementation of the "string" * class as specified by the ANSI C++ library. * * Missing features: * (1) "string" is not derived from "basic_string". * (2) "string::traits" is missing. * (3) "string::Allocator" is missing ("string" uses "malloc()" instead). * (4) Several "unimportant" methods are not implemented (but they are * "declared" in comments). */ /* ------------------------------------------------------------------------- */ struct char_traits { typedef char char_type; static char eos() { return '\0'; } }; /* ------------------------------------------------------------------------- */ class string { public: // "traits" and "Allocator" should be template parameters, but we hard-code // them. typedef char_traits traits; //typedef Allocator; // Types typedef traits traits_type; typedef traits::char_type value_type; //typedef allocator_type; typedef size_t size_type; //typedef difference_type; typedef char &reference; typedef const char &const_reference; //typedef pointer; //typedef const_pointer; typedef char *iterator; typedef const char *const_iterator; typedef char *reverse_iterator; typedef const char *const_reverse_iterator; static const size_type npos; // Constructors/Destructors string() : p(&null) {} string(const string &); //string(const string &, size_type pos, size_type n = npos); string(const char *, size_type n); string(const char *); string(size_type, char); //string(const_iterator, const_iterator); ~string(); // Assignment operators const string &operator=(const string &); const string &operator=(const char *); const string &operator=(char); // Iterators iterator begin() { return p; } const_iterator begin() const { return p; } iterator end(); const_iterator end() const; reverse_iterator rbegin(); const_reverse_iterator rbegin() const; reverse_iterator rend() { return p; } const_reverse_iterator rend() const { return p; } // Capacity size_type size() const; size_type length() const; //size_type max_size() const; void resize(size_type n, char c = '\0'); //size_type capacity() const; //void reserve(size_type); bool empty() const { return *p == '\0'; } // Element access char operator[](size_type pos) const { return p[pos]; } reference operator[](size_type pos) { return p[pos]; } const_reference at(size_type pos) const { return p[pos]; } reference at(size_type pos) { return p[pos]; } // Modifiers //const string &operator+=(const string &); const string &operator+=(const char *); const string &operator+=(char); //string &append(const string &); //string &append(const string &, size_type, size_type); //string &append(const char *, size_type); //string &append(const char *); string &append(size_type, char); //string &append(const_iterator, const_iterator); //string &assign(const string &s); string &assign(const string &s, size_type pos, size_type n); //string &assign(const char *s, size_type n); //string &assign(const char *s); //string &assign(size_type n, char c); //string &assign(string::const_iterator from, string::const_iterator to); //string &insert(size_type, const string &); //string &insert(size_type, const string &, size_type, size_type); //string &insert(size_type, const char *, size_type); //string &insert(size_type, const char *); string &insert(size_type pos, size_type n, char c); //iterator insert(iterator, char = '\0'); //void insert(iterator, size_type, char); //void insert(iterator, const_iterator, const_iterator); string &erase(size_type pos = 0, size_type n = npos); //iterator erase(iterator); //iterator erase(iterator, iterator); //string &replace(size_type pos, size_type n1, const string &s); string &replace( size_type pos, size_type n1, const string &s, size_type pos2, size_type n2 ); string &replace( size_type pos, size_type n1, const char *s, size_type n2 ); string &replace(size_type pos, size_type n1, const char *); string &replace(size_type pos, size_type n1, size_type n2, char); //string &replace(...); //size_type copy(char *, size_type, size_type = 0); //void swap(string &); // String operations const char *c_str() const { return p; } const char *data() const { return p; } //const allocator_type &get_allocator() const; //size_type find(...) const; //size_type rfind(...) const; //size_type find_first_of(...) const; //size_type find_last_of(...) const; //size_type find_first_not_of(...) const; //size_type find_last_not_of(...) const; //string substr(size_type pos = 0, size_type n = npos) const; //int compare(...) const; private: friend string operator+(const string &, const string &); friend string operator+(const char *, const string &); friend string operator+(char, const string &); friend string operator+(const string &, const char * ); friend string operator+(const string &, char ); friend bool operator==(const string &, const string &); friend bool operator==(const char *, const string &); friend bool operator==(const string &, const char * ); friend bool operator<(const string &, const string &); friend bool operator<(const char *, const string &); friend bool operator<(const string &, const char * ); friend bool operator!=(const string &, const string &); friend bool operator!=(const char *, const string &); friend bool operator!=(const string &, const char * ); friend bool operator>(const string &, const string &); friend bool operator>(const char *, const string &); friend bool operator>(const string &, const char * ); friend bool operator<=(const string &, const string &); friend bool operator<=(const char *, const string &); friend bool operator<=(const string &, const char * ); friend bool operator>=(const string &, const string &); friend bool operator>=(const char *, const string &); friend bool operator>=(const string &, const char * ); //friend istream &operator>>(istream &, string &); friend ostream &operator<<(ostream &, const string &); friend istream &getline(istream &, string &, char delim = '\n'); string(size_type); // Create an uninitialzed string / internal use only. char *p; // Points to a null-terminated string. static char null; // For a fast default constructor. }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/utility0100644000000000000000000000513306670774205016661 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __utility_INCLUDED__ /* { */ #define __utility_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: utility,v 1.2 1999/03/08 16:19:17 arno Exp $" /* ------------------------------------------------------------------------- */ template struct pair { T1 first; T2 second; pair() {} pair(const T1 &x) : first(x) {} pair(const T1 &x, const T2 &y) : first(x), second(y) {} }; /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/libstd/include/vector0100644000000000000000000002313207012076046016444 0ustar rootroot /* ------------------------------------------------------------------------- */ /* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* ------------------------------------------------------------------------- */ #ifndef __vector_INCLUDED__ /* { */ #define __vector_INCLUDED__ /* ------------------------------------------------------------------------- */ #ident "$Id: vector,v 1.2 1999/11/09 19:55:18 arno Exp $" #include #include "../vector_base.h" /* ------------------------------------------------------------------------- */ template class vector : public vector_base { // Public types public: //typedef value_type; //typedef allocator_type; typedef value_type &reference; typedef const value_type &const_reference; typedef value_type *iterator; typedef const value_type *const_iterator; //typedef size_type; // Inherited from "vector_base" //typedef difference_type; //typedef reverse_iterator; //typedef const_reverse_iterator; private: typedef void (*value_printer)(ostream &, const value_type &); // Construct/Copy/Destroy public: explicit vector() : vector_base() {} explicit vector(size_type n) : vector_base(sizeof(value_type), n, construct_value2) {} vector(size_type n, const value_type &value) : vector_base(sizeof(value_type), n, construct_value2, (void *) &value) {} vector(const vector &x) : vector_base( sizeof(value_type), construct_value2, (char *) x.begin(), (char *) x.end() ) {} vector(const_iterator from, const_iterator to) : vector_base( sizeof(value_type), construct_value2, (char *) from, (char *) to ) {} ~vector() { vector_base::clear(sizeof(value_type), destroy_value2); } const vector &operator=(const vector &x) { assign(x.begin(), x.end()); return *this; } void assign(const_iterator from, const_iterator to) { vector_base::assign(sizeof(value_type), (char *) from, (char *) to); } void assign(size_type n) { vector_base::assign(sizeof(value_type), n); } void assign(size_type n, const value_type &x) { vector_base::assign(sizeof(value_type), n, (void *) &x); } //allocator_type get_allocator() const; iterator begin() { return (iterator) p; } const_iterator begin() const { return (const_iterator) p; } iterator end() { return (iterator) p + size1; } const_iterator end() const { return (const_iterator) p + size1; } //reverse_iterator rbegin(); //const_reverse_iterator rbegin() const; //reverse_iterator rend(); //const_reverse_iterator rend() const; // Capacity //size_type size() const; // Inherited from "vector_base". //size_type max_size() const; // Inherited from "vector_base". void resize(size_type n) { vector_base::resize(sizeof(value_type), n); } void resize(size_type n, const value_type &v) { vector_base::resize(sizeof(value_type), n, (void *) &v); } //size_type capacity() const; // Inherited from "vector_base". //bool empty() const; // Inherited from "vector_base". void reserve(size_type n) { vector_base::reserve(sizeof(value_type), n); } // Element access reference operator[](size_type i) { return ((iterator) p)[i]; } const_reference operator[](size_type i) const { return ((const_iterator) p)[i]; } reference at(size_type i) { return ((iterator) p)[i]; } const_reference at(size_type i) const { return ((const_iterator) p)[i]; } reference front() { return *(iterator) p; } const_reference front() const { return *(const_iterator) p; } reference back() { return *((iterator) p + size1 - 1); } const_reference back() const { return *((const_iterator) p + size1 - 1); } // Modifiers void push_back(const value_type &v) { vector_base::push_back(sizeof(value_type), (void *) &v); } void pop_back() { vector_base::pop_back(sizeof(value_type)); } iterator insert(iterator pos) { return (iterator) vector_base::insert(sizeof(value_type), (char *) pos); } iterator insert(iterator pos, const value_type &v) { return (iterator) vector_base::insert( sizeof(value_type), (char *) pos, (void *) &v ); } void insert(iterator pos, size_type n, const value_type &v) { vector_base::insert(sizeof(value_type), (char *) pos, n, (void *) &v); } void insert(iterator pos, const_iterator from, const_iterator to) { vector_base::insert( sizeof(value_type), (char *) pos, (char *) from, (char *) to ); } iterator erase(iterator pos) { return (iterator) vector_base::erase(sizeof(value_type), (char *) pos); } iterator erase(iterator from, iterator to) { return (iterator) vector_base::erase( sizeof(value_type), (char *) from, (char *) to ); } void swap(vector &x) { vector_base::swap(x); } void clear() { vector_base::clear(sizeof(value_type)); } private: /* * Implement "vector_base"'s pure virtual methods. */ /*virtual*/ void construct_value(void *that) { new(that) value_type; } /*virtual*/ void construct_value(void *that, void *v) { new(that) value_type(*(const value_type *) v); } typedef value_type xxxx; /*virtual*/ void destruct_value(void *that) { ((value_type *) that)->~xxxx(); } /*virtual*/ void assign_value(void *to) { *(value_type *) to = value_type(); } /*virtual*/ void assign_value(void *to, void *from) { *(value_type *) to = *(value_type *) from; } /*virtual*/ void print_value(ostream &os, void *v, void *closure) const { (*(value_printer *) closure)(os, *(const value_type *) v); } /* * Define some static helpers that are needed during construction and * destruction. */ static void construct_value2(void *that) { new(that) value_type; } static void construct_value2(void *that, void *v) { new(that) value_type(*(const value_type *) v); } static void destroy_value2(void *that) { ((value_type *) that)->~xxxx(); } friend bool operator==( const vector &x, const vector &y ); friend bool operator<( const vector &x, const vector &y ); void print(ostream &os, value_printer vp) const { vector_base::print(sizeof(value_type), os, (void *) &vp); } friend ostream &operator<<(ostream &, const vector &); }; /* ------------------------------------------------------------------------- */ template inline bool vector__value_equals(const value_type *x, const value_type *y) { return *x == *y; } template inline bool operator==(const vector &x, const vector &y) { if (x.size() != x.size()) return false; return x.equals(sizeof(value_type), y, ( (bool (*)(void *, void *)) (bool (*)(const value_type *, const value_type *)) vector__value_equals )); } /* ------------------------------------------------------------------------- */ template inline bool vector__value_less_than(const value_type *x, const value_type *y) { return *x < *y; } template inline bool operator<(const vector &x, const vector &y) { return x.less_than(sizeof(value_type), y, ( (bool (*)(void *, void *)) (bool (*)(const value_type *, const value_type *)) vector__value_less_than )); } /* ------------------------------------------------------------------------- */ template inline void vector__print_value(ostream &os, const value_type &value) { os << value; } template inline ostream & operator<<(ostream &os, const vector &x) { x.print(os, vector__print_value); return os; } /* ------------------------------------------------------------------------- */ #endif /* } */ /* ------------------------------------------------------------------------- */ html2text-1.3.2a/TODO0100644000000000000000000000513510000517350012773 0ustar rootroot## TODO - What has to be done ? Mon Jan 12 14:19:08 CET 2004 ## =========================================================================== Sorry, due to a lack of time that I may spend on things like this, I am not feeling able to work on the program by myself yet. However, if you have an applicable patch, please let me know, so I will be glad to insert it in this package. I will do so with any considerable bug report as well. # ---------------------------------------------------------------------------- Some major development guidelines for future releases: + Become independent from bison++ + Improve portability to other platforms (think poll()) # ---------------------------------------------------------------------------- [Debian Bug report logs #98325] To: 98325@bugs.debian.org Subject: use of std::auto_ptr From: Philip Martin Date: 16 Jul 2001 01:01:49 +0100 Message-ID: <877kx9lp1u.fsf@debian2.lan> Hi The std::auto_ptr class template does not meet the requirements for objects that can be stored in standard library containers. In particular 23.1 [lib.container.requirements] para 3 requires objects in containers to be CopyConstructible and Assignable, std::auto_ptr is not. See http://gcc.gnu.org/onlinedocs/libstdc++/20_util/howto.html#2 and http://cpptips.hyperformix.com/cpptips/autoptr_contain Standard library algorithms and container methods are allowed to copy container elements, and then treat the copies as identical. This won't work it the elements are auto_ptrs. *Whether* any particular algorithm/method makes copies in this way is a property of the library implementation and is beyond the scope of the standard. html2text's configure test for list> is always going to fail with g++ 3.0. By using a local version of auto_ptr to get round this, html2text is relying on a particular implementation of the standard library. Looking at the auto_ptr implementation in html2text-1.2.4/libstd/include/auto_ptr.h the dodgy bits of code are the copy constructor and the assignment operator (no surprise there) which both take a const reference argument and then cast away const to modify the argument. Lying to the compiler in this way may work, but the standard does not require it to work. Enough bad news. If it were my job to fix this code I would consider replacing the auto_ptr with a different smart pointer, probably the shared_ptr in the boost collection. See http://www.boost.org/libs/smart_ptr/shared_ptr.htm Philip # ============================================================================ Martin Bayer html2text-1.3.2a/CREDITS0100644000000000000000000000167107760113032013334 0ustar rootroot## CREDITS - Thanks to... Sun Nov 23 12:12:18 CET 2003 ## =========================================================================== # # Since september 2000, these people have contributed to the development of # this program: Johannes Geiger + SCRIPT/STYLE error patch + new IMG handling Randolph Chung + ported to g++-3.0 Arno Unkrig + almost all bugfixes in 1.3.1. Thanks! Bela Lubkin + Plain-ASCII output patch Kirby Zhou + patch for SCRIPT/STYLE elements within table cells Nicolas Boullis + ported to g++-3.3 (this change is not backward-compatible) Alexander Solovey + bugfix for urlistream.h + better rendering of XHTML ## --------------------------------------------------------------------------- Martin Bayer html2text-1.3.2a/html2text.1.gz0100644000000000000000000000503310001241001014720 0ustar rootroot‹¦A@html2text.1•XmoÛºþÎ_Aû¶Ü¤-ú²‹á¦¹é‡æ¶‹n˜ïZ¢m.’¨‘T|=\ì·ï9‡’%;I×MIäáy}Îs˜,r¾1^f6­ ]Œ-åVy™+da3³2:“ËH°öF¹`JùIí´“?Kúýó¿u¦B²ªGKírS&™þ /† oØúWUʳ7ø÷ñìýÇó7òòj.Ï_½zCkxÝ¥-èh/U™I_¯×Ú“xvZnužÚB'qõüZnB‘Ÿý{g,f1zu¶AÜìZÞ^Ü\‰nÅb™ReªL¡ÉõüæËb,~èkjËGí‚v¼uö÷Û»ûÙd&’O½3FW"Yº£×Øè¡ã“/âô¸Õe¥œ×âø˜ntú ~“Í×L/ëõbäSU–8þø=oí½véÊäZ$Y©°éÞû°Ãëz€*•†x^åt;qÚ­Üš ! þ±oé­CU‡Ÿ±ÿRڥO‰“©4%­¯].2I’ø=øËÕìr:¹ŸOîn}â´Ê<{Ÿg^®œ-dØè#‰~(VÖ +´J7Ò®hQÁJ%}€°‚ÞV¹B&Æ@n”ƒñÉPPm ÚÓ>é´¯ó ±×|R.kÌ•'Ö±P>¿ç9fû$á x¶ ¡Èl-mÅõ‚©½ÎN‘•÷â—½Ia£'­¯tÚÖ ”þ:ý"O«OÓÏ—òìÝë÷‹Õô4.^êµA¢o ‚2Ø„P}°Ðýô‰-¹ÞU”¬0wîTéW(À{gƒMm.öÂ?¼yKÂ:Ò÷Žˆbdsº™•¥ °Õ• ¹”ÃÆhÃNæ6U¹¤>‘y.-´qq?©jÊG•›Œ]1YAÔq@Å¡G šÌé»uø\ºp~ìƒÆòp¾Ì”ßHå›ÌéÎÆs­çHƒR‡ÚqØ3Ž$‘¶‡Öe†ü¡E+9[_'ç„Hº: ~(—ÈÔ,ÂC«aJ¾CIº°ÏÒ¬Ö8T¹)L„Sß|ŽÉÚ¤_LòD^Û-â솼 rvíÞ*]T”QVàݣɴ\[Kè¸6†šÒx“ÎuL?ÃÊ!–~É¡…Œ*M¾‰nÀ©ÖF³h›Êá4OÞõušjïWuNO1â´‘´¡Ö!&´Ã£*±–”õ;dP ò~1ejÓih¢h÷ CñiGPEŠtoKµÌwÎ|Ã!<4¡E®‡C8Aø)ƒÍóXvq-ºÕšÜJµc`=å–àÔB§—œ×G~êé—#Íþt}ws5Nö‹\*N÷3f|‹ l? §–Œ1¨>ÞÞ„m©ùÀáÿÔg*Ç:¤ãž:2‰Å×ÚÄiÚÈ‹PS°ýœY8ƒÎ´H>Gù… ‰g}vʹYöB¤2Ó+` ¼G²TB‹ö„(‹â’iŸ:³ÔT{s¦²¯ûÉÛSÕÀ™J­)è&wÜHÐŽç÷ýôiמ}ä&²§7“Ù|ÿþípÑ–H¬¹DÎ"}XOŒÆ0öÚ|1»œLDïP×øvYØTrë»…ãÊň×özOÜTy:â/‚è Ø#¶ éÕ0¥zk^¤óN-rµ.-¶¤²ª]eaçGòôaÐêHwdœ ø6KÀKU¼5^£rËh?è&p¢P3Tùtk€„NW– ®Œ¢%òꆬQ–Åç¡Ün úµ ]n!Ü:è/ú%òÖ¤¹¢Ø#yìM²Rúu‡iƒ6M°UåS…hÉÂøÑR"m`—ý”’©8c ’==\{.5¥†ß¢ËY/:‡Ìì‹Ç-«õã¼}ÐÞHÛ˜Uà„¨ÑB›—ªªrC/QÆ »Ž@„`_ÅxKäåÅS…[Šù™fåúwÂí¨zÔ„Ô;Tì)îFÅÄÿ£ê{‡Pp´<¸ç” ½-ÌE_,üXr(YúE¢ ü—^1ëñú_µÿÇHcP…±Ó£9éÑR¥¼Yöe<`¹´y¶Âû½¤¸u¿êÙm l­{ðZl 8;"U¦bh!扄. ‡<9;ò2ÄÇ'øÑƒAäÊ%ò3b!qÞ¤‘U %^óÀC¶ Q{X¤Ì„=à´±VÐPŠÆŠsyqSa6³7„º4ðšIFô` «ö‹½C[ƒ›^€)$7~ÓñVnËOú_Û "¢Â æßñè{ßÐSâ"Ñ@3Uð±†$îü>O¤c%ö´=*Ó$6±7·­ù9B@¥Ü QG9Xä´â ñ?͇¿þ 3xŽÃ§1ãY¯î­ƒ²ôŽ[ kêC‚×zÏ:ûM.4't[Á†O;r"ú]/šÂHýï%±‡´ ª‰­­ëƒÑ„„  ­úݘ¨3c¦o­nç[Š^dßæ¸T´s÷öÑI—G=ƒ˜‹Ç>Úúi(Öº¤æÕë'ÃØ¿ ]Ãp&6 P ‡\¸aÂdê!vm`:4бð|"7‡™ôöû^TÜ­rgkÒ¡›g;uþØX>4ã×™3UÞk•…ÆÙsl{á‘»Tš·O@{ÒÞ9 yù÷àeüþ©§°Á©ÓºŒr(Pï>ôX§OÅ (#†w@Ê´+Ø\r¿j3ÀÂ%¥|ÿJô®ºòŸµG) Ðäá5&,wVT‡FáeÞ ' | 8#Ĭ)3+PCâ{EXE‡P$Û3TªÛÄô?ª‰òçÉ—«†&OžÌb¶C‚´¡IiGsÒʬëHÄúpôìœsØ’©ù »‡û!"Ñ÷ŽŒP\ÞÝ~¾›ÞLn•ó»9lÐŽÙq²Ÿ¸§š-E÷|½ixt›ÇW$ãñv»M¶¯ëÖãùt<½º‘Ưϩ‹Æ ¹A{B‚_¾9!™×óùýiÔpz5›O'—Íàq0<Ç)¼áØ ÝÐG Úî®M[¾ù½›šxÍÎmYëÛëˆ º©$¤ŽAY¤Bº@Џ²6(E—*ëÈY¹Â‰§3Y¶Ä\»z ’ËGÊ×¯ÎÆøïÝéÓ«ºí¥+³@\Å¶×¢Ý È¾ùèFÍ(BÁéÝÜ'€ŸkRíoü|ÖmÆÕ]£Pó¤9»2Ž„¾)ýO¼(Ìz/%–ÝPƒtñu~}7}Ù„º"+Zd:KÎa¦ò ‡éøkù¨“?)<ü\ó_WÐüz3É™]…-5¯_‹åõë ÒM®­1{ï²v\¾¦Fº3#Ê„(TÎ0ƒÍì¶Ì-š ]5è#'æ_ŸSŠ&ß1ß8žŒû«Æÿ‰ÆÁÚÜwEÏÕ‹ÝWñZûêJ^|™Ý=×â9gŸŠÿ©ë=¡html2text-1.3.2a/html2textrc.5.gz0100644000000000000000000000464107311076162015305 0ustar rootroot‹r|$;html2textrc.5ÕYmoÛFþÎ_1Ç|ˆ”J´åÄÁÁ°Ó[#]dÉ©ŠºVÔJZ˜/ wiEÕ©¿½3»¤DÉìÕ½ô€KW|™×gžeœ¼¥0‹ý4ä‘bJĬ™„€Ia<sÁg0ÝXÊÞ±D‰Zlø§ôû_øŒ)gžÖ§< DäÌø­FEwø÷Ï4‚FçWç¿jœC»ëzpq~Þ !-ØŽC²-E3ébÁ%¹÷ ‡5ü8䎑öz°Tap¡ø•øp©=ÔÏß?Ô¨ÐíÁ°y×µŠ2u˜ÇIÈz¾€U¯8Á%ÌEÀéÕAc¥QÕ:܇£{·ïZN>òõ“å´àÆú7ý\Y?Óã± åZ¸ÓuÛãþ½× -o‰ÏZã#+—UK›šñ¹ˆÐ.ÉÄóßq+•:ß Nu¹—*µ©ák†Jžx’ˆ®BiR²HXøZÂ4ªc¥ &Ьн5Юq ¾8›‘¸ýÊ&Iûo6T„Ãíì\$°(ŽêrÅ|þ’%ÌWhJPв5ŒVU-Æ4`Ñ£v[fj˜ ÂG¨©‹£`CA­—BñSŲZ#DYZ~ÆL³_DžXD1>Äü`V7jI y u0çÙô[aŠ]† úœÆYÝ™Sæ?Jä÷従ìvÕƳ&ÖbE>‹ÈÒ·Òhp½JòϱcÄ #Ž4ºÄ†Ø¦„VS€h¡061+§@„w¼àšv´¿x%먤ýœ»ÀÀÚ¢f&`fÓDC%* ÉÖ7·9¶&¦îX9°Õ*Øhø·Ó)eŠñq*×LøLa’Œ6ÂÍò‰¼åÎÂ!?¶ÍhíÚ–T膤Øèi³mé‹1´Úàh(áÓΨ=¹ë=gû¤“èl§ ê9ªÞ‘)*ñ6às‰X,Õn7pn¹ $,ã5¶m´9bF*ž@æH4Ae*1)ZmM_æë,³Îƒ4ŒNVfxÕÖ5µ˜{íHȬvÝ|zp¬Ê/á;Õ<ÐÖ¨ó㟒…fk&;¹v˜Æ³WK<{CóÓ„Êb¦Ý`ïÑ´2»& ¢”Þj ôý“HTÊ‚l¡$óÈÚ‚›8Í{oS¬Ü¾ª TËBߎ0@§?\0Î`çœ$bÊÕšó(OÈ[`ÝUµ½6ê Âꓘ¥DÂÔrØ®¡¬°°oÆx¾sL¥$:ø¾à n€äãŒóUÁHæ"-ÀÝ”ú”E½Á±$pôYÇi0Ó3§ìG¸22I8+l®FKõ®Y'Oõ¸ª¡ˆ½IDx š¢dê/MÉiQfmR'[•yô‰x“€†7ºqý‡ƒL8knHP}QNiŒ f[qöдQ]r˜öœH}AåáÀÚX4…MÝGÜS˜„´®zÃÚ¯6+7G±æ-¡—ä+tæ#n& g–ñ3R¢®1 ò™Ö½;ò4¹Zõ±f lMž”Å dçdú‰†‚I§ï¶¡Ý·]p˜4ÇÝntïfŒ«½×#Ña6Ì*¥i‡6aø¦çÝ ´¸¥޳1*" åyÂñ/«@øÂèÃí‘/ý9¥'fÎŽ>µ&ƒA׃š¾×ŽšKãgv“ùžÝL\ot×ÐwÙÍe¦ðò-r;B"¤~DÈETälòƒBÝ‹{M^Y+á$JH„’ºBErFÞD&ÃNw<è»ûùÃõÆý]¯7M>ôʶé­çAZýàÞ5p'-ü»‡Î÷Ch:8 5ïîác«ƒC×= ŠœxAP¦dúˆÕítúºzILÙ™êCš;yPpn&·ù‡Y³hÄɨîwýeykA÷Ž| ?”–ºpJÅà¿Òš‰ùØÌQ ±™ç_`³`ÍÙêQ >a›<â,]¸ÛçøÑ@kßM}¡ÁÁu3›k²Sø’éÇvoÜýÞ¶ö pU¹+‡õæj½Œ_KË,9ØÄ‘ÏöûÏΨxY…*úóŒ}Ý„%nÐ7ö«wö­]}P` †ç¿°óú`÷ï>8 _šªðxŠG(ʧ­?ð–¿föEVÌV-¼FUûÉOxw1âŽÂŽS¥“ÙxG‰áªdÜ#ŸÐ g›Í@fó×ÁOðóé&P8ªTå7UâhT²wæõ©a“O=§ÎŽp{Мx½Ñ˜>¬iŒ†˜sú:M_Б„W‰鋿,^GAŒÇå ö÷Tr~ÿ)ÏòJi}ù?J¡°3[K¥VWgg˜ädÅüHêìW£àLÅq ÏP¾«½¶Ý.n¯wd—|Pü îÿ html2text-1.3.2a/CHANGES0100644000000000000000000005212210001246537013303 0ustar rootroot## CHANGES - What has changed ? Wed Jan 14 14:47:26 CET 2004 ## =========================================================================== ############################# # # #Changes in Version 1.3.2(a)# # # # (released 2004-01-15) # ############################# Bugfixes and new features **************************************** Version 1.3.2A only: ported to gcc 3.3+ ------------------------------ Ported to g++ v. 3.3. Since this change is not compatible to g++ versions prior to 3.0, it is included in version 1.3.2A only. THIS IS THE ONLY DIFFERENCE BETWEEN VERSIONS 1.2.3 AND 1.2.3A. (Area.C, Area.h, HTMLControl.C, HTMLControl.h, Properties.C, Properties.h, format.C, format.h, html.C, html.h, html2text.C, urlistream.h) ------------------------------ Added command line option '-ascii' for straight ascii output (instead of ISO-8859-1, which is the default). (html2text.C, sgml.C) ------------------------------ Implemented rendering of most SGML entities introduced in HTML-4. As a limitation, those entities not present in HTML-3.2/ISO-8859-1 will be recognized only if represented as "named entities" and not with thier numeric values, e.g. "™" will be rendered, "™" won't. (html2text.C, sgml.C) ------------------------------ Element closing as done in XHTML (e.g. "
") is now tolerated. (HTMLControl.C) ------------------------------ The program now ignores the content of