, Wed, 20 Aug 1997 01:18:21 +0200
unhtml-2.3.9/debian/rules 0000755 0002322 0002322 00000001157 10274156524 015536 0 ustar pbuilder pbuilder #!/usr/bin/make -f
package = unhtml
CC = gcc
CFLAGS = -g -Wall
ifeq (,$(findstring noopt,$(DEB_BUILD_OPTIONS)))
CFLAGS += -O2
endif
build:
$(MAKE) CC="$(CC)" CFLAGS="$(CFLAGS)"
touch build
clean:
dh_clean
rm -f build
-$(MAKE) clean
binary-indep: build
binary-arch: build
dh_clean
dh_installdirs usr/bin
install unhtml `pwd`/debian/$(package)/usr/bin
dh_installdocs Readme.html Readme.txt
dh_installman unhtml.1
dh_installchangelogs
dh_strip
dh_compress
dh_fixperms
dh_shlibdeps
dh_gencontrol
dh_md5sums
dh_builddeb
binary: binary-indep binary-arch
.PHONY: binary binary-arch binary-indep clean
unhtml-2.3.9/Readme.html 0000600 0002322 0002322 00000003264 07243730632 015310 0 ustar pbuilder pbuilder
Unhtml - Document Parser
Kevin Swan, 013639s@dragon.acadiau.ca
Version 2.3
Last revised: February 3, 1998
DESCRIPTION
unhtml is a program developed by me in April of 1996 to remove the
HTML formatting from documents and print the output to the standard output
stream. It treated any occurrences of a greater than or less than sign as
HTML tags and removes them. In this version, it is more intelligent in the
sense that it recognizes <SCRIPT> blocks.
Please report any bugs you find or comments you have to
013639s@dragon.acadiau.ca
DISTRIBUTION
This software is freely distributable under the GNU Public License,
and may be altered in any way. The only condition for redistribution is
that this README file be included with the revised/redistributed software,
with the only modifications being additions describing what's been changed.
The software's original author
Kevin Swan 013639s@dragon.acadiau.ca is
agreeing to negotiate different licensing terms with interested parties for
commercial reuse of the source code.
INSTALLATION
You can compile and install unhtml by doing the following as root:
make && make install
Kevin Swan
unhtml-2.3.9/Makefile 0000600 0002322 0002322 00000001271 10043051657 014654 0 ustar pbuilder pbuilder #
# Makefile for unhtml
#
# If you are parsing an HTML file that has a large Javascript program
# in it, you may need to set MAX_TAG_SIZE to something higher and
# recompile
#
CC = gcc
MAKE = make
RM = rm -f
CFLAGS = -Wall
# CFLAGS = -Wall -DDEBUG
all: unhtml
unhtml: unhtml.o ops.o esc.o
$(CC) -o unhtml unhtml.o ops.o esc.o
unhtml.o: unhtml.c ops.h esc.h
$(CC) $(CFLAGS) -c unhtml.c
ops.o: ops.c ops.h
$(CC) $(CFLAGS) -c ops.c
esc.o: esc.c esc.h
$(CC) $(CFLAGS) -c esc.c
check: all
$(MAKE) -C tests
clean:
$(RM) core *.o unhtml
install:
cp unhtml /usr/local/bin
chmod 755 /usr/local/bin/unhtml
cp unhtml.1 /usr/local/man/man1
chmod 644 /usr/local/man/man1/unhtml.1
unhtml-2.3.9/Readme.txt 0000600 0002322 0002322 00000002306 07243730632 015157 0 ustar pbuilder pbuilder
========================
Unhtml - Document Parser
========================
Kevin Swan, 013639s@dragon.acadiau.ca
Version 2.3
Last revised: February 21, 1999
DESCRIPTION
unhtml is a program developed by me in April of 1996 to remove the
HTML formatting from documents and print the output to the standard output
stream. It treated any occurrences of a greater than or less than sign as
HTML tags and removes them. In this version, it is more intelligent in the
sense that it recognizes <SCRIPT> blocks.
Please report any bugs you find or comments you have to
013639s@dragon.acadiau.ca
DISTRIBUTION
This software is freely distributable under the GNU Public License,
and may be altered in any way. The only condition for redistribution is
that this README file be included with the revised/redistributed software,
with the only modifications being additions describing what's been changed.
The software's original author Kevin Swan <013639s@dragon.acadiau.ca> is
agreeing to negotiate different licensing terms with interested parties for
commercial reuse of the source code.
INSTALLATION
You can compile and install unhtml by doing the following as root:
make && make install
Kevin Swan
unhtml-2.3.9/cnv.awk 0000644 0002322 0002322 00000000141 07243730632 014516 0 ustar pbuilder pbuilder { printf("%-10s, %3d,\n","\""$1";\"",$2) }
END { printf("%-10s, %3d\n","\"\"",0) }
unhtml-2.3.9/esc.c 0000644 0002322 0002322 00000011745 07243730632 014156 0 ustar pbuilder pbuilder #define DEBUG 0 /* 1:dummy main 2:numerical ascii codes off */
#include "esc.h"
#include
#include
#include
#define THRU 0 /* THRU mode simply prints chars */
#define HOLD 1 /* HOLD mode stocks chars in bff[] */
#define MXBF 7 /* length of longest escape sequence excluding '&' */
#define CHARSET 256 /* set to 256 if ISO-8859-1 (European) */
static int scmp(char *, char *);
static int flush(int);
static int mode=THRU;
static char bff[MXBF];
static int index=0;
#if DEBUG == 1 /* dummy main */
void main(){
char ch;
while( (ch=getchar()) != EOF)
m_putchar(ch);
m_putchar(EOF);
}
#endif
struct table {
char * seq;
int n;
} ktbl[] = {
{"gt;" , 62},
{"lt;" , 60},
{"amp;" , 38},
{"quot;" , 34},
#if CHARSET == 128
{"nbsp;" , 32},
{"shy;" , 45},
#elif CHARSET == 256
{"nbsp;" , 160},
{"iexcl;" , 161},
{"cent;" , 162},
{"pound;" , 163},
{"curren;" , 164},
{"yen;" , 165},
{"brvbar;" , 166},
{"sect;" , 167},
{"uml;" , 168},
{"copy;" , 169},
{"ordf;" , 170},
{"laquo;" , 171},
{"not;" , 172},
{"shy;" , 173},
{"reg;" , 174},
{"macr;" , 175},
{"deg;" , 176},
{"plusmn;" , 177},
{"sup2;" , 178},
{"sup3;" , 179},
{"acute;" , 180},
{"micro;" , 181},
{"para;" , 182},
{"middot;" , 183},
{"cedil;" , 184},
{"sup1;" , 185},
{"ordm;" , 186},
{"raquo;" , 187},
{"frac14;" , 188},
{"frac12;" , 189},
{"frac34;" , 190},
{"iquest;" , 191},
{"Agrave;" , 192},
{"Aacute;" , 193},
{"Acirc;" , 194},
{"Atilde;" , 195},
{"Auml;" , 196},
{"Aring;" , 197},
{"AElig;" , 198},
{"Ccedil;" , 199},
{"Egrave;" , 200},
{"Eacute;" , 201},
{"Ecirc;" , 202},
{"Euml;" , 203},
{"Igrave;" , 204},
{"Iacute;" , 205},
{"Icirc;" , 206},
{"Iuml;" , 207},
{"ETH;" , 208},
{"Ntilde;" , 209},
{"Ograve;" , 210},
{"Oacute;" , 211},
{"Ocirc;" , 212},
{"Otilde;" , 213},
{"Ouml;" , 214},
{"times;" , 215},
{"Oslash;" , 216},
{"Ugrave;" , 217},
{"Uacute;" , 218},
{"Ucirc;" , 219},
{"Uuml;" , 220},
{"Yacute;" , 221},
{"THORN;" , 222},
{"szlig;" , 223},
{"agrave;" , 224},
{"aacute;" , 225},
{"acirc;" , 226},
{"atilde;" , 227},
{"auml;" , 228},
{"aring;" , 229},
{"aelig;" , 230},
{"ccedil;" , 231},
{"egrave;" , 232},
{"eacute;" , 233},
{"ecirc;" , 234},
{"euml;" , 235},
{"igrave;" , 236},
{"iacute;" , 237},
{"icirc;" , 238},
{"iuml;" , 239},
{"eth;" , 240},
{"ntilde;" , 241},
{"ograve;" , 242},
{"oacute;" , 243},
{"ocirc;" , 244},
{"otilde;" , 245},
{"ouml;" , 246},
{"divide;" , 247},
{"oslash;" , 248},
{"ugrave;" , 249},
{"uacute;" , 250},
{"ucirc;" , 251},
{"uuml;" , 252},
{"yacute;" , 253},
{"thorn;" , 254},
{"yuml;" , 255},
#endif
{"" , 0}
};
int
m_putchar(int chr){
struct table *ptr;
if ( mode == THRU ) switch (chr){
case '&': mode=HOLD; return '&';
case EOF: return EOF;
default : return putchar(chr); /* most chars pass through here */
}
/* mode == HOLD */
else switch (chr) {
case '&': return flush(HOLD);
case EOF: return flush(THRU);
case ';': bff[index++]=';'; /* delimiter */
for(ptr=ktbl; !scmp(ptr->seq,bff); ptr++)
;
chr= ptr->n; /* chr == 0 if no match */
#if DEBUG == 0 || DEBUG == 2
if(chr==0 && bff[0]=='#') /* numerical ascii code */
/* without the following strong tests seqs like "K;" would fall through */
{
if( (bff[1]=='X' || bff[1]=='x')
&& isxdigit(bff[2]) && isxdigit(bff[3]) )
/* hexadecimal */
/* /[Xx][0-9A-Da-d][0-9A-Da-d];/ */
chr=strtoul( &bff[2], NULL, 16 );
else if( (bff[2]==';'
&& isdigit(bff[1]) )
/* decimal */
/* /[0-9];/ */
|| (bff[3]==';'
&& isdigit(bff[1]) && isdigit(bff[2]) )
/* decimal */
/* /[0-9][0-9];/ */
|| (bff[4]==';'
&& (bff[1]=='1' || bff[1]=='2')
&& isdigit(bff[2]) && isdigit(bff[3]) )
/* decimal */
/* /[12][0-9][0-9];/ */
)
chr=strtoul( &bff[1], NULL, 10);
}
#endif
if( ( chr<0 ) || ( chr==0)
|| ( 0<=chr && chr<= 8 ) || ( 11<=chr && chr<= 12 )
|| ( 14<=chr && chr<= 31 ) || ( 127<=chr && chr<=159 )
|| ( CHARSET<=chr )
) return flush(THRU); /* no match or undefined */
else { index=0; mode=THRU; return putchar(chr); }
/* print converted character */
default : bff[index++]=chr;
return(index==MXBF)?flush(THRU):chr;
}
}
static int
flush(int md){ /* sends chars in bff to output stream */
int r; int idx;
register int i;
idx=index; index=0; mode=md;
if((r=putchar('&'))==EOF) return EOF;
for(i=0;i