dadadodo-1.04/Makefile0100644000033400007640000000351107621426005013047 0ustar jwzjwz# Makefile for DadaDodo, Copyright (c) 1997 Jamie Zawinski. SHELL = /bin/sh CC = gcc -Wall -Wstrict-prototypes -Wnested-externs -Wno-format #CC = cc -fullwarn -use_readonly_const -rdata_shared -g3 CFLAGS = -g LDFLAGS = DEFS = -DGETTIMEOFDAY_TWO_ARGS -DHAVE_UNISTD_H LIBS = DEPEND = makedepend DEPEND_FLAGS = DEPEND_DEFINES = srcdir = . INCLUDES = -I$(srcdir) SRCS = dadadodo.c parse.c generate.c files.c hash.c yarandom.c OBJS = dadadodo.o parse.o generate.o files.o hash.o yarandom.o EXE = dadadodo HDRS = dadadodo.h parse.h parsei.h files.h generate.h hash.h \ yarandom.h version.h MEN = EXTRAS = Makefile README dodotodo TARFILES = $(EXTRAS) $(SRCS) $(HDRS) $(MEN) TAR = gtar COMPRESS = gzip --verbose --best COMPRESS_EXT = gz all: $(EXE) clean: -rm -f *.o a.out core $(EXE) distclean: clean -rm -f *~ "#"* depend: $(DEPEND) -s '# DO NOT DELETE: updated by make depend' \ $(DEPEND_FLAGS) -- $(INCLUDES) $(DEFS) $(DEPEND_DEFINES) $(CFLAGS) \ -- $(SRCS) TAGS: tags tags: find $(srcdir) -name '*.[chly]' -print | xargs etags -a .c.o: $(CC) -c $(INCLUDES) $(DEFS) $(CFLAGS) $< $(EXE): $(OBJS) $(CC) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) dadadodo.o: dadadodo.h hash.h parse.h generate.h version.h generate.o: dadadodo.h hash.h parse.h generate.h parse.o: dadadodo.h hash.h parse.h hash.o: hash.h # This really makes me sick... tar: @NAME=`sed -n \ 's/.* \([0-9]\.[0-9][0-9]*\).*/dadadodo-\1/p' version.h` ; \ rm -f $$NAME ; ln -s . $$NAME ; \ echo creating tar file $${NAME}.tar.$(COMPRESS_EXT)... ; \ $(TAR) -vchf - `echo $(TARFILES) \ | sed "s|^|$$NAME/|g; s| | $$NAME/|g" ` \ | $(COMPRESS) > $${NAME}.tar.$(COMPRESS_EXT) ; \ rm $$NAME ; \ echo "" ; \ ls -lgF $${NAME}.tar.$(COMPRESS_EXT) ; \ echo "" ; dadadodo-1.04/README0100644000033400007640000000130307621430456012271 0ustar jwzjwzDadaDodo, Copyright (c) 1997, 1998, 2003 Jamie Zawinski Run "dadadodo -help" for options. See "http://www.jwz.org/dadadodo/" for enlightenment. Changelog: 1.00 11-Jul-1997 Initial release. 1.01 14-Jul-1997 Better mailbox parsing. Better output paragraph wrapping. Binary .dodo file portability fixes. 1.02 04-Dec-1997 Better HTML output. Better handling of quoted-unreadable input. 1.03 21-Feb-1998 Strip signature files, VCards, and citation headings from messages. Binary .dodo file portability fixes. 1.04: 09-Feb-2003 Fixed some crashes by using "unsigned char" everywhere. dadadodo-1.04/dodotodo0100644000033400007640000000726506473626224013170 0ustar jwzjwz= Do something about words without vowels? (what about abbreviations?) = Do pairs of words, not just singles. = Hash by soundex -- probably won't work out very well. = Randomly bounce to a similar sounding word -- sometimes, or all the time, or weighted by frequencies of the respective words. = Make it possible to load in a saved file, then suck more data into it (make files.c be able to create either `words' or `pwords'.) = Make it able to descend directory trees. = Make it able to talk HTTP? NNTP? = Do interesting typography: enumerated lists, subtitles, (Do it with HTML, too.) = Insert random whitespace to make it look like poetry. = Count syllables to make haiku. Loop regenerating sentences until we get ones that have word and sentence breaks in the right places. Counting syllables is hard -- have to snarf a hyphenation algorithm from somewhere. Here are some ideas I plan to someday implement for v2.0, unless someone beats me to it: * pwords don't point to strings, because they aren't just single words any more. the key is the string, and the pword object itself contains only statistics about the string under which it was indexed. * pword->succ always points to pwords that are single words. but the pwords table contains both kinds. * to record text: for each sentence there is an N-word buffer, initially empty for each word for each cdr of the buffer (0-N, 1-N, 2-N, ... N) look up the cdr in the pwords table index this word under the pword we just found add this word to end buffer, dropping the old word 0 off the end * to generate text: while generating sentences pick a random sentence-start word there is an N-word buffer, initially empty (which is <= value of N) while not done pick a random number R, from 0 to min(N, size-of-buffer) look up a pword in the table, using the last R words as the key (while there is no match, --R and try again) emit that word push that word onto the end of the buffer The idea here is that, given N=3, and the input text All work and no play makes Jack a dull boy when scanning that text we would record these associations: "all" --> "work" "all work" --> "and" "work" --> "and" "all work and" --> "no" "work and" --> "no" "and" --> "no" "work and no" --> "play" "and no" --> "play" "no" --> "play" "and no play" --> "makes" "no play" --> "makes" "play" --> "makes" "no play makes" --> "jack" "play makes" --> "jack" "makes" --> "jack" "play makes jack" --> "a" "makes jack" --> "a" "jack" --> "a" "makes jack a" --> "dull" "jack a" --> "dull" "a" --> "dull" "jack a dull" --> "boy" "a dull" --> "boy" "dull" --> "boy" When generating: * suppose we already have picked the sequence "all work and no". * pick a random number from 1-N. * if 1: look up "no" * if 2: look up "and no" * if 3: look up "work and no" * pick a random successor to the pword associated with the string we looked up ("play".) * now we've got "all work and no play". * repeat. If "work and no" wasn't found in the table (meaning we generated it, but it never occurred in nature) then we would decrease N and look up "and no". If that didn't match either, we'd look up "no". That way, word sequences which had actually occurred together are more likely to be chosen than ones that didn't. The N used for generating can't usefully be larger than the N used for recording. The N used for recording is an important tunable paramter, that is probably language-centric; I'd guess that 3 is good for English. The larger N, the more likely one is to regenerate the exact input text. dadadodo-1.04/dadadodo.c0100644000033400007640000006244107621430717013326 0ustar jwzjwz/* DadaDodo, Copyright (c) 1997, 1998 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #include "version.h" #include #include #include #include #include #include #include #include "hash.h" #include "dadadodo.h" #include "parse.h" #include "files.h" #include "generate.h" #include "yarandom.h" typedef struct { const char *string; unsigned char latin1_char; unsigned char length; } entity; static entity entities[] = { {"lt", '<', 2}, {"LT", '<', 2}, {"gt", '>', 2}, {"GT", '>', 2}, {"amp", '&', 3}, {"AMP", '&', 3}, {"quot", '\"', 4}, {"QUOT", '\"', 4}, {"nbsp", ' ', 4}, /* \240 */ {"reg", '\256', 3}, {"REG", '\256', 3}, {"copy", '\251', 4}, {"COPY", '\251', 4}, {"iexcl", '\241', 5}, {"cent", '\242', 4}, {"pound", '\243', 5}, {"curren", '\244', 6}, {"yen", '\245', 3}, {"brvbar", '\246', 6}, {"sect", '\247', 4}, {"uml", '\250', 3}, {"ordf", '\252', 4}, {"laquo", '\253', 5}, {"not", '\254', 3}, {"shy", '\255', 3}, {"macr", '\257', 4}, {"deg", '\260', 3}, {"plusmn", '\261', 6}, {"sup2", '\262', 4}, {"sup3", '\263', 4}, {"acute", '\264', 5}, {"micro", '\265', 5}, {"para", '\266', 4}, {"middot", '\267', 6}, {"cedil", '\270', 5}, {"sup1", '\271', 4}, {"ordm", '\272', 4}, {"raquo", '\273', 5}, {"frac14", '\274', 6}, {"frac12", '\275', 6}, {"frac34", '\276', 6}, {"iquest", '\277', 6}, {"Agrave", '\300', 6}, {"Aacute", '\301', 6}, {"Acirc", '\302', 5}, {"Atilde", '\303', 6}, {"Auml", '\304', 4}, {"Aring", '\305', 5}, {"AElig", '\306', 5}, {"Ccedil", '\307', 6}, {"Egrave", '\310', 6}, {"Eacute", '\311', 6}, {"Ecirc", '\312', 5}, {"Euml", '\313', 4}, {"Igrave", '\314', 6}, {"Iacute", '\315', 6}, {"Icirc", '\316', 5}, {"Iuml", '\317', 4}, {"ETH", '\320', 3}, {"Ntilde", '\321', 6}, {"Ograve", '\322', 6}, {"Oacute", '\323', 6}, {"Ocirc", '\324', 5}, {"Otilde", '\325', 6}, {"Ouml", '\326', 4}, {"times", '\327', 5}, {"Oslash", '\330', 6}, {"Ugrave", '\331', 6}, {"Uacute", '\332', 6}, {"Ucirc", '\333', 5}, {"Uuml", '\334', 4}, {"Yacute", '\335', 6}, {"THORN", '\336', 5}, {"szlig", '\337', 5}, {"agrave", '\340', 6}, {"aacute", '\341', 6}, {"acirc", '\342', 5}, {"atilde", '\343', 6}, {"auml", '\344', 4}, {"aring", '\345', 5}, {"aelig", '\346', 5}, {"ccedil", '\347', 6}, {"egrave", '\350', 6}, {"eacute", '\351', 6}, {"ecirc", '\352', 5}, {"euml", '\353', 4}, {"igrave", '\354', 6}, {"iacute", '\355', 6}, {"icirc", '\356', 5}, {"iuml", '\357', 4}, {"eth", '\360', 3}, {"ntilde", '\361', 6}, {"ograve", '\362', 6}, {"oacute", '\363', 6}, {"ocirc", '\364', 5}, {"otilde", '\365', 6}, {"ouml", '\366', 4}, {"divide", '\367', 6}, {"oslash", '\370', 6}, {"ugrave", '\371', 6}, {"uacute", '\372', 6}, {"ucirc", '\373', 5}, {"uuml", '\374', 4}, {"yacute", '\375', 6}, {"thorn", '\376', 5}, {"yuml", '\377', 4}, }; static char get_entity (const unsigned char *string, int length) { int i; unsigned char c = *string; for (i = 0; i < (sizeof(entities)/sizeof(*entities)); i++) if (length == entities[i].length && c == entities[i].string[0] && !strncmp(string, entities[i].string, length)) return entities[i].latin1_char; return 0; } #undef DEBUG_CONT #undef DEBUG_CITE #undef DEBUG_WROTE #undef DEBUG_HEAD #undef DEBUG_UU #undef DEBUG_QP #undef DEBUG_HTML #undef DEBUG_SIG static int scan (FILE *file, hash_table *table, char *first_line) { pword *prev = 0; int line_tick = 0; int line_count = 0; unsigned char buf [10240]; unsigned char *s; int L; int mailbox_p = 0; int in_headers = 0; int in_binhex_p = 0; int contains_msg = 0; int inside_html_tag = 0; int in_comment_p = 0; int in_sig_p = 0; unsigned char *qp_wrap_hack = 0; unsigned char *qp_free_wrap_hack = 0; if (first_line) s = first_line; else { s = fgets (buf, sizeof(buf)-1, file); if (!s) return -1; } mailbox_p = (*s == 'F' && !strncmp(s, "From ", 5)); do { if (qp_free_wrap_hack) { free (qp_free_wrap_hack); qp_free_wrap_hack = 0; } if (qp_wrap_hack) { qp_free_wrap_hack = (unsigned char *) malloc (strlen(s)+strlen(qp_wrap_hack)+1); if (!qp_free_wrap_hack) return -1; strcpy(qp_free_wrap_hack, qp_wrap_hack); strcat(qp_free_wrap_hack, s); free(qp_wrap_hack); qp_wrap_hack = 0; s = qp_free_wrap_hack; } L = strlen(s); if (L > 0 && (s[L-1] == '\r' || s[L-1] == '\n')) L--; if (L > 0 && (s[L-1] == '\r' || s[L-1] == '\n')) L--; line_count++; if (++line_tick == 200) { fprintf (stderr, "."); if ((line_count % (65 * line_tick)) == 0) fprintf (stderr, " %d lines\n", line_count); line_tick = 0; } /* If inside a binhex section, keep discarding it so long as the lines are the proper length (binhex isn't as easily detectible as base64 and uuencode...) */ if (in_binhex_p) { if (L != 0 && L != 64) in_binhex_p = 0; continue; /* swallow one last line */ } /* If an HTML tag spanned multiple lines, keep discarding it. */ if (inside_html_tag) { #ifdef DEBUG_CONT printf (" CONT: %s", s); #endif while (*s && *s != '>') s++, L--; if (*s) { s++; L--; inside_html_tag = 0; } } else if (in_comment_p) { #ifdef DEBUG_CONT printf (" COMMENT CONT: %s", s); #endif while (*s && (s[0] != '-' || s[1] != '-' || s[2] != '>')) s++, L--; if (*s) { s += 3; L -= 3; in_comment_p = 0; } } else if (in_sig_p) { if (in_sig_p > 20) in_sig_p = 0; else if (*s == 'F' && !strncmp (s, "From ", 5)) in_sig_p = 0; else { #ifdef DEBUG_SIG printf (" SIG %d: %s", in_sig_p, s); #endif in_sig_p++; continue; } } if (mailbox_p) { /* Strip off all lines from "-- \n" to the end of the message. */ if (s[0] == '-' && s[1] == '-' && s[2] == ' ' && (s[3] == '\n' || s[3] == '\r' || s[3] == 0)) { #ifdef DEBUG_SIG printf ("\n SIG: %s", s); #endif in_sig_p = 1; continue; } /* Strip off those FUCKING VCARDS too. */ if ((*s == 'b' || *s == 'B') && !strncasecmp(s, "begin:", 6)) { unsigned char *s2 = s+6; while (isspace(*s2)) s2++; if (!strncasecmp(s2, "vcard", 5)) { #ifdef DEBUG_SIG printf ("\n VCARD: %s", s); #endif in_sig_p = 1; continue; } } /* Strip off anything that looks like a citation prefix. */ { int got_some = 0; unsigned char *s2 = s; while (isspace(*s2)) s2++; while (*s2 == '>' || *s2 == ']' || *s2 == '}' || *s2 == '|' || *s2 == ':') { got_some = 1; s2++; while (isspace(*s2)) s2++; } #ifdef DEBUG_CITE if (got_some) printf("CITE: %s", s); #endif if (got_some) { s = s2; L = strlen(s); } } /* Strip off anything that looks like a citation heading. */ { const unsigned char *w1 = "wrote:"; const unsigned char *w2 = "writes:"; if (L > 10 && (strstr(s+L-10, w1) || strstr(s+L-10, w2))) { int got_it = 0; /* Ends with "wrote:". Nuke it if: o the line contains "<", or o the line begins with "In " or "On "; or o the line contains "@"; or o the line contains less than 4 spaces. */ unsigned char *s2 = s; while (*s2 == ' ') s2++; if (!strncmp(s2, "In ", 3) || !strncmp(s2, "On ", 3)) got_it = 1; else if (strchr(s2, '@')) got_it = 1; else { int i = 0; while (*s2) if (*s2++ == ' ') i++; if (i < 4) got_it = 1; } if (got_it) { #ifdef DEBUG_WROTE printf("WROTE: %s", s); #endif L = 0; *s = 0; } else { #ifdef DEBUG_WROTE printf("NOT WROTE: %s", s); #endif } } } /* Envelope lines are definitely the start of headers. */ if (*s == 'F' && !strncmp (s, "From ", 5)) in_headers = 2; /* Guess that lines beginning with dashes, or lines beginning with common header fields are probably headers. */ else if (!in_headers && ((*s == '-' && s[1] == '-') || ((*s == 'F'||*s == 'f') && !strncmp(s,"From: ",6)) || ((*s == 'D'||*s == 'd') && !strncmp(s,"Date: ",6)) || ((*s == 'P'||*s == 'p') && !strncmp(s,"Path: ",6)) || ((*s == 'S'||*s == 's') && !strncmp(s,"Subject: ",9)) || ((*s == 'R'||*s == 'r') && !strncmp(s,"Received: ",10)) || ((*s == 'M'||*s == 'm') && !strncmp(s,"Message-ID: ",12))|| ((*s == 'R'||*s == 'r') && !strncmp(s,"Return-Path: ",13)) )) in_headers = 1; else if (in_headers) { /* Blank lines always mean end of headers. Unless these headers describe a forwarded message, in which case we should swallow one blank line. */ if (*s == '\r' || *s == '\n') { if (contains_msg) contains_msg = 0; else in_headers = 0; } /* If we're not totally sure we're in headers, then be heuristic about end-of-headers. */ else if (in_headers == 1) { /* Lines beginning with whitespace don't mean end of headers. */ if (*s == ' ' || *s == '\t') ; else { const unsigned char *s2; /* Lines that look like they begin with a header field (match "^[^ \t\n]+:") don't mean end of headers. */ for (s2 = s; *s2 && *s2 != ':' && !isspace(*s2); s2++) ; /* But all others do. */ if (*s2 != ':') in_headers = 0; } if (in_headers && !contains_msg && (*s == 'C' || *s == 'c') && (!strncasecmp (s, "Content-Type: message/rfc822", 28) || !strncasecmp (s, "Content-Type: message/news", 26))) contains_msg = 1; } } #ifdef DEBUG_HEAD if (in_headers) { if (L > 72) strcpy(s+69, "...\n"); printf ("HEAD: %s", s); } #endif /* 0 */ if (in_headers) { prev = 0; continue; } /* If the line is exacty 61 characters long and begins with M, it might be uuencoded data. Go look at each character and see if it fits the profile. */ if (L == 61 && *s == 'M') { int uue_p = 1; const unsigned char *s2; for (s2 = s; *s2 && *s2 != '\n' && *s2 != '\r'; s2++) if (*s2 < ' ' || *s2 > '`') { uue_p = 0; break; } #ifdef DEBUG_UU if (uue_p) printf("UUE: %s", s); #endif if (uue_p) continue; } /* If the line is more than 60 characters, or ends with "=", then it might be base64 data. Go look at each character and see if it fits the profile. */ if (L >= 60 || (L > 3 && s[L-1] == '=')) { int b64_p = 1; const unsigned char *s2; for (s2 = s; *s2 && *s2 != '\n' && *s2 != '\r'; s2++) if (! ((*s2 >= 'A' && *s2 <= 'Z') || (*s2 >= 'a' && *s2 <= 'z') || (*s2 >= '0' && *s2 <= '9') || *s2 == '+' || *s2 == '/' || *s2 == '=')) { b64_p = 0; break; } #ifdef DEBUG_UU if (b64_p) printf("B64: %s", s); #endif if (b64_p) continue; } /* If the line begins with the magic BinHex string, then go into "binhex-skipping-mode." (Handled at the start of the loop.) */ if (s[0] == '(' && s[1] == 'T' && !strncmp(s, "(This file must be converted with BinHex 4.0)", 45)) { in_binhex_p = 1; continue; } /* Failing that, any line that is longer than 60 characters but doesn't contain any spaces is fucked up in some way, so give up on it. */ if (L > 60 && !strchr (s, ' ')) { #ifdef DEBUG_UU printf("CRAP: %s", s); #endif continue; } } /* Decode anything that looks a little bit like quoted-unreadable. */ { #ifdef DEBUG_QP int got_any = 0; unsigned char *o = strdup(s); #endif unsigned char *s2 = s; while ((s2 = strchr(s2, '='))) { if (!isxdigit(s2[1]) || !isxdigit(s2[2])) s2++; else if (*s2) { unsigned char *s3 = s2 + 1; const unsigned char *s4 = s2 + 3; s2[0] = ((((s2[1] >= '0' && s2[1] <= '9') ? s2[1] - '0' : ((s2[1] >= 'A' && s2[1] <= 'F') ? s2[1] - ('A' - 10) : s2[1] - ('a' - 10))) << 4) | (((s2[2] >= '0' && s2[2] <= '9') ? s2[2] - '0' : ((s2[2] >= 'A' && s2[2] <= 'F') ? s2[2] - ('A' - 10) : s2[2] - ('a' - 10))) << 4)); L -= 2; while (*s4) *s3++ = *s4++; *s3 = 0; #ifdef DEBUG_QP got_any = 1; #endif } } #ifdef DEBUG_QP if (got_any) { printf ("LINE1: %s", o); printf ("LINE2: %s", s); } free(o); #endif } /* If the line ends with "=", then this might also be quoted-unreadable. If the character before the = was alphanumeric, then a word was split. Truncate the line before that word, and remember the word for next time around. */ if (L > 1 && s[L-1] == '=' && isalnum(s[L-2])) { unsigned char *s3 = s+L-2; s[L-1] = 0; while (isalnum(*s3) && s3 > s) s3--; *s3 = 0; qp_wrap_hack = strdup(s3+1); L = strlen(s); } /* Strip out anything that looks like an HTML tag. */ { #ifdef DEBUG_HTML int got_any = 0; unsigned char *o = strdup(s); #endif const unsigned char *last = s; unsigned char *s2; while ((s2 = strchr(last, '<'))) { const unsigned char *s3 = s2+1; int close_p = 0; last = s3; /* Multiple <<< in a row disqualifies it from being a tag. */ if (*last == '<') { while (*last == '<') last++; continue; } /* To qualify as a tag, it must match "0 and <20 characters long. */ if (s3 > s2+1+close_p && s3 < s2+20 && (*s3 == 0 || *s3 == '>' || isspace(*s3))) { while (*s3 && *s3 != '>') s3++; if (!*s3) { last = s3; inside_html_tag = 1; s2[0] = 0; s2[1] = 0; #ifdef DEBUG_HTML got_any = 1; #endif #ifdef DEBUG_HTML printf(" HTML: %s", s2); #endif } else { unsigned char *out = s2; #ifdef DEBUG_HTML unsigned char b[255]; strncpy(b, s2, s3+1-s2); b[s3+1-s2]=0; printf ("HTML: %s\n", b); #endif #ifdef DEBUG_HTML got_any = 1; #endif s3++; *out++ = ' '; while (*s3) *out++ = *s3++; *out = 0; last = s2; } } else if (s3[0] == '!' && s3[1] == '-' && s3[2] == '-') { unsigned char *out = s2; in_comment_p = 1; s2[0] = 0; s2[1] = 0; #ifdef DEBUG_HTML got_any = 1; #endif s3 += 3; while (*s3 && (s3[0] != '-' || s3[1] != '-' || s3[2] != '>')) s3++; if (*s3) { in_comment_p = 0; s3 += 3; } *out++ = ' '; while (*s3) *out++ = *s3++; *out = 0; last = s2; } } #ifdef DEBUG_HTML if (got_any) { printf ("LINE1: %s", o); printf ("LINE2: %s", s); } free(o); #endif } /* Remap anything that looks like an HTML character entity. */ { const unsigned char *last = s; unsigned char *s2; while ((s2 = strchr(last, '&'))) { unsigned char *s3 = s2+1; last = s3; while (*s3 && *s3 != ';' && !isspace(*s3) && s3 < last+10) s3++; if (*s3 == ';' || isspace(*s3)) { unsigned char e = get_entity(last, s3-last); if (!e) continue; *s2++ = e; if (*s3) s3++; while (*s3) *s2++ = *s3++; *s2 = 0; } } } prev = scan_line (s, table, prev); } while ((s = fgets(buf, sizeof(buf)-1, file))); if (qp_free_wrap_hack) free (qp_free_wrap_hack); fprintf (stderr, " %d lines\n", line_count); return 0; } static void usage (const char *av0) { char *s = strdup(version+4); char *s2 = strchr(s, '('); *s2 = '<'; s2 = strchr(s, ')'); *s2 = '>'; fprintf (stderr, "%s\n", s); free (s); fprintf (stderr, "\nusage: %s [ options ] [ input-files ]\n", av0); fprintf (stderr, "\n\ This program analyses text files and generates markov chains of word\n\ frequencies; it can then generate random sentences based on that data.\n\ Options include:\n\ \n\ -h or -help this message\n\ -o or -output file to save compiled data in (- for stdout)\n\ -l or -load file of compiled data to load (- for stdin)\n\ -c or -count how many sentences to generate (0 = inf)\n\ -p or -pause delay between paragraphs\n\ -html output HTML instead of plain-text.\n\ \n\ Remaining arguments are input files; these should be text files, but may\n\ be mail folders or HTML. (MIME messages are also handled sensibly.)\n\ \n\ When no output file is specified, sentences will be generated from the input\n\ data directly; however, loading a saved file is far faster than re-parsing\n\ the text files each time.\n\n"); } extern int *starters; extern int total_starters; extern word *all_words; extern unsigned char **all_strings; int main (int argc, char **argv) { int status; int i; int n_input = 0; int count = -1; int pause = 1; int html_p = 0; const char ** input = (const char **) malloc (argc * sizeof(*input)); char *output = 0; char *load = 0; char *stat_words = 0; FILE *tmp_file = 0; for (i = 1; i < argc; i++) { const char *sw = argv[i]; if (sw[0] == '-' && sw[1] == '-') sw++; if (!strcmp(sw, "-h") || !strcmp(argv[i], "-help")) { usage(argv[0]); exit(0); } else if (!strcmp(sw, "-o") || !strcmp(sw, "-output")) { output = argv[++i]; } else if (!strcmp(sw, "-l") || !strcmp(sw, "-load")) { load = argv[++i]; } else if (!strcmp(sw, "-c") || !strcmp(sw, "-count")) { if (i+1 >= argc) { usage(argv[0]); exit(1); } count = atoi(argv[++i]); } else if (!strcmp(sw, "-p") || !strcmp(sw, "-pause")) { if (i+1 >= argc) { usage(argv[0]); exit(1); } pause = atoi(argv[++i]); } else if (!strcmp(sw, "-html")) { html_p = 1; } else if (!strcmp(sw, "-stats")) { if (argc > i+1 && argv[i+1][0] != '-') stat_words = argv[++i]; else stat_words = strdup(""); } else if (sw[0] == '-' && sw[1]) { usage(argv[0]); exit(1); } else { input[n_input++] = sw; } } if (n_input == 0 && !output && !load) { usage(argv[0]); exit(1); } if (load && n_input) { fprintf(stderr, "%s: can't load and parse files at the same time.\n", argv[0]); usage(argv[0]); exit(1); } if (!output && !stat_words && count == -1) count = 0; if (n_input) { FILE *out; hash_table *table = make_hash_table (20000, (long (*) (const void *)) string_case_hash, (int (*) (const void *, const void *)) strcasecmp); for (i = 0; i < n_input; i++) { unsigned char buf[1024]; unsigned char *s; FILE *f; if (!strcmp(input[i], "-")) f = stdin; else { f = fopen (input[i], "r"); if (!f) { sprintf(buf, "%s: opening input file %s", argv[0], input[i]); perror(buf); exit(1); } } fprintf (stderr, "%s: reading %s...\n", argv[0], (f == stdin ? "stdin" : input[i])); s = fgets (buf, sizeof(buf)-1, f); if (!s) { status = 0; /* empty file */ } else { if (!!strcmp(s, DADADODO_MAGIC)) status = scan (f, table, buf); else { if (load) { fprintf(stderr, "%s: can't load two saved files at once (%s and %s)\n", argv[0], load, input[i]); exit(-1); } else { fprintf (stderr, "%s: saved files must be loaded with -load: %s\n", argv[0], input[i]); exit(-1); } } } if (f != stdin) fclose (f); if (status < 0) { fprintf (stderr, "%s: out of memory\n", argv[0]); exit(1); } } free_hash_table (table); free (input); input = 0; if (!output) { char *tmp = getenv("TMPDIR"); char *b; if (!tmp) tmp = strdup("/tmp"); b = (char *) malloc(strlen(tmp) + 40); strcpy(b, tmp); if (b[strlen(b)-1] != '/') strcat(b, "/"); sprintf(b+strlen(b), "dadadodo.%d", getpid()); tmp_file = fopen(b, "wb+"); if (!tmp_file) { char buf[255]; sprintf(buf, "%s: opening %s", argv[0], b); perror(buf); exit (-1); } unlink (b); free (b); out = tmp_file; } else if (!strcmp(output, "-")) out = stdout; else { out = fopen(output, "wb"); if (!out) { char buf[255]; sprintf(buf, "%s: opening output file %s", argv[0], output); perror(buf); exit(1); } } status = write_dadadodo_file (out, output); if (out == tmp_file) fflush (out); else if (out != stdout) fclose (out); if (status < 0) { char buf[255]; sprintf(buf, "%s: writing output file %s", argv[0], output); perror(buf); exit(1); } } if (count >= 0 || stat_words) { FILE *f; if (load) { f = fopen(load, "rb"); if (!f) { fprintf (stderr, "%s: can't open input file %s\n", argv[0], load); exit (-1); } } else if (output) { f = fopen(output, "rb"); if (!f) { fprintf (stderr, "%s: can't open output file %s\n", argv[0], load); exit (-1); } } else { f = tmp_file; fseek (f, 0, 0); } status = read_dadadodo_file (f); if (status < 0) { perror("reading file"); exit (-1); } fclose(f); } if (stat_words) { if (!*stat_words) stats (stdout); else if (!strcmp(stat_words, "starters")) { int done_once = 0; int *s = starters; int i = 0; printf("\nStarters:"); while (i < total_starters) { i += all_words[*s].start; string_stats (stdout, all_strings[all_words[*s].string], !done_once); done_once = 1; s++; } } else { unsigned char *s = strtok (stat_words, ",; "); int done_once = 0; do { string_stats (stdout, s, !done_once); done_once = 1; } while ((s = strtok (0, ",; "))); } } if (count >= 0) { int column = 0; int words = 0; int n = 0; int indent = 0; int fill_column = 72; int sidebar_p = 0; int sidebar_words = 0; int number_p = 0; FILE *out = stdout; ya_rand_init(0); while (count > 0 ? n < count : 1) { /* Break paragraph. */ if (words == 0 || words > 90 || sidebar_words < 0 || (column > 0 && (RAND(4)) == 0)) { int old_indent = indent; if (RAND(3) == 0) indent = RAND(4) * 4; fill_column = 72; if (indent && (RAND(2) == 0)) fill_column -= indent; if (sidebar_p) { fputs ("\n", out); sidebar_p = 0; } if (indent == 0) number_p = 0; if (html_p && indent != old_indent) { int i; if (indent > old_indent) { for (i = old_indent; i < indent; i += 4) { switch (RAND(4)) { case 0: case 1: fputs ("
", out); break; case 2: fprintf (out, "
    ", RAND(100)); break; default: fputs ("
      ", out); break; } } number_p = (RAND(5) == 0); } else for (i = indent; i < old_indent; i += 4) fputs ("
    ", out); fputs("\n", out); } if (words > 0) { if (html_p) { if (number_p && RAND(3) != 0) fputs ("

  1. ", out); else if (RAND(10) == 0) fputs ("

    ", out); else fputs ("

    ", out); } fputs ("\n\n", out); fflush (out); column = 0; words = 0; if (pause) sleep (pause); } /* after there are more words out of the sidebar than in, close then reopen any to avoid a floating table causing the whitespace on the right to never ever be reclaimed... */ if (sidebar_words < 0 || sidebar_words == 1) { int i; sidebar_words = 0; for (i = 0; i < indent; i += 4) fputs("", out); for (i = 0; i < indent; i += 4) fputs("

      ", out); } if (html_p && !sidebar_p && sidebar_words <= 0 && RAND(100) == 0) { int width = 30 + RAND(30); fprintf (out, "
      \n", width); sidebar_p = 1; sidebar_words = 100; } } { int sw = random_sentence (out, &column, indent, fill_column, html_p); fflush (out); words += sw; n++; if (sidebar_p) sidebar_words += sw; else if (sidebar_words > 1) { sidebar_words -= sw; if (sidebar_words == 0) sidebar_words = 1; } } } if (sidebar_p) fputs ("
      ", out); if (html_p) fputs ("

      \n", out); else fputs ("\n", out); } exit (0); } dadadodo-1.04/parse.c0100644000033400007640000001415707621426032012675 0ustar jwzjwz/* parse.c --- generating a markov chain. * DadaDodo, Copyright (c) 1997, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #include #include #include #include #include #include "dadadodo.h" #include "parse.h" #include "parsei.h" int total_words = 0; int total_links = 0; pword_pool *wpool = 0; string_pool *spool = 0; static const unsigned char * new_string (const unsigned char *s) { int L = strlen(s); if (!spool || spool->fp+L+2 >= STRING_POOL_COUNT) { string_pool *p = (string_pool *) malloc (sizeof(*p)); if (!p) return 0; p->fp = 0; p->next = spool; spool = p; } memcpy (spool->chars + spool->fp, s, L+1); s = spool->chars + spool->fp; spool->fp += L+1; return s; } static pword * new_pword (const unsigned char *string) { pword *pw; string = new_string (string); if (!string) return 0; if (!wpool || wpool->fp+1 >= PWORD_POOL_COUNT) { pword_pool *p = (pword_pool *) calloc (1, sizeof(*p)); if (!p) return 0; p->fp = 0; p->next = wpool; wpool = p; } pw = &wpool->pwords[wpool->fp++]; pw->id = total_words++; pw->string = string; return pw; } static int increment (pword *pword, pword_link **listP, int *lengthP, int *sizeP) { int size = *sizeP; int length = *lengthP; pword_link *list = *listP; int bot = 0; int top = length; /* Binary search for word in the pword_link list. Assume it's sorted by &pword_link->word. */ while (top > bot) { int mid = ((top - bot) >> 1) + bot; if (pword == list[mid].word) { list[mid].count++; return 0; } else if (pword < list[mid].word) top = mid; else if (bot == mid) break; else bot = mid; } /* We didn't find it, but top == bot == the place it ought to go. Expand the array, move the others down, and insert it. This is a lot of memmoves, but... */ if (size == 0) { size = 10; list = (pword_link *) calloc (size, sizeof(*list)); if (!list) return 0; *listP = list; *sizeP = size; } else if (length+1 >= size) { size = (((*sizeP + 10) * 13) / 10); list = (pword_link *) realloc (list, size * sizeof(*list)); if (!list) return 0; memset (list+length, 0, (size-length) * sizeof(*list)); *listP = list; *sizeP = size; } if (top != length) /* Note: regions overlap, so memmove() better work as advertised. */ memmove (list+top+1, list+top, (length - top) * sizeof(*list)); total_links++; (*lengthP)++; list[top].word = pword; list[top].count = 1; return 0; } static int follow (pword *prev, pword *next) { int s = increment (next, &prev->succ, &prev->succ_length, &prev->succ_size); if (s < 0) return s; return increment (prev, &next->pred, &next->pred_length, &next->pred_size); } static pword * intern (const unsigned char *string, hash_table *table) { pword *pw = (pword *) gethash (table, (void *) string, 0); if (!pw) { int status; pw = new_pword (string); if (! pw) return 0; status = puthash (table, (void *) pw->string, (void *) pw); if (status < 0) { /* free_pword (pw); */ return 0; } } return pw; } /* Interns and indexes the word (a null-terminated string.) */ static pword * push (const unsigned char *string, char punc, hash_table *table, pword *prev) { pword *pword = intern (string, table); int terminal = 0; if (!pword) return 0; pword->count++; if (!prev) pword->start++; else if (isupper(*string)) pword->cap++; switch (punc) { case ',': pword->comma++; break; case '.': pword->period++; terminal=1; break; case ':': pword->period++; terminal=1; break; case ';': pword->period++; break; case '?': pword->quem++; terminal=1; break; case '!': pword->bang++; terminal=1; break; case '(': pword->comma++; break; case ')': pword->comma++; break; } if (prev) follow (prev, pword); if (terminal) return 0; return pword; } /* Map over the line, and call push() with each word. `line' is modified/destroyed. */ pword * scan_line (unsigned char *line, hash_table *table, pword *prev) { unsigned char *s = line; while (*s) { int had_dot = 0; int had_digit = 0; unsigned char *start, *end; /* Skip forward to first alphanumeric or slash. */ while (*s && !(isalnum(*s) || *s == '/' || *s == '\\')) s++; start = s; /* Ignore pathnames and URLs. They don't contain nice word selections. */ if (*start == '/' || *start == '\\' || (*start == 'h' && !strncmp(start, "http://", 7)) || (*start == 'm' && !strncmp(start, "mailto:", 7)) || (*start == 'f' && !strncmp(start, "ftp://", 6)) || (*start == 'g' && !strncmp(start, "file://", 7))) { while (*s && !isspace(*s)) s++; start = s; if (*s) break; } AGAIN: /* Skip forward to next non-alphanumeric-non-apostrophe. */ while (*s && (isalnum(*s) || *s == '\'')) { if (isdigit(*s)) had_digit = 1; s++; } end = s; if ((*s == '.' || *s == '@') && isalnum(s[1])) { s++; /* treat "xxx@foo.com" as one pword */ had_dot = 1; goto AGAIN; } if (had_dot) /* turns out that we get a lot of lists of newsgroup names, so let's just reject all dotted words. */ continue; if (had_digit) /* Let's just ignore words that contain any digits. */ continue; while (s > start && ispunct(s[-1])) s--; if (s > start + 30) /* Ignore words that are more than 30 long. */ continue; if (s != start) { unsigned char punc = *s; *s = 0; prev = push (start, punc, table, prev); *s = punc; } s = end; } return prev; } dadadodo-1.04/generate.c0100644000033400007640000001666407621426063013366 0ustar jwzjwz/* generate.c --- generating random sentences from a markov chain. * DadaDodo, Copyright (c) 1997, 1998, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #include #include #include #include #include "hash.h" #include "dadadodo.h" #include "generate.h" #include "yarandom.h" extern int total_words; word *all_words = 0; unsigned char **all_strings = 0; int total_starters = 0; int *starters = 0; static word * random_first_word (void) { word *w = 0; int *s = starters; int i = RAND(total_starters); do { w = &all_words[*s]; i -= w->start; s++; } while (i > 0); return w; } static word * random_linked (word_link *links, int link_length) { int i; int count = 0; /* Could cache this number by making `word' bigger, but it doesn't seem to be a performance problem. */ for (i = 0; i < link_length; i++) count += links[i].count; count = RAND(count); for (i = 0; i < link_length; i++) { count -= links[i].count; if (count <= 0) return &all_words [links[i].word]; } abort(); } static word * random_next_word (word *w) { if (w->pred_length && !(RAND(10))) /* One time in ten, pick a random sibling instead of a random child. */ { word *w2 = random_next_word (random_linked (w->pred, w->pred_length)); if (w != w2) /* but don't stutter. */ return w2; } if (w->succ_length) return random_linked (w->succ, w->succ_length); else if (random() % 4) return &all_words[RAND(total_words)]; /* triple word score */ else return 0; } int random_sentence (FILE *stream, int *column, int indent, int fill_column, int html_p) { int count = 0; unsigned char buf [10240]; word *word = random_first_word (); int first_p = 1; int paren_open_p = 0; int open_paren_p = 0; unsigned char indent_str[255]; for (count = 0; count < indent; count++) indent_str[count] = ' '; indent_str[count] = 0; count = 0; while (*column < indent) { fputs(" ", stream); (*column)++; } while (word) { int open_paren_next_p = 0; int close_paren_p = 0; int punctuate_chance = RAND(word->count); int always_cap_p = (word->count - word->start) <= word->cap; int cap_p = (first_p || always_cap_p); const unsigned char *c1 = all_strings[word->string]; unsigned char *c2 = buf; int new_col; const unsigned char *punc = 0; int premature_end_slop = 0; if (count > 400) /* Stuck in a long-running loop. */ { fprintf(stream, "\nYow!\n%s", indent_str); *column = indent; return count; } count++; /* Every ten words, increase the chance of the sentence ending by 16% (yet, only end at places where periods were possible.) */ if ((count % 10) == 0) { int delta = (word->count / 6); premature_end_slop += (delta == 0 ? 1 : delta); } if (punctuate_chance < word->comma) { if (paren_open_p) { punc = 0; close_paren_p = 0; } else if (RAND(20)) /* once in 20, use parens instead of commas */ punc = ","; else open_paren_next_p = 1; } else { punctuate_chance -= word->comma; if (word->period && punctuate_chance < word->period + premature_end_slop) { #if 0 if (punctuate_chance >= word->period) fprintf(stderr, "premature: %d %d %d %s\n", premature_end_slop, word->count, word->period, all_strings[word->string]); #endif if ((RAND(5)) == 0) punc = ":"; else if ((RAND(4)) == 0) punc = ";"; else { close_paren_p = 1; punc = ". "; } } else { punctuate_chance -= word->period; if (punctuate_chance < word->quem) punc = "? "; else { punctuate_chance -= word->quem; if (punctuate_chance < word->bang) punc = "! "; else punc = 0; } if (punc) close_paren_p = 1; } } new_col = *column + strlen (c1) + (punc ? strlen(punc) : 0); if (open_paren_p || close_paren_p) new_col++; #if 0 printf("\n%s\t\tpun=%3d cou=%3d sta=%3d cap=%3d com=%3d per=%3d que=%3d\n", all_strings[word->string], punctuate_chance, word->count, word->start, word->cap, word->comma, word->period, word->quem); #endif /* 0 */ first_p = 0; if (*column <= indent) *column = new_col; else if (new_col < fill_column) { fputs (" ", stream); *column = new_col+1; } else { fputs ("\n", stream); fputs (indent_str, stream); *column = indent + strlen (c1)+1; } if (open_paren_p) { *c2++ = '('; open_paren_p = 0; open_paren_next_p = 0; paren_open_p = 1; } /* If the word was always capitalized, then use the capitalization (of all letters) exactly as seen. Else, if we're at the beginning of a sentence, upcase the first character and downcase the rest. Else, capitalize the word as probability suggests. */ if (cap_p || (word->count > word->start && RAND(word->count - word->start) < word->cap)) *c2++ = toupper (*c1++); if (always_cap_p) strcpy (c2, c1); else { while (*c1) *c2++ = tolower (*c1++); *c2 = 0; } fputs (buf, stream); if (punc && (*punc == '.' || *punc == '!' || *punc == '?')) word = 0; else { word = random_next_word (word); if (!word) /* no next; maybe the word never had a next. */ { /* punctuate it anyway. */ switch (RAND(5)) { case 0: punc = "! "; break; case 1: punc = "? "; break; default: punc = ". "; break; } (*column) += 2; close_paren_p = 1; } } if (paren_open_p && close_paren_p) { fputs(")", stream); paren_open_p = 0; close_paren_p = 0; open_paren_next_p = 0; } if (punc) fputs (punc, stream); if (open_paren_next_p) open_paren_p = 1; } if (paren_open_p) abort(); return count; } void word_stats (FILE *stream, word *word) { int i; fprintf (stream, "%-16s %5d %5d %5d %5d %5d %5d %5d\n", all_strings[word->string], word->count, word->start, word->cap, word->comma, word->period, word->quem, word->bang); if (word->succ_length) fprintf (stream, " -->\n"); for (i = 0; i < word->succ_length; i++) fprintf (stream, " %5d %s\n", word->succ[i].count, all_strings [all_words[word->succ[i].word].string]); if (word->pred_length) fprintf (stream, " <--\n"); for (i = 0; i < word->pred_length; i++) fprintf (stream, " %5d %s\n", word->pred[i].count, all_strings [all_words[word->pred[i].word].string]); } #define STAT_HEAD \ "\nWORD COUNT START CAP COMMA END QUEM BANG\n\n" void stats (FILE *stream) { int i; fputs (STAT_HEAD, stream); for (i = 0; i < total_words; i++) word_stats (stream, &all_words[i]); fputs ("\n", stream); } void string_stats (FILE *stream, const unsigned char *s, int print_header) { int i; if (print_header) fputs (STAT_HEAD, stream); for (i = 0; i < total_words; i++) if (!strcasecmp (s, all_strings [all_words[i].string])) word_stats (stream, &all_words[i]); } dadadodo-1.04/files.c0100644000033400007640000003445507621426072012674 0ustar jwzjwz/* files.c --- input and output. * DadaDodo, Copyright (c) 1997, 1998, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #include #include #include #include #include #include #include /* for htonl */ #include "dadadodo.h" #include "files.h" #include "parse.h" #include "parsei.h" #include "generate.h" /* Tick this when the file format changes. No relation to version.h. */ #define FILE_VERSION 1 /* Output. */ static unsigned char write_buf[10240]; static int write_fp = 0; static int write_flush (FILE *out) { int status; if (!write_fp) return 0; status = fwrite (write_buf, 1, write_fp, out); write_fp = 0; return status; } static int write_int32 (FILE *out, int x) { u_long lx = (u_long) x; /* no htonl, since we're writing big-endian */ write_buf[write_fp++] = (lx >> 24); write_buf[write_fp++] = (lx >> 16) & 0xFF; write_buf[write_fp++] = (lx >> 8) & 0xFF; write_buf[write_fp++] = lx & 0xFF; if (write_fp >= (sizeof(write_buf)-4)) return write_flush(out); return 0; } static int write_int16 (FILE *out, int x) { u_short sx = (u_short) x; /* no htonl, since we're writing big-endian */ write_buf[write_fp++] = (sx >> 8) & 0xFF; write_buf[write_fp++] = sx & 0xFF; if (write_fp >= (sizeof(write_buf)-4)) return write_flush(out); return 0; } static int write_int8 (FILE *out, int x) { u_short sx = (u_short) x; /* no htonl, since we're writing big-endian */ write_buf[write_fp++] = ((sx & 0xFF) ? (sx & 0xFF) : (sx >> 8)); if (write_fp >= (sizeof(write_buf)-4)) return write_flush(out); return 0; } int write_dadadodo_file (FILE *out, const char *output_name) { int status; int i; int (*write_fn) (FILE *out, int x) = write_int32; if (output_name) fprintf (stderr, "writing %s (%d words, %d pairs)\n", output_name, total_words, total_links/2); fputs (DADADODO_MAGIC, out); /* Write file version. */ status = write_int32 (out, FILE_VERSION); if (status < 0) return status; /* Write `total_words' */ status = write_int32 (out, total_words); if (status < 0) return status; /* Write `total_links' */ status = write_int32 (out, total_links); if (status < 0) return status; /* Write out the strings. */ { string_pool *p, **pools; int npools = 0; int string_bytes = 0; for (p = spool; p; p = p->next) { npools++; if (p == spool) string_bytes += p->fp; else string_bytes += STRING_POOL_COUNT; } pools = (string_pool **) malloc(npools * sizeof(*pools)); if (!pools) return -1; i = npools-1; for (p = spool; p; p = p->next, i--) pools[i] = p; /* Write total string bytes */ status = write_int32 (out, string_bytes); if (status < 0) return status; /* Important to flush now, since we call fwrite... */ status = write_flush (out); if (status < 0) return status; /* Write out the string data. I wanted to just write out the strings, one after another, and read them into one contiguous segment -- but that doesn't work, because if you malloc something larger than a page, the read() syscall can't write into it -- on Irix at least, you die with a "memory segment error". So, we write them out in ~500k chunks, padding with nulls between chunks so that strings aren't split across them. (The last chunk may be less than 500k and isn't padded.) */ for (i = 0; i < npools; i++) { p = pools[i]; if (i == npools-1) status = fwrite (p->chars, 1, p->fp, out); else { if (p->fp < STRING_POOL_COUNT) memset (p->chars + p->fp, 0, STRING_POOL_COUNT - p->fp); status = fwrite (p->chars, 1, STRING_POOL_COUNT, out); } free (p); if (status < 0) return status; } free (pools); } /* Write out the `word' structures. */ { pword_pool *p, **pools; int npools = 0; for (p = wpool; p; p = p->next) npools++; pools = (pword_pool **) malloc(npools * sizeof(*pools)); if (!pools) return -1; for (p = wpool, i = 0; p; p = p->next, i++) pools[npools-i-1] = p; /* Decide on a word-length. */ if (total_words > 0xFFFF) write_fn = write_int32; else if (total_words > 0xFF) write_fn = write_int16; else write_fn = write_int8; for (i = 0; i < npools; i++) { int j; p = pools[i]; for (j = 0; j < p->fp; j++) { int k; pword *w = &p->pwords[j]; # define WRITE(INT) \ status = write_fn(out,INT); \ if (status < 0) return status WRITE(w->count); WRITE(w->start); WRITE(w->cap); WRITE(w->comma); WRITE(w->period); WRITE(w->quem); WRITE(w->bang); WRITE(w->succ_length); WRITE(w->pred_length); for (k = 0; k < w->succ_length; k++) { WRITE(w->succ[k].count); WRITE(w->succ[k].word->id); } for (k = 0; k < w->pred_length; k++) { WRITE(w->pred[k].count); WRITE(w->pred[k].word->id); } # undef WRITE if (w->succ) free (w->succ); if (w->pred) free (w->pred); } } /* Compute and write `total_starters' */ { int nstarters = 0; total_starters = 0; for (i = 0; i < npools; i++) { int j; p = pools[i]; for (j = 0; j < p->fp; j++) { pword *w = &p->pwords[j]; if (w->start) { total_starters += w->start; nstarters++; } } } status = write_int32 (out, total_starters); if (status < 0) return status; status = write_int32 (out, nstarters); if (status < 0) return status; /* Write out `starters' array. */ for (i = 0; i < npools; i++) { int j; p = pools[i]; for (j = 0; j < p->fp; j++) { pword *w = &p->pwords[j]; if (w->start) write_fn(out, w->id); } } } for (i = 0; i < npools; i++) free (pools[i]); free (pools); } status = write_flush (out); if (status < 0) return status; status = fflush (out); return status; } /* Input. */ int read_dadadodo_file (FILE *in) { int i = 0; unsigned char *s = 0; unsigned char **as = 0, **as2 = 0; word *aw = 0; word_link *al = 0, *al_l = 0; long tw = 0, tl = 0, sb = 0; unsigned char **spools = 0; int nspools = 0; int nstarters = 0; int ts = 0; int *st = 0; int fv = 0; void *buf = 0; int buf_size = 0; int buf_elt_size = 0; u_short scratch_ushort = 0; s = fgets (write_buf, 100, in); if (!s || strncmp(s, DADADODO_MAGIC, strlen(DADADODO_MAGIC))) { fprintf (stderr, "not a DadaDodo Data file\n"); goto FAIL; } /* Write file version. */ if (4 != fread ((void *) &fv, 1, 4, in)) goto FAIL; fv = (long) ntohl (fv); if (fv != FILE_VERSION) { fprintf (stderr, "incompatible dadadodo file version: %d instead of %d\n", fv, FILE_VERSION); goto FAIL; } if (4 != fread ((void *) &tw, 1, 4, in)) goto FAIL; tw = (long) ntohl (tw); if (!tw) goto FAIL; if (4 != fread ((void *) &tl, 1, 4, in)) goto FAIL; tl = (long) ntohl (tl); if (!tl) goto FAIL; if (4 != fread ((void *) &sb, 1, 4, in)) goto FAIL; sb = (long) ntohl (sb); if (!sb) goto FAIL; nspools = (sb + STRING_POOL_COUNT - 1) / STRING_POOL_COUNT; spools = (unsigned char **) malloc (nspools * sizeof(*spools)); if (!spools) goto FAIL; memset (spools, 0, nspools * sizeof(*spools)); for (i = 0; i < nspools; i++) { if (i == nspools-1) spools[i] = (unsigned char *) malloc (sb % STRING_POOL_COUNT); else spools[i] = (unsigned char *) malloc (STRING_POOL_COUNT); if (!spools[i]) goto FAIL; } as = (unsigned char **) malloc (tw * sizeof(*as)); if (!as) goto FAIL; aw = (word *) malloc (tw * sizeof(*aw)); if (!aw) goto FAIL; al = (word_link *) malloc (tl * sizeof(*al)); if (!al) goto FAIL; /* Read in the string data */ as2 = as; for (i = 0; i < nspools; i++) { unsigned char *s, *s2; int bytes_to_read = (i == nspools-1 ? (sb % STRING_POOL_COUNT) : STRING_POOL_COUNT); int this_time = fread (spools[i], 1, bytes_to_read, in); if (this_time != bytes_to_read) { perror("read error"); goto FAIL; } /* Fill in all_strings. */ s = spools[i]; s2 = s + bytes_to_read; while (s < s2) { *as2++ = s; s += strlen(s) + 1; while (s < s2 && !*s) s++; } } buf_size = 50; buf = (void *) malloc (buf_size * sizeof(u_long)); if (!buf) goto FAIL; /* Decide on a word-length. */ if (tw > 0xFFFF) buf_elt_size = 4; else if (tw > 0xFF) buf_elt_size = 2; else buf_elt_size = 1; /* Read in the word data */ al_l = al; for (i = 0; i < tw; i++) { word *w = &aw[i]; int j = fread (buf, buf_elt_size, 9, in); if (j != 9) { perror("short read"); goto FAIL; } w->string = i; switch (buf_elt_size) { case 4: w->count = ntohl(((u_long *) buf) [0]); w->start = ntohl(((u_long *) buf) [1]); w->cap = ntohl(((u_long *) buf) [2]); w->comma = ntohl(((u_long *) buf) [3]); w->period = ntohl(((u_long *) buf) [4]); w->quem = ntohl(((u_long *) buf) [5]); w->bang = ntohl(((u_long *) buf) [6]); w->succ_length = ntohl(((u_long *) buf) [7]); w->pred_length = ntohl(((u_long *) buf) [8]); break; case 2: w->count = ntohs(((u_short *) buf) [0]); w->start = ntohs(((u_short *) buf) [1]); w->cap = ntohs(((u_short *) buf) [2]); w->comma = ntohs(((u_short *) buf) [3]); w->period = ntohs(((u_short *) buf) [4]); w->quem = ntohs(((u_short *) buf) [5]); w->bang = ntohs(((u_short *) buf) [6]); w->succ_length = ntohs(((u_short *) buf) [7]); w->pred_length = ntohs(((u_short *) buf) [8]); break; default: # define NTOHC(C) ((scratch_ushort = ntohs((C))), \ (scratch_ushort & 0xFF \ ? scratch_ushort & 0xFF \ : scratch_ushort >> 8)) w->count = NTOHC(((unsigned char *) buf) [0]); w->start = NTOHC(((unsigned char *) buf) [1]); w->cap = NTOHC(((unsigned char *) buf) [2]); w->comma = NTOHC(((unsigned char *) buf) [3]); w->period = NTOHC(((unsigned char *) buf) [4]); w->quem = NTOHC(((unsigned char *) buf) [5]); w->bang = NTOHC(((unsigned char *) buf) [6]); w->succ_length = NTOHC(((unsigned char *) buf) [7]); w->pred_length = NTOHC(((unsigned char *) buf) [8]); break; } /* Make sure buf is big enough */ { int L = (w->succ_length > w->pred_length ? w->succ_length : w->pred_length); if (buf_size < buf_elt_size * L * 2) { int s = buf_elt_size * L * 2 * 2; void *b2 = (void *) realloc (buf, s); if (!b2) goto FAIL; buf_size = s; buf = b2; } } if (!w->succ_length) w->succ = 0; else { j = fread (buf, buf_elt_size, w->succ_length * 2, in); if (j != (w->succ_length * 2)) { perror("short read"); goto FAIL; } w->succ = al_l; for (j = 0; j < w->succ_length; j++) { switch (buf_elt_size) { case 4: al_l->count = ntohl(((u_long *) buf) [j * 2]); al_l->word = ntohl(((u_long *) buf) [j * 2 + 1]); break; case 2: al_l->count = ntohs(((u_short *) buf) [j * 2]); al_l->word = ntohs(((u_short *) buf) [j * 2 + 1]); break; default: al_l->count = NTOHC(((unsigned char *) buf) [j * 2]); al_l->word = NTOHC(((unsigned char *) buf) [j * 2 + 1]); } al_l++; } } if (!w->pred_length) w->pred = 0; else { j = fread (buf, buf_elt_size, w->pred_length * 2, in); if (j != (w->pred_length * 2)) { perror("short read"); goto FAIL; } w->pred = al_l; for (j = 0; j < w->pred_length; j++) { switch (buf_elt_size) { case 4: al_l->count = ntohl(((u_long *) buf) [j * 2]); al_l->word = ntohl(((u_long *) buf) [j * 2 + 1]); break; case 2: al_l->count = ntohs(((u_short *) buf) [j * 2]); al_l->word = ntohs(((u_short *) buf) [j * 2 + 1]); break; default: al_l->count = NTOHC(((unsigned char *) buf) [j * 2]); al_l->word = NTOHC(((unsigned char *) buf) [j * 2 + 1]); } al_l++; } } } if (buf) free (buf); buf = 0; buf_size = 0; if (4 != fread ((void *) &ts, 1, 4, in)) goto FAIL; ts = (long) ntohl (ts); if (4 != fread ((void *) &nstarters, 1, 4, in)) goto FAIL; nstarters = (long) ntohl (nstarters); st = (int *) malloc ((nstarters + 1) * sizeof(*st)); if (!st) goto FAIL; switch (buf_elt_size) { case 4: buf = st; break; case 2: buf_size = (nstarters + 1) * buf_elt_size; buf = (void *) malloc (buf_size); if (!buf) goto FAIL; break; default: buf_elt_size = sizeof(char); buf_size = (nstarters + 1) * buf_elt_size; buf = (void *) malloc (buf_size); if (!buf) goto FAIL; break; } { int bytes_to_read = nstarters * buf_elt_size; char *b2 = (unsigned char *) buf; do { int bytes_read = fread (b2, 1, bytes_to_read, in); if (bytes_read <= 0) { if (buf == st) buf = 0; goto FAIL; } bytes_to_read -= bytes_read; b2 += bytes_read; } while (bytes_to_read > 0); } if (buf == st) { for (i = 0; i < nstarters; i++) st[i] = ntohl (((u_long *)buf) [i]); buf = 0; } else if (buf_elt_size == sizeof(u_short)) for (i = 0; i < nstarters; i++) st[i] = ntohs (((u_short *)buf) [i]); else for (i = 0; i < nstarters; i++) st[i] = NTOHC (((unsigned char *)buf) [i]); # undef NTOHC if (buf) free (buf); total_words = tw; all_words = aw; all_strings = as; total_starters = ts; starters = st; return 0; FAIL: if (buf) free (buf); if (as) free (as); if (aw) free (aw); if (al) free (al); if (st) free (st); if (spools) { for (i = 0; i < nspools; i++) if (spools[i]) free(spools[i]); free (spools); } return -1; } dadadodo-1.04/hash.c0100644000033400007640000001434307621426044012506 0ustar jwzjwz/* hash.c --- simple hash tables. * DadaDodo, Copyright (c) 1997 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #include #include #include #include #include "hash.h" typedef struct bucket bucket; struct hash_table { unsigned long size; unsigned long count; long (*hash) (const void *); int (*compare) (const void *, const void *); bucket *buckets; }; struct bucket { const void *key; void *value; }; static const unsigned long primes[] = { /* 3, 7, 11, 13, 29, 37, 47, 59, 71, 89, */ 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919, 1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591, 17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437, 187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263, 1674319, 2009191, 2411033, 2893249 }; static unsigned long toprime (unsigned long size) { unsigned int i; static unsigned int s = (sizeof (primes) / sizeof (*primes)) - 1; for (i = 0; i < s; i++) if (size <= primes[i]) return primes[i]; return primes[s-1]; } hash_table * make_hash_table (long size, long (*hash) (const void *), int (*compare) (const void *, const void *)) { hash_table *t = (hash_table *) malloc(sizeof(*t)); if (!t) return 0; size = toprime ((13 * size) / 10); t->hash = hash; t->compare = compare; t->size = size; t->count = 0; t->buckets = (bucket *) calloc(t->size, sizeof(*t->buckets)); if (!t->buckets) { free(t); return 0; } return t; } void clrhash (hash_table *table) { table->count = 0; memset (table->buckets, 0, table->size * sizeof(*table->buckets)); } void free_hash_table (hash_table *table) { free (table->buckets); free (table); } static int growhash (hash_table *table, unsigned long new_size) { unsigned long i; unsigned long old_size = table->size; bucket *old_buckets = table->buckets; bucket *new_buckets; new_size = toprime (new_size); new_buckets = (bucket *) calloc(new_size, sizeof(*table->buckets)); if (!new_buckets) return -1; table->size = new_size; table->buckets = new_buckets; table->count = 0; for (i = 0; i < old_size; i++) if (old_buckets[i].key) puthash (table, old_buckets[i].key, old_buckets[i].value); free (old_buckets); return 0; } int puthash (hash_table *table, const void *key, void *value) { unsigned long hash = table->hash (key); unsigned long size = table->size; unsigned long n; bucket *b; if (size < (1 + ((13 * table->count) / 10))) { int status; /* printf("\nrehashing (%d for %d)\n", table->size, table->count);*/ status = growhash (table, size + 1); if (status < 0) return status; size = table->size; } n = hash % size; b = &table->buckets[n]; if (b->key && key != b->key && table->compare (key, b->key) != 0) { unsigned long h2 = size - 2; unsigned long i = (hash % h2) + 1; do { n += i; if (n >= size) n -= size; b = &table->buckets [n]; } while (b->key && key != b->key && table->compare (key, b->key) != 0); } if (!b->key) table->count++; b->key = key; b->value = value; return 0; } void * gethash (hash_table *table, const void *key, void *def) { unsigned long hash = table->hash (key); unsigned long size = table->size; unsigned long n = hash % size; bucket *b = &table->buckets[n]; if (b->key && key != b->key && table->compare (key, b->key) != 0) { unsigned long h2 = size - 2; unsigned long i = (hash % h2) + 1; do { n += i; if (n >= size) n -= size; b = &table->buckets [n]; } while (b->key && key != b->key && table->compare (key, b->key) != 0); } return b->value; } int maphash (hash_table *table, int (*mapper) (const void *key, const void *value, void *arg), void *arg) { unsigned long i; int status; if (table) for (i = 0; i < table->size; i++) if (table->buckets[i].key) { status = mapper (table->buckets[i].key, table->buckets[i].value,arg); if (status != 0) return status; } return 0; } unsigned long string_hash (const unsigned char *x) { unsigned long g, h = 0; if (!x) return 0; while (*x != 0) { h = (h << 4) + *x++; if ((g = h & 0xf0000000) != 0) h = (h ^ (g >> 24)) ^ g; } return h; } unsigned long string_case_hash (const unsigned char *x) { unsigned long g, h = 0; if (!x) return 0; while (*x != 0) { h = (h << 4) + toupper(*x++); if ((g = h & 0xf0000000) != 0) h = (h ^ (g >> 24)) ^ g; } return h; } /* english-centric phoenetic hashing */ #ifdef SOUNDEX static const unsigned char soundex[] = { 7,1,2,3,7,1,2,7,7,2,2,4,5,5,7,1,2,6,2,3,7,1,7,2,7,2,0 }; unsigned long soundex_hash (const unsigned char *string) { unsigned long result = 0; const unsigned char *s; unsigned char last_letter = 0; unsigned char last_class = 0; unsigned char this_letter = 0; unsigned char this_class = 0; for (s = string; *s; s++) { this_letter = toupper(*s); if (this_letter < 'A' || this_letter > 'Z') continue; this_letter -= 'A'; this_class = soundex[(int) this_letter]; if (this_class != 7) result = (result << 3) | this_class; else { /* could be H or W separating like letters */ if (this_letter == 'H' || this_letter == 'W') { unsigned char next_letter = toupper(s[1]); if (next_letter >= 'A' && next_letter <= 'Z' && soundex[(int) (next_letter -= 'A')] == last_class) { s++; this_letter = next_letter; this_class = soundex[(int) next_letter]; if (!*s) break; } } } last_letter = this_letter; last_class = this_class; } return result; } #endif dadadodo-1.04/yarandom.c0100644000033400007640000001026607621414622013374 0ustar jwzjwz/* yarandom.c -- Yet Another Random Number Generator. * Copyright (c) 1997, 1998, 2003 by Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ /* The unportable mess that is rand(), random(), drand48() and friends led me to ask Phil Karlton what the Right Thing to Do was. He responded with this. It is non-cryptographically secure, reasonably random (more so than anything that is in any C library), and very fast. I don't understand how it works at all, but he says "look at Knuth, Vol. 2 (original edition), page 26, Algorithm A. In this case n=55, k=20 and m=2^32." So there you have it. --------------------------- Note: xlockmore 4.03a10 uses this very simple RNG: if ((seed = seed % 44488 * 48271 - seed / 44488 * 3399) < 0) seed += 2147483647; return seed-1; of which it says ``Dr. Park's algorithm published in the Oct. '88 ACM "Random Number Generators: Good Ones Are Hard To Find" His version available at ftp://cs.wm.edu/pub/rngs.tar Present form by many authors.'' Karlton says: ``the usual problem with that kind of RNG turns out to be unexepected short cycles for some word lengths.'' Karlton's RNG is faster, since it does three adds and two stores, while the xlockmore RNG does two multiplies, two divides, three adds, and one store. Compiler optimizations make a big difference here: gcc -O: difference is 1.2x. gcc -O2: difference is 1.4x. gcc -O3: difference is 1.5x. SGI cc -O: difference is 2.4x. SGI cc -O2: difference is 2.4x. SGI cc -O3: difference is 5.1x. Irix 6.2; Indy r5k; SGI cc version 6; gcc version 2.7.2.1. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #ifdef HAVE_UNISTD_H # include /* for getpid() */ #endif #include /* for gettimeofday() */ #include "yarandom.h" # undef ya_rand_init /* The following 'random' numbers are taken from CRC, 18th Edition, page 622. Each array element was taken from the corresponding line in the table, except that a[0] was from line 100. 8s and 9s in the table were simply skipped. The high order digit was taken mod 4. */ #define VectorSize 55 static unsigned int a[VectorSize] = { 035340171546, 010401501101, 022364657325, 024130436022, 002167303062, /* 5 */ 037570375137, 037210607110, 016272055420, 023011770546, 017143426366, /* 10 */ 014753657433, 021657231332, 023553406142, 004236526362, 010365611275, /* 14 */ 007117336710, 011051276551, 002362132524, 001011540233, 012162531646, /* 20 */ 007056762337, 006631245521, 014164542224, 032633236305, 023342700176, /* 25 */ 002433062234, 015257225043, 026762051606, 000742573230, 005366042132, /* 30 */ 012126416411, 000520471171, 000725646277, 020116577576, 025765742604, /* 35 */ 007633473735, 015674255275, 017555634041, 006503154145, 021576344247, /* 40 */ 014577627653, 002707523333, 034146376720, 030060227734, 013765414060, /* 45 */ 036072251540, 007255221037, 024364674123, 006200353166, 010126373326, /* 50 */ 015664104320, 016401041535, 016215305520, 033115351014, 017411670323 /* 55 */ }; static int i1, i2; unsigned int ya_random (void) { register int ret = a[i1] + a[i2]; a[i1] = ret; if (++i1 >= VectorSize) i1 = 0; if (++i2 >= VectorSize) i2 = 0; return ret; } void ya_rand_init(unsigned int seed) { int i; if (seed == 0) { struct timeval tp; #ifdef GETTIMEOFDAY_TWO_ARGS struct timezone tzp; gettimeofday(&tp, &tzp); #else gettimeofday(&tp); #endif /* ignore overflow */ seed = (999*tp.tv_sec) + (1001*tp.tv_usec) + (1003 * getpid()); } a[0] += seed; for (i = 1; i < VectorSize; i++) { seed = a[i-1]*1001 + seed*999; a[i] += seed; } i1 = a[0] % VectorSize; i2 = (i1 + 024) % VectorSize; } dadadodo-1.04/dadadodo.h0100644000033400007640000000176506561267621013341 0ustar jwzjwz/* DadaDodo, Copyright (c) 1997 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ /*#if sizeof(int) >= 4*/ typedef int int32; typedef unsigned int uint32; /*#else typedef long int32; typedef unsigned long uint32; #endif*/ typedef struct word word; typedef struct word_link word_link; struct word { int string; int count; int start; int cap; int comma; int period; int quem; int bang; int succ_length, pred_length; word_link *succ, *pred; }; struct word_link { int count; int word; }; #define RAND(N) ((random() & 0x7FFFFFFF) % (N)) dadadodo-1.04/parse.h0100644000033400007640000000156007621414325012676 0ustar jwzjwz/* parse.h --- generating a markov chain. * DadaDodo, Copyright (c) 1997, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __DADADODO_PARSE_H__ #define __DADADODO_PARSE_H__ #include "hash.h" typedef struct pword pword; extern pword *scan_line (unsigned char *line, hash_table *table, pword *prev); extern int total_words; extern int total_links; extern int total_starters; #endif /* __DADADODO_PARSE_H__ */ dadadodo-1.04/parsei.h0100644000033400007640000000334707621414355013057 0ustar jwzjwz/* parsei.h --- generating a markov chain. * DadaDodo, Copyright (c) 1997, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __DADADODO_PARSEI_H__ #define __DADADODO_PARSEI_H__ typedef struct pword_link pword_link; /* A larger version of `struct word' that contains data needed at parse-time but not at generate-time. */ struct pword { int id; const unsigned char *string; int count; int start; int cap; int comma; int period; int quem; int bang; pword_link *succ, *pred; int succ_length, pred_length; int succ_size, pred_size; }; struct pword_link { int count; pword *word; }; /* allocation pools. */ typedef struct pword_pool pword_pool; typedef struct string_pool string_pool; #define PWORD_POOL_SIZE (500*1024) #define STRING_POOL_SIZE (500*1024) #define PWORD_POOL_COUNT ((PWORD_POOL_SIZE - (sizeof(void *)*4))/sizeof(pword)) #define STRING_POOL_COUNT (STRING_POOL_SIZE - (sizeof (void *) * 4)) struct pword_pool { pword pwords [PWORD_POOL_COUNT]; int fp; pword_pool *next; }; struct string_pool { unsigned char chars[STRING_POOL_COUNT]; int fp; string_pool *next; }; extern pword_pool *wpool; extern string_pool *spool; extern word *all_words; extern unsigned char **all_strings; extern int *starters; #endif /* __DADADODO_PARSEI_H__ */ dadadodo-1.04/files.h0100644000033400007640000000145206561267615012700 0ustar jwzjwz/* files.h --- input and output * DadaDodo, Copyright (c) 1997 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __DADADODO_FILES_H__ #define __DADADODO_FILES_H__ #define DADADODO_MAGIC "#!DadaDodo\n" extern int write_dadadodo_file (FILE *out, const char *output_name); extern int read_dadadodo_file (FILE *in); #endif /* __DADADODO_FILES_H__ */ dadadodo-1.04/generate.h0100644000033400007640000000201007621414774013355 0ustar jwzjwz/* generate.h --- generating random sentences from a markov chain. * DadaDodo, Copyright (c) 1997, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __DADADODO_GENERATE_H__ #define __DADADODO_GENERATE_H__ #include "parse.h" extern int random_sentence (FILE *stream, int *column, int indent, int fill_column, int html_p); extern void word_stats (FILE *stream, word *word); extern void string_stats (FILE *stream, const unsigned char *word, int print_header); extern void stats (FILE *stream); #endif /* __DADADODO_GENERATE_H__ */ dadadodo-1.04/hash.h0100644000033400007640000000241107621414312012477 0ustar jwzjwz/* hash.c --- simple hash tables. * DadaDodo, Copyright (c) 1997, 2003 Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __DADADODO_HASH_H__ #define __DADADODO_HASH_H__ typedef struct hash_table hash_table; hash_table *make_hash_table (long size, long (*hash) (const void *), int (*compare) (const void *, const void *)); void free_hash_table (hash_table *table); int puthash (hash_table *table, const void *key, void *value); void *gethash (hash_table *table, const void *key, void *default_value); void clrhash (hash_table *table); int maphash (hash_table *table, int (*mapper) (const void *key, const void *value, void *arg), void *arg); unsigned long string_hash (const unsigned char *string); unsigned long string_case_hash (const unsigned char *string); #endif /* __DADADODO_HASH_H__ */ dadadodo-1.04/yarandom.h0100644000033400007640000000376507621415024013404 0ustar jwzjwz/* xscreensaver, Copyright (c) 1997, 1998, 2003 by Jamie Zawinski * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. No representations are made about the suitability of this * software for any purpose. It is provided "as is" without express or * implied warranty. */ #ifndef __YARANDOM_H__ #define __YARANDOM_H__ #undef random #undef rand #undef drand48 #undef srandom #undef srand #undef srand48 #undef frand #undef RAND_MAX #ifdef VMS # include "vms-gtod.h" #endif extern unsigned int ya_random (void); extern void ya_rand_init (unsigned int); #define random() ya_random() #define RAND_MAX 0xFFFFFFFF /*#define srandom(i) ya_rand_init(0)*/ /* Define these away to keep people from using the wrong APIs in xscreensaver. */ #define rand __ERROR_use_random_not_rand_in_xscreensaver__ #define drand48 __ERROR_use_frand_not_drand48_in_xscreensaver__ #define srandom __ERROR_do_not_call_srandom_in_xscreensaver__ #define srand __ERROR_do_not_call_srand_in_xscreensaver__ #define srand48 __ERROR_do_not_call_srand48_in_xscreensaver__ /*#define ya_rand_init __ERROR_do_not_call_ya_rand_init_in_xscreensaver__*/ #if defined (__GNUC__) && (__GNUC__ >= 2) /* Implement frand using GCC's statement-expression extension. */ # define frand(f) \ ({ double tmp = (((double) random()) / \ (((double) ((unsigned int)~0)) / ((double) (f)))); \ tmp < 0 ? (-tmp) : tmp; }) #else /* not GCC2 - implement frand using a global variable.*/ static double _frand_tmp_; # define frand(f) \ (_frand_tmp_ = (((double) random()) / \ (((double) ((unsigned int)~0)) / ((double) (f)))), \ _frand_tmp_ < 0 ? (-_frand_tmp_) : _frand_tmp_) #endif /* not GCC2 */ #endif /* __YARANDOM_H__ */ dadadodo-1.04/version.h0100644000033400007640000000015607621420462013250 0ustar jwzjwzstatic const char version[] = "@(#)\ DadaDodo 1.04, Copyright (c) 1997-2003 by Jamie Zawinski (jwz@jwz.org)";