cwebx-3.04.orig/ 40755 1750 1750 0 6653566416 11246 5ustar jdgjdgcwebx-3.04.orig/Makefile100644 1750 1750 5301 6470016007 12761 0ustar jdgjdg# This file is part of CWEB, version x3.0. # # Read the README file, then edit this file to reflect local conditions # # The C compiler should be ANSI compatible CC = gcc # We let CWEAVE report syntax errors by setting +d. # The flag +m makes the TeX output slightly more compact. # We use the +e flag since our printer is two-sided. CWFLAGS = +mde CTFLAGS = # We keep debugging info around to enable the `+d' option of cweave CFLAGS = -DDEBUG -DSTAT -g # RM and CP are used below in case rm and cp are aliased RM= /bin/rm CP= /bin/cp RENAME= /bin/mv # Set CCHANGES to common-foo.ch if you need changes to common.w in another # file than common.ch CCHANGES= # Set TCHANGES to ctangle-foo.ch if you need changes to ctangle.w in another # file than ctangle.ch TCHANGES= # Set WCHANGES to cweave-foo.ch if you need changes to cweave.w in another # file than cweave.ch WCHANGES= ########## You shouldn't have to change anything after this point ####### CWEAVE = ./cweave $(CWFLAGS) CTANGLE = ./ctangle $(CTFLAGS) .SUFFIXES: .tex .dvi .w .w.tex: $(CWEAVE) $* .tex.dvi: tex $*.tex .w.c: $(CTANGLE) $* .w.dvi: make $*.tex make $*.dvi .w.o: make $*.c make $*.o all: cweb doc listings cweb: ctangle cweave manual doc: manual.dvi listings: common.dvi ctangle.dvi cweave.dvi cautiously: make common.c ctangle.c cweave.c $(RENAME) ctangle SAVEctangle # save version in case things mess up make ctangle $(RENAME) common.c SAVEcommon.c ./ctangle common $(CCHANGES) diff common.c SAVEcommon.c $(RENAME) SAVEcommon.c common.c # restore date $(RENAME) ctangle.c SAVEctangle.c ./ctangle ctangle $(TCHANGES) diff ctangle.c SAVEctangle.c $(RENAME) SAVEctangle.c ctangle.c # restore date $(RENAME) cweave.c SAVEcweave.c ./ctangle cweave $(WCHANGES) diff cweave.c SAVEcweave.c $(RENAME) SAVEcweave.c cweave.c # restore date $(RM) SAVEctangle # succeeded, use new binary from now on SAVEctangle.c: $(CP) ctangle.c SAVEctangle.c SAVEcommon.c: $(CP) common.c SAVEcommon.c common.c: common.w $(CCHANGES) common.inc $(CTANGLE) common $(CCHANGES) common.h: common.w $(CCHANGES) $(CTANGLE) common $(CCHANGES) common.tex: common.w common.inc $(CWEAVE) common $(CCHANGES) ctangle: ctangle.o common.o $(CC) $(CFLAGS) -o ctangle ctangle.o common.o ctangle.c: ctangle.w $(TCHANGES) common.inc common.h $(CTANGLE) ctangle $(TCHANGES) ctangle.tex: ctangle.w $(TCHANGES) common.inc intro.inc $(CWEAVE) ctangle $(TCHANGES) cweave: cweave.o common.o $(CC) $(CFLAGS) -o cweave cweave.o common.o cweave.c: cweave.w $(WCHANGES) common.inc common.h parser.w rules.w $(CTANGLE) cweave $(WCHANGES) cweave.tex: cweave.w $(WCHANGES) parser.w rules.w common.inc intro.inc $(CWEAVE) cweave $(WCHANGES) manual.dvi: compare.tex cwebx-3.04.orig/README100644 1750 1750 13442 6470064572 12240 0ustar jdgjdgThis file is part of CWEB version x3.04, and explains briefly what CWEB is, and how to set it up, and what the various files are for. Here is a listing of the other files constituting the release with a brief indication of their purpose. common.w % CWEB source for routines common to CTANGLE and CWEAVE ctangle.w % CWEB source for CTANGLE cweave.w % CWEB source for bulk of CWEAVE parser.w % CWEB source for parser of CWEAVE rules.w % CWEB source for grammar rules of CWEAVE common.inc % CWEB source included by common.w, ctangle.w, and cweave.w intro.inc % CWEB source included by ctangle.w and cweave.w common.c % C file produced (by CTANGLE) from common.w; for bootstrap common.h % C file produced (by CTANGLE) from common.w; for bootstrap ctangle.c % C file produced (by CTANGLE) from ctangle.w; for bootstrap cwebxmac.tex % TeX macro file needed to process files produced by CWEAVE cwebcmac.tex % TeX macro file needed in addition for compatibility mode cwebhmac.tex % optional TeX macro file creating hyperlinks for xhdvi (e.g.) manual.tex % master file for the CWEB manual compare.tex % file \input by manual.tex (present to facilitate bootstrap) Makefile % control file for UNIX make facility to build CWEB man1 % directory with UNIX manpage man1/cweb.1 % UNIX manpage for CTANGLE and CWEAVE examples % directory with sample CWEB programs examples/compare.w % Compare files; source for compare.tex above examples/treeprint.w % Convert sorted file listings to a tree diagram examples/wc.w % Count characters, words and lines in text files examples/wmerge.w % Perform preprocessing of change files as filter examples/Makefile cweb.el % EMACS file that might facilitate editing CWEB files The CWEB system allows you to write C programs and their documentation at the same time, in a way that optimises human readability. This version fully supports ANSI/ISO C, and indeed requires such a compiler for its own compilation. Like other WEB systems, CWEB allows you to decompose your programs into small, logically related portions, to add any desired amount of documentation to each of them, and to present them in any order that help understanding. As TeX is used to obtain the printed source document, a high typographic quality can be obtained while allowing for formatting of complicated formulae and tables; formatting of C program fragments is simple however, since it is done fully automatically. When used properly, CWEB can be used to produce documents that combine the qualities of a scientific publication about algorithms and of a well documented source listing for an implementation of those algorithms. The CWEB system consists of two programs, CTANGLE and CWEAVE, that are to be used in combination with a C compiler and TeX; both transform text files, while they share the same input language. CWEB source files, that typically have suffix `.w', can be converted by CTANGLE to C files (suffix `.c') that can be compiled, and alternatively can be converted by CWEAVE to TeX files (suffix `.tex') from which the typeset source listing can be produced by TeX. More details can be found in the manual, which you obtain by running TeX on the file manual.tex. Here the main interest is that since CWEB is written using CWEB you need a bootstrap procedure to get things going. For this purpose the derived files common.c, common.h and ctangle.c are supplied. If you have the UNIX make facility, all you have to do is check that these files are newer than the sources common.w and ctangle.w (they should be when the archive is unpacked, but you can use "touch" to make them newer in case this should be necessary), and that the Makefile macro CC refers to an ANSI compiler (it is set to "cc"; you may need to select "gcc" instead or to supply additional flags in CFLAGS) and then invoke "make all". Otherwise here is the required sequence of commands. $ cc -c ctangle.c # compile main part of CTANGLE $ cc -c common.c # compile common code $ cc -o ctangle ctangle.o common.o # link CTANGLE together $ ./ctangle cweave # create cweave.c from cweave.w $ cc -DDEBUG -DSTAT -g -c cweave.c # compile main part of CWEAVE $ cc -DDEBUG -DSTAT -g -o cweave cweave.o common.o # link CWEAVE together $ tex manual.tex # typeset the manual $ # the rest is only needed to obtain source listings for the CWEB system $ ./cweave common # produce common.tex $ tex common # typeset listing of common code $ ./cweave ctangle # produce ctangle.tex $ tex ctangle # typeset listing of CTANGLE $ ./cweave cweave # produce cweave.tex $ tex cweave # typeset listing of CWEAVE By the time you have done all this successfully, you have already tested CTANGLE and CWEAVE on a substantial amount of input, and you may be confident that the system is working properly. Should you on the other hand encounter problems during bootstrapping, then things are of course less pleasant, especially because you cannot print the source listings yet. Few problems are expected though, since there are no known system dependencies, and identical source code compiles on several dissimilar systems, one of which is quite non-UNIX. It is expected though that the compiler can handle large arrays (certainly larger than 64 Kb) and programs that used command line arguments; if your compiler does not, you will either have to replace it by a better one or do some tailoring of the programs. If you encounter any other difficulties, please report them to me at . If you do need to make changes to common.c, common.h, or ctangle.c, don't forget to write them down, since they will have to be applied at the proper places in common.w, ctangle.w, or common.inc in order to persist after bootstrapping. Such patches are best incorporated into a change file (e.g., ctangle.ch) as explained in the manual. Marc van Leeuwen Universit\'e de Poitiers http://wallis.univ-poitiers.fr/~maavl/ cwebx-3.04.orig/common.c100644 1750 1750 55713 6217736466 13032 0ustar jdgjdg#include #include #include #include #include #include #include "common.h" #define max_bytes 50000L #define max_modules 1000 #define max_idents 5000 #define max_sections 4000 #define hash_size 353 #define buf_size 100 #define longest_name 1000 #define long_buf_size (buf_size+longest_name) #define local static #define array_size(a)((int)(sizeof(a)/sizeof(a[0]))) #define false (boolean)0 #define true (boolean)1 #define ctangle 0 #define cweave 1 #define and_and 04 #define lt_lt 020 #define gt_gt 021 #define plus_plus 013 #define minus_minus 01 #define minus_gt 031 #define not_eq 032 #define lt_eq 034 #define gt_eq 035 #define eq_eq 036 #define or_or 037 #define find_char()(loc<=limit||get_line()) #define id_index(p)((sixteen_bits)((p)-id_table)) #define id_at(i)(&id_table[i]) #define mod_index(p)((sixteen_bits)((p)-mod_table)) #define mod_at(i)(&mod_table[i]) #define name_begin(p)((p)->byte_start) #define length(p)((int)(strlen(name_begin(p)))) #define name_end(p)(name_begin(p)+length(p)) #define complete_name(p)((p)->byte_start[-1]=='\0') #define print_mod(p) \ printf(": <%s%s>",name_begin(p),complete_name(p)?"":"...") #define spotless 0 #define harmless_message 1 #define error_message 2 #define fatal_message 3 #define mark_harmless() \ if(history==spotless)history=harmless_message;else #define mark_error()(history=error_message) #define overflow(t)fatal("\n! Sorry, %s capacity exceeded",t) #define confusion(s)fatal("\n! This can't happen: %s",s) #define show_banner flags['b'] #define show_happiness flags['h'] #define show_progress flags['p'] #define show_stats flags['s'] #define C_plus_plus flags['+'] #define compatibility_mode flags['c'] #define update_terminal()fflush(stdout) #define new_line()putchar('\n') #define term_write(string,leng)printf("%.*s",(int)(leng),string) #define buffer_end (&buffer[buf_size-2]) #define max_include_depth 10 #define max_include_paths 8 #define max_path_length 80 #define lines_match() \ (change_limit-change_buffer==limit-buffer \ &&strncmp(buffer,change_buffer,limit-buffer)==0) #define byte_mem_end (&byte_mem[max_bytes]) #define id_table_end (&id_table[max_idents]) #define mod_table_end (&mod_table[max_modules]) #define copy_char(c)if(id_loc0);}/*:60*//*66:*/ #line 1000 "common.w" *byte_ptr++='\0';/*:66*//*83:*/ #line 1307 "common.w" mod_text[0]=' ';/*:83*//*102:*/ #line 1649 "common.w" show_banner=show_happiness=show_progress=true;/*:102*/ #line 119 "common.w" scan_args(argc,argv); }/*:19*//*22:*/ #line 162 "common.w" local boolean input_ln(FILE*f) {register int c; register char*k=limit=buffer; while((c=getc(f))!='\n'&&c!=EOF) if(k<=buffer_end){*k++=c;if(!isspace(c))limit=k;} if(k>buffer_end) {loc= &buffer[0]; err_print("! Input line too long"); if(limit>buffer_end)limit=buffer_end; } if(buffer[0]=='@'&&limit> &buffer[1]&&strchr("IXYZ",buffer[1])!=NULL) buffer[1]=tolower(buffer[1]); return c!=EOF||limit>buffer; }/*:22*//*25:*/ #line 261 "common.w" boolean push_input_file(boolean header,boolean suspend) {boolean success=false; char delim=' '; while(loc',++loc; if(loc>=limit)err_print("! Include file name not given"); else if(++include_depth>=max_include_depth) {--include_depth; err_print("! Too many nested includes"); } else {/*26:*/ #line 298 "common.w" {char*k=cur_file_name; while(loc &buffer[1]&&buffer[0]=='@'&&buffer[1]=='i') {loc= &buffer[2];print_where=true;push_input_file(false,false);} else return true; else if(include_depth==0) {input_has_ended=true;web_file_open=false;return false;} else {fclose(cur_file);print_where=true; if(include_depth-- ==saved_include_depth) {changing=saved_changing;change_limit=saved_change_limit; saved_include_depth=0;including_header_file=false; if(changing)return false; } } while(true); }/*:31*//*33:*/ #line 464 "common.w" local void prime_the_change_buffer(void) {change_limit=change_buffer;/*34:*/ #line 477 "common.w" do {if(++change_line,!input_ln(change_file))return; if(limit> &buffer[1]&&buffer[0]=='@') if(buffer[1]=='x')break; else if(buffer[1]=='y'||buffer[1]=='z') {loc= &buffer[2]; err_print("! Where is the matching @x?"); } else/*35:*/ #line 497 "common.w" {if(buffer[1]=='i'&&!compatibility_mode) {loc= &buffer[2];err_print("! No includes allowed in change file");} }/*:35*/ #line 486 "common.w" }while(true);/*:34*//*36:*/ #line 506 "common.w" do if(++change_line,!input_ln(change_file)) {loc= &buffer[0];err_print("! Change file ended after @x");return;} while(limit==buffer);/*:36*//*37:*/ #line 512 "common.w" {int n=(int)(limit-buffer);change_limit=change_buffer+n; strncpy(change_buffer,buffer,n); }/*:37*/ #line 470 "common.w" }/*:33*//*38:*/ #line 529 "common.w" local void check_change(void) {int n=0; if(!lines_match())return; print_where=true; do {changing=true;/*39:*/ #line 560 "common.w" {if(++change_line,!input_ln(change_file)) {loc= &buffer[0];err_print("! Change file ended before @y"); change_limit=change_buffer;changing=false;return; } if(limit> &buffer[1]&&buffer[0]=='@') if(buffer[1]=='y')break; else if(buffer[1]=='x'||buffer[1]=='z') {loc= &buffer[2];err_print("! Where is the matching @y?");} else/*35:*/ #line 497 "common.w" {if(buffer[1]=='i'&&!compatibility_mode) {loc= &buffer[2];err_print("! No includes allowed in change file");} }/*:35*//*37:*/ #line 512 "common.w" {int n=(int)(limit-buffer);change_limit=change_buffer+n; strncpy(change_buffer,buffer,n); }/*:37*/ #line 572 "common.w" }/*:39*/ #line 538 "common.w" changing=false; if(!get_web_line()) {loc= &buffer[0]; err_print("! CWEB file ended during a change");return; } if(!lines_match())++n; }while(true); if(n>0) {loc= &buffer[2]; print("\n! Hmm... %d of the preceding lines failed to match",n); err_print(""); } }/*:38*//*41:*/ #line 585 "common.w" void reset_input(void) {boolean use_change_file=change_file_name[0]!='\0';/*42:*/ #line 601 "common.w" {if((web_file=fopen(web_file_name,"r"))!=NULL) strcpy(file[0].name,web_file_name); else if((web_file=fopen(alt_web_file_name,"r"))!=NULL) strcpy(file[0].name,alt_web_file_name); else fatal("! Cannot open \"%s\" as input file",web_file_name); web_file_open=true; if(use_change_file) if((change_file=fopen(change_file_name,"r"))!=NULL) strcpy(change.name,change_file_name); else if(!change_file_explicit) use_change_file=false; else fatal("! Cannot open \"%s\" as change file",change_file_name); }/*:42*/ #line 588 "common.w" cur_line=0;change_line=0;include_depth=0; if(use_change_file){changing=true;prime_the_change_buffer();} else change_limit=change_buffer; limit=buffer;loc= &buffer[1]; changing=false;input_has_ended=false; }/*:41*//*45:*/ #line 660 "common.w" boolean get_line(void) { restart: if(changing)mark_section_as_changed(section_count); else/*47:*/ #line 715 "common.w" {if(get_web_line() &&change_limit>change_buffer &&limit-buffer==change_limit-change_buffer &&buffer[0]==change_buffer[0] )check_change(); }/*:47*/ #line 665 "common.w" if(changing) {/*48:*/ #line 728 "common.w" {if(++change_line,!input_ln(change_file)) {err_print("! Change file ended without @z"); buffer[0]='@';buffer[1]='z';limit= &buffer[2]; } if(limit> &buffer[1]&&buffer[0]=='@') if(buffer[1]=='z') {prime_the_change_buffer();changing=false;print_where=true;} else if(buffer[1]=='x'||buffer[1]=='y') {loc= &buffer[2];err_print("! Where is the matching @z?");} else/*35:*/ #line 497 "common.w" {if(buffer[1]=='i'&&!compatibility_mode) {loc= &buffer[2];err_print("! No includes allowed in change file");} }/*:35*/ #line 739 "common.w" }/*:48*/ #line 667 "common.w" if(!changing) {mark_section_as_changed(section_count);goto restart;} } loc= &buffer[0];*limit=' '; if(compatibility_mode&&buffer[0]=='@'&&buffer[1]=='i') {loc+=2;print_where=true;push_input_file(false,changing); goto restart; } if(limit-buffer>5 &&strncmp(buffer,"#line",5)==0&&isspace((eight_bits)buffer[5]))/*46:*/ #line 686 "common.w" {sixteen_bits line=0; print_where=true; loc= &buffer[6];while(locline=line-1; strncpy(cur_f->name,loc,i);cur_f->name[i]='\0'; goto restart; } } } err_print("! Improper #line directive");goto restart; }/*:46*/ #line 679 "common.w" return!input_has_ended; }/*:45*//*50:*/ #line 755 "common.w" void check_complete(void) {if(change_limit!=change_buffer) {int l=(int)(change_limit-change_buffer); strncpy(buffer,change_buffer,l);limit= &buffer[l]; changing=true;loc=buffer;web_file_open=true; err_print("! Change file entry did not match"); } }/*:50*//*54:*/ #line 823 "common.w" char*store_string(char*s,int l) {char*dest=byte_ptr; if(byte_mem_end-byte_ptr<=l)overflow("byte memory"); byte_ptr+=l;*byte_ptr++='\0';return strncpy(dest,s,l); }/*:54*//*61:*/ #line 914 "common.w" id_pointer id_lookup(char*first,char*last,int ilk) {int l,h; if(last==NULL)last=first+(l=(int)strlen(first)); else l=(int)(last-first);/*62:*/ #line 932 "common.w" {char*p=first; h= *p;while(++phash_link; if(p==NULL)/*64:*/ #line 955 "common.w" {p=id_ptr; if(id_ptr++ >=id_table_end)overflow("identifier"); name_begin(p)=store_string(first,l); if(program==cweave)init_id_name(p,ilk); p->hash_link=hash[h];hash[h]=p; }/*:64*/ #line 947 "common.w" return p; }/*:63*/ #line 922 "common.w" }/*:61*//*70:*/ #line 1036 "common.w" local enum mod_comparison mod_name_cmp (char*p,int l1,char*q,int l2) {int l=l1=0)if(*p++ != *q++)return* --p< * --q?less:greater; return l1l2?extension:equal; }/*:70*//*71:*/ #line 1051 "common.w" local mod_pointer make_mod_node(char*name) {mod_pointer node=mod_ptr; if(mod_ptr++ >=mod_table_end)overflow("module name"); name_begin(node)=name; node->llink=NULL;node->rlink=NULL; init_module_name(node); return node; }/*:71*//*72:*/ #line 1067 "common.w" local mod_pointer mod_name_lookup(char*name,int l) {mod_pointer p; mod_pointer*loc= &root; while((p= *loc)!=NULL) {int l0=p->key_length;char*key=name_begin(p); switch(mod_name_cmp(name,l,key,l0)) {case less:loc= &p->llink;break; case greater:loc= &p->rlink;break; case equal:case extension:/*74:*/ #line 1109 "common.w" {enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) {case less:case greater: err_print("! Incompatible module name"); print("\nName inconsistently extends <%.*s...>.\n",l0,key); return NULL; case extension:case equal: if(complete_name(p)) if(cmp==equal)return p; else {err_print("! Incompatible module name"); print("\nPrefix exists: <%s>.\n",key);return NULL; } name_begin(p)=store_string(name,l);/*77:*/ #line 1200 "common.w" free(key-1);/*:77*/ #line 1128 "common.w" return p; } }/*:74*/ #line 1081 "common.w" case prefix: err_print("! Incompatible module name"); print("\nName is a prefix of <%s%s>.\n" ,key,complete_name(p)?"":"..."); return NULL; } }/*73:*/ #line 1098 "common.w" {(p=make_mod_node(store_string(name,l)))->key_length=l; return*loc=p; }/*:73*/ #line 1090 "common.w" }/*:72*//*75:*/ #line 1152 "common.w" local mod_pointer prefix_lookup(char*name,int l) {mod_pointer p=root,*loc= &root; mod_pointer match=NULL; mod_pointer saved=NULL; while(p!=NULL) {int l0=p->key_length;char*key=name_begin(p); switch(mod_name_cmp(name,l,key,l0)) {case less:p= *(loc= &p->llink);break; case greater:p= *(loc= &p->rlink);break; case equal:return p; case extension:/*78:*/ #line 1209 "common.w" {enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) {case less:case greater: err_print("! Incompatible module name"); print("\nName inconsistently extends <%.*s...>.\n",l0,key); return NULL; case prefix:case equal:return p; case extension: if(complete_name(p)) {err_print("! Incompatible module name"); print("\nPrefix exists: <%s>.\n",key);return NULL; }/*79:*/ #line 1236 "common.w" {/*77:*/ #line 1200 "common.w" free(key-1);/*:77*/ #line 1237 "common.w" if((key=(char*)malloc(l+2))==NULL)fatal("Out of dynamic memory!"); *key++='\1'; strncpy(key,name,l);key[l]='\0'; name_begin(p)=key; }/*:79*/ #line 1224 "common.w" return p; } }/*:78*/ #line 1167 "common.w" case prefix: if(match!=NULL) {err_print("! Ambiguous prefix");return NULL;} match=p;saved=p->rlink;p=p->llink; } if(p==NULL&&match!=NULL) p=saved,saved=NULL; } if(match==NULL)/*76:*/ #line 1190 "common.w" {char*key=(char*)malloc(l+2); if(key==NULL)fatal("Out of dynamic memory!"); *key++='\1'; strncpy(key,name,l);key[l]='\0'; (p=make_mod_node(key))->key_length=l; return*loc=p; }/*:76*/ #line 1179 "common.w" match->key_length=l; return match; }/*:75*//*82:*/ #line 1291 "common.w" mod_pointer get_module_name(void) {/*84:*/ #line 1315 "common.w" {eight_bits c;char*k=mod_text; do {if(!find_char()) {err_print("! Input ended in module name");break;} c= *loc++;/*85:*/ #line 1342 "common.w" if(c=='@') {if((c= *loc++)=='>')break; if(isspace(c)||c=='*'||c=='~') {err_print("! Module name didn't end");loc-=2;break;} if(k=mod_text_end-1) {print("\n! Module name too long: "); term_write(id_first,25);err_print(".."); } id_loc= *k==' '&&k>mod_text?k:k+1; }/*:84*/ #line 1294 "common.w" {int l=(int)(id_loc-id_first); return l>=3&&strncmp(id_loc-3,"...",3)==0 ?prefix_lookup(id_first,l-3):mod_name_lookup(id_first,l); } }/*:82*//*86:*/ #line 1360 "common.w" boolean get_control_text(void) {char c,*k=id_first= &mod_text[1]; do if((*k++= *loc++)=='@') if((c= *loc++)!='@') {if(c!='>') err_print("! Control codes are forbidden in control text"); return(id_loc=k-1)==id_first; } while(loc<=limit); err_print("! Control text didn't end"); return(id_loc=k)==id_first; }/*:86*//*87:*/ #line 1386 "common.w" void get_string(void) {char c,delim=loc[-1]; id_loc=id_first= &mod_text[1];copy_char(delim); if(delim=='L') *id_loc++=delim= *loc++; else if(delim=='<')delim='>'; do {if(loc>=limit) {err_print("! String didn't end");loc=limit;break;} copy_char(c= *loc++); if(c=='\\') if(loc=mod_text_end) {print("\n! String too long: "); term_write(mod_text+1,25);err_print(".."); } }/*:87*//*90:*/ #line 1449 "common.w" void err_print(char*s) {print(*s=='!'?"\n%s.":"%s.",s); if(web_file_open)/*91:*/ #line 1462 "common.w" {char*k,*l=(locbuffer) {for(k=buffer;k0)print("\n(%s)\n",mess[history]); }/*:93*/ #line 1489 "common.w" exit(history>harmless_message); }/*:92*//*94:*/ #line 1510 "common.w" void fatal(char*s,...) {va_list p;va_start(p,s); vprintf(s,p);va_end(p);err_print(""); history=fatal_message;wrap_up(); }/*:94*//*98:*/ #line 1563 "common.w" local void scan_args(int argc,char* *argv) {char*dot_pos; int files_found=0,paths_found=at_h_path[0].name==NULL?0:1; while(--argc>0) if(((* ++argv)[0]=='+'||(*argv)[0]=='-')&&(*argv)[1]!='\0')/*103:*/ #line 1656 "common.w" {boolean flag_change=(* *argv=='+'); char*p= &(*argv)[1];unsigned char c; while((c= *p++)!='\0') if((c=tolower(c))!='i')flags[c]=flag_change; else/*104:*/ #line 1668 "common.w" {size_t l=strlen(p); if(l==0)err_print("! Empty include path"); else if(l>max_path_length)err_print("! Include path too long"); else if(paths_found>=max_include_paths) err_print("! Too many include paths"); else {at_h_path[paths_found].length=(int)l; at_h_path[paths_found++].name=strcpy(byte_ptr,p); byte_ptr+=l+1; } break; }/*:104*/ #line 1661 "common.w" }/*:103*/ #line 1569 "common.w" else {if(strlen(*argv)+5>max_file_name_length) fatal("! Filename too long:\n%s",*argv); dot_pos=strrchr(*argv,'.'); switch(++files_found) {case 1:/*99:*/ #line 1597 "common.w" #ifndef CPPEXT #define CPPEXT "C" #endif {if(dot_pos==NULL)sprintf(web_file_name,"%s.w",*argv); else {sprintf(web_file_name,"%s",*argv); *dot_pos='\0'; } sprintf(alt_web_file_name,"%s.web",*argv); sprintf(change_file_name,"%s.ch",*argv); if(program==ctangle) sprintf(C_file_name,"%s.%s",*argv,C_plus_plus?CPPEXT:"c"); else {sprintf(tex_file_name,"%s.tex",*argv); sprintf(idx_file_name,"%s.idx",*argv); sprintf(scn_file_name,"%s.scn",*argv); } }/*:99*/ #line 1577 "common.w" break;case 2:/*100:*/ #line 1624 "common.w" if((*argv)[0]=='-')change_file_name[0]='\0'; else if((*argv)[0]!='+') {change_file_explicit=true; sprintf(change_file_name,dot_pos==NULL?"%s.ch":"%s",*argv); }/*:100*/ #line 1578 "common.w" break;case 3:/*101:*/ #line 1634 "common.w" if(program==ctangle) if(dot_pos!=NULL)sprintf(C_file_name,"%s",*argv); else sprintf(C_file_name,"%s.%s",*argv,C_plus_plus?CPPEXT:"c"); else {if(dot_pos!=NULL) {sprintf(tex_file_name,"%s",*argv);*dot_pos='\0';} else sprintf(tex_file_name,"%s.tex",*argv); sprintf(idx_file_name,"%s.idx",*argv); sprintf(scn_file_name,"%s.scn",*argv); }/*:101*/ #line 1579 "common.w" break;default:/*105:*/ #line 1688 "common.w" fatal("! Usage:\n" "c%se [(+|-)options] cwebfile[.w] [(changefile[.ch]|+|-) [outputfile[.%s]]]" ,program==ctangle?"tangl":"weav" ,program==ctangle?"c":"tex");/*:105*/ #line 1581 "common.w" } } if(files_found==0)/*105:*/ #line 1688 "common.w" fatal("! Usage:\n" "c%se [(+|-)options] cwebfile[.w] [(changefile[.ch]|+|-) [outputfile[.%s]]]" ,program==ctangle?"tangl":"weav" ,program==ctangle?"c":"tex");/*:105*/ #line 1584 "common.w" if(paths_found>3]|=1<<((n)&7)) #define section_changed(n) ((changed_section[(n)>>3]&(1<<((n)&7)))!=0)/*:43*//*49:*/ #line 745 "common.w" extern void check_complete(void);/*:49*//*52:*/ #line 796 "common.w" extern char byte_mem[],*byte_ptr; extern id_info id_table[],*id_ptr; extern mod_info mod_table[],*mod_ptr;/*:52*//*58:*/ #line 886 "common.w" extern id_pointer hash[]; #define hash_end (&hash[hash_size]) id_pointer id_lookup(char*,char*,int);/*:58*//*67:*/ #line 1005 "common.w" extern mod_pointer root;/*:67*//*80:*/ #line 1266 "common.w" extern char mod_text[],*id_first,*id_loc; #define mod_text_end (&mod_text[longest_name+1]) mod_pointer get_module_name(void); boolean get_control_text(void); void get_string(void);/*:80*//*88:*/ #line 1433 "common.w" extern history; extern void err_print(char*),wrap_up(void),print_stats(void), fatal(char*,...);/*:88*//*95:*/ #line 1531 "common.w" extern boolean flags[]; extern char C_file_name[],idx_file_name[],scn_file_name[];/*:95*//*106:*/ #line 1697 "common.w" extern FILE*C_file,*tex_file; void open_output_file(void);/*:106*//*109:*/ #line 1723 "common.w" void print(char*,...),print_progress(char*),print_section_progress(void);/*:109*//*:14*/ cwebx-3.04.orig/common.inc100644 1750 1750 23174 5675246354 13354 0ustar jdgjdg@ This is is the beginning of material included (by means of `\.{@@i}') from the file \.{common.inc}, so that it appears in an identical way in the programs for \.{CTANGLE}, \.{CWEAVE}, and in the code shared between them (from the file \.{common.w}). There will therefore not be any code producing material, just like in a header file. However, the major part of the declarative information that is shared between all the compilation units is contained in an actual header file \.{common.h}, which is written as an auxiliary output file when \.{CTANGLE} processes \.{common.w}; in particular that file contains all the typedefs, |extern| variable declarations and function prototype declarations relating to code shared by \.{CTANGLE} and \.{CWEAVE}. Therefore this file is almost exclusively devoted to some macro definitions of general utility and a few function prototype declarations for functions that are {\it used\/} by the shared code but defined separately in \.{CTANGLE} and \.{CWEAVE}. @h @h @h @h @h @h "common.h" @ We start with some dimensioning parameters that are used both in \.{CTANGLE} and \.{CWEAVE}. Some of them have been decreased with respect to their earlier values which were sufficient in the original \.{WEB} to handle \TeX. Nevertheless they should be sufficient for most applications of \.{CWEB}, since in \Cee\ there is no need to generate large programs entirely as a single file (and \TeX\ is not written in \.{CWEB}!). The smaller values allow \.{CWEB} to run on rather moderate size computers. @d max_bytes 50000L /* the number of bytes in identifier and module names */ @d max_modules 1000 /* maximal number of module names */ @d max_idents 5000 /* maximal number of identifiers */ @d max_sections 4000 /* greater than the total number of sections, and less than $10240$ */ @d hash_size 353 /* hash modulus, preferably odd */ @d buf_size 100 /* maximum length of input line, plus two */ @d longest_name 1000 /* module names and strings shouldn't be longer than this */ @d long_buf_size (buf_size+longest_name) /* for \.{CWEAVE} */ @ Here are some macros of general utility. We use |local| in place of |static| for functions and variables when we want to stress the property of file-scope rather than static initialisation and permanence of value. The macro |array_size| can be used to compute the number of elements in a statically initialised array. For truth values we use the type |boolean| and the values |true| and |false|. We also make the general convention that whenever we mention a pointer to the ``end'' of a subsequence of a linear array, we mean the address of first entry beyond the subsequence itself. This is in keeping with general practice in~\Cee, and the language definition in fact guarantees the existence of the address even if the subsequence should contain the last entry of the array; this convention avoids repeated use of turgid phrases such as ``pointing one place beyond the last entry of~\dots''. @d local static @f local static @d array_size(a) ((int)(sizeof(a)/sizeof(a[0]))) @d false (boolean) 0 @d true (boolean) 1 @d ctangle 0 @d cweave 1 @ Although \.{CWEB} uses the conventions of \Cee\ programs found in the standard \.{} header file, it does assume that the character set is the \caps{ASCII} code. This dependency is mild however, and limited to the assumption that certain character codes below |040| are not occupied by ordinary characters. To be able to use such vacant spots in a character code independent way is possible (as is done in \TeX) by mapping all characters on input to their \caps{ASCII} positions (thereby ensuring that certain positions remain unused) and unmapping them on output; such an approach was deemed too tedious for the \.{CWEB} system however. Rather, the few places that have to be modified for non-\caps{ASCII} codes can be located by looking up the entry named ``\caps{ASCII} code dependencies'' in the index of each of the programs. @ A few character pairs are encoded internally as single characters, using the definitions below. These definitions are consistent with an extension of \caps{ASCII} code originally developed at \caps{MIT} and explained in Appendix~C of {\sl The \TeX book\/}. Thus, users who have such a character set can type things like `\.{\char'32}' and `\.{\char'4}' instead of `\.{!=}' and `\.{\&\&}'; however, their files will not be too portable until more people adopt the extended code. To be precise, when moved to other installations their files can still be processed by \.{CTANGLE} and \.{CWEAVE}, producing the required output, but users will have problems reading the source files. Therefore it is advised to use the two-character form in the \.{CWEB} files, which will be converted to single characters on input. @^ASCII code dependencies@> @^system dependencies@> @d and_and 04 /* `\.{\&\&}'; this corresponds to \caps{MIT}'s \.{\char'4} */ @d lt_lt 020 /* `\.{<<}'; this corresponds to \caps{MIT}'s \.{\char'20} */ @d gt_gt 021 /* `\.{>>}'; this corresponds to \caps{MIT}'s \.{\char'21} */ @d plus_plus 013 /* `\.{++}'; this corresponds to \caps{MIT}'s \.{\char'13} */ @d minus_minus 01 /* `\.{--}'; this corresponds to \caps{MIT}'s \.{\char'1} */ @d minus_gt 031 /* `\.{->}'; this corresponds to \caps{MIT}'s \.{\char'31} */ @d not_eq 032 /* `\.{!=}'; this corresponds to \caps{MIT}'s \.{\char'32} */ @d lt_eq 034 /* `\.{<=}'; this corresponds to \caps{MIT}'s \.{\char'34} */ @d gt_eq 035 /* `\.{>=}'; this corresponds to \caps{MIT}'s \.{\char'35} */ @d eq_eq 036 /* `\.{==}'; this corresponds to \caps{MIT}'s \.{\char'36} */ @d or_or 037 /* `\.{\v\v}'; this corresponds to \caps{MIT}'s \.{\char'37} */ @ Invoking the macro |find_char| will fetch a new line if the current one has been completely read, and it will either return |false| to indicate that there is no input left, or otherwise ensure that it is safe to inspect |*loc| (although it might be the line-ending space). @d find_char() (loc<=limit || get_line()) @ The following declarations give the interface to the searching algorithms. They are defined separately in \.{CTANGLE} and \.{CWEAVE} and used by the common code, rather than the other way around. The small number of such functions does not seem to justify creating a separate header file for them, and moreover, it would be difficult to decide whether such a header file should be produced by the source file for \.{CTANGLE} or by the source file for \.{CWEAVE}. @< Function prototypes used but not defined in the shared code @>= boolean names_match (id_pointer,char*,int,int); void init_id_name (id_pointer,int); void init_module_name (mod_pointer); @ Here are some macros associated with the storage of names. The first four of them convert pointers to structures representing identifiers or module names to smaller |sixteen_bits| indices into the arrays they are stored in, and back again. The macros |name_begin|, |name_end| and |length| provide the basic attributes of the actual string referred to by a name pointer. The macro |complete_name| tells whether the complete name for a module has been encountered so far, and |print_mod| prints a module name on the user's terminal for error reporting, providing a colon, angle brackets and an ellipsis if necessary. @d id_index(p) ((sixteen_bits)((p)-id_table)) @d id_at(i) (&id_table[i]) @d mod_index(p) ((sixteen_bits)((p)-mod_table)) @d mod_at(i) (&mod_table[i]) @d name_begin(p) ((p)->byte_start) @d length(p) ((int)(strlen(name_begin(p)))) @d name_end(p) (name_begin(p)+length(p)) @d complete_name(p) ((p)->byte_start[-1]=='\0') @d print_mod(p) printf(": <%s%s>",name_begin(p), complete_name(p) ? "" : "..." ) @ Three levels of error severity are distinguished: informative, serious and fatal. An overflow stop occurs if \.{CWEB}'s tables aren't large enough. Sometimes the program will detect a violation of one of its supposed invariants (at least this could happen if the program still contains some errors), and \.{CWEB} then prints an error message that is really for the \.{CWEB} maintenance person, not the user. In such cases the program says |confusion("indication of where we are")|. @d spotless 0 /* |history| value for normal jobs */ @d harmless_message 1 /* |history| value when non-serious info was printed */ @d error_message 2 /* |history| value when an error was noted */ @d fatal_message 3 /* |history| value when we had to stop prematurely */ @d mark_harmless() @+ if (history==spotless) history=harmless_message; @+ else @; @d mark_error() (history=error_message) @d overflow(t) fatal("\n! Sorry, %s capacity exceeded",t) @.Sorry,... capacity exceeded@> @d confusion(s) fatal("\n! This can't happen: %s",s) @.This can't happen@> @ Command line flag settings are stored in an array |flags|. For some of them we use symbolic names. @d show_banner flags['b'] /* should the banner line be printed? */ @d show_happiness flags['h'] /* should lack of errors be announced? */ @d show_progress flags['p'] /* should progress reports be printed? */ @d show_stats flags['s'] /* should statistics be printed at end of run? */ @d C_plus_plus flags['+'] /* is the language `\Cpp' rather than `\Cee'? */ @d compatibility_mode flags['c'] /* emulate \LKC.? */ @ Here are some macros for terminal output. The macro |update_terminal| is invoked when we want to make sure that everything we have output to the terminal so far has actually left the computer's internal buffers and been sent. Note that the cast on the |leng| parameter of |term_write| is necessary because a pointer difference need not be of type |int|. @d update_terminal() fflush(stdout) /* empty the terminal output buffer */ @d new_line() putchar('\n') @d term_write(string,leng) printf("%.*s",(int)(leng),string) /* write on the standard output */ cwebx-3.04.orig/common.w100644 1750 1750 225205 6204063072 13047 0ustar jdgjdg% This file is part of CWEBx. % This program by Marc van Leeuwen based on earlier versions by % D. E. Knuth., Silvio Levy and Frank Jensen. % It is distributed WITHOUT ANY WARRANTY, express or implied. % CWEB (Revision: 2.0) % Don Knuth, July 1990 % Version 3.x, Marc van Leeuwen, December 1993 % CWEBx 2+1.0, Marc van Leeuwen, August 1994 % CWEBx 3.0, Marc van Leeuwen, Januari 1995 % CWEBx 3.02, Marc van Leeuwen, April 1996 % Copyright (C) 1987,1990 Silvio Levy and Donald E. Knuth % Copyright 1994 Marc A. A. van Leeuwen % Permission is granted to make and distribute verbatim copies of this % document provided that the copyright notice and this permission notice % are preserved on all copies. % Permission is granted to copy and distribute modified versions of this % document under the conditions for verbatim copying, provided that the % entire resulting derived work is distributed under the terms of a % permission notice identical to this one. \def\pb{$\.|\ldots\.|$} % C brackets (|...|) \def\LKC.{Levy/Knuth \.{CWEB}} \def\:#1{`\.{@@#1}'} \def\title{Common code for CTANGLE and CWEAVE (Version x3.0)} \def\topofcontents {\topglue 0pt plus .5 fill \centerline{\titlefont Common code for {\ttitlefont CTANGLE} and {\ttitlefont CWEAVE}} \vskip 15pt \centerline{(\.{CWEB} version x3.0)} } \def\botofcontents {\vfill\noindent Copyright \copyright\ 1987,\thinspace1990 Silvio Levy and Donald E. Knuth \par\noindent Copyright 1994 Marc A. A. van Leeuwen \bigskip\noindent Permission is granted to make and distribute verbatim copies of this document provided that the copyright notice and this permission notice are preserved on all copies. \smallskip\noindent Permission is granted to copy and distribute modified versions of this document under the conditions for verbatim copying, provided that the entire resulting derived work is distributed under the terms of a permission notice identical to this one. } @* Introduction. This file contains code common to both \.{CTANGLE} and \.{CWEAVE}, that roughly concerns the following problems: input routines, name table handling, error handling and handling of the command line. @h @i common.inc @* Generalities. That completes the contents of \.{common.inc}. In the texts below we will sometimes use \.{CWEB} to refer to either of the two component programs, if no confusion can arise. Here is the overall appearance of this file, except for the function definitions for (local and public) functions, which will follow in the remaining unnamed sections. @c @< Function prototypes used but not defined in the shared code @>@; @< Definitions of variables common to \.{CTANGLE} and \.{CWEAVE} @>@; @< Prototypes of local functions @>@; @ For all functions and variables defined here that are accessible to \.{CTANGLE} and \.{CWEAVE}, prototype respectively |extern| declarations are placed on the file \.{common.h} that is included by all three source files. Typedef declarations that are publicly visible also appear in this file. @( common.h @>= @< Public typedef declarations @>@; @< Declarations of public variables and function prototypes @>@; @ Since a number of large arrays are used to store various kinds of data, we wish to have some control over the number of bytes occupied by the basic items. In such cases we use one of the following types rather than |int|. @< Public typedef declarations @>= typedef char boolean; typedef unsigned char eight_bits; typedef unsigned short sixteen_bits; @ In certain cases \.{CTANGLE} and \.{CWEAVE} should do almost, but not quite, the same thing. In these cases we've written common code for both, differentiating between the two by means of the global variable |program|. Furthermore, |CTANGLE| operates in two phases (input and output), and similarly |CWEAVE| operates in three phases (cross-reference collection, translation of the source, and output of the index); the global variable |phase| tells which phase we are in. @< Declarations... @>= extern int program, phase; @~@< Definitions... @>= int program, phase; @ There's an initialisation function that gets both \.{CTANGLE} and \.{CWEAVE} off to a good start. @< Declarations...@>= void common_init (int argc,char** argv); @~We will fill in the details of this function later. @c void common_init (int argc,char** argv) { @< Initialise variables @> @< Set the default options common to \.{CTANGLE} and \.{CWEAVE} @> scan_args(argc,argv); } @* Input routines. The lowest level of input to the \.{CWEB} programs is performed by |input_ln|, which must be told which file to read from. The return value of |input_ln| is |true| if the read is successful and |false| if not (i.e., if file has ended). The conventions of \TeX\ are followed; i.e., the characters of the next line of the file are copied into the |buffer| array, and the global variable |limit| will point to the first unoccupied position; trailing white space is ignored. @< Declarations... @>= extern char buffer[], *loc, *limit; @~The value of |limit| must be less than or equal to |buffer_end|, so that |*buffer_end| is never filled by |input_ln|. The characters |*limit| and |limit[1]| are reserved for placing a sentinel at the end of the line. For the convenience of |CWEAVE|, the buffer is extended by |longest_name| characters, so that there is enough space to place any module name after the input. @d buffer_end (&buffer[buf_size-2]) /* practical end of |buffer| */ @< Definitions... @>= char buffer[long_buf_size]; /* where each line of input goes */ char *loc=buffer; /* points to the next character to be read from the buffer */ char *limit=buffer; /* points to the end of the input line */ @ If a non-empty line follows the last newline in a file, we return it with a success status when |EOF| is read; in this case the next call will read |EOF| once again, and that time return failure. We are careful in case |isspace| is a macro using its argument more than once. As a service to the include and change file handling functions, |get_line| replaces any initial \:I, \:X, \:Y, or \:Z by its lower-case counterpart. The value of |loc| is usually irrelevant during the lower level input functions, and will later be set properly by |get_line|; however, when error messages are given, the value of |loc| will determine the way that the current line of input is displayed, so we give it an appropriate value in those cases. @c local boolean input_ln (FILE *f) /* copies a line into |buffer| or returns |false| */ { register int c; /* the character read */ register char* k=limit=buffer; /* where next character goes */ while ((c=getc(f))!='\n' && c!=EOF) if (k<=buffer_end) {@; *k++=c; @+ if (!isspace(c)) limit=k; } if (k>buffer_end) { loc=&buffer[0]; /* now |err_print| will display unbroken input line */ err_print ("! Input line too long"); @.Input line too long@> if (limit>buffer_end) limit=buffer_end; /* truncate line */ } if (buffer[0]=='@@' && limit>&buffer[1] && strchr("IXYZ",buffer[1])!=NULL) buffer[1]=tolower(buffer[1]); return c!=EOF || limit>buffer; /* whether anything new has been found */ } @ Now comes the problem of deciding which file to read from next. Recall that the actual text that \.{CWEB} should process comes from two streams: a |web_file|, which can contain possibly nested include commands \:i, and a |change_file|. For each file we store its name and line number for error reporting and for the production of \&{\#line} directives by |CTANGLE|. Information for the |web_file| together with the currently open include files is kept on a stack |file| with stack pointer |include_depth|, while the change file has its own record. The boolean |changing| tells whether or not we're reading from the |change_file|. Whenever we switch from the |cur_file| to the |change_file| or vice versa, or if the |cur_file| has changed, we tell |CTANGLE| to print this information by means of a \&{\#line} directive in the \Cee\ file, by raising the |print_where| flag. This flag is handled like an interrupt request, i.e., it remains raised until it is serviced at an appropriate time (when a complete token has been scanned), whereupon it is cleared by the service routine. In Phase~I of \.{CWEAVE} header files following \:h are swiftly scanned for typedef declarations, which creates one or more extra levels of input, for which we use the same stack as for~\:i. Certain functions operate differently at such times, so we must be able to distinguish these inclusions; this is achieved by maintaining a boolean variable |including_header_file|. @< Declarations...@>= #define max_file_name_length 60 extern struct f { FILE *file; char name[max_file_name_length]; sixteen_bits line; } file[], change; extern int include_depth; extern boolean input_has_ended, changing, web_file_open, print_where , including_header_file; @) boolean push_input_file(boolean,boolean); /* start a new level of input */ boolean get_line (void); /* get the next line of merged input */ #define cur_file file[include_depth].file /* current file */ #define cur_file_name file[include_depth].name /* current file name */ #define cur_line file[include_depth].line /* number of current line in current file */ #define web_file file[0].file #define change_file change.file #define change_line change.line @~We also keep an array |at_h_path| of alternative search paths for locating \:h files, and a path |at_i_path| for locating \:i files. @d max_include_depth 10 /* maximum nesting depth of source files, not counting the change file */ @d max_include_paths 8 /* maximum number of additional search paths for \:h files */ @d max_path_length 80 /* maximal length of a search path */ @= struct f file[max_include_depth]; /* stack of non-change files */ struct f change; /* change file */ local char web_file_name[max_file_name_length] , change_file_name[max_file_name_length] , alt_web_file_name[max_file_name_length]; int include_depth; /* current level of nesting */ boolean input_has_ended; /* whether there is no more input */ boolean changing; /* whether the current line is from |change_file| */ boolean web_file_open=false; /* whether the web file is being read */ boolean print_where=false; /* should |CTANGLE| print line and file info? */ local struct { char* name; int length; } at_h_path[max_include_paths],at_i_path; /* alternative search paths for \:h and \:i */ boolean including_header_file=false; /* are we processing \:h? */ @ Before we consider merging the main input with the change file, we first handle pushing and popping files in the main stream. The function |push_input_file| opens a new level of input, e.g., when a \:i line is found. It is used both from within the common code, namely to open files included by~\:i, and by a direct call from |CWEAVE|, to open files included by~\:h (and nested include files) during its first phase. When it is called, the file name is supposed to start at the first non-blank character from |loc|; it is delimited by either blank space, double quotes or angle brackets. Once the file name is located and the file is opened, any further input from the current line will be discarded. This function has two boolean parameters, the first telling whether the file to be included is a header file (rather than a \:i file), the second telling whether changes should be suspended during the file inclusion; the boolean result tells whether a file was actually opened. @c boolean push_input_file(boolean header,boolean suspend) { boolean success=false; /* whether a file as been opened */ char delim=' '; /* the character being used to delimit the file name */ while (loc=limit) err_print("! Include file name not given"); @.Include file name not given@> else if (++include_depth>=max_include_depth) @/{@; --include_depth; err_print("! Too many nested includes"); @.Too many nested includes@> } else { @< Read file name into |cur_file_name| @> if (delim!='>' && (cur_file=fopen(cur_file_name,"r"))!=NULL) success=true; else @< Try to open file in alternative directory, and set |success| @> if (success) {@; cur_line=0; print_where=true; @< If necessary deactivate the change file @> } else { --include_depth; if (delim!='>') /* don't complain about system header files */ err_print("! Cannot open include file"); @.Cannot open include file@> } } loc=&limit[1]; /* force |input_ln| before next character is read */ return success; } @ When the include file name is delimited by spaces, any white-space character will end the name; in any case we accept the end of the line as end of the file name. @< Read file name into |cur_file_name| @>= { char* k=cur_file_name; while (loc else *k++=*loc++; *k='\0'; } @ At initialisation time, paths may have been stored in |at_i_path| and |at_h_path|, that will be prefixed to given file name in an attempt to find include files. For a file opened in this manner |cur_file_name| will not contain the full path name but just the final component; this affects error messages and \&{\#line} directives produced while reading the file. The only problem that this might cause is that a debugger could be unable to locate a source file for code included from a \:i file; however, it is not likely that such a file outside the current directory will contain any code producing material, and good debuggers have their own means to specify a search path for source files. @< Try to open file in alternative directory... @>= { char name_buf[max_path_length+max_file_name_length]; int i; if (header) for (i=0; i= { char* cwebinputs=getenv("CWEBINPUTS"); at_h_path[0].name=at_i_path.name=NULL; /* defaults */ #ifdef CWEBHEADERS at_h_path[0].name=CWEBHEADERS; at_h_path[0].length=(int)strlen(CWEBHEADERS); #endif if (cwebinputs!=NULL) { at_i_path.length=(int)strlen(cwebinputs); at_i_path.name=strcpy(byte_ptr,cwebinputs); byte_ptr+=at_i_path.length+1; } else { #ifdef CWEBINPUTS at_i_path.name=CWEBINPUTS; at_i_path.length=(int)strlen(CWEBINPUTS); #endif } } @ In some cases we wish to suspend changes during the inclusion of a file, and any nested inclusions. This happens when during the first pass of \.{CWEAVE} a header file specified after \:h is being read in, or when in compatibility mode a \:i inclusion is issued under control of the change file. In such cases we suspend the change file by setting |changing=false| and |change_limit=change_buffer| as if the change file had ended, after having saved the old values of |changing| and |change_limit|. The change file will be reactivated when the included file ends, which requires saving |include_depth| as well. @= local boolean saved_changing; /* were we changing before it was suspended? */ local char* saved_change_limit; /* end of suspended change line */ local int saved_include_depth=0; /* depth after opening \:h file */ @~Although this could be easily changed, the current code relies on the fact that suspension cannot be nested (as would be the case if \:h were activated in a file included under control of the change file), since compatibility mode does not support \:h file inclusion. @< If necessary deactivate the change file @>= if (suspend) { saved_changing=changing; changing=false; @/saved_change_limit=change_limit; change_limit=change_buffer; @/saved_include_depth=include_depth; } @ The function |get_web_line| fetches the next line from the main input stream, taking care of the interpretation on \:i and of restoring input to the parent file when an include file has ended. Like |input_ln| it returns a boolean value telling whether a line could be found. When this is not the case, it means that either the main input stream has dried up, or we have come to the end of a header file included by an \:h code issued from the change file, in which case |changing==true| afterwards, and we should not read from the main input stream after all. In compatibility mode |get_web_line| operates differently, since in \LKC. include files are expanded after matching against the change file rather than before, although an expanded include file will be reconsidered for matching against (the same line of) the change file, unless it was included under control of the change file itself. This means that in compatibility mode no include file should be opened while preparing the main input stream for a match. On the other hand, if an include file ends, there is no other sensible action but to close the file and read on at the previous level. @c local boolean get_web_line(void) { do if (++cur_line,input_ln(cur_file)) /* then a line has been found */ if (!compatibility_mode && limit>&buffer[1] && buffer[0]=='@@' && buffer[1]=='i') @/{@;loc=&buffer[2]; print_where=true; push_input_file(false,false); } /* expand \:i */ else return true; /* return the line without further action */ else if (include_depth==0) /* then end of input has been reached */ @/{@; input_has_ended=true; web_file_open=false; return false; } else { fclose(cur_file); print_where=true; if (include_depth--==saved_include_depth) /* then restore |changing| */ { changing=saved_changing; change_limit=saved_change_limit; saved_include_depth=0; including_header_file=false; if (changing) return false; /* fall back into change file */ } } while (true); } @ Now we come to merging the main input stream with the change file. When |changing| is false, the first non-empty line of |change_file| after the next \:x to be matched is kept in |change_buffer|, for purposes of comparison with the next line of |cur_file|. The test |lines_match| is used for equality between the two lines; it will never return |true| when |change_limit==change_buffer| because it will not be invoked when |limit==buffer|. @d lines_match() (change_limit-change_buffer==limit-buffer && strncmp(buffer, change_buffer, limit-buffer)==0) @= local char change_buffer[buf_size]; /* next line of |change_file| */ local char *change_limit; /* points to the effective end of |change_buffer| */ @ The function |prime_the_change_buffer| sets |change_buffer| in preparation for the next matching operation. After the change file has been completely input, we set |change_limit=change_buffer|, so that no further matches will be made; since blank lines in the change file are not used for matching, we have |(change_limit==change_buffer && !changing)| if and only if the change file is exhausted (or suspended). This function is called only when |changing| is true; hence error messages will be reported correctly. @c local void prime_the_change_buffer (void) { change_limit=change_buffer; /* this value is used if the change file ends */ @ @ @ } @ While looking for a line that begins with \:x in the change file, we allow lines that begin with `\.{@@}', as long as they don't begin with \:y or \:z (which would probably indicate that the change file is fouled up). @= do { if (++change_line,!input_ln(change_file)) return; if (limit>&buffer[1] && buffer[0]=='@@') if (buffer[1]=='x') break; else if (buffer[1]=='y' || buffer[1]=='z') { loc=&buffer[2]; /* point out error after \:y or \:z */ err_print ("! Where is the matching @@x?"); @.Where is the match...@> } else @< Check for erroneous \:i @> } while (true); @ When not in compatibility mode, \:i lines are expanded by |get_web_line|, so it makes no sense to place such a code between \:x and~\:y or between \:y and~\:z; to allow them outside of the changes they would only cause confusion by suggesting the inclusion of a subsidiary change file. Therefore we normally do not allow \:i at the beginning of any line of the change file; in compatibility mode however, \:i can be used in both sides of a change, and outside of the changes the code is allowed but ignored. @< Check for erron... @>= { if (buffer[1]=='i' && !compatibility_mode) @/{@; loc=&buffer[2]; err_print ("! No includes allowed in change file"); } } @.No includes allowed...@> @ After a \:x has been found, we ignore the rest of the line, as well as any blank lines that follow it; since |input_ln| removes trailing blanks, we can simply test for empty lines. @< Skip to the next non-blank line... @>= do if (++change_line,!input_ln(change_file)) @/{@; loc=&buffer[0]; err_print("! Change file ended after @@x"); return; } while (limit==buffer); @.Change file ended...@> @ @= {@; int n=(int)(limit-buffer); change_limit=change_buffer+n; strncpy(change_buffer,buffer,n); } @ The function |check_change| is used to see if the next change entry should go into effect; it is called only when |changing| is false. The idea is to test whether or not the current contents of |buffer| matches the current contents of |change_buffer|. If not, there's nothing more to do, but if so, a change is called for. When this happens, all of the text down to the \:y is supposed to match, and an error message is issued if any discrepancy is found; after finding \:y we have |changing==true|, so that subsequent lines will be read from the change file. Since |check_change| is called only when |change_limit>change_buffer|, i.e., when the change file is active, we don't have to consider the case here that |get_web_line| returns |false| after reactivating the suspended change file. @c local void check_change (void) /* switches to |change_file| if the buffers match */ { int n=0; /* the number of discrepancies found */ if (!lines_match()) return; print_where=true; /* indicate interrupted line sequencing */ do { changing=true; @< Read a line from the change file into the change buffer; if \:y is found, |break|; if the change file ends, |return| @> changing=false; if (!get_web_line()) @/{@; loc=&buffer[0]; err_print("! CWEB file ended during a change"); return; @.CWEB file ended...@> } if (!lines_match()) ++n; } while (true); if (n>0) { loc=&buffer[2]; print("\n! Hmm... %d of the preceding lines failed to match",n); @.Hmm... $n$ of the preceding...@> err_print(""); } } @ Since we read a line from the change file before reading the line from the main input stream that should match it, we can use |input_ln| to read the line into |buffer| first, and move it to |change_buffer| afterwards. When expecting \:y, we signal and ignore any \:x or \:z. @< Read a line from the change file into the change buffer... @>= { if (++change_line,!input_ln(change_file)) { loc=&buffer[0]; err_print("! Change file ended before @@y"); @.Change file ended...@> change_limit=change_buffer; changing=false; return; } if (limit>&buffer[1] && buffer[0]=='@@') if (buffer[1]=='y') break; else if (buffer[1]=='x' || buffer[1]=='z') @/{@; loc=&buffer[2]; err_print("! Where is the matching @@y?"); } @.Where is the match...@> else @< Check for erron... @> @< Move |buffer| and |limit|... @> } @ The function |reset_input|, which gets \.{CWEB} ready to read the \.{CWEB} source file(s), is used at the beginning of Phase~I of |CTANGLE|, and of Phases I~and~II of |CWEAVE|. @< Declarations... @>= void reset_input (void); @~Although |reset_input| will not read anything from |web_file| after opening it, it will move up to the first change line in |change_file|. @c void reset_input (void) /* initialise to read the web file and change file */ { boolean use_change_file= change_file_name[0]!='\0'; @ cur_line=0; change_line=0; include_depth=0; if (use_change_file) {@; changing=true; prime_the_change_buffer(); } /* prepare change file */ else change_limit=change_buffer; /* emulate that change file that has ended */ limit=buffer; loc=&buffer[1]; /* now |find_char()| will read a line */ changing=false; input_has_ended=false; } @ The following code opens the input files. We complain about a missing change file only if it was explicitly mentioned as a command line argument. @= { if ((web_file=fopen(web_file_name,"r"))!=NULL) strcpy(file[0].name,web_file_name); else if ((web_file=fopen(alt_web_file_name,"r"))!=NULL) strcpy(file[0].name,alt_web_file_name); else fatal("! Cannot open \"%s\" as input file", web_file_name); @.Cannot open input file@> web_file_open=true; if (use_change_file) if ((change_file=fopen(change_file_name,"r"))!=NULL) strcpy(change.name,change_file_name); else if (!change_file_explicit) use_change_file=false; /* forget about the change file */ else fatal("! Cannot open \"%s\" as change file", change_file_name); @.Cannot open change file@> } @ Here are some more variables relevant to the reading of input files. Every time an input line is read coming from the change file, and on returning to reading from |cur_file| (possibly after one or more lines of the main input stream have been removed by the change file), we mark the current section as having changed by setting a bit in the bitmap |changed_section|. @< Declarations... @>= extern sixteen_bits section_count; extern eight_bits changed_section[]; #define mark_section_as_changed(n) (changed_section[(n)>>3]|=1<<((n)&7)) #define section_changed(n) ((changed_section[(n)>>3]&(1<<((n)&7)))!=0) @~ @= sixteen_bits section_count; /* the current section number */ eight_bits changed_section[(max_sections+7)/8]; /* is the section changed? */ @ The function |get_line| puts the next line of merged input into the buffer and updates the variables appropriately. A space is placed at the right end of the line (i.e., at |*limit|), serving many purposes, like ensuring that a final `\.@@' on a line will be interpreted as \:\ , and at other times allowing us to test for interesting characters without first testing for line end. The function returns |!input_has_ended| because we often want to check the value of that variable after calling the function. Usually |get_line| is called after the space at |*limit| has been processed; therefore a call often takes the form of the macro |find_char|, which calls |get_line| if necessary, and returns whether it has succeeded in making |loc| point to a valid character (possibly a line-ending space). The logic of marking sections as changed, which is implemented below, is a bit subtle. The status of |changing| is noted for every line read, but since the test is made at the start of |get_line|, it actually happens just before the next line is read in. This means that if the first replacement line of a change involves the start of a new section, then the new section is marked as changed rather than the one before it, which is the right choice, assuming that sections are started at the beginning of a line. It is possible that |changing| is switched on and then right back off again (if the line after \:y starts with \:z); in that case we mark the current section as changed and restart |get_line|, since we still have found no actual line. @c boolean get_line (void) /* inputs the next line */ { restart: if (changing) mark_section_as_changed(section_count); else @ if (changing) { @ if (!changing) {@; mark_section_as_changed(section_count); goto restart; } } loc=&buffer[0]; *limit= ' '; /* place sentinel space */ if (compatibility_mode && buffer[0]=='@@' && buffer[1]=='i') @/{@; loc+=2; print_where=true; push_input_file(false,changing); goto restart; } if (limit-buffer>5 && strncmp(buffer,"#line",5)==0 && isspace((eight_bits)buffer[5])) @< Set file name and line number according to \&{\#line} directive and |goto restart| @> return !input_has_ended; } @ A \&{\#line} directive should have the form `\.{\#line 85 "common.w"}'. The line number and file name simply override |cur_line| and |cur_file_name|. @< Set file name and line number... @>= { sixteen_bits line=0; print_where=true; /* output a \&{\#line} directive soon */ loc=&buffer[6]; @+ while (locline=line-1; /* directive applies to next line, not this one */ strncpy(cur_f->name,loc,i); cur_f->name[i]='\0'; goto restart; } } } err_print("! Improper #line directive"); goto restart; @.Improper \#line directive@> } @ After checking that a line could be obtained from the main input stream, some quick tests are made that will avoid calling |check_change| in most cases. The switch to |changing| mentioned in the module name is usually brought about by |check_change|, but may also happen when the suspension of the change file during inclusion of a header file is ended by |get_web_line|. In either case further input lines should be taken from the change file. @< Read from |cur_file|... @>= { if (get_web_line() && change_limit>change_buffer && limit-buffer==change_limit-change_buffer && buffer[0]==change_buffer[0] ) check_change(); } @ Here we get a line of input from the change file, unless it starts with~\:z. The statements `|loc=buffer; *limit=' ';|' were performed in |get_web_line| for lines from the main input stream, but must be issued explicitly for lines from the change file. @< Read from |change_file|... @>= { if (++change_line,!input_ln (change_file)) { err_print("! Change file ended without @@z"); @.Change file ended...@> @/buffer[0]='@@'; buffer[1]='z'; limit=&buffer[2]; } if (limit>&buffer[1] && buffer[0]=='@@') /* check if the change has ended */ if (buffer[1]=='z') @/{@; prime_the_change_buffer(); changing=false; print_where=true; } else if (buffer[1]=='x' || buffer[1]=='y') @/{@; loc=&buffer[2]; err_print("! Where is the matching @@z?"); } @.Where is the match...@> else @< Check for erron... @> } @ The function |check_complete| will be called at the end of \.{CTANGLE} and \.{CWEAVE} to check for an unfinished state of the change file. @< Declarations...@>= extern void check_complete (void); @~When |check_complete| is called we have |input_has_ended|, which implies |!changing|. The only thing to test for is that there is no change line still waiting for a match. In order to get a decent display of the non-matching line, we copy it from the |change_buffer| to the |buffer|, and set some other variable appropriately. There is no need to restore any variables, since |check_complete| is called at the very end of the run. @c void check_complete (void) /* checks that all changes were picked up */ { if (change_limit!=change_buffer) { int l=(int)(change_limit-change_buffer); strncpy(buffer,change_buffer,l); limit=&buffer[l]; changing=true; loc=buffer; web_file_open=true; /* prepare unmatched line for display */ err_print("! Change file entry did not match"); @.Change file entry did not match@> } } @* Storage of names and strings. Both \.{CTANGLE} and \.{CWEAVE} store the strings representing identifiers, module names and (in case of \.{CWEAVE}) index entries in a large array of characters, called |byte_mem|. These strings are not accessed directly, but via structures that collect further information about these objects. These structures come in two kinds, depending on whether they correspond to identifiers (or index entries) or to module names; these structures are called |id_info| and |mod_info| respectively. @< Public typedef...@>= typedef struct id_info { char *byte_start; /* beginning of the name in |byte_mem| */ @@; } id_info, *id_pointer; @) typedef struct mod_info { char *byte_start; /* beginning of the name in |byte_mem| */ @@; } mod_info, *mod_pointer; @ All |id_info| and |mod_info| structures are stored in one of two arrays, called |id_table| and |mod_table| respectively, and hence all |id_pointer| and |mod_pointer| values point into these arrays. Therefore we can freely convert between such a pointer and an index into the appropriate array; the macros |id_index|, |id_at|, |mod_index| and |mod_at| defined above perform these conversions. @< Declarations... @>= extern char byte_mem[], *byte_ptr; extern id_info id_table[], *id_ptr; extern mod_info mod_table[], *mod_ptr; @~The first unused position in |byte_mem| is kept in |byte_ptr|, and the first unused positions in |id_table| and |mod_table| are similarly kept in |id_ptr| and |mod_ptr|, respectively. We want to keep |byte_ptr<=byte_mem_end|, |id_ptr<=id_table_end| and |mod_ptr<=mod_table_end|. @d byte_mem_end (&byte_mem[max_bytes]) /* end of |byte_mem| */ @d id_table_end (&id_table[max_idents]) /* end of |id_table| */ @d mod_table_end (&mod_table[max_modules]) /* end of |mod_table| */ @= char byte_mem[max_bytes]; /* characters of names */ char *byte_ptr=&byte_mem[0]; /* first unused position in |byte_mem| */ id_info id_table[max_idents]; /* information about identifiers */ id_pointer id_ptr=&id_table[0]; /* first unused position in |id_table| */ mod_info mod_table[max_modules]; /* information about module names */ mod_pointer mod_ptr=&mod_table[0]; /* first unused position in |mod_table| */ @ Here is a simple function that copies a (not necessarily null-terminated) string~|s| of length~|l| into |byte_mem| and returns a pointer to the copied string. @c char* store_string(char* s, int l) { char* dest=byte_ptr; if (byte_mem_end-byte_ptr<=l) overflow ("byte memory"); byte_ptr+=l; *byte_ptr++='\0'; return strncpy(dest,s,l); } @ A component is present in both |id_info| and |mod_info| structures, whose function is different for \.{CTANGLE} and \.{CWEAVE}. In |CTANGLE|, it is used only in |mod_info| structures, and is a pointer to a replacement text for the module name. In |CWEAVE| it is a pointer to a list of cross-references for the identifier or module name. The precise nature of these pointers is of no interest to the common code, and at this point we do not have the types available to express this field as a |union|. However, since in both cases it will be a pointer to a structure, we can use a little trick by declaring it as a pointer to a |struct variant| where |variant| is not further specified here. Then in \.{CTANGLE} and \.{CWEAVE} we define |variant| as a macro for the appropriate |struct| specifier, while in the common code we leave it undefined (the \:d defining |variant| is given very early, so that it will precede the `\.{@@h "common.h"}' that will read in the definition of |struct id_info| and |struct mod_info|). It is not quite proper that in the common code these structures contain a pointer to a named but undefined structure, while in the other compilation unit of \.{CTANGLE} and \.{CWEAVE} respectively, they contain a pointer to a known structure with a {\it different\/} specifier (due to the macro definition); in \Cee\ structures with different specifiers can never be the same type. However, since the linker does not type-check, and the compiled code is not likely to depend on the name of a structure specifier, this should cause no problems, except possibly some mild confusion to a debugger. An alternative solution of defining a structure of the same name to denote entirely different types in \.{CTANGLE} and \.{CWEAVE} would be clearer to the computer, but more confusing to humans. Of course we could also have used a |(void*)| field instead of a pointer to a structure, but this would require a lot of explicit or implicit casts, allowing possible type errors to go undetected by the compiler. @< More fields of |struct id_info| @>= struct variant* equiv_or_xref; /* extra information about this identifier */ @~ @< More fields of |struct mod_info| @>= struct variant* equiv_or_xref; /* extra information about this module */ @ The |id_info| structures are linked together into (hopefully) short lists sharing the same hash key by means of the |hash_link| field. In |CWEAVE|, an additional field is used to store information about the syntactic r\^ole of the identifier, its so-called |ilk|. @< More fields of |struct id_info| @>= struct id_info *hash_link; /* links identifiers with same hash code */ int ilk; /* syntactic information, used in |CWEAVE| only */ @ The pointers to the heads of the hash lists are stored in the array~|hash|. Identifiers can be found by computing a hash code~|h| and then looking at the identifiers represented by the |id_pointer|s |hash[h]|, |hash[h]->hash_link|, |hash[h]->hash_link->hash_link|,~\dots, until either finding the desired name or encountering the null pointer. Thus the hash table consists of entries of type |id_pointer|; it is maintained by the function |id_lookup|, which finds a given identifier and returns the appropriate |id_pointer|. The basic matching is done by the function |names_match|, which is slightly different in \.{CTANGLE} and \.{CWEAVE}. If there is no match for the identifier, it is inserted into the table. @< Declarations... @>= extern id_pointer hash[]; #define hash_end (&hash[hash_size]) /* end of |hash| */ id_pointer id_lookup(char*,char*,int); @~The |hash| table has size~|hash_size|. @= id_pointer hash[hash_size]; /* heads of hash lists */ @ Initially all the hash lists are empty. Although strictly speaking the array |hash| should be initialised to null pointers by the compiler, we don't count too much on this in case such pointers are not represented by null bit patterns. @= {@; int i=hash_size; do hash[--i]=NULL; while(i>0); } @ Here is the main function for finding identifiers (and index entries). The parameters |first| and |last| point to the beginning and end of the string. The parameter |ilk| is used by |CWEAVE| only in the course of the matching process; within |id_lookup| it is just passed on to the functions |names_match| and~|init_id_name|, which are defined in \.{CTANGLE} and \.{CWEAVE} separately. We facilitate the initialisation of the reserved words in |CWEAVE| by allowing the use of null-terminated strings rather than using a pointer to the end of the string: in this case |first| should point to such a string, and |last==NULL|. @c id_pointer id_lookup (char* first,char* last,int ilk) /* look up an identifier */ { int l,h; /* length and hash code of the given identifier */ if (last==NULL) last=first+(l=(int)strlen(first)); /* null-terminated string */ else l=(int)(last-first); /* compute the length */ @ @ } @ A simple hash code is used: If the sequence of character codes is $c_1c_2\ldots c_n$, its hash value will be $$ (2^{n-1}c_1+2^{n-2}c_2+\cdots+c_n)\bmod\hbox{|hash_size|}. $$ The purist might worry about the empty string, which, although of course impossible as identifier, might be flagged as an index entry; such an index entry is however explicitly ruled out in |CWEAVE|. @= { char* p=first; h=*p; while (++p= { id_pointer p=hash[h]; /* the head of the hash list */ while (p!=NULL && !names_match(p,first,l,ilk)) p=p->hash_link; if (p==NULL) /* we haven't seen this identifier before */ @< Make |p| point to a fresh |id_node| that refers to a string copied from |first|, and prepend the node to hash list |h| @> return p; } @ The information associated with a new identifier must be initialised in |CWEAVE| in a way that does not apply to~\.{CTANGLE}; hence the function |init_id_name|. @< Make |p| point to a fresh |id_node|... @>= { p=id_ptr; /* this is where the new name entry will be created */ if (id_ptr++>=id_table_end) overflow ("identifier"); name_begin(p)=store_string(first,l); if (program==cweave) init_id_name(p,ilk); p->hash_link=hash[h]; hash[h]=p; /* insert |p| at beginning of hash list */ } @ The names of modules are stored in |byte_mem| together with the identifier names, but a hash table is not used for them because \.{CWEB} needs to be able to recognise a module name when given a prefix of that name. To this end the |mod_info| structures are linked into a binary search tree. The only unconventional thing about this search tree is that the search key is not necessarily equal to the full string stored but that a prefix is decisive for matching purposes: once that prefix matches, not matching the remainder of the string is considered to be an error. The reason for this is that the module name may have been specified first by a short prefix and is later extended; we need to be aware of the ambiguity when a second incompatible extension of the original prefix is attempted. Therefore a field |key_length| is included in |struct mod_info| that indicates the length of the prefix that should determine a unique match; its value is the length of the shortest partial specification of this module name that has been encountered so far. @< More fields of |struct mod_info| @>= struct mod_info *llink,*rlink; /* left and right links in binary search tree */ int key_length; /* number of characters decisive for match */ @ There is one more attribute that must be recorded for a module name, namely whether the string accessed by the |byte_start| field is the full module name, or just the longest prefix specified so far (i.e., all occurrences so far ended with `\.{...}'). Rather than extending |struct mod_info| with another (boolean) field or encoding this information in one of the other fields we use a small trick to record this information. All full module names will be stored in |byte_mem|, and since that is filled with null-terminated strings, we will have for any |mod_pointer p@;| representing a complete module name that |p->byte_start[-1]=='\0'|. The macro |complete_name| defined above tests this condition, and we shall store incomplete module names in such a way that the condition fails for them. This method costs just one byte of temporary storage for each module name that is incompletely specified on its first occurrence, except that a one-time sacrifice of another byte is needed to guarantee that even the first name is preceded by a null byte. @< Initialise... @>= *byte_ptr++='\0'; /* prefix a null byte to the first string */ @~Like most trees, the search tree for module names starts at its |root|. @< Declarations... @>= extern mod_pointer root; @~The search tree starts out empty. @< Definitions... @>= mod_pointer root=NULL; /* the root of the binary search tree for module names */ @ The set of all possible strings can be conceptually arranged in an infinite tree with the empty string at its root, where the direct descendents of a string are those strings obtained by appending a single letter to it, listed in alphabetical order. The position of one string relative to another in this tree is one of five possibilities: to the left of, to the right of, ancestor of, descendent of, or equal to. The type |enum mod_comparison| encodes these five possibilities. @< Prototypes of local functions @>= enum mod_comparison @/ { less, /* the first name is lexicographically less than, but no prefix of the second */ equal, /* the first name is equal to the second */ greater, /* the first name is lexicographically greater than, but no extension of the second */ prefix, /* the first name is a proper prefix of the second */ extension /* the first name is a proper extension of the second */ }; @~The function |mod_name_cmp| will determine which one of the relations described above holds for a given pair of strings, each represented by a pointer to its beginning and its length. @c local enum mod_comparison mod_name_cmp (char* p, int l1, char* q, int l2) { int l= l1=0) if (*p++!=*q++) return *--p<*--q ? less : greater; return l1l2 ? extension : equal; } @ When a new module name is found and approved, we first store the string representing it in an appropriate place, and then call |new_mod_node| to install a node for the name in the |mod_table| array and prepare it for inclusion in the search tree. The information associated with the new node must be initialised in a slightly different way in |CWEAVE| than in |CTANGLE|; hence the call of |init_module_name|. @c local mod_pointer make_mod_node (char* name) { mod_pointer node=mod_ptr; /* allocate new node */ if (mod_ptr++>=mod_table_end) overflow ("module name"); name_begin(node)=name; node->llink=NULL; node->rlink=NULL; init_module_name(node); /* initialise new node */ return node; } @ The function |mod_name_lookup| is used to look up a complete module name in the search tree, inserting it if necessary, and returns a pointer to where it was found. Its parameters give the beginning and length of the module name. The name might be illegal, for instance if it is a prefix of an existing name, in which case |NULL| is returned rather than a pointer into |mod_table|, after printing an error message. @c local mod_pointer mod_name_lookup (char* name, int l) /* finds complete module name */ { mod_pointer p; /* current node of the search tree */ mod_pointer* loc=&root; /* |p| will come from this location */ while ((p=*loc)!=NULL) { int l0=p->key_length; char* key=name_begin(p); switch (mod_name_cmp(name,l,key,l0)) { case less: loc=&p->llink; break; case greater: loc=&p->rlink; break; case equal: case extension: @< Check that |name| matches the remainder of |key|; if so, complete |p| if necessary and |return p|, if |name| is a prefix of~|key| fall through, and otherwise report an error and |return NULL| @> case prefix: err_print("! Incompatible module name"); @.Incompatible module name@> print("\nName is a prefix of <%s%s>.\n" @.Name is a prefix of <...>@> ,key, complete_name(p) ? "" : "..."); return NULL; /* dummy module name */ } } @< Copy |name| into |byte_mem|, install a new node in the tree at |*loc|, and |return| it @> } @ When the name is not found in the tree, a new module has been specified whose name is complete from the start, and can therefore be installed in |byte_mem|. Its |key_length| field is set to the length~|l| of the name as specified here, but it might be decreased later. @< Copy |name| into |byte_mem|... @>= { (p=make_mod_node(store_string(name,l)))->key_length=l; /* prepare new node */ return *loc=p; /* install new node into tree */ } @ When the module name |name| being looked up matches the string |key| in a node~|p| of the search tree for the first |p->key_length| characters, then this is guaranteed to be the only such match in the tree. It is required that the rest of |key| matches the remainder of~|name| as well, and if |complete_name(p)| holds then the match must exhaust~|name|. @< Check that |name| matches the remainder of |key|... @>= { enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) { case less: case greater: err_print("! Incompatible module name"); @.Incompatible module name@> print("\nName inconsistently extends <%.*s...>.\n",l0,key); @.Name inconsistently extends <...>@> return NULL; case extension: case equal: if (complete_name(p)) if (cmp==equal) return p; else { err_print("! Incompatible module name"); @.Incompatible module name@> print("\nPrefix exists: <%s>.\n",key); return NULL; @.Prefix exists@> } name_begin(p)=store_string(name,l); /* install |name| in place of |key| */ @< Deallocate memory for |key| @> return p; } } @ The function |prefix_lookup| is similar to |mod_name_lookup|, but it is called when the specification of a module name ends with `\.{...}'. Here, unlike in |mod_name_lookup|, there is the possibility that there is more than one match. The function decides whether there are $0$,~$1$, or~$>1$ matches present in the tree, but in the final case it need not find all matches. There is a match of |name| with node~|p| if the first |p->key_length| characters at |name_begin(p)| are equal, an extension or a prefix of~|name| (in the last case we should not have |completed(p)|). It is clear that if there is a match with two distinct nodes, then there is also a match with their closest common ancestor in the search tree, and since we do not allow inclusions among the significant parts of the keys in the tree, this situation can only occur if |name| is a proper prefix for all its matches. Hence, once a first match is found, any further match must be among its descendents in the tree. The value of |loc| is maintained as in |mod_name_lookup| to allow insertion of a new node if no match is found; however, once we have |match!=NULL|, the value of~|loc| becomes irrelevant. @c local mod_pointer prefix_lookup (char* name,int l) /* finds module name given a prefix */ { mod_pointer p=root,* loc=&root; /* current node and where it comes from */ mod_pointer match=NULL; /* the first matching node, if any */ mod_pointer saved=NULL; /* another subtree that might have matches */ while (p!=NULL) { int l0=p->key_length; char* key=name_begin(p); switch (mod_name_cmp(name,l,key,l0)) { case less: p=*(loc=&p->llink); break; case greater: p=*(loc=&p->rlink); break; case equal: return p; /* a match, and no other matches are possible */ case extension: @< Check that |name| matches the remainder of~|key|; if so, extend its string if necessary and |return p|, otherwise report an error and |return NULL| @> case prefix: if (match!=NULL) {@; err_print("! Ambiguous prefix"); return NULL; } @.Ambiguous prefix@> match=p; saved=p->rlink; p=p->llink; /* |loc| is irrelevant now */ } if (p==NULL && match!=NULL) p=saved, saved=NULL; /* search other subtree */ } if (match==NULL) @< Copy the incomplete |name| in a temporary place, install a new node in the tree at |*loc|, and |return| it @> match->key_length=l; /* |name| is a shorter prefix than used before */ return match; } @ Incomplete nodes are not stored in |byte_mem|, but temporarily set aside in dynamic memory, which will be freed when the name is completed. A non-zero byte is installed at the beginning to make the |complete_name| macro function properly. Like for complete names, the initial |key_length| is the full length of the string specified. @< Copy the incomplete |name|... @>= { char* key=(char*)malloc(l+2); if (key==NULL) fatal("Out of dynamic memory!"); *key++='\1'; /* ensure that |complete_name(p)| is false afterwards */ strncpy(key,name,l); key[l]='\0'; /* store the incomplete name */ (p=make_mod_node(key))->key_length=l; /* prepare new node */ return *loc=p; /* install new node into tree */ } @~When freeing the dynamic memory, we mustn't forget to subtract~1. @< Deallocate memory for |key| @>= @+ free(key-1); @ When we come here, we have found that |name| extends the first |p->key_length| characters of the name stored in~|p|, and therefore cannot similarly match any other nodes. @< Check that |name| matches the remainder of~|key|; if so, extend its string if necessary and |return p|, otherwise report an error and |return NULL| @>= { enum mod_comparison cmp= mod_name_cmp(name+l0,l-l0,key+l0,(int)strlen(key+l0)); switch(cmp) { case less: case greater: err_print("! Incompatible module name"); @.Incompatible module name@> print("\nName inconsistently extends <%.*s...>.\n",l0,key); @.Name inconsistently extends <...>@> return NULL; case prefix: case equal: return p; case extension: if (complete_name(p)) { err_print("! Incompatible module name"); @.Incompatible module name@> print("\nPrefix exists: <%s>.\n",key); return NULL; @.Prefix exists@> } @< Replace name stored in |p| by the larger prefix |name| @> return p; } } @ We come here in the rather unusual case that a second larger prefix of the same module name is given before the full name is specified. We discard the old memory for |key| and replace it by a fresh copy of |name|, which is easier and probably more efficient than to try to reuse the old |key| by using |realloc|. As for the initial dynamic allocation, we must not forget the byte at~|key[-1]| here. @< Replace name stored in |p| by the larger prefix |name| @>= { @< Deallocate memory for |key| @> if ((key=(char*)malloc(l+2))==NULL) fatal("Out of dynamic memory!"); *key++='\1'; /* ensure that |complete_name(p)| is false afterwards */ strncpy(key,name,l); key[l]='\0'; /* store the incomplete name */ name_begin(p)=key; /* install new name in node |p| */ } @* Lexical routines. Much of the code of \.{CWEB} deals with lexical matters such as recognising tokens, control texts, etc. This inevitably involves some complications since the relevant set of lexical rules varies from place to place in the source file, being sometimes that of \TeX, sometimes of \Cee, and sometimes rules defined by \.{CWEB} itself. Furthermore the source file is read in on three different occasions: once by \.{CTANGLE} and twice by \.{CWEAVE}; the level of detail in which the source is inspected differs between these passes. We can alleviate the task of lexical recognition by collecting in the common code a few functions that will scan specific lexical items; we do this for various classes of ``large'' lexical items: module names, control texts and strings. In all cases we copy the characters of the lexical item to a separate place, making minor modifications as necessary (for instance replacing \:@@ by~`\.@@'). To this end there is a buffer |mod_text| used to copy the characters into; its name is derived from its most important purpose of storing module names, for which a separate buffer is definitely needed since these names can be spread over several lines of input. There are also two pointers |id_first| and~|id_loc| that are used to point to the beginning and end of lexical entities. @< Declarations... @>= extern char mod_text[], *id_first, *id_loc; #define mod_text_end (&mod_text[longest_name+1]) /* end of |mod_text| */ mod_pointer get_module_name (void); boolean get_control_text(void); void get_string(void); @~The character |mod_text[0]| will not be used to store any actual characters, whence we make |mod_text| one slot longer than |longest_name|. @= char mod_text[longest_name+1]; /* name being sought for */ char *id_first; /* where the current identifier begins in the buffer */ char *id_loc; /* just after the current identifier in the buffer */ @ The function for scanning and looking up module names is |get_module_name|; it will read the module name from the input, and decide how to search depending on the presence or absence of an ellipsis. This function is called when \:< has just been scanned; it will scan the module name and look it up by |mod_name_lookup| or |prefix_lookup| as appropriate; it returns a |mod_pointer| to the name found, or |NULL| if the name was found to be erroneous. Consequent to the limit, \.{CWEB} will treat `\.{@@<...@@>}' as a valid module name if and only if if there exactly one module name in the entire program. @c mod_pointer get_module_name (void) { @< Put module name into |mod_text|, set |id_first| and |id_loc| appropriately @> { int l=(int)(id_loc-id_first); return l>=3 && strncmp(id_loc-3,"...",3)==0 @/ ? prefix_lookup(id_first,l-3) : mod_name_lookup(id_first,l); } } @ Module names are placed into the |mod_text| array with consecutive spaces, tabs, and carriage-returns replaced by single spaces. There will be no spaces at the beginning or the end. We set |mod_text[0]=' '| to facilitate this, and put |id_first=&mod_text[1]| for the calls to the lexical routines that use |mod_text|. @= mod_text[0]=' '; @~A module name of exactly |longest_name| characters will cause an error message, even though all its characters are stored; this is because we don't take the effort to distinguish a `full' state of the buffer from an `overflowed' one. @< Put module name... @>= { eight_bits c; char* k=mod_text; /* points to last recorded character */ do { if (!find_char()) {@; err_print("! Input ended in module name"); break; } @.Input ended in module name@> c=*loc++; @ if (isspace(c)) c=' '; /* convert tabs, newlines etc. */ if (k=mod_text_end-1) @/{@; print("\n! Module name too long: "); @.Module name too long@> term_write(id_first,25); err_print(".."); } id_loc= *k==' ' && k>mod_text ? k : k+1; /* point after last non-space character */ } @ Except for the beginning of new sections we are not fussy about unexpected control sequences within module names, since we might be within `\pb'; any problems will be eventually be detected during the output of module names by |CWEAVE|. So we are only looking for \:> (incidentally this means that control texts like those introduced by \:t are forbidden in module names). @< Handle |c=='@@'|... @>= if (c=='@@') { if ((c=*loc++)=='>') break; if (isspace(c) || c=='*' || c=='~') @/{@; err_print("! Module name didn't end"); loc-=2; break; } @.Module name didn't end@> if (k into the |mod_text| array, and sets |id_first| and |id_loc| appropriately. The function returns a boolean value telling whether the control text was in fact empty, since that case sometimes needs special treatment. Always using |get_control_text| guarantees uniformity in the recognition of various types of control texts (not counting module names). @c boolean get_control_text(void) { char c,* k=id_first=&mod_text[1]; /* points after last recorded character */ do if ((*k++=*loc++)=='@@') if ((c=*loc++)!='@@') { if (c!='>') err_print("! Control codes are forbidden in control text"); @.Control codes are forbidden...@> return (id_loc=k-1)==id_first; } while(loc<=limit); err_print("! Control text didn't end"); @.Control text didn't end@> return (id_loc=k)==id_first; } @ And here is a similar function |get_string| that is used to scan strings and character constants. These can contain newlines or instances of their own delimiters if they are protected by a backslash. We follow this convention, but do not allow the string to be longer than |longest_name|. The macro |copy_char| copies a character to |mod_text| without risking overflow; the |else| branch of its definition serves only to ensure that any side effect of its argument is always performed. @d copy_char(c) @+ if (id_loc=limit) {@; err_print("! String didn't end"); loc=limit; break; } @.String didn't end@> copy_char(c=*loc++); if (c=='\\') if (loc break; } else if (!including_header_file && c=='@@') if (*loc=='@@') ++loc; /* undouble \:@@ */ else err_print("! Double @@ required in strings"); @.Double @@ required...@> } while (c!=delim); if (id_loc>=mod_text_end) @/{@; print("\n! String too long: "); @.String too long@> term_write(mod_text+1,25); err_print(".."); } } @* Reporting errors to the user. A global variable called |history| will contain one of four values at the end of every run: |spotless| means that no unusual messages were printed; |harmless_message| means that a message of possible interest was printed but no serious errors were detected; |error_message| means that at least one error was found; |fatal_message| means that the program terminated abnormally. The value of |history| does not influence the behaviour of the program; it is simply computed for the convenience of systems that might want to use such information. @< Declarations... @>= extern history; /* indicates how bad this run was */ extern void err_print (char*), wrap_up (void), print_stats (void), fatal (char*,...); @~Despite the name |history|, each run starts in an unblemished state. @< Definitions... @>= int history=spotless; /* indicates how bad this run was */ @ The call |err_print("! Error message")| will report an error to the user, printing the error message at the beginning of a new line and then giving an indication of where the error was spotted in the source file. Note that the string passed to |err_print| does not end with a period, since one will be automatically supplied, as will an initial newline if the string begins with~|"!"|. @c void err_print (char *s) /* prints `\..' and location of error message */ { print(*s=='!' ? "\n%s." : "%s.",s); if (web_file_open) @ update_terminal(); mark_error(); } @ The error locations can be indicated by using the global variables |loc|, |cur_line|, |cur_file_name| and |changing|, which tell respectively the first unlooked-at position in |buffer|, the current line number, the current file, and whether the current line is from |change_file| or |cur_file|. @< Print error location based on input buffer @>= { char *k, *l=(locbuffer) { for (k=buffer; k @c void wrap_up (void) { #ifdef STAT if (show_stats) print_stats(); /* print statistics about memory usage */ #endif @< Print the job |history| @> exit(history>harmless_message); } @ A spotless status is reported if |show_happiness| is true, any other exit status is always reported. @< Print the job |history| @>= { static char* mess[]= { "No errors were found.", "Did you see the warning message above?", "Pardon me, but I think I spotted something wrong", "That was a fatal error, my friend." }; if (show_happiness || history>0) print("\n(%s)\n",mess[history]); } @ When an error or overflow condition is detected for which no recovery is possible, we call |fatal|, which aborts the program after pointing out the source of trouble and wrapping up as graciously as possible. @c void fatal(char* s,...) { va_list p; va_start(p,s); vprintf(s,p); va_end(p); err_print(""); /* print reason and location of fatal stop */ history=fatal_message; wrap_up(); } @* Command line arguments. The user calls \.{CWEAVE} and \.{CTANGLE} with one or more arguments on the command line. These are either file names or sets of flags to be turned on (beginning with |"+"|) or off (beginning with |"-"|); in case the special flag |'i'| occurs, the remainder of that argument is interpreted as a string rather than as a set of flags. The following globals are for communicating the user's desires to the rest of the program. The various file name variables contain strings with the names of those files. Most of the flags are undefined but available for future extensions. @< Declarations... @>= extern boolean flags[]; extern char C_file_name[],idx_file_name[],scn_file_name[]; @~@< Definitions... @>= boolean flags[UCHAR_MAX+1]; /* an option for each character code */ char C_file_name[max_file_name_length]; /* name of |C_file| */ local char tex_file_name[max_file_name_length]; /* name of |tex_file| */ char idx_file_name[max_file_name_length]; /* name of index file */ char scn_file_name[max_file_name_length]; /* name of module names file */ local boolean change_file_explicit=false; /* was a change file argument specified? */ @ We now must look at the command line arguments and set the file names accordingly. @< Prototypes of local functions @>= local void scan_args (int argc,char** argv); @~At least one file name must be present: the \.{CWEB} file. It may have an extension, or it may omit the extension to get |".w"| or |".web"| added. The \TeX\ output file name is formed by replacing the \.{CWEB} file name extension by |".tex"|, and the \Cee\ file name by replacing the extension by~|".c"|. If a second file name is given, it is the change file, again either with an extension or without one to get |".ch"|. An omitted change file argument means that the change file name is that of the \.{CWEB} file with the extension replaced by |".ch"| if that file exists, or none at all otherwise. If present, a third file name replaces the default output file name, possibly including the extension. @c local void scan_args (int argc,char** argv) { char *dot_pos; /* position of rightmost |'.'| in the argument */ int files_found=0, paths_found= at_h_path[0].name==NULL ? 0 : 1; while (--argc>0) /* first ``argument'' (program name) is irrelevant */ if (((*++argv)[0]=='+' || (*argv)[0]=='-') && (*argv)[1]!='\0') @ else { if (strlen(*argv)+5>max_file_name_length) /* we need room to add things like `\.{.web}' */ fatal("! Filename too long:\n%s", *argv); dot_pos=strrchr(*argv,'.'); switch (++files_found) { case 1: @< Make |web_file_name|, and defaults for other file names, from~|*argv| @> @+ break; case 2: @< Make |change_file_name| from |*argv| @> @+ break; case 3: @< Make output file names from |*argv| @> @+ break; default: @< Print usage error message and quit @> } } if (files_found==0) @< Print usage error message and quit @> if (paths_found= #ifndef CPPEXT #define CPPEXT "C" /* extension for \Cpp\ file names; should not exceed 3 characters */ #endif { if (dot_pos==NULL) sprintf(web_file_name,"%s.w",*argv); else { sprintf(web_file_name,"%s",*argv); /* use file name and extension */ *dot_pos='\0'; /* truncate the name before the dot */ } sprintf(alt_web_file_name,"%s.web",*argv); sprintf(change_file_name,"%s.ch",*argv); if (program==ctangle) sprintf(C_file_name,"%s.%s",*argv, C_plus_plus ? CPPEXT : "c"); else { sprintf(tex_file_name,"%s.tex",*argv); sprintf(idx_file_name,"%s.idx",*argv); sprintf(scn_file_name,"%s.scn",*argv); } } @ If in place of the change file name a `\.-' is specified, we clear the |change_file_name|, and if a `\.+' is specified we retain the default (but |files_found| is increased so that the next file name will be the output file); otherwise, |change_file_explicit| is raised and the default change file name is replaced by the given one. @< Make |change_file_name|... @>= if ((*argv)[0]=='-') change_file_name[0]='\0'; else if ((*argv)[0]!='+') @/{@; change_file_explicit=true; sprintf(change_file_name,dot_pos==NULL ? "%s.ch" : "%s", *argv); } @ If an output file name is given when calling \.{CWEAVE}, its base name is also used for |idx_file_name| and |scn_file_name|. @< Make output file names... @>= if (program==ctangle) if (dot_pos!=NULL) sprintf(C_file_name, "%s", *argv); else sprintf(C_file_name,"%s.%s", *argv, C_plus_plus ? CPPEXT : "c"); else { if (dot_pos!=NULL) {@; sprintf(tex_file_name, "%s", *argv); *dot_pos='\0'; } else sprintf(tex_file_name,"%s.tex", *argv); sprintf(idx_file_name,"%s.idx",*argv); sprintf(scn_file_name,"%s.scn",*argv); } @ The |flags| are turned off initially; any flags that are on by default are set before calling |common_init|. @< Set the default options common to \.{CTANGLE} and \.{CWEAVE} @>= show_banner=show_happiness=show_progress=true; @~Flags are made case-insensitive, since some operating systems do not allow passing of both lower and upper case arguments. The |'i'| flag gets a special treatment. @< Handle flag... @>= { boolean flag_change=(**argv == '+'); char* p=&(*argv)[1]; unsigned char c; while ((c=*p++)!='\0') if ((c=tolower(c))!='i') flags[c]=flag_change; else @< Store string |p| as new include path and |break| @> } @ We copy include paths into |byte_mem|; since we are at the very beginning of the run, there is no chance that we overflow |byte_mem| already, so that we need not include a fourth error message below. @< Store string |p| as new include path and |break| @>= { size_t l=strlen(p); if (l==0) err_print("! Empty include path"); @.Empty include path@> else if (l>max_path_length) err_print("! Include path too long"); @.Include path too long@> else if (paths_found>=max_include_paths) err_print("! Too many include paths"); @.Too many include paths@> else { at_h_path[paths_found].length=(int)l; at_h_path[paths_found++].name=strcpy(byte_ptr,p); byte_ptr+=l+1; } break; } @ The usage message gives only a sketch of the proper way to call the \.{CWEB}. programs; more details can be found in the manual. @< Print usage error message and quit @>= fatal("! Usage:\n" @+ "c%se [(+|-)options] cwebfile[.w] [(changefile[.ch]|+|-) [outputfile[.%s]]]" , program==ctangle ? "tangl" : "weav" , program==ctangle ? "c" : "tex"); @* Output. The only thing done in the common code for the output files is declaring the variables through which they are accessed, and opening them. @< Declarations... @>= extern FILE *C_file, *tex_file; void open_output_file(void); @~Having separate variables for the output files of \.{CTANGLE} and \.{CWEAVE} only serves to give them more meaningful names. @= FILE *C_file; /* where output of \.{CTANGLE} goes */ FILE *tex_file; /* where output of \.{CWEAVE} goes */ @~@c void open_output_file(void) { char* name; FILE** file; if (program==ctangle) {@; name=C_file_name; file=&C_file; } else {@; name=tex_file_name; file=&tex_file; } if ((*file=fopen(name,"w"))==NULL) fatal("! Cannot open \"%s\" as output file",name); @.Cannot open output file@> } @ All regular terminal output passes through the function |print|, which takes the place of |printf|; it suppresses initial newlines if the line is already empty. The function |print_progress| and |print_section_progress| are used to display progress reports on the screen. @< Declarations...@>= void print(char*,...), print_progress(char*), print_section_progress(void); @~A variable |term_line_empty| keeps track of whether we are at the beginning of an output line, so that we can force certain terminal output to start on a new line without producing empty lines. @= local boolean term_line_empty=true; /* has anything been written to the current line? */ @~Any initial or final newline produced by~|print| is caused by the format string, not by any of the arguments following it; this makes it easy to maintain |term_line_empty|. If a function produces terminal output otherwise than through one of the three functions below, it should take care of |term_line_empty| itself; for instance |err_print| produces the error context using |printf| and |putchar|, knowing that |term_line_empty==false| at that time, which is also the correct value afterwards. @c void print(char* s,...) { va_list p; va_start(p,s); if (term_line_empty && *s=='\n') ++s; /* avoid printing empty line */ vprintf(s,p); va_end(p); /* print formatted value */ term_line_empty= s[strlen(s)-1]=='\n'; update_terminal(); } void print_progress (char* s) @+{@; if (show_progress) print(s); } void print_section_progress (void) @+{@; if (show_progress) print("*%u",section_count); } @* Index. % CWEAVE will add the index to this section. cwebx-3.04.orig/compare.tex100644 1750 1750 14524 5666373556 13545 0ustar jdgjdg\input cwebxmac \N0 1. Comparing text files. This is an entirely trivial program, that tests whether two text files are equal, and if not so, points out the first point of difference. \Y\B\h$\.{}$\par \B\h$\.{}$\par \Y\B$\&{typedef}~\&{char}~\&{bool};$\par \fi \M2. The outline of the program is simple. We read characters from both input files into $ c_1$ and~$ c_2$ until the comparison is complete. Line and column counts are maintained in $\\{line}$ and~$\\{col}$. \Y\B\D$\\{left\_margin}$\5 $\T{1}$\C{ leftmost column number; change to 0 if you prefer }\par \Y\B$\X5:Functions\X$\7 \&{int} $\\{main}$\5 $(\1\1\1\&{int}~ n,\31~\&{char}~\m*\m*\\{arg}\2\2\2)$\6 $\a\{\1\&{FILE}~\m* f_1,\31~\m* f_2;$\C{ the two input files }\6 $\&{int}~ c_1,\31~ c_2,\31~\\{col}\K\\{left\_margin};$\6 $\&{long}~\\{line}\K\T{1};$\7 $\X6:Open the files $ f_1$ and~$ f_2$, taking their names from the command line or from the terminal; in case of an error for which no recovery is possible, call $\\{exit}(\T{1})$\X$\6 $\X3:Search for first difference, leaving $ c_1\I c_2$ if and only if a difference was found\X$\6 $\X4:Report the outcome of the comparison\X$\6 $\&{return}~\T{0};$\C{ successful completion }\2\6 $\}$\par \fi \M3. The heart of the program is this simple loop. When we reach the end of one of the files, the files match if and only if the other file has also reached its end. For this reason the test $ c_1\E c_2$, which requires characters to be read from both files, must precede the test for file end; when only one file ends, it is the former test which breaks the loop. \Y\B\4$\X3:Search for first difference, leaving $ c_1\I c_2$ if and only if a difference was found\X\EQ{}$\6 \&{while}~$(( c_1\K\\{getc}( f_1))\E( c_2\K\\{getc}( f_2))\W c_1\I\.{EOF})\1$\6 \&{if}~$( c_1\E\.{'\\n'})\1$\5 $\{\1$\5 $\PP\\{line};$\5 $\\{col}\K\\{left\_margin};\2$\5 $\}$\5 \2\&{else}\1\5 $\PP\\{col};\2$\2\par \U 2.\fi \M4. When the first difference occurs at the end of one of the files, or at the end of a line, we give a message indicating this fact. \Y\B\4$\X4:Report the outcome of the comparison\X\EQ{}$\6 \&{if}~$( c_1\E c_2)\1$\5 $\\{printf}(\.{"Files\ match.\\n"});\2$\6 \&{else}\6 $\a\{\1\\{printf}(\.{"Files\ differ.\\n"});$\6 \&{if}~$( c_1\E\.{EOF}\V c_2\E\.{EOF})$\1\6 $\{\1$\5 $\\{the\_file}( c_1\E\.{EOF});$\5 $\\{printf}(\.{"is\ contained\ in\ the\)\ other\ as\ initial\ seg\)ment.\\n"});% \2$\5 $\}\2$\6 \&{else}~\&{if}~$( c_1\E\.{'\\n'}\V c_2\E\.{'\\n'})$\1\6 $\{\1$\5 $\\{the\_file}( c_1\E\.{'\\n'});$\5 $\\{printf}(\.{"has\ a\ shorter\ line\ \)number\ \%ld\ than\ the\ o\)ther.% \\n"},\31\\{line});\2$\5 $\}\2$\6 \&{else}\1\5 $\\{printf}(\.{"First\ difference\ at\)\ line\ \%ld,\ column\ \%d.\)\\n"},\31% \\{line},\31\\{col});\2$\2\6 $\}$\par \U 2.\fi \M5. The function $\\{the\_file}$ starts a sentence about the first or second file, depending on its boolean argument. \Y\B\4$\X5:Functions\X\EQ{}$\6 \&{void} $\\{the\_file}$\5 $(\1\1\1\&{bool}~\\{is\_first}\2\2\2)$\5 $\{\1$\5 $\\{printf}(\.{"The\ \%s\ file\ "},\31\\{is\_first}\?\.{"first"}:\.{"second"});% \2$\5 $\}$\par \A 7. \U 2.\fi \M6. There can be be zero, one or two command line arguments. If there are none, the user is prompted to supply them, and if there are two these are taken as the file names, prompting the user only in case a file could not be opened. In case just one argument is present, the first file is assumed to be the standard input, which does not have to be opened; in this case however we will not read a file name from terminal in case the second file cannot be opened. \Y\B\D$\\{read\_mode}$\5 $\.{"r"}$\par \Y\B\4$\X6:Open the files $ f_1$ and~$ f_2$, taking their names from the command line or from the terminal; in case of an error for which no recovery is possible, call $\\{exit}(\T{1})$\X\EQ{}$\6 $\MM n;$\5 $\PP\\{arg};$\C{ ignore ``argument'' 0, which is the program name }\6 \&{if}~$( n\E\T{0})$\1\6 $\{\1$\5 $\\{open\_file}(\m\AND f_1,\31\.{"First\ file\ to\ compa\)re"},\31\NULL);$\5 $\\{open\_file}(\m\AND f_2,\31\.{"Second\ file\ to\ comp\)are"},\31\NULL);\2$\5 $\}\2$\6 \&{else}~\&{if}~$( n\E\T{1})$\6 $\a\{\1 f_1\K\\{stdin};$\6 \&{if}~$(( f_2\K\\{fopen}(\m*\\{arg},\31\\{read\_mode}))\E\NULL)\1$\5 $\{\1$\5 $\\{printf}(\.{"Could\ not\ open\ file\)\ \%s.\\n"},\31\m*\\{arg});$\5 $\\{exit}(\T{1});\2$\5 $\}\2$\2\6 $\}$\6 \&{else}~\&{if}~$( n\E\T{2})$\6 $\a\{\1\\{open\_file}(\m\AND f_1,\31\.{"Give\ another\ first\ \)file"},\31\m*% \\{arg}\PP);$\5 $\\{open\_file}(\m\AND f_2,\31\.{"Give\ another\ second\)\ file"},\31\m*% \\{arg});\2$\6 $\}$\6 \&{else}\1\5 $\{\1$\5 $\\{printf}(\.{"No\ more\ than\ two\ co\)mmand\ line\ arguments\ \)are\ allowed.\\n"});$\5 $\\{exit}(\T{1});\2$\5 $\}\2$\par \U 2.\fi \M7. The function $\\{open\_file}$ will try to open the file $\\{name}$ for reading, and if this fails it will prompt for another file name until it has success. If called with $\\{name}\E\NULL$, the function starts with prompting right away. \Y\B\4$\X5:Functions\X\PE{}$\6 \&{void} $\\{open\_file}$\5 $(\1\1\1\&{FILE}~\m*\m* f,\31~\&{char}~\m*\\{prompt},\31~\&{char}~\m*\\{name}\2% \2\2)$\6 $\a\{\1\&{char}~\\{buf}[\T{80}];$\7 \&{if}~$(\\{name}\E\NULL\V(\m* f\K\\{fopen}(\\{name},\31\\{read\_mode}))\E% \NULL)\1$\6 \&{do}\1\5 $\{\1$\5 $\\{printf}(\.{"\%s:\ "},\31\\{prompt});$\5 $\\{fflush}(\\{stdout});$\5 $\\{scanf}(\.{"\%79s"},\31\\{buf});\2$\5 $\}$\2\6 \&{while}~$((\m* f\K\\{fopen}(\\{buf},\31\\{read\_mode}))\E\NULL);$\2\2\6 $\}$\par \fi \N0 8. Index. \fi \inx \@m\\{arg}, \[2], 6. \@h\&{bool}, \[1], 5. \@m\\{buf}, \[7]. \@m\\{col}, \[2], 3, 4. \@m c_1, \[2], 3, 4. \@m c_2, \[2], 3, 4. \@m\.{EOF}, 3, 4. \@m\\{exit}, 6. \@m f, \[7]. \@m\\{fflush}, 7. \@m\\{fopen}, 6, 7. \@m f_1, \[2], 3, 6. \@m f_2, \[2], 3, 6. \@m\\{getc}, 3. \@m\\{is\_first}, \[5]. \@m\\{left\_margin}, \[2], 3. \@m\\{line}, \[2], 3, 4. \@m\\{main}, \[2]. \@m n, \[2]. \@m\\{name}, \[7]. \@m\\{open\_file}, 6, \[7]. \@m\\{printf}, 4, 5, 6, 7. \@m\\{prompt}, \[7]. \@m\\{read\_mode}, \[6], 7. \@m\\{scanf}, 7. \@m\\{stdin}, 6. \@m\\{stdout}, 7. \@m\\{the\_file}, 4, \[5]. \fin \@$\X5, 7:Functions\X$ \U 2. \@$\X6:Open the files $ f_1$ and~$ f_2$, taking their names from the command line or from the terminal; in case of an error for which no recovery is possible, call $\\{exit}(\T{1})$\X$ \U 2. \@$\X4:Report the outcome of the comparison\X$ \U 2. \@$\X3:Search for first difference, leaving $ c_1\I c_2$ if and only if a difference was found\X$ \U 2. \con cwebx-3.04.orig/ctangle.c100644 1750 1750 62677 6470042127 13147 0ustar jdgjdg#define version_string "3.04" #define banner "This is CTANGLE (Version x" version_string ")\n" #define max_toks 150000L #define max_texts 2500 #define max_files 50 #define stack_size_max 50 #define max_indent 1000 #define variant text #define line_output flags['l'] #include #include #include #include #include #include "common.h" #define max_bytes 50000L #define max_modules 1000 #define max_idents 5000 #define max_sections 4000 #define hash_size 353 #define buf_size 100 #define longest_name 1000 #define long_buf_size (buf_size+longest_name) #define local static #define array_size(a)((int)(sizeof(a)/sizeof(a[0]))) #define false (boolean)0 #define true (boolean)1 #define ctangle 0 #define cweave 1 #define and_and 04 #define lt_lt 020 #define gt_gt 021 #define plus_plus 013 #define minus_minus 01 #define minus_gt 031 #define not_eq 032 #define lt_eq 034 #define gt_eq 035 #define eq_eq 036 #define or_or 037 #define find_char()(loc<=limit||get_line()) #define id_index(p)((sixteen_bits)((p)-id_table)) #define id_at(i)(&id_table[i]) #define mod_index(p)((sixteen_bits)((p)-mod_table)) #define mod_at(i)(&mod_table[i]) #define name_begin(p)((p)->byte_start) #define length(p)((int)(strlen(name_begin(p)))) #define name_end(p)(name_begin(p)+length(p)) #define complete_name(p)((p)->byte_start[-1]=='\0') #define print_mod(p) \ printf(": <%s%s>",name_begin(p),complete_name(p)?"":"...") #define spotless 0 #define harmless_message 1 #define error_message 2 #define fatal_message 3 #define mark_harmless() \ if(history==spotless)history=harmless_message;else #define mark_error()(history=error_message) #define overflow(t)fatal("\n! Sorry, %s capacity exceeded",t) #define confusion(s)fatal("\n! This can't happen: %s",s) #define show_banner flags['b'] #define show_happiness flags['h'] #define show_progress flags['p'] #define show_stats flags['s'] #define C_plus_plus flags['+'] #define compatibility_mode flags['c'] #define update_terminal()fflush(stdout) #define new_line()putchar('\n') #define term_write(string,leng)printf("%.*s",(int)(leng),string) #define tok_begin(p)(p)->tok_start #define tok_end(p)((p)+1)->tok_start #define text_table_end (&text_table[max_texts]) #define tok_mem_end (&tok_mem[max_toks]) #define store_byte(c) \ if(tok_ptr==tok_mem_end)overflow("token");else*tok_ptr++=c #define macro_flag (text_table_end-1) #define header_flag text_table_end #define next_sec(m)((m)->text_link) #define equiv equiv_or_xref #define verb_quote 0x2 #define join 0x3 #define cur_repl cur_state.repl_field #define cur_byte cur_state.byte_field #define cur_end cur_state.end_field #define cur_sec cur_state.sec_nr_field #define cur_ind cur_state.indent_field #define cur_state stack[0] #define stack_end (&stack[stack_size_max]) #define stack_empty()(stack_ptr== &stack[0]) #define C_printf(format,x)fprintf(C_file,format,x) #define C_putc(c)putc(c,C_file) #define put_indent() \ (indent_buffer[ind_i=cur_ind]='\0',C_printf("%s",indent_buffer)) #define append_white(c) \ if(ind_i>=max_indent)overflow("indent buffer"); \ else indent_buffer[ind_i++]=isspace(c)?c:' ' #define trans_limit 9 #define trans_of(c)c_trans[(unsigned char)(c)-0x80] #define translation_exists(c)(trans_of(c)[0]!='\0') #define comp_op(op) \ (C_printf(out_state==operator&&line_output?" %s":"%s",op) \ ,out_state=operator) #define code_of(c)ccode[(unsigned char)(c)] #define compress(char2,code) \ if(*loc==char2)return++loc,code #define preproc_directive 0 #define section_body 1 #define report(k,c,m) \ printf("%lu %ss (out of %lu)\n",(unsigned long)(c),k,(unsigned long)(m)) /*3:*//*10:*/ #line 122 "common.inc" boolean names_match(id_pointer,char*,int,int); void init_id_name(id_pointer,int); void init_module_name(mod_pointer);/*:10*//*15:*/ #line 116 "ctangle.w" typedef struct text {eight_bits*tok_start; struct text*text_link; }text,*text_pointer;/*:15*//*30:*/ #line 442 "ctangle.w" typedef struct {text_pointer repl_field; eight_bits*byte_field; eight_bits*end_field; sixteen_bits sec_nr_field; sixteen_bits indent_field; }output_state,*stack_pointer;/*:30*//*36:*/ #line 561 "ctangle.w" enum{identifier=0x80,section_start,section_end,line_mark};/*:36*//*44:*/ #line 747 "ctangle.w" enum{no_space,num_or_id,operator,literal};/*:44*//*54:*/ #line 936 "ctangle.w" enum {ignore=0x80, id_code,constant, verbatim, at_sign_image, join_code, ord, control_text, include_preproc, char_trans, format, definition, header, begin_C, module_name, new_section };/*:54*//*24:*/ #line 301 "ctangle.w" void phase_two(void); void output(text_pointer); void output_preproc_directives(void); void C_newline(void); void out_char(eight_bits);/*:24*//*80:*/ #line 1604 "ctangle.w" boolean mark_line(void);/*:80*//*89:*/ #line 1770 "ctangle.w" void phase_one(void);/*:89*//*16:*/ #line 136 "ctangle.w" text text_table[max_texts]; text_pointer text_ptr= &text_table[0]; eight_bits tok_mem[max_toks]; eight_bits*tok_ptr= &tok_mem[0];/*:16*//*20:*/ #line 197 "ctangle.w" text_pointer text_root=NULL; text_pointer*last_unnamed= &text_root;/*:20*//*23:*/ #line 291 "ctangle.w" boolean atp_seen=false;/*:23*//*26:*/ #line 343 "ctangle.w" mod_pointer output_file[max_files]; int output_file_count=0;/*:26*//*31:*/ #line 466 "ctangle.w" output_state stack[stack_size_max]; stack_pointer stack_ptr;/*:31*//*32:*/ #line 486 "ctangle.w" char indent_buffer[max_indent]; sixteen_bits ind_i;/*:32*//*35:*/ #line 543 "ctangle.w" int cur_val;/*:35*//*43:*/ #line 723 "ctangle.w" eight_bits out_state; boolean protect;/*:43*//*47:*/ #line 819 "ctangle.w" char c_trans[UCHAR_MAX+1-0x80][trans_limit+1];/*:47*//*55:*/ #line 962 "ctangle.w" eight_bits ccode[UCHAR_MAX+1];/*:55*//*60:*/ #line 1100 "ctangle.w" id_pointer cur_id; mod_pointer cur_mod;/*:60*//*70:*/ #line 1342 "ctangle.w" text_pointer cur_text; eight_bits next_control;/*:70*/ #line 79 "ctangle.w" int main(int argc,char* *argv) {program=ctangle; line_output=true; common_init(argc,argv);/*17:*/ #line 144 "ctangle.w" tok_begin(text_ptr)=tok_ptr;/*:17*//*48:*/ #line 827 "ctangle.w" if(compatibility_mode) {unsigned char c=UCHAR_MAX; do sprintf(trans_of(c),"X%X",c);while(--c>=0x80); }/*:48*//*56:*/ #line 970 "ctangle.w" {unsigned char c=0; do ccode[c]=isspace(c)?new_section:ignore;while(c++ !=UCHAR_MAX); ccode['v']=ccode['V']='|'; ccode['=']=verbatim; ccode['@']=at_sign_image; ccode['&']=join_code; ccode['\'']=ord; ccode['^']=ccode['?']=ccode['.']=ccode[':']=ccode['#']= ccode['t']=ccode['T']=ccode['q']=ccode['Q']=control_text; ccode['p']=ccode['P']=include_preproc; ccode['l']=ccode['L']=char_trans; ccode['f']=ccode['F']=ccode['s']=ccode['S']=format; ccode['d']=ccode['D']=definition; ccode['h']=ccode['H']=header; ccode['c']=ccode['C']=begin_C; ccode['<']=ccode['(']=module_name; ccode['~']=ccode['*']=new_section; if(compatibility_mode)/*57:*/ #line 1000 "ctangle.w" {ccode['h']=ccode['H']=include_preproc; ccode['p']=ccode['P']=begin_C; ccode['#']=ignore; }/*:57*/ #line 989 "ctangle.w" }/*:56*/ #line 86 "ctangle.w" if(show_banner)print(banner); phase_one(); phase_two(); wrap_up(); return 0; }/*:3*//*18:*/ #line 153 "ctangle.w" void store_two_bytes(sixteen_bits x) {if(tok_ptr+2>tok_mem_end)overflow("token"); *tok_ptr++=x>>8; *tok_ptr++=x&0xFF; }/*:18*//*21:*/ #line 211 "ctangle.w" boolean names_match(id_pointer x,char*q,int l,int dummy) {char*p=name_begin(x);while(--l>=0)if(*p++ != *q++)return false; return*p=='\0'; } void init_module_name(mod_pointer node) {node->equiv=NULL;} void init_id_name(id_pointer dummy,int ilk){}/*:21*//*25:*/ #line 314 "ctangle.w" void phase_two(void) {phase=2; if(text_root==NULL&&output_file_count==0) {print("\n! No program text was specified.");mark_harmless();} else {if(show_progress) {print("\nWriting the output file%s" ,(text_root!=NULL)+output_file_count>1?"s":""); if(text_root!=NULL)printf(" (%s):",C_file_name); update_terminal(); } if(text_root==NULL)C_file=NULL; else {open_output_file();cur_line=1; if(!atp_seen)output_preproc_directives(); output(text_root); }/*27:*/ #line 355 "ctangle.w" {int i; char output_file_name[longest_name+1]; for(i=0;iequiv==NULL) {print("\n! Module not present"); print_mod(output_module);err_print(""); } else if((C_file=fopen(output_file_name,"w"))==NULL) {print("\n! Cannot open \"%s\" as output file",output_file_name); err_print(""); } else {if(show_progress)print("\n(%s):",output_file_name); cur_line=1; output(output_module->equiv); } } }/*:27*/ #line 333 "ctangle.w" print_progress("\nDone.\n"); } }/*:25*//*33:*/ #line 499 "ctangle.w" void push_level(mod_pointer p) {if(stack_ptr==stack_end)overflow("output stack"); *stack_ptr++=cur_state; cur_repl=p->equiv; cur_byte=tok_begin(cur_repl);cur_end=tok_end(cur_repl); cur_ind=ind_i; }/*:33*//*34:*/ #line 516 "ctangle.w" void continue_or_pop_level(void) {if(cur_repl->text_link!=NULL) {cur_repl=next_sec(cur_repl); cur_byte=tok_begin(cur_repl);cur_end=tok_end(cur_repl); } else if(--stack_ptr> &stack[0])cur_state= *stack_ptr; ind_i=cur_ind; }/*:34*//*37:*/ #line 576 "ctangle.w" void output(text_pointer repl) {stack_ptr= &stack[1]; cur_repl=repl;cur_byte=tok_begin(cur_repl);cur_end=tok_end(cur_repl); cur_ind=ind_i=0; do if(cur_byte==cur_end) {cur_val=cur_sec; continue_or_pop_level(); out_char(section_end); } else/*38:*/ #line 601 "ctangle.w" {int a= *cur_byte++; if(a<0x80)out_char(a); else if(a>=0xF8) if(a<0xFA)out_char(a==0xF8? *cur_byte++:line_mark); else{C_newline();output_preproc_directives();} else {cur_val=(((a-=0x80)%0x28)<<8)+ *cur_byte++; switch(a/0x28) {case 0:out_char(identifier);break; case 1:/*39:*/ #line 623 "ctangle.w" {mod_pointer mod_name=mod_at(cur_val); if(mod_name->equiv!=NULL)push_level(mod_name); else {print("\n! Module not present");print_mod(mod_name);err_print(""); } }/*:39*/ #line 610 "ctangle.w" break; case 2:cur_sec=cur_val;out_char(section_start); } } }/*:38*/ #line 589 "ctangle.w" while(!stack_empty()); C_newline(); }/*:37*//*40:*/ #line 644 "ctangle.w" void output_preproc_directives(void) {text_pointer repl,l; eight_bits*p,*end; protect=true; for(repl= &text_table[0];repltext_link)==macro_flag||l==header_flag) {p=tok_begin(repl);end=tok_end(repl); C_printf("#%se ",l==macro_flag?"defin":"includ"); out_state=no_space; while(p=0xF8) if(a==0xF8)out_char(*p++); else confusion("`@p' within macro"); else {cur_val=(((a-=0x80)%0x28)<<8)+ *p++; if(a<0x28)out_char(identifier); else confusion("module within macro"); } }/*:41*/ #line 656 "ctangle.w" C_newline(); } protect=false; }/*:40*//*42:*/ #line 692 "ctangle.w" void C_newline(void) {C_putc('\n'); if(!line_output)put_indent(); if(cur_line%100==0&&show_progress) {if(cur_line%500!=0)print("."); else print(cur_line%2500==0?"%u\n":"%u",cur_line); update_terminal(); } ++cur_line; }/*:42*//*45:*/ #line 764 "ctangle.w" void out_char(eight_bits c) {if(out_state==literal) if(c==verb_quote)out_state=num_or_id; else C_putc(c); else if(isalnum(c)&&c<0x80||c=='_') {if(out_state==num_or_id&&line_output)C_putc(' '); C_putc(c);out_state=num_or_id; if(!line_output)append_white(c); } else switch(c) {case verb_quote: if(out_state==num_or_id&&line_output)C_putc(' '); out_state=literal;break; case join:out_state=no_space;break; case '\n':/*46:*/ #line 803 "ctangle.w" {if(protect){C_putc(' ');C_putc('\\');} C_newline(); if(out_state!=literal)out_state=no_space; }/*:46*/ #line 779 "ctangle.w" break; case identifier:/*49:*/ #line 838 "ctangle.w" {char*p=name_begin(id_at(cur_val));int l=0; if(out_state==num_or_id&&line_output)C_putc(' '); do if((unsigned char)(*p)<0x80){C_putc(*p);++l;} else {char*q=trans_of(*p);do{C_putc(*q);++l;}while(* ++q!='\0');} while(* ++p!='\0'); out_state=num_or_id; if(!line_output)do append_white(' ');while(--l>0); }/*:49*/ #line 780 "ctangle.w" break; case section_start: if(line_output)C_printf("/*%d:*/",cur_val);else C_newline(); out_state=no_space;break; case section_end: if(line_output)C_printf("/*:%d*/",cur_val);else C_newline(); out_state=no_space;break; case line_mark:/*50:*/ #line 854 "ctangle.w" {sixteen_bits a; a=(*cur_byte++)<<8;a+= *cur_byte++; C_newline();C_printf("#line %u \"",a); a=(*cur_byte++)<<8;a+= *cur_byte++; C_printf("%s\"",name_begin(id_at(a))); C_newline();out_state=no_space; }/*:50*/ #line 787 "ctangle.w" break;/*51:*/ #line 877 "ctangle.w" case '+':case '-':case '*':case '/':case '%':case '?': case '<':case '>':case '&':case '|': if(out_state==operator&&line_output)C_putc(' '); case '=':C_putc(c);out_state=operator;break;/*:51*//*52:*/ #line 892 "ctangle.w" case plus_plus:comp_op("++");break; case minus_minus:comp_op("--");break; case minus_gt:comp_op("->");break; case gt_gt:comp_op(">>");break; case eq_eq:comp_op("==");break; case lt_lt:comp_op("<<");break; case gt_eq:comp_op(">=");break; case lt_eq:comp_op("<=");break; case not_eq:comp_op("!=");break; case and_and:comp_op("&&");break; case or_or:comp_op("||");break;/*:52*/ #line 790 "ctangle.w" default:C_putc(c);out_state=no_space; if(!line_output)append_white(c); } }/*:45*//*58:*/ #line 1012 "ctangle.w" eight_bits skip_ahead(void) {eight_bits c; while(find_char()) {limit[1]='@'; while(*loc++ !='@'){} if(loc<=limit&&(c=code_of(*loc++))!=ignore)return c; } return new_section; }/*:58*//*59:*/ #line 1051 "ctangle.w" boolean skip_comment(boolean one_liner) {char c; do {if(loc>=limit) if(one_liner)return false; else if(get_line())return true; else {err_print("! Input ended in mid-comment");return false;} if((c= *loc++)=='/'&& *loc=='*') err_print("! `/*' inside comment, did you forget `*/' before? "); if(c=='@') {eight_bits cc=code_of(*loc++); if(cc==new_section) {err_print("! Section ended in mid-comment");loc-=2;return false;} if(cc==module_name){/*68:*/ #line 1291 "ctangle.w" {boolean file_module=loc[-1]=='('; cur_mod=get_module_name(); if(file_module&&cur_mod!=NULL)/*29:*/ #line 398 "ctangle.w" {int i=0; while(i=limit) {if(preprocessing&&limit>buffer&&limit[-1]!='\\') preprocessing=false; return get_line()?'\n':new_section; } if(comment_continues ||(c= *loc++)=='/'&&(*loc=='*'||C_plus_plus&& *loc=='/'))/*63:*/ #line 1184 "ctangle.w" {boolean one_liner=false; if(!comment_continues) {if(!line_output){store_byte('/');store_byte(*loc);} one_liner= *loc++ =='/'; } else if(preprocessing) {print("\nWarning: Multi-line comment in preprocessor line"); preprocessing=false;mark_harmless(); } if(comment_continues=skip_comment(one_liner)) return '\n'; else goto restart; }/*:63*/ #line 1162 "ctangle.w" if(isspace(c)) if(line_output) if(preprocessing)return ' ';else goto restart; else return c; if(c=='#'&&loc==buffer+1)preprocessing=true; }/*:62*/ #line 1123 "ctangle.w" if(c=='L'&&(*loc=='\''|| *loc=='\"')) {get_string();return constant;} if(c<0x80?isalpha(c)||c=='_':translation_exists(c)) {/*65:*/ #line 1233 "ctangle.w" {id_first= --loc; do c= * ++loc; while(c<0x80?isalnum(c)||c=='_':translation_exists(c)); cur_id= loc==id_first+1&&(eight_bits)(*id_first)<0x80 ?NULL:id_lookup(id_first,loc,0); }/*:65*/ #line 1126 "ctangle.w" return id_code;} if(c>=0x80){err_print("! Illegal 8-bit character");goto restart;} if(isdigit(c)||c=='.'&&isdigit((eight_bits)*loc)) {/*66:*/ #line 1250 "ctangle.w" {if(*(id_first=loc-1)=='0'&&tolower((eight_bits)*loc)=='x') do c= * ++loc;while(isxdigit(c)); else {while(isdigit(c))c= *loc++; if(c=='.')do c= *loc++;while(isdigit(c)); if(tolower(c)=='e') {if((c= *loc)=='+'||c=='-')c= * ++loc; while(isdigit(c))c= * ++loc; } else--loc; } while(isalpha(c))c= * ++loc; id_loc=loc; }/*:66*/ #line 1130 "ctangle.w" return constant;} switch(c) {case '\'':case '"':get_string();return constant; case '@':/*67:*/ #line 1275 "ctangle.w" {eight_bits cc=code_of(*loc++); switch(cc) {case ignore:goto restart; case control_text:get_control_text();goto restart; case verbatim:if(get_control_text())goto restart;else break; case ord:/*69:*/ #line 1302 "ctangle.w" id_first=loc; while(*loc!='\'') {if(*loc++ =='\\')loc++; if(loc>=limit){err_print("! ASCII constant didn't end");break;} } id_loc=loc++;/*:69*/ #line 1280 "ctangle.w" break; case module_name:/*68:*/ #line 1291 "ctangle.w" {boolean file_module=loc[-1]=='('; cur_mod=get_module_name(); if(file_module&&cur_mod!=NULL)/*29:*/ #line 398 "ctangle.w" {int i=0; while(i',minus_gt);break; case '=':compress('=',eq_eq);break; case '>':compress('=',gt_eq);compress('>',gt_gt);break; case '<':compress('=',lt_eq);compress('<',lt_lt);break; case '&':compress('&',and_and);break; case '|':compress('|',or_or);break; case '!':compress('=',not_eq);break;/*:64*/ #line 1137 "ctangle.w" } return c; }/*:61*//*71:*/ #line 1367 "ctangle.w" void scan_repl(eight_bits context) {eight_bits a; eight_bits*keep=tok_ptr; int brace_level=0,par_level=0; if(context==section_body)/*81:*/ #line 1612 "ctangle.w" {if(mark_line()){a=new_section;goto done;}}/*:81*/ #line 1373 "ctangle.w" do/*72:*/ #line 1388 "ctangle.w" {switch(a=get_next()) {/*75:*/ #line 1459 "ctangle.w" case id_code:/*76:*/ #line 1496 "ctangle.w" if(cur_id==NULL)store_byte(*id_first); else store_two_bytes(0x8000+id_index(cur_id));/*:76*/ #line 1460 "ctangle.w" keep=tok_ptr;continue; case module_name:if(context==preproc_directive)goto done; if(cur_mod!=NULL) {sixteen_bits n=mod_index(cur_mod);/*77:*/ #line 1508 "ctangle.w" {char*p=loc; while(*p==' '&&p>6)); store_byte('0'+((verb_quote>>3)&7));store_byte('0'+(verb_quote&7)); } else {if((eight_bits)(*id_first)>=0x80)store_byte(0xF8); store_byte(*id_first++); } while(id_firstid_first) err_print("! ASCII constant should be single character"); else{err_print("! Empty ASCII constant");c=0;} store_byte(verb_quote); if(c>=100)store_byte('0'+c/100);if(c>=10)store_byte('0'+(c/10)%10); store_byte('0'+c%10); store_byte(verb_quote); }/*:79*/ #line 1476 "ctangle.w" keep=tok_ptr;continue; case include_preproc: if(context==preproc_directive) err_print("! `@p' is forbidden in preprocessor directive"); else {if(line_output)tok_ptr=keep; store_byte(0xFA);atp_seen=true;keep=tok_ptr;/*81:*/ #line 1612 "ctangle.w" {if(mark_line()){a=new_section;goto done;}}/*:81*/ #line 1485 "ctangle.w" } continue; case char_trans: err_print("! `@l' is only allowed in limbo"); continue;/*:75*/ #line 1394 "ctangle.w" case format:case definition:case header:case begin_C: if(context==preproc_directive)goto done; err_print ("! `@f', `@d', `@h', and `@c' are ignored in section body"); continue; case new_section:goto done;/*73:*/ #line 1420 "ctangle.w" case '(':++par_level;break; case ')': if(par_level<=0) {err_print("! Unmatched closing parenthesis");continue;} --par_level;break; case '{':++brace_level;break; case '}': if(brace_level<=0) {err_print("! Unmatched closing brace");continue;} --brace_level;break;/*:73*/ #line 1402 "ctangle.w" case '\n':store_byte('\n'); if(context==section_body&&print_where) {tok_ptr=keep;/*81:*/ #line 1612 "ctangle.w" {if(mark_line()){a=new_section;goto done;}}/*:81*/ #line 1406 "ctangle.w" } continue; case join_code:a=join; } store_byte(a);keep=tok_ptr; }/*:72*/ #line 1375 "ctangle.w" while(true); done:tok_ptr=keep; next_control=a; if(par_level>0||brace_level>0)/*74:*/ #line 1439 "ctangle.w" {char*p,*s;int l; if(par_level>0)l=par_level,s="parenthes",p=l>1?"es":"is"; else l=brace_level,s="brace",p=l>1?"s":""; print("\n! There %s %d unclosed %s%s" ,par_level+brace_level>1?"are":"is",l,s,p); if(par_level>0&&brace_level>0) print(" and %d unclosed brace%s" ,brace_level,brace_level>1?"s":""); print(" in the previous "); err_print(context==preproc_directive?"macro":"section"); while(--par_level>=0)store_byte(')'); while(--brace_level>=0)store_byte('}'); }/*:74*//*19:*/ #line 166 "ctangle.w" {cur_text=text_ptr++; if(text_ptr>=text_table_end)overflow("text"); tok_begin(text_ptr)=tok_ptr; }/*:19*/ #line 1381 "ctangle.w" }/*:71*//*82:*/ #line 1627 "ctangle.w" boolean mark_line(void) {while(loc>=limit) if(!get_line())return true; print_where=false; if(line_output) {store_byte(0xF9); id_first=changing?change.name:cur_file_name; store_two_bytes(changing?change_line:cur_line); store_two_bytes(id_index(id_lookup(id_first,NULL,0))); } return false; }/*:82*//*83:*/ #line 1648 "ctangle.w" void scan_section(void) {++section_count; if(loc[-1]=='*')print_section_progress();/*84:*/ #line 1668 "ctangle.w" {next_control=ignore; do {if(next_controltext_link=macro_flag; } else {scan_repl(preproc_directive);cur_text->text_link=header_flag;} if(next_control==module_name)/*86:*/ #line 1718 "ctangle.w" {eight_bits t=get_next(); if(t=='+')t=get_next(); if(t!='='&&t!=eq_eq) {next_control=ignore; if(t!='|'&&!compatibility_mode) err_print("! `=' sign missing, module name ignored"); } }/*:86*/ #line 1684 "ctangle.w" }while(next_controltext_link;} else {text_pointer*q= &p->equiv; while(*q!=NULL)q= &(*q)->text_link; *q=cur_text; } cur_text->text_link=NULL; }/*:88*/ #line 1744 "ctangle.w" }/*:87*/ #line 1653 "ctangle.w" }/*:83*//*90:*/ #line 1777 "ctangle.w" void phase_one(void) {phase=1;section_count=0;reset_input(); while((next_control=skip_ahead())!=new_section) if(next_control==char_trans)/*91:*/ #line 1791 "ctangle.w" {int c; while(loc0)*p='\0';/*92:*/ #line 1824 "ctangle.w" if(i==0)err_print("! Translation string absent after `@l'"); else if(i>trans_limit)err_print("! Translation string too long"); else if(!isspace((eight_bits)*loc)) err_print("! Translation string not terminated by space");/*:92*/ #line 1810 "ctangle.w" } }/*:91*/ #line 1781 "ctangle.w" while(!input_has_ended)scan_section(); check_complete(); }/*:90*//*93:*/ #line 1838 "ctangle.w" #ifdef STAT void print_stats(void) {print("\nMemory usage statistics:\n"); report("identifier",id_index(id_ptr),max_idents); report("module name",mod_index(mod_ptr),max_modules); report("byte",byte_ptr-byte_mem,max_bytes); report("replacement text",text_ptr-text_table,max_texts); report("token",tok_ptr-tok_mem,max_toks); } #endif/*:93*/ cwebx-3.04.orig/ctangle.w100644 1750 1750 240672 6470041763 13211 0ustar jdgjdg% This file is part of CWEBx. % This program by Marc van Leeuwen based on earlier versions by % D. E. Knuth., Silvio Levy and Frank Jensen. % It is distributed WITHOUT ANY WARRANTY, express or implied. % CWEB (Revision: 2.0) % Don Knuth, July 1990 % Version 3.x, Marc van Leeuwen, December 1993 % CWEBx 2+1.0, Marc van Leeuwen, August 1994 % CWEBx 3.0, Marc van Leeuwen, Januari 1995 % CWEBx 3.02, Marc van Leeuwen, April 1996 % CWEBx 3.02a, Marc van Leeuwen, September 1996 % CWEBx 3.03, Marc van Leeuwen, January 1998 (ctangle unchanged) % Copyright (C) 1987,1990 Silvio Levy and Donald E. Knuth % Copyright 1994 Marc A. A. van Leeuwen % Permission is granted to make and distribute verbatim copies of this % document provided that the copyright notice and this permission notice % are preserved on all copies. % Permission is granted to copy and distribute modified versions of this % document under the conditions for verbatim copying, provided that the % entire resulting derived work is distributed under the terms of a % permission notice identical to this one. \def\me.{CTANGLE} \def\myroots{\.{TANGLE}} % Here is TeX material that gets inserted after \input cwebxmac \def\ASCII.{\caps{ASCII}} @i intro.inc % Here is some text that matches the start of CWEAVE @d banner "This is CTANGLE (Version x"@+version_string@+")\n" @ The following parameters are specific to \.{CTANGLE}; those which are common to \.{CTANGLE} and \.{CWEAVE} are defined in the file \.{common.inc} and appear below. Some of these values have been decreased with respect to their earlier values which were sufficient in the original \.{WEB} to handle \TeX; a motivation is given at the common declarations. The macro |variant| is used in the include file \.{common.h}, and should therefore be defined early; it refers to the |struct text| that will be declared later. @d max_toks 150000L /* number of bytes in compressed \Cee~code */ @d max_texts 2500 /* number of replacement texts, must be less than 10240 */ @d max_files 50 /* number of auxiliary output files */ @d stack_size_max 50 /* module nesting level during output */ @d max_indent 1000 /* size of buffer for save indentation characters */ @) @d variant @;text @ The program is built from two compilation units, one with source file \.{common.w}, which contains a collection of routines and data shared between \.{CTANGLE} and \.{CWEAVE}, and a second with source file \.{ctangle.w} containing all code specific to \.{\me.}, and whose typeset version you are now reading. All compilation units of the \.{CWEB} system incorporate the file \.{common.inc} containing common declarations. \.{\me.} has a fairly straightforward outline. It operates in two phases: first it reads the source file, saving the \Cee~code in compressed form; then outputs the code, after shuffling it around. It can optionally be compiled with the preprocessor symbol |STAT| defined, in which case it will keep track of how much of \.{\me.}'s resources were actually used. One command line argument is specifically designated to modify the behaviour of \.{\me.}. Normally \.{\me.} will produce machine-oriented output that supplies \&{\#line} directives (to allow compilers and debuggers to locate the original source lines of statements), while preserving line breaks but otherwise ignoring the lay-out and comments in the source text. When the flag `\.{-l}' is supplied on the command line however, the output cannot be traced back easily to the source lines, but will be easier for humans to read: no \&{\#line} directives are produced, but original lay-out and comments are preserved as well as possible. The flag controlling this behaviour is called |line_output|; in the standard mode of operation we have |line_output==true|. @d line_output flags['l'] @c @< Function prototypes used but not defined in the shared code @>@; @< Typedef and enumeration declarations @>@; @< Prototypes @>@; @< Global variables @>@; int main (int argc, char** argv) { program=ctangle; line_output=true; common_init(argc,argv); @ if (show_banner) print(banner); /* print a ``banner line'' */ phase_one(); /* read all the user's text and compress it into |tok_mem| */ phase_two(); /* output the contents of the compressed tables */ wrap_up(); /* and exit gracefully */ return 0; } @i common.inc @* Data structures exclusive to {\tt \me.}. The basic mission of \.{\me.} is a relatively simple one, namely to separate the \Cee~code from the commentary, to forget about the latter, and to output the former almost verbatim, but plugging in the program text corresponding to a module each time a reference is made. To this end the bodies of all the sections are collected during Phase~I, and the proper links are laid, after which the code is linearised for output in a straightforward manner during Phase~II. The \Cee~code itself will be stored as a sequence of bytes (more precisely, of |eight_bits|) in a single large array |tok_mem|, much in the same way as the characters for identifiers and module names are stored in |byte_mem|; the encoding used will be described below. The individual sections are represented by structures of type |text|, which contain pointers to the |tok_mem| array as well as links by which they may be connected into lists; the same structures are also used to represent the replacement texts of macros. All |text| structures reside in an array |text_table|, which is comparable to |id_table| and |mod_table|. @= typedef struct text { eight_bits* tok_start; /* pointer into |tok_mem| */ struct text* text_link; /* relates replacement texts */ } text,* text_pointer; @ Since all allocation is during Phase~I and all deallocation during Phase~II, there is no reason for reclaiming memory when it is no longer needed. Hence allocation is strictly linear, and a single pointer into |tok_mem| for each |text| suffices, since a texts ends where the next one starts. The first position of |tok_mem| that is unoccupied by a replacement text is called |tok_ptr|, and the first unused location of |text_table| is called |text_ptr|. Whenever we are not in the process of adding tokens to |tok_mem|, we have the identity |tok_begin(text_ptr)==tok_ptr|. @d tok_begin(p) (p)->tok_start @d tok_end(p) ((p)+1)->tok_start @d text_table_end (&text_table[max_texts]) @d tok_mem_end (&tok_mem[max_toks]) @= text text_table[max_texts]; text_pointer text_ptr=&text_table[0]; /* first unused position in |text_table| */ eight_bits tok_mem[max_toks]; eight_bits *tok_ptr=&tok_mem[0]; /* first unused position in |tok_mem| */ @ Invariants must be initialised. @= tok_begin(text_ptr)=tok_ptr; @ The following macro and function are used to enter one- and two-byte tokens into |tok_mem| when a replacement text is being generated. @d store_byte(c) @+ if (tok_ptr==tok_mem_end) overflow("token"); @+ else *tok_ptr++=c@; @c void store_two_bytes (sixteen_bits x) { if (tok_ptr+2>tok_mem_end) overflow("token"); *tok_ptr++ = x >> 8; /* store high byte */ *tok_ptr++ = x & 0xFF; /* store low byte */ } @ When by successive calls of |store_byte| and |store_two_bytes| a complete replacement text has been stored, the following code makes it into an official text object by advancing |text_ptr| and storing the starting location of the next text to be generated into the |text_table| array. @< Wrap up the accumulated bytes into a completed text, pointed to by |cur_text| @>= { cur_text=text_ptr++; /* consolidate the replacement text */ if (text_ptr>=text_table_end) overflow("text"); tok_begin(text_ptr)=tok_ptr; /* mark end of replacement text */ } @ The |text_link| field links a section to a possible continuation of it (the next section with the same name, or for unnamed sections the next unnamed section). The set of all sections linked together in such a way will be termed a `module'. Apart from the value |NULL| used to indicate the end of such a list, we need two other distinguished values: |macro_flag|, which indicates that the text is a macro replacement text rather than a section body, and |header_flag| similarly indicating that the text is the (quoted) file name of a header file that is included. By a fortunate coincidence, there are two unused pointer values near the end of |text_table|: since the macro |tok_end| should work for all pointers to a replacement text, the last entry of |text_table| is never used to store a replacement text; its address can be used as one reserved pointer value, and the definition of~\Cee\ generously offers us the address of the non-existent entry following it as another valid pointer value. The pointer to the first section of the unnamed module appears in |text_root|, and the address of the final link of this module is recorded in |last_unnamed|. The named modules are accessed by the extra pointer in the |mod_info| structure for a module name, which is a |text_pointer| pointing to the body of the first section of that module; in~\.{\me.} this field will be called |equiv|. @d macro_flag (text_table_end-1) /* address of unused entry */ @d header_flag text_table_end /* address just beyond |text_table| */ @d next_sec(m) ((m)->text_link) /* next section of the same module, if any */ @d equiv equiv_or_xref /* info corresponding to names */ @= text_pointer text_root=NULL; text_pointer* last_unnamed=&text_root; /* where to link new unnamed sections */ @ Here are the functions specific to \.{\me.}, which are used by the common lookup routines. The function |names_match| decides whether a name of length~|l| starting at position |q| equals the identifier represented by~|x|. The parameter |dummy| is present only for compatibility with \.{CWEAVE}, where it contains the |ilk| code. The common lookup routines refer to separate routines |init_module_name| and |init_id_name| when the data structure grows. Actually |init_id_name| is called only when |program==cweave|, but we need to declare a dummy version so that the linker won't complain about its absence. @c boolean names_match (id_pointer x,char* q,int l,int dummy) {@; char* p=name_begin(x); while (--l>=0) if (*p++!=*q++) return false; return *p=='\0'; } void init_module_name(mod_pointer node) @+{@; node->equiv=NULL; } void init_id_name (id_pointer dummy,int ilk) @+ {} @* Tokens. Clearly, \.{\me.} must hold all the code for a complete program file at the end of Phase~I (not counting the possible multiple use of some modules, which will probably not make a great difference), and so it is worth while to use a compact representation for the internal storage of the \Cee~code. Using full-fledged data compression techniques would probably be overdoing things a bit, but a few simple methods will make a great difference. There are likely to be many identical occurrences of identifiers and keywords of the language, so it is efficient to collect them into a table and replace them in the internal format by references to the table; module names have to be entered in a table anyway for the purpose of associating occurrences of the same name with each other. Furthermore white space and comments can be removed upon storing the texts; any necessary white space can be reinserted during the output process (but note that newlines must be represented internally, in order to be able to match output lines with input lines). @ These compressed texts representing \Cee~code, while physically represented as a sequence of eight-bit bytes in |tok_mem|, logically consist of streams of `tokens', some of which occupy two or more consecutive byte positions, while others take just one byte. If the first byte is |a| and, in case |a>=0x80|, the next byte is~|b|, then the interpretation of the token is as follows. \Y \item{$\bullet$} |0<=a<0x80|: the token represents the character~|a|; \item{$\bullet$} |0x80<=a<0xA8|: the two-byte token represents the identifier with name |id_at((a-0x80)*@t$2^8$@>+b)|; \item{$\bullet$} |0xA8<=a<0xD0|: the two-byte token represents the module with name |mod_at((a-0xA8)*@t$2^8$@>+b)|; \item{$\bullet$} |0xD0<=a<0xF8|: this two-byte token marks the beginning of (a part of) the repacement text for the current module, defined in section number |(a-0xD0)*@t$2^8$@>+b|; \item{$\bullet$} |a==0xF8|: the two-byte token represents the (8-bit) character~|b|. \item{$\bullet$} |a==0xF9|: the token consists of 5 bytes, say |a|,~|b|, |c|, |d| and~|e|, and represents a \&{\#line} directive for line number |b*@t$2^8$@>+c| in the file |id_at(d*@t$2^8$@>+e)|. \item{$\bullet$} |a==0xFA|: the token consists of a single byte, and represents the location, indicated by the \:p control codes in the source file, where the preprocessor directives produced by \:d and \:h will be included in the output; this location will be marked by the presence of the virtual module called `|@p|' in the typeset output. \Y Any \Cee~token that consists of a single 7-bit character is represented by that character itself; in particular, a single-character identifier like `|x|' will be a one-byte token, while all longer identifiers will occupy two bytes. Some of the 7-bit codes will not be present however, since they are control codes and they do not represent a symbol useful for \Cee\ in the \caps{MIT} extension of \ASCII., so we can use them for special purposes. The following symbolic names are used: \yskip \hang |join| denotes the concatenation of adjacent items with no space or line breaks allowed between them (the \:\& operation of \.{CWEB}). \hang |verb_quote| denotes the beginning or end of a sequence of bytes that is to be copied verbatim to the output, i.e., no spaces should be inserted. Such sequences are used to transmit numerical constants and strings (in which case the ordinary string quotes appear within the |verb_quote| bytes), and to implement the \:= operation. @^ASCII code dependencies@> @d verb_quote 0x2 /* takes the place of extended \ASCII. \.{\char2} */ @d join 0x3 /* takes the place of extended \ASCII. \.{\char3} */ @< Global var... @>= boolean atp_seen=false; /* whether \:p occurs anywhere */ @* Phase II processing. We will start to explain the structure of the tokens lists built up in memory by considering how they are output during Phase~II; this is the simpler part of the program. We will proceed in top-down fashion for this part. @= void phase_two (void); /* output the contents of the compressed tables */ void output (text_pointer); /* recursively write out modules */ void output_preproc_directives(void); /* write out all \:d and \:h stuff */ void C_newline (void); /* send a newline to the \Cee~file */ void out_char (eight_bits); /* write a single token */ @ Here is the general control routine. After Phase~I, the quantity |output_file_count| will be equal to the number of extra output files to be written. There should at least be either an unnamed section or one that produces an additional output file, for otherwise there will be no output at all; if the is no unnamed section, \:d~and~\:h commands will only have effect if \:p occurs somewhere. @c void phase_two (void) { phase=2; if (text_root==NULL && output_file_count==0) @/{@; print("\n! No program text was specified."); mark_harmless(); } @.No program text...@> else { if (show_progress) { print("\nWriting the output file%s" @.Writing the output...@> ,(text_root!=NULL)+output_file_count>1 ? "s" : ""); if (text_root!=NULL) printf(" (%s):",C_file_name); update_terminal(); } if (text_root==NULL) C_file=NULL; else { open_output_file(); cur_line=1; if (!atp_seen) output_preproc_directives(); output(text_root); } @ print_progress("\nDone.\n"); } } @ \.{\me.} can write output on multiple files. If a module name is introduced in at least one place by \:( instead of \:<, it is treated as the name of a file. All these special module names are saved in the array |output_files|. @= mod_pointer output_file[max_files]; int output_file_count=0; @ To write the named output files, we proceed as for the unnamed module, the only subtlety is that we have to open each one after closing the previous one. However, if there is no main file, there is no file to close the first time, and this situation can be recognised because we have set |C_file=NULL| in that case. The length of a module name can be at most |longest_name| (which is rather more generous than allowed elsewhere for file names), so we need no test for overflowing |output_file_name|. @= { int i; char output_file_name[longest_name+1]; /* name of the file */ for (i=0; i if (C_file!=NULL) fclose(C_file); if (output_module->equiv==NULL) @/{@; print("\n! Module not present"); @.Module not present@> print_mod(output_module); err_print(""); } else if ((C_file=fopen(output_file_name,"w"))==NULL) @/{@; print("\n! Cannot open \"%s\" as output file",output_file_name); @.Cannot open output file@> err_print(""); } else { if (show_progress) print("\n(%s):",output_file_name); cur_line=1; output(output_module->equiv); } } } @ If the module name defining the output file contains any occurrences of `\.{@@@@}', they are undoubled in forming the actual file name; apart from this should be no control codes in the module name. @< Copy name of |output_module| into |output_file_name| @>= { char* p=output_file_name, *q=name_begin(output_module); while (*q!='\0') if ((*p++=*q++)=='@@') if (*q++!='@@') { print("\n! Illegal control code in file name"); @.Illegal control code in file name@> print_mod(output_module); err_print(""); } *p='\0'; } @ Here is how a file name gets added to the list of output files. @= { int i=0; while(i= typedef struct { text_pointer repl_field; /* replacement text of active section */ eight_bits *byte_field; /* present location within replacement text */ eight_bits *end_field; /* ending location of replacement text */ sixteen_bits sec_nr_field; /* section number */ sixteen_bits indent_field; /* amount of indentation */ } output_state, * stack_pointer; @ The global variable |stack_ptr| tells how many levels of output are currently in progress; the stack grows upwards with |stack_ptr| pointing to the first vacant location above the top of the stack. The entry |stack[0]| at the bottom of the stack is not used; when |stack_ptr| points to it then |cur_state| itself has become invalid and the output process is completed; this condition can be tested as |stack_empty()|. This somewhat strange use of the stack is forced upon us because the \Cee\ language does not guarantee that a pointer value at offset~$-1$ of an allocated array exists. Since the location |stack[0]| is vacant we might as well use it to store |cur_state| in. @d cur_state stack[0] @d stack_end (&stack[stack_size_max]) @d stack_empty() (stack_ptr==&stack[0]) @= output_state stack[stack_size_max]; /* info for non-current levels */ stack_pointer stack_ptr; /* points above top of output state stack */ @ When |line_output==false| we keep track of the current indentation level in an array of white space characters |indent_buffer|. Every time a newline is output, a sequence of |cur_ind| characters from |indent_buffer| are also output by invoking |put_indent|. When relevant non-newline characters are output, a white space counterpart of the character is appended to |indent_buffer| by invoking the macro |append_white|, whose argument should not involve any side effects. The current position of writing the characters is maintained in |ind_i|, and reset to |cur_ind| on output of a newline. @d C_printf(format,x) fprintf(C_file,format,x) @d C_putc(c) putc(c,C_file) @d put_indent() (indent_buffer[ind_i=cur_ind]='\0',C_printf("%s",indent_buffer)) @d append_white(c) if (ind_i>=max_indent) overflow("indent buffer"); else indent_buffer[ind_i++]= isspace(c) ? c : ' ' @; @< Global var... @>= char indent_buffer[max_indent]; /* white space characters to produce indentation */ sixteen_bits ind_i; /* number of those characters needed to reach current position */ @ When the replacement text for name |p| is to be inserted into the output, the following function is called to save the old level of output and get the new one going. The value of |cur_sec| is not set since its value is recorded in the first token to be read after this function is called. It is by calling |push_level| that the current value of |ind_i| becomes significant, by being copied to |cur_ind|. @c void push_level (mod_pointer p) /* suspends the current level */ { if (stack_ptr==stack_end) overflow("output stack"); *stack_ptr++=cur_state; @/cur_repl=p->equiv; cur_byte=tok_begin(cur_repl); cur_end=tok_end(cur_repl); @/cur_ind=ind_i; } @ When we come to the end of a replacement text, the function |continue_or_pop_level| does what its name suggests: it either moves to the continuation of this replacement text (i.e., to the next section of the same module) or returns the state to the most recently stacked level when a module is completed. In the latter case all the fields of |cur_state| are restored; in the former case, like for |push_level|, the value of |cur_sec| is not yet set, but will be adjusted immediately afterwards. @c void continue_or_pop_level (void) { if (cur_repl->text_link != NULL) /* then link to a continuation, staying on the same level */ {@; cur_repl=next_sec(cur_repl); cur_byte=tok_begin(cur_repl); cur_end=tok_end(cur_repl); } else if (--stack_ptr > &stack[0]) cur_state=*stack_ptr; /* otherwise |stack_empty()| holds */ ind_i=cur_ind; /* reset indentation to base of current level */ } @ The function |output| handles output of tokens by sending them to a lower level function |out_char|, until the condition |stack_empty()| holds. The task of |output| is to isolate and decode tokens, to handle stacking and unstacking as necessary, and to issue special calls to |out_char| to mark the beginning and end of replacements texts and to produce appropriate \&{\#line} directives. As its name suggests, |out_char| usually handles the output of single characters, but some special codes will make it perform a few other operations. Some of these, like |verb_quote| and compressed operator codes occur, as bytes with values below |0x80| inside the replacements texts, and these get no special treatment by |output|. A few other ones are generated explicitly by |output|, and these have values from |0x80| upwards, defined in the enumeration below. Information about the precise value of two-byte tokens is passed to |out_char| via the global variable |cur_val|. @= int cur_val; /* additional information corresponding to output token */ @~The code |identifier| is used for identifiers of length two or more, in which case |cur_val| indicates the identifier name. The codes |section_start| or |section_end| are sent at the beginning and end of parts of the replacement text defined in a one section, in which case |cur_val| is the section number; this number will be recorded in a comment to aid human readers of the \Cee~file produced. When a \&{\#line} directive is to be output, |line_mark| is sent to |out_char|, which will fetch the following 4~bytes (specifying the line number and file name) from the replacement text itself; this is the only occasion where |output| does not read a complete token before calling |out_char|. Any 8-bit characters that were escaped by a byte `|0xF8|' are sent directly to |out_char|; they do not interfere with the codes below since the input routine guarantees that they only occur between |verb_quote| tokens, at which times |out_char| does not honour any special codes. @< Typedef and enumeration... @>= enum {identifier=0x80, section_start, section_end, line_mark }; @ Due to the output stack operations, the function |output| can be written iteratively rather than recursively. A user who is not afraid of recursion may however easily rewrite this code into a recursive form, and dispose of the output stack and its routines altogether; the recursion depth will be equal to the level of nesting of modules, which is not likely to cause any problems in practice. We have retained the iterative version out of reverence for the author of the original |WEB| system; it has the additional advantage that, should there be a cycle in the directed graph of module references, a reasonable overflow message will be produced by |push_level|, whereas otherwise system stack overflow would occur, and it would depend on the \Cee~runtime support whether it is properly detected. @c void output (text_pointer repl) /* sends tokens to |out_char| */ { stack_ptr=&stack[1]; @/cur_repl=repl; cur_byte=tok_begin(cur_repl); cur_end=tok_end(cur_repl); cur_ind=ind_i=0; do if (cur_byte==cur_end) { cur_val=cur_sec; continue_or_pop_level(); out_char(section_end); /* output ending section number comment */ } else @< Output the token starting at |*cur_byte|, advancing |cur_byte| correspondingly @> while(!stack_empty()); C_newline(); } @ Most tokens lead to calling |out_char| with an appropriate value, after possibly setting |cur_val| and |cur_sec|. Exceptions are module names, which will push their replacement text on the stack but not produce any direct output, and tokens |0xFA| which will produce the collected preprocessor directives by calling |output_preproc_directives|, after ensuring that this will start on a fresh line of output. @< Output the token starting at |*cur_byte|... @>= { int a = *cur_byte++; if (a<0x80) out_char(a); /* single byte token */ else if (a>=0xF8) if (a<0xFA) out_char(a==0xF8 ? *cur_byte++ : line_mark); else {@; C_newline(); output_preproc_directives(); } /* |a==0xFA| */ else { cur_val=(((a-=0x80)%0x28)<<8)+*cur_byte++; switch (a/0x28) { case 0: out_char(identifier); break; case 1: @ @+ break; case 2: cur_sec=cur_val; out_char(section_start); /* set the correct section number and output comment */ } } } @ If any defining occurrence of a module name was encountered during the Phase~I, its replacement text will have been be linked into to |equiv| field of that name; otherwise that field will contain a null pointer and we must report an error. @= { mod_pointer mod_name=mod_at(cur_val); if (mod_name->equiv!=NULL) push_level(mod_name); else {@; print("\n! Module not present"); print_mod(mod_name); err_print(""); @.Module not present@> } } @ Output of macro definitions and inclusions of header files is sufficiently different from ordinary output that we use a special function |output_preproc_directives| for it. We bypass |output| and call |out_char| directly, since in these cases there can be no nesting of replacement texts. We go through the list of all replacement texts and copy the ones that refer to macros or header files, preceded respectively by \&{\#define} and \&{\#include}. During the output of a single directive any line breaks present in the source file will be protected by backslashes. We use local variables |p| and |end| instead of |cur_byte| and |cur_end|, since the current level of the stack is already in use if this function is called from |output|. @c void output_preproc_directives (void) { text_pointer repl,l; eight_bits* p, *end; protect=true; /* newlines should be preceded by |'\\'| */ for (repl=&text_table[0]; repltext_link)==macro_flag || l==header_flag) { p=tok_begin(repl); end=tok_end(repl); C_printf ("#%se ",l==macro_flag ? "defin" : "includ"); out_state=no_space; while (p C_newline(); /* this newline is not escaped */ } protect=false; } @ The situation here is simpler than in |output|, since module names and \:p control codes are forbidden in the replacement texts handled here, nor will codes for \.{\#line} directives or section number indications have been included in them during input. Should any such token nevertheless turn up, then one of the two calls of |confusion| below is invoked; the diagnostic is not quite exhaustive as to the possible cause in either case, but we do not wish to elaborate on code that will never be executed. @< Output preprocessor token... @>= { int a=*p++; if (a<0x80) out_char(a); /* single byte token */ else if (a>=0xF8) if (a==0xF8) out_char(*p++); @+ else confusion("`@@p' within macro"); @.`@@p' within macro@> else { cur_val=(((a-=0x80)%0x28)<<8)+*p++; if (a<0x28) out_char(identifier); @+ else confusion("module within macro"); @.module within macro@> } } @* Writing characters. The |output| routine above handles the global structure of output generation; we now present the routines that transform the lexical items produced by |output| into characters. First we give a function that is called whenever we want to finish off a line sent to the \Cee~file. It keeps |cur_line| equal to the number of the next line to be output, and displays a progress report every 100 lines. @c void C_newline (void) /* writes one line to output file */ { C_putc('\n'); if (!line_output) put_indent(); if (cur_line%100==0 && show_progress) { if (cur_line%500!=0) print("."); /* progress report */ else print(cur_line%2500==0 ? "%u\n" : "%u",cur_line); update_terminal(); } ++cur_line; } @ The function |out_char| must make sure that the output has the proper ``surface structure''. When |line_output==false| everything should essentially be written as it is stored, but when |line_output==true| only the essential tokens have been stored, and white space has to be inserted where appropriate. Spaces should not occur at certain places (e.g., not in the middle of a string or a constant or an identifier, not at a \:\& position where quantities are being joined together), while in other places they are required (e.g., between identifiers, which for \.{\me.} includes reserved words, and between certain operators that might otherwise be considered as a single token). Such surface structure can very nicely be obtained by attaching a small finite state machine to the output generator, recording information about the most recent token (as a matter of fact, even \TeX's fabulous formatting of math formulae is largely based on such a simple device). In our present case, the state of the output process is recorded in the global variable |out_state|. Furthermore there is a variable |protect| that is explicitly managed by the function calling |out_char| (notably by |output_preproc_directives|), being set to |true| whenever newlines are to be escaped by a backslash. @= eight_bits out_state; /* current status of partial output */ boolean protect; /* should newline characters be quoted? */ @ The output process can be in one of following states: \yskip\hang |no_space| means that no space will precede the following item. This state is set by \:\&, and after the output of anything that could not possibly combine into a larger token, such as punctuation. \yskip\hang |num_or_id| means that the last item in the buffer is a number or identifier, hence a blank space or line break must be inserted if the next item is also a number or identifier. \yskip\hang |operator| means that the last item in the buffer is an operator, hence a blank space or line break must be inserted if the next item is also an operator. \yskip\hang |literal| means we're copying only character tokens, and that they are to be output exactly as stored. This is the case during strings, verbatim constructions and numerical constants; the |verb_quote| character serves as a quoting mechanism which switches this state on~and~off. @< Typedef and enum... @>= enum { no_space, num_or_id, operator, literal}; @ The function |out_char| is a many-way switch on the kind of character or token transmitted to it. The |verb_quote| mechanism is used amongst others to encapsulate numeric constants, so we make the whole construction behave as an identifier with respect to spacing (the user can still defeat any spaces around it by means of \:\&). The code |line_mark| will only be stored (and output) if |line_output==true|. We do not anticipate the need of indentation across literals, operators and such, and assume that module names are preceded on the same line only by white space, identifiers (keywords) and things like braces; apart from identifiers all of these come through the |default| case below. Therefore we only include an invocation of |append_white| in that case and for identifiers; to handle all possible cases the assiduous user could add such invocations to all other calls of |C_putc| and |C_printf|. @c void out_char (eight_bits c) { if (out_state==literal) if (c==verb_quote) out_state=num_or_id; /* close literal mode */ else C_putc(c); /* write literal character, possibly 8-bit */ else if (isalnum(c)&&c<0x80 || c=='_') /* single character identifier or number */ { if (out_state==num_or_id && line_output) C_putc(' '); C_putc(c); out_state=num_or_id; if (!line_output) append_white(c); } else switch (c) { case verb_quote: if (out_state==num_or_id && line_output) C_putc(' '); out_state=literal; break; case join: out_state=no_space; break; case '\n': @< Output a newline @> @+ break; case identifier: @< Output an identifier @> @+ break; case section_start: if (line_output) C_printf("/*%d:*/",cur_val); @+ else C_newline(); out_state=no_space; break; case section_end: if (line_output) C_printf("/*:%d*/",cur_val); @+ else C_newline(); out_state=no_space; break; case line_mark: @< Output a \.{\#line} directive @> @+ break; @\@ @\@ default: C_putc(c); out_state=no_space; if (!line_output) append_white(c); } } @ Newlines are escaped whenever |protect| holds; a space is prepended so that the newline is effectively replaced by a space. This also means that the user who cannot break the habit of escaping newlines in multi-line preprocessor directives, even though this is unnecessary after \:d and \:h, is not punished for this by the creation of the sequence `\.{\\\\}' at the end of the lines, but rather gets `\.{\\\ \\}', which is harmless. @< Output a newline @>= { if (protect) {@; C_putc(' '); C_putc('\\'); } C_newline(); if (out_state!=literal) out_state=no_space; } @ Apart from alphanumeric characters and underscores, we allow identifiers to contain characters in the range |0x80<=c<=UCHAR_MAX|, if a translation into a string of legal characters is known to \.{\me.}; such translations can be specified by \:l directives in limbo. These translations are stored in an array |c_trans|. @d trans_limit 9 /* maximal length of a translation string */ @d trans_of(c) c_trans[(unsigned char)(c)-0x80] @d translation_exists(c) (trans_of(c)[0]!='\0') /* non-empty translation */ @< Global var... @>= char c_trans[UCHAR_MAX+1-0x80][trans_limit+1]; @~In compatibility mode, default translations exist of the form `\.X\\{NN}', where \\{NN} is the (two digit) upper case hexadecimal representation of the character in question. Otherwise the static initialisation will have made all translation strings empty. @< Set init... @>= if (compatibility_mode) @/{@; unsigned char c=UCHAR_MAX; do sprintf(trans_of(c),"X%X",c); while (--c>=0x80); } @ In case of identifiers the name indexed by |cur_val| is written out. The translation of characters in the range from |0x80| upwards is performed here, rather than on input of such characters, since this is slightly more convenient. The check that |translations_exists| was done on input, however. @< Output an identifier @>= { char* p=name_begin(id_at(cur_val)); int l=0; if (out_state==num_or_id && line_output) C_putc(' '); do if ((unsigned char)(*p)<0x80) {@; C_putc(*p);++l; } else {@; char* q=trans_of(*p); do {@; C_putc(*q); ++l; } while (*++q!='\0'); } while (*++p!='\0'); out_state=num_or_id; if (!line_output) do append_white(' '); while (--l>0); } @ As indicated above, a call |out_char(line_mark)| causes four further bytes to be fetched directly from the token memory, increasing |cur_byte| so that |output| will resume at the correct byte. @< Output a \.{\#line} directive @>= { sixteen_bits a; a=(*cur_byte++)<<8; a+=*cur_byte++; /* get the line number */ C_newline(); C_printf("#line %u \"",a); @/ a=(*cur_byte++)<<8; a+=*cur_byte++; /* get the file name index */ C_printf("%s\"",name_begin(id_at(a))); C_newline(); out_state=no_space; } @ In some cases when two operator symbols are adjacent, a space is required in between to avoid interpretation as a compound symbol. The language definition is not explicit about when such a space is mandatory, but examples are the somewhat unusual constructions | x /@, *p |, | a++ + b != a + ++b| and |sum = a - -b|; certain backward \Cee~compilers even think |x=-x| is ambiguous without a space before the minus sign. Rather than trying to detect precisely the problematic cases we take the conservative approach of always putting in a space, unless the second operator is~`\.='; this exception is made because we store operators like `\.{+=}' as two separate tokens, but they form a single token according to the \caps{ANSI/ISO} standard. Note that other multi-character operators like `\.{\&\&}' stay intact because they will be compressed on input, and therefore do not involve the present section. @= case '+': case '-': case '*': case '/': case '%': case'?': case '<': case '>': case '&': case '|': if (out_state==operator && line_output) C_putc(' '); /* fall through */ case '=': C_putc(c); out_state=operator; break; @ Compilers don't mind doing repetitive work, so we use a macro to produce several almost identical cases. Had the cases been much longer or more numerous however, then combining the cases and using an array of strings would have been preferable. @d comp_op(op) (C_printf(out_state==operator && line_output ? " %s" : "%s",op) ,out_state=operator) @= case plus_plus: comp_op("++"); break; case minus_minus: comp_op("--"); break; case minus_gt: comp_op("->"); break; case gt_gt: comp_op(">>"); break; case eq_eq: comp_op("=="); break; case lt_lt: comp_op("<<"); break; case gt_eq: comp_op(">="); break; case lt_eq: comp_op("<="); break; case not_eq: comp_op("!="); break; case and_and: comp_op("&&"); break; case or_or: comp_op("||"); break; @* Introduction to the phase I. We have now seen that \.{\me.} will be able to output the full \Cee\ program, if we can only get that program into the byte memory in the proper format. The input process is something like the output process in reverse, since we compress the text as we read it in and we expand it as we write it out (but of course, as any programmer knows, input requires a bit more work than the corresponding output). To preserve the symmetry we shall proceed in a bottom-up fashion in presenting this part of the program. The basic character input routines are defined in the code shared with \.{CWEAVE}. At the next higher level there are three main input routines. The most interesting is the one that gets the next token of a \Cee\ text; the other two are used to scan rapidly past \TeX\ text in the \.{CWEB} source code. One of the latter routines will jump to the next token that starts with `\.{@@}', and the other skips to the end of a \Cee~comment. @ Control codes in \.{CWEB} begin with `\.{@@}', and the next character identifies the code. Some of these are of interest only to \.{CWEAVE}, so \.{\me.} ignores them; the others are converted by \.{\me.} into internal code numbers. The code numbers have been chosen such that they are distinct from ordinary characters (so that a function can return either a character or a control code), and their ordering is such as to simplify the program logic; larger numbers are given to the control codes that denote more significant milestones. This order is only relevant starting from |format| however. The code \:> is treated as ignored because it should not occur in places where we are not searching for it. The following enumeration lists the non-character values that may be returned from~|get_next|; therefore the two values |id_code| and |constant| are included although they do not belong to any control code. @= enum @/ { ignore=0x80, /* control code of no interest to \.{\me.}, or \:> */ id_code, constant, /* token codes but not control codes */ verbatim, /* control code for \:= */ at_sign_image, /* control code for \:@@ */ join_code, /* control code for \:\& */ ord, /* control code for \:' */ control_text, /* control code for \:t, \:\^, etc. */ include_preproc, /* control code for \:p */ char_trans, /* control code for \:l */ format, /* control code for \:f */ definition, /* control code for \:d */ header, /* control code for \:h */ begin_C, /* control code for \:c */ module_name, /* control code for \:< or \:( */ new_section /* control code for \:\ , \:\~, or \:* */ }; @ The conversion from the character following `\.@@' to the corresponding numeric code is performed by means of the table~|ccode|. Since we don't know whether characters are signed or not, we always access |ccode| via the macro |code_of|. @d code_of(c) ccode[(unsigned char)(c)] @= eight_bits ccode[UCHAR_MAX+1]; /* meaning of a char following `\.@@' */ @~Here we initialise |ccode|. The code code for~\:v is the only case where a value stored in the |ccode| table is an ordinary character; this is done so that it may be used in the \Cee~part of a section to stand for~`|@v|', even though there is no need for using it there. @= { unsigned char c=0; do ccode[c] = isspace (c) ? new_section : ignore; while(c++!=UCHAR_MAX); @/ccode['v']=ccode['V']='|'; @/ccode['=']=verbatim; @/ccode['@@']=at_sign_image; @/ccode['&']=join_code; @/ccode['\'']=ord; @/ccode['^']=ccode['?']=ccode['.']=ccode[':']=ccode['#']= ccode['t']=ccode['T']=ccode['q']=ccode['Q']=control_text; @/ccode['p']=ccode['P']=include_preproc; @/ccode['l']=ccode['L']=char_trans; @/ccode['f']=ccode['F']=ccode['s']=ccode['S']=format; @/ccode['d']=ccode['D']=definition; @/ccode['h']=ccode['H']=header; @/ccode['c']=ccode['C']=begin_C; @/ccode['<']=ccode['(']=module_name; @/ccode['~']=ccode['*']=new_section; if (compatibility_mode) @< Reset some control codes to match Levy/Knuth \.{CWEB} @> } @ In \.{CWEBx} there are a few control codes that also exist in \LKC. but have a different meaning. In compatibility mode we reassign the meaning of these codes to that of \LKC., making their usual function inaccessible, since it is not intended that hybrid programs should be written using the codes of \LKC. together with features particular to \.{CWEBx}. For \.{\me.} the change of meaning of \:: (which is used instead of \:? in \LKC.) has no effect since it introduces a control text anyway. @< Reset some control codes... @>= { ccode['h']=ccode['H']=include_preproc; /* \:h means \:p */ ccode['p']=ccode['P']=begin_C; /* \:p means \:c */ ccode['#']=ignore; /* \:\# means \:) */ } @ The function |skip_ahead| reads through the input at fairly high speed until finding the next non-ignorable control code, which it returns. It is used to skip over \TeX~text, and text in limbo. It uses the fact that |get_line| places a~|' '| at~|*limit|, so that placing the sentinel cannot inadvertently create a token \:@@. @c eight_bits skip_ahead (void) /* skip to next control code */ { eight_bits c; /* control code found */ while (find_char()) { limit[1]='@@'; /* place a sentinel */ while (*loc++!='@@') {} if (loc<=limit && (c=code_of(*loc++))!=ignore) return c; } return new_section; } @ The function |skip_comment| reads through the input until finding the end-comment token `\.{*/}' or a newline, or in case of a \Cpp\ one-line comment (starting with `$/\!/$') just until finding a newline. The |one_liner| parameter tells whether the latter applies, and a boolean result is returned telling whether a newline in the middle of a comment was scanned (which can only happen if |one_liner==false|). Returning such a newline is necessary so that the each newline in the \Cee\ part of a section may be copied to the output; otherwise the \&{\#line} commands inserted into the \Cee\ file by the output routines become useless. When |line_output==false|, all characters encountered are also stored, so that they can be output along with the \Cee~code. The function gives an error message if it encounters a sequence `\.{/*}', since this almost certainly means the user has forgotten to close the comment. In this way we avoid the most pernicious consequence of \Cee's rule that nested comments are forbidden, namely that such an omission often leads to a syntactically correct, but unintended program. This means that one cannot write either `\.{/*}' or `\.{*/}' in a comment; this difficulty can usually be circumvented by inserting something like `\.{\{\}}' between the two characters. As a safety measure, |skip_comment| comes to an end if it runs into the next section, and prints an error message; it is slightly non-trivial that this test does not interfere with searching for the end of the comment. If a module name occurs in a comment (presumably in `\pb'), it is scanned by the normal routines that will be given later: this could be the only place where the name is spelled out in full, and/or using \:(. @c boolean skip_comment (boolean one_liner) /* skips over comments */ { char c; /* current character */ do { if (loc>=limit) if (one_liner) return false; else if (get_line ()) return true; else {@; err_print("! Input ended in mid-comment"); return false; } @.Input ended in mid-comment@> if ((c=*loc++)=='/' && *loc=='*') err_print("! `/*' inside comment, did you forget `*/' before? "); @.`/*' inside comment...@> if (c=='@@') { eight_bits cc=code_of(*loc++); if (cc==new_section) /* watch out for \:\ , \:\~, and \:* */ @/{@; err_print("! Section ended in mid-comment"); loc-=2; return false; } @.Section ended in mid-comment@> if (cc==module_name) @/{@; @< Scan the module name... @> continue; } } if (!line_output) {@; store_byte(c); if (c=='@@' && (c=loc[-1])!='@@') store_byte(c); } } while (c!='*' || *loc!='/' || one_liner); ++loc; @+ if (!line_output) store_byte('/'); return false; } @* Getting the next token. We now come to the function |get_next|, which does more extensive scanning. It will move the input pointer over a single token of input, and if that token consists of a single character that is not an identifier or a number, it will return that character; certain two-character operators are also returned as a single character representing the operator. In the case of longer tokens, it will return one of the codes of the enumeration given earlier, after possibly setting some global variables in such a way that the complete token can be retrieved. For numeric constants |id_first| and |id_loc| will be made to point to the beginning and end of the token within~|buffer|. For string constants, which need not reside in a single line of input, the characters are copied into the separate buffer |mod_text| and this is where |id_first| and |id_loc| will then point. Identifiers and module names are even looked up, and |cur_id| or |cur_mod| is made to point to the name found. In the case of single-character identifiers or bad module names, which are not entered into the name tables, |cur_id| respectively |cur_mod| is set to~|NULL|; in the former case |id_first| is made to point to the character that forms the identifier. @= id_pointer cur_id; /* identifier just scanned */ mod_pointer cur_mod; /* module name just scanned */ @ As one might expect, |get_next| consists mostly of a big switch that branches to the various special cases that can arise. The static flag |preprocessing| is raised while we are scanning an explicit preprocessor directive (i.e., not one introduced by \:d or \:h). The static flag |comment_continues| is raised when |get_next| must return a newline while scanning a comment, so that it will remember what it was doing the next time it is called. Although we allow 8-bit character input, any characters |c>=0x80| should be contained in a comment or a character or string constant, or in case |translation_exists(c)| an identifier, since they cannot otherwise be part of a valid \Cee~symbol. @c eight_bits get_next (void) /* produces the next input token */ { static boolean preprocessing=false; /* did this line start with `\.\#'? */ static boolean comment_continues=false; /* were we scanning a comment? */ eight_bits c; /* the current character */ restart: @< Handle cases involving newlines and comments; if a token is found |return| it, or |goto restart| if it is ignored; otherwise read the next character into |c| @> if (c=='L' && (*loc=='\'' || *loc=='\"')) {@; get_string(); return constant; } if (c<0x80 ? isalpha(c) || c=='_' : translation_exists(c)) @/{@; @< Get an identifier @> return id_code; } if (c>=0x80) {@; err_print("! Illegal 8-bit character"); goto restart; } @.Illegal 8-bit character@> if (isdigit(c) || c=='.' && isdigit((eight_bits)*loc)) @/{@; @< Get a numeric constant @> return constant; } switch(c) { case '\'': case '"': get_string(); return constant; case '@@': @< Get a control code and possibly a complete module name, and either |return| it, or |goto restart| if control code is |ignore| @> @\@< In applicable cases |return| compressed two-symbol operator @> } return c; } @ The flag |preprocessing| raised when the first character of a line is~`\.\#', and is lowered at a non-escaped newline; while it is raised spaces are not ignored. The test whether a newline is escaped is not entirely sound, since a final `\.{\\}' might be part of a \:\\ control code or of a \Cpp\ one-line comment (after `$/\!/$'); a completely correct check would be more difficult, and leaving |preprocessing| raised a bit too long does no real harm. Contrary to |preprocessing|, the flag |comment_continues| will always be given a new value on the first call to |get_next| after it is set, and therefore can remain set only if |skip_comment| finds another newline. @< Handle cases involving newlines... @>= { if (loc>=limit) { if (preprocessing && limit>buffer && limit[-1]!='\\') preprocessing=false; return get_line() ? '\n' : new_section; } if (comment_continues ||(c=*loc++)=='/' && (*loc=='*' || C_plus_plus && *loc=='/')) @< Scan to the end of the comment or of the current line, setting |comment_continues| to indicate which case applies; in the former case |goto restart|, in the latter |return '\n'| @> if (isspace(c)) if (line_output) if (preprocessing) return ' '; @+ else goto restart; /* ignore other spaces */ else return c; /* when |line_output==false| preserve white space faithfully */ if (c=='#' && loc==buffer+1) preprocessing=true; } @ When we come to this code |loc| either points to the beginning of a multi-line comment or to the second character of `\.{//}' or `\.{/*}'; in the latter case we must advance |loc| before looking for `\.{*/}', lest `\.{/*/}' would be scanned as a complete comment. Another subtlety arises when both |preprocessing| and |comment_continues| are raised, which means that a preprocessor line has ended inside a comment; although in \Cee\ this means that the preprocessor line continues, \.{\me.} will terminate it because the comment is removed and a non-escaped newline remains. The warning message below can be ignored by the user unless something follows the comment, but the proper way to resolve this is to start the comment on the line after the preprocessor line, which avoids all confusion. @< Scan to the end of the comment... @>= { boolean one_liner=false; if (!comment_continues) { if (!line_output) {@; store_byte('/'); store_byte(*loc); } one_liner=*loc++=='/'; /*/ record kind of comment, and advance to comment proper {/}*/ } else if (preprocessing) { print("\nWarning: Multi-line comment in preprocessor line"); @.Multi-line comment...@> preprocessing=false; mark_harmless(); } if (comment_continues=skip_comment(one_liner)) /* scan to end of comment or newline */ return '\n'; /* comment contains a newline; |get_line| has been called */ else goto restart; /* comment complete, get next token */ } @ The following code assigns values to the combinations `\.{++}', `\.{--}', `\.{->}', `\.{>=}', `\.{<=}', `\.{==}', `\.{<<}', `\.{>>}', `\.{!=}', `\.{\v\v}' and `\.{\&\&}'. The compound assignment operators (e.g., `\.{+=}') are separate tokens, according to {\sl The C Reference Manual\/} (so there may be white space and even comments in between), but according to the \caps{ANSI/ISO} standard they are single tokens. We have not allocated single-byte codes for them, so we scan them as two separate symbols; these will be adjacent on output. Therefore, as far as \.{\me.} is concerned, one may write a separation between the two characters even if the \Cee-compiler does not accept this; however, this practice should still be avoided because it will confuse the parser of |CWEAVE|, leading to ill-formatted output. @d compress(char2,code) @+ if (*loc==char2) return ++loc,code @; @< In applicable cases... @>= case '+': compress('+',plus_plus); break; case '-': compress('-',minus_minus); compress('>',minus_gt); break; case '=': compress('=',eq_eq); break; case '>': compress('=',gt_eq); compress('>',gt_gt); break; case '<': compress('=',lt_eq); compress('<',lt_lt); break; case '&': compress('&',and_and); break; case '|': compress('|',or_or); break; case '!': compress('=',not_eq); break; @ Identifiers are looked up unless they consist of a single character |c<0x80|, in which case that character can be found as |*id_first|; there is no need to set |id_loc| in either case. In case any 8-bit characters appear in identifiers, they are only stored in the name table, and cannot interfere with any of the special codes we use. @< Get an identifier @>= { id_first=--loc; /* mark beginning of identifier */ do c=*++loc; while (c<0x80 ? isalnum(c) || c=='_' : translation_exists(c)); cur_id= loc==id_first+1 && (eight_bits)(*id_first)<0x80 ? NULL : id_lookup(id_first,loc,0); } @ Scanning numeric constants is straightforward. The only subtle point is that we must not call functions like |isdigit| with an argument of type |char|, like |*loc|, since that may fail for 8-bits characters if |char| is a signed type; therefore we copy characters into |c|, which has type |unsigned char|, before applying such functions. We don't mind \.{8}'s and \.{9}'s appearing in octal constants (although the \Cee~compiler will), so there is no need here to distinguish between octal and decimal constants. @< Get a numeric constant @>= { if (*(id_first=loc-1)=='0' && tolower((eight_bits)*loc)=='x') /* hex constant */ do c=*++loc; while (isxdigit(c)); else /* octal, decimal or float constant */ { while (isdigit(c)) c=*loc++; if (c=='.') @+ do c=*loc++; while (isdigit(c)); if (tolower(c)=='e') /* floating point constant with exponent */ { if ((c=*loc)=='+' || c=='-') c=*++loc; while (isdigit(c)) c=*++loc; } else --loc; /* back up to first character after constant */ } while (isalpha(c)) c=*++loc; /* incorporate any `\.{U}', `\.{L}', or `\.{F}' suffixes */ id_loc=loc; } @ After an `\.@@' sign has been scanned, the next character tells us whether there is more work to do. Although control codes like \:\^ and \:t are ignored by \.{\me.}, their control text has to be skipped, which is performed by |get_control_text|. Verbatim constructions also use |get_control_text|, but then return the |verbatim| control code, unless their control text was empty, in which case they are ignored. @< Get a control code... @>= { eight_bits cc=code_of(*loc++); switch(cc) { case ignore: goto restart; case control_text: get_control_text(); goto restart; case verbatim: if (get_control_text()) goto restart; @+ else break; case ord: @ @+ break; case module_name: @ } return cc; } @ Before we scan a module name, we record whether it started with \:(, so that we may record it afterwards as an auxiliary file name if necessary. @= { boolean file_module= loc[-1]=='('; /* does this module define an output file? */ cur_mod=get_module_name(); if (file_module && cur_mod!=NULL) @ } @ There is no reason why we should allow a newline within an \ASCII. constant, even if it is escaped. @= id_first=loc; /* first character after opening quote */ while (*loc!='\'') { if (*loc++=='\\') loc++; /* accept any character following backslash */ if (loc>=limit) {@; err_print("! ASCII constant didn't end"); break; } @.ASCII constant didn't end@> } id_loc=loc++; /* move past closing quote */ @* Scanning a piece of \Cee\ text. Having acquired the skills of skipping over text and isolating tokens that really matter, we are now ready to tackle complete pieces of \Cee~text. The rules for generating the replacement texts for preprocessor directives following \:d and \:h and for bodies of sections are almost identical; the only differences are that \yskip \item{a)} Module names are not possible in preprocessor directives; indeed, the appearance of a module name terminates such directives and starts the \Cee~part of the section. \item{b)} The codes \:d, \:h, \:f, and \:c are not allowed in a \Cee~part, while they terminate preprocessor directives. \item{c)} The code \:p is not allowed inside preprocessor directives. \yskip \noindent Therefore there is a single function |scan_repl| with a parameter |context| indicating which of the two kinds of replacement texts is being scanned; its value is either |preproc_directive| or |section_body|. After |scan_repl| has acted, |cur_text| will point to the replacement text just generated, and |next_control| will contain the control code that terminated the activity. @d preproc_directive 0 @d section_body 1 @= text_pointer cur_text; /* replacement text formed by |scan_repl| */ eight_bits next_control; /* control code which has already been scanned */ @ We will try to reduce storage requirements by discarding any trailing newlines both after preprocessor directives and section bodies. This does not affect the line sequencing indicated by \&{\#line} directives in either case, since in the former case such directives are not issued in the first place, and in the latter case a new \&{\#line} directive will be issued before any further code is added. Note that we cannot reliably recognise newlines in a right-to-left motion when the end of a replacement text is reached, since there is a mixture of one-byte, two-byte and five-byte tokens. Therefore, instead of tracing backwards, we mark the position of the last non-discardable token every time one is appended, and back up to the last position recorded when reaching the end of a replacement text. Given this mechanism, it is easy to discard other things than newlines, such as \&{\#line} directives that apply to no lines at all, and to back up at other occasions where the line sequencing is interrupted as well, like before applied module names. This is also a good place to add a simple but useful check, namely that in each section and macro replacement text the parentheses and braces should be matched (failure in this respect can lead to syntax errors that are very hard to locate in the \.{CWEB} source, since the real problem may be buried inside a module name or macro). @c void scan_repl (eight_bits context) /* creates a replacement text */ { eight_bits a; /* the current token */ eight_bits* keep=tok_ptr; /* non-discardable stuff up to this point */ int brace_level=0, par_level=0; if (context==section_body) @< Insert the line number into |tok_mem| @> do @< Scan and store a token, setting |keep=tok_ptr| afterwards unless it is discardable; when the replacement text has ended |goto done| @> while (true); done: tok_ptr=keep; /* backup over trailing newlines and \&{\#line} marks */ next_control=a; /* this control code must be reconsidered */ if (par_level>0 || brace_level>0) @< Report unmatched opening symbols @> @< Wrap up the accumulated bytes into a completed text, pointed to by |cur_text| @> } @ When the function |scan_repl| is called we have either just scanned \:h or the identifier following \:d or a defining occurrence of a module name (with the following `\.='); in all cases we start by calling |get_next|. @< Scan and store a token... @>= { switch (a=get_next()) { @\@< Cases where |a| is a special token (|id_code|, |module_name|, etc.): either store the corresponding bytes and |continue|, or |goto done| if |a| signals the end of this replacement text@> case format: case definition: case header: case begin_C: if (context==preproc_directive) goto done; err_print ("! `@@f', `@@d', `@@h', and `@@c' are ignored in section body"); @.`@@f', `@@d', ... are ignored...@> continue; case new_section: goto done; @\@< Cases that keep track of |par_level| and |brace_level| @> case '\n': store_byte('\n'); if (context==section_body && print_where) /* input file was switched */ { tok_ptr=keep; /* back up over discardable items */ @< Insert the line number into |tok_mem| @> } continue; case join_code: a=join; } store_byte(a); keep=tok_ptr; /* mark as non-discardable */ } @ We test matching parentheses and braces separately because it is easier, and in the unlikely case that a mismatch should go undetected the compiler will surely report an error that is easy to locate. All parentheses and braces that are returned from |get_next| are real ones, i.e., not part of a string or comment. @< Cases that keep track of |par_level| and |brace_level| @>= case '(': ++par_level; break; case ')': if (par_level<=0) {@; err_print("! Unmatched closing parenthesis"); continue; } @.Unmatched closing...@> --par_level; break; case '{': ++brace_level; break; case '}': if (brace_level<=0) {@; err_print("! Unmatched closing brace"); continue; } --brace_level; break; @ The most difficult part of matching parentheses and braces is reporting any errors at the end of the macro or section in proper English. We supply the missing closing symbols so that there is a slightly larger chance that the produced \Cee~code will compile. Note that there is no need to update |keep| here, since no backing up will follow. @< Report unmatched opening symbols @>= { char *p, *s; int l; if (par_level>0) l=par_level, s="parenthes", p= l>1 ? "es" : "is"; else l=brace_level, s="brace", p=l>1 ? "s" : ""; print("\n! There %s %d unclosed %s%s" ,par_level+brace_level>1 ? "are" : "is", l, s, p ); @.There are unclosed...@> if (par_level>0 && brace_level>0) print(" and %d unclosed brace%s" , brace_level, brace_level>1 ? "s" : ""); print(" in the previous "); err_print(context==preproc_directive ? "macro" : "section"); while (--par_level>=0) store_byte(')'); while (--brace_level>=0) store_byte('}'); } @ When a module reference is inserted the line sequencing is interrupted, so we must restore it afterwards; on the other hand newlines before the insertion can be discarded. @< Cases where |a| is...@>= case id_code: @< Store the identifier just scanned @> @+ keep=tok_ptr; continue; case module_name: if (context==preproc_directive) goto done; if (cur_mod!=NULL) /* don't record bad module name */ { sixteen_bits n = mod_index(cur_mod); /* index of module name */ @< If this looks like a defining occurrence, report a runaway section @> if (line_output) tok_ptr=keep; /* back up */ store_two_bytes(0xA800+n); keep=tok_ptr; /* store reference to module name */ @< Insert the line number into |tok_mem| @> /* to get in phase after module insertion */ } continue; case constant: case verbatim: @/@< Copy a constant or verbatim construction @> @+ keep=tok_ptr; continue; case ord: @ @+ keep=tok_ptr; continue; case include_preproc: if (context==preproc_directive) err_print("! `@@p' is forbidden in preprocessor directive"); @.`@@p' is forbidden...@> else { if (line_output) tok_ptr=keep; store_byte(0xFA); atp_seen=true; keep=tok_ptr; @< Insert the line number... @> } continue; case char_trans: err_print("! `@@l' is only allowed in limbo"); @.`@@l' is only allowed in limbo@> continue; @ Identifiers of length~$1$ have no name pointer, but |id_first| still points at the relevant character. @< Store the identifier... @>= if (cur_id==NULL) store_byte(*id_first); /* single-character identifier */ else store_two_bytes(0x8000+id_index(cur_id)); @ Here we look to see whether a module name is followed by~`\.{=}' or~`\.{+=}' (whether another `\.{=}' follows is irrelevant at this point), in which case we assume that introduces the \Cee-part of a new section, of which we somehow missed the opening \:\ . Note that this need not limit the use of module names standing for single expressions, even if followed by the `\.{=}' or the `\.{+=}' operator, since such names should be followed by `\.{@@;@@;}' for proper treatment by \.{CWEAVE}. @< If this looks like a defining occurrence, report... @>= { char *p=loc; while (*p==' ' && p } @ Constants of length one can only be single digit numbers, so they can be stored in a single (unquoted) byte like single letter identifiers. Inside strings (and verbatim constructions) {\it any\/} character may in principle show up, including the character |verb_quote| used to delimit the verbatim storage of the string. Even if this is hardly a sign of good programming, the effect on output would be so disastrous that some counter measure is called for: we replace |verb_quote| by its three-digit octal escape code, which the user should have written to begin with. Because a \:i line or a change file match can occur inside a string, we must test |print_where| at the end of a string. @< Copy a constant... @>= if (id_loc==id_first+1) store_byte(*id_first); /* single digit number */ else { store_byte(verb_quote); /* opening */ do if (*id_first==verb_quote) { ++id_first; store_byte('\\'); store_byte('0'+(verb_quote>>6)); store_byte('0'+((verb_quote>>3)&7)); store_byte('0'+(verb_quote&7)); } else { if ((eight_bits)(*id_first)>=0x80) store_byte(0xF8); /* quote 8-bit characters */ store_byte(*id_first++); } while (id_first } @ This section should be rewritten on machines that don't use \ASCII. code internally. It is probably best to use a table indexed by characters and containing \ASCII. codes for this purpose (like the |xord| array in \TeX). Incidentally, it would be tempting to write statements like `|case'\\': c=@'\\';|' here, which would in principle be correct in any other place, but which would cause circularity in a bootstrapped system like this. @^ASCII code dependencies@> @< Translate an \ASCII. constant @>= { int c=*id_first++; if (c=='@@' && *id_first++ !='@@') @/{@; err_print("! Double `@@' should be used in ASCII constant"); @.Double `@@' should be used...@> --id_first; } if (c=='\\') { c=*id_first++; switch (c) { case 't':c='\011';break; case 'n':c='\012';break; case 'b':c='\010';break; case 'f':c='\014';break; case 'v':c='\013';break; case 'r':c='\015';break; case '0':c='\0';break; case '\\':c='\134';break; case '\'':c='\047'; break; case '\"':c='\042'; break; default: err_print("! Unrecognised escape sequence"); @.Unrecognised escape sequence@> } } else { /* at this point |c| should be converted to its \ASCII. code number */ @+} if (id_first!=id_loc) if (id_loc>id_first) err_print("! ASCII constant should be single character"); @.ASCII constant should be...@> else {@; err_print("! Empty ASCII constant"); c=0; } @.Empty ASCII constant@> store_byte(verb_quote); if (c>=100) store_byte('0'+c/100); @+ if (c>=10) store_byte('0'+(c/10)%10); store_byte('0'+c%10); @/ store_byte(verb_quote); } @ Line number marks, which will lead to \&{\#line} directives on output, are inserted in several places, so we shall call a function to do the necessary work. For reasons explained below this function scans ahead a bit through the input, and in doing so it may hit the end of the input; when this happens it must be reported back to |scan_repl|. Therefore |mark_line| returns a boolean value which tells whether |input_has_ended|. @< Prototypes @>= boolean mark_line(void); @~When |mark_line| returns |true|, we should terminate |scan_repl| with |next_control==new_section|; this is achieved by jumping to |done|, where some exitialisations are done, and where |next_control| is assigned from the variable~|a|. @< Insert the line... @>= {@; if (mark_line()) {@; a=new_section; goto done; } } @~If |line_output==true|, the following items are stored for recording the line number: first a byte equal to |0xF9|, then the numeric line number, and finally the |id_index| of the file name, which is looked up as if it were an identifier (except that we use the feature of |id_lookup| to accept null-terminated strings as first argument if the second argument is~|NULL|). Note that |keep| is not increased; if nothing essential follows this line number indication then it need not be recorded. We improve compactness of storage and output a bit by discarding any newlines that are immediately ahead; the inserted line number will automatically be increased properly. While reading we might find there is no input left, in which case no line number is recorded and |true| is returned. @c boolean mark_line(void) { while (loc>=limit) @+ if (!get_line()) return true; /* skip over newlines */ print_where=false; /* request is being serviced */ if (line_output) { store_byte(0xF9);@/ id_first=changing ? change.name : cur_file_name; store_two_bytes(changing ? change_line : cur_line); store_two_bytes(id_index(id_lookup(id_first,NULL,0))); } return false; } @* Scanning a section. The function |scan_section| starts when \:\ , \:\~, or \:* has been sensed in the input, and it proceeds until the end of that section. It uses |section_count| to keep track of the current section number; hopefully \.{CWEAVE} and \.{\me.} will both assign the same numbers to sections. @c void scan_section (void) { ++section_count; if (loc[-1]=='*') print_section_progress(); @ @ } @ For \.{\me.} there is no real difference between the \TeX\ and definition parts of a section; after all, the latter can contain \&{format} commands that are ignored just like \TeX~text is. So we set up a loop that stops when |next_control>=begin_C|, and handles the cases |definition| and |header|, while ignoring anything else. The price we pay for our haste in skipping the \TeX~part of a section (and \:f commands) is that we may have to back up after finding \:< in order to scan the whole module name and set |cur_mod|, which requires calling~|get_next|. In case \:< follows a |definition| or |header| code, it is scanned by |scan_repl|, and no backing up is required; therefore the code for backing up and rescanning cannot be moved outside the loop. @= { next_control=ignore; /* clear |new_section| */ do { if (next_control scan_repl(preproc_directive); cur_text->text_link=macro_flag; /* characterise as macro */ } else /* |next_control==header| */ {@; scan_repl(preproc_directive); cur_text->text_link=header_flag; } if (next_control==module_name) @< If this is not a defining occurrence, set |next_control=ignore| @> } while (next_control= { do next_control=get_next(); /* allow white space before identifier */ while (line_output ? next_control=='\n' : next_control<0x80 && isspace(next_control)); if (next_control!=id_code) @/{@; err_print("! Macro definition ignored, must start with identifier"); @.Macro definition ignored...@> continue; } @< Store the id... @> if (isspace((eight_bits)*loc)) store_byte(' '); /* separate the replacement text from a parameterless macro */ } @ Due to the fact that module names are allowed enclosed in `\pb', it is not certain that the occurrence of a module name signals the start of the \Cee~part of the section. The decisive factor will be what follows the module name: if this is `\.=' or `\.{==}', possibly preceded by `\.+', then this is really the name of the module defined in this section. If not, then we require that the next token is a `\.\v' (the one closing `\pb'), unless we are in compatibility mode, in which case anything goes. We use the fact that scanning the `\.+' and `\.=' symbols will not destroy the value in |cur_mod|. @< If this is not a defining occurrence, set |next_control=ignore| @>= { eight_bits t=get_next(); if (t=='+') t=get_next(); /* skip an optional `\.+' */ if (t!='=' && t!=eq_eq) /* then apparently a cited occurrence, ignore it */ { next_control=ignore; if (t!='|' && !compatibility_mode) err_print("! `=' sign missing, module name ignored"); @.`=' sign missing, module name ignored@> } } @ If we come to this module with |next_control==module_name|, then the code of the previous section has just removed the `\.=' or `\.{==}' and optional `\.+' following the module name. @= { mod_pointer p; /* module name for the current section */ switch (next_control) { default: return; /* no \Cee\ part present */ case begin_C: p=NULL; break; /* section contributing to unnamed module */ case module_name: p=cur_mod; /* section contributing to named module */ } store_two_bytes(0xD000+section_count); /* record section number */ scan_repl(section_body); /* now |cur_text| points to the replacement text */ @< Link the section body at |cur_text| to the module named |p|, or to the unnamed module if |p==NULL| @> } @ Sections whose body starts with a bad name (e.g., an ambiguous prefix) will be treated as if they were unnamed, so the user had better not ignore the error message. Unnamed sections get a slightly special treatment, which makes accumulating many of them a bit more efficient than building up a named module that is defined in many sections. @= { if (p == NULL) /* unnamed section, or bad module name */ {@; *last_unnamed = cur_text; last_unnamed = &cur_text->text_link; } else { text_pointer* q=&p->equiv; /* text for the current module */ while(*q!=NULL) q=&(*q)->text_link; /* find end of list */ *q=cur_text; /* add section to module */ } cur_text->text_link=NULL; /* end list, also marking replacement text as a non-macro */ } @* Phase I processing. Finally we can wrap everything up and define the global structure of~Phase~I. @= void phase_one (void); /* read all the user's text and compress it into |tok_mem| */ @~The only thing left to do is picking up character translations in the limbo part. @c void phase_one (void) { phase=1; section_count=0; reset_input(); while ((next_control=skip_ahead())!=new_section) if (next_control==char_trans) @< Store a character translation @> while (!input_has_ended) scan_section(); /* load all sections */ check_complete(); /* verify that change file hasn't got out of sync */ } @ The code \:l should be followed (after optional space) by a two-digit hex number~|c| with |c>=0x80|, white space, and a translation string of at most |trans_limit| characters, either alphanumeric or underscores, terminated by another space. @< Store a character translation @>= { int c; while (loc else if (sscanf(loc,"%x",&c),c<0x80) err_print("! You cannot translate characters < 0x80"); @.You cannot translate...@> else { char* p=trans_of(c); int i=0; loc+=3; @+ while (find_char() && isspace((eight_bits)*loc)) ++loc; /* skip space */ if (!input_has_ended) while (isalnum((eight_bits)*loc) || *loc=='_') if (++i<=trans_limit) *p++=*loc++; @+ else break; if (i>0) *p='\0'; /* terminate translation unless none was given */ @< Report any problems with the translation string @> } } @ The main contribution of the \:l feature to \.{CWEB} is new syntax, leading to a variety of new error messages. It is possible to give an empty translation for a character by terminating the translation string by a non-space character that is not allowed in identifiers; this is forbidden however, since an empty translation for~|c| would just make |translation_exists(c)| fail, making~|c| useless anyway. Even if the translation string is not empty, terminating it by a non-space character is considered an error, since this would make the \TeX\ macro used for formatting the corresponding line of the printed document fail. @< Report any problems... @>= if (i==0) err_print("! Translation string absent after `@@l'"); @.Translation string...@> else if (i>trans_limit) err_print("! Translation string too long"); else if (!isspace((eight_bits)*loc)) err_print("! Translation string not terminated by space"); @ At the end of the run, if |STAT| was defined and the `\.{+s}' flag present, we report how much of all the arrays was actually needed. @d report(k,c,m) printf("%lu %ss (out of %lu)\n",(unsigned long)(c),k,(unsigned long)(m)) @c #ifdef STAT void print_stats (void) { print("\nMemory usage statistics:\n"); @/report("identifier", id_index(id_ptr), max_idents); @/report("module name", mod_index(mod_ptr), max_modules); @/report("byte", byte_ptr-byte_mem, max_bytes); @/report("replacement text", text_ptr-text_table, max_texts); @/report("token", tok_ptr-tok_mem, max_toks); } #endif @* Index. Here is a cross-reference table for the \.{\me.} processor. All sections in which an identifier is used are listed with that identifier, except that reserved words are indexed only when they appear in format definitions, and the appearances of identifiers in module names are not indexed. Underlined entries correspond to where the identifier was declared. Error messages and a few other things like ``\ASCII. code dependencies'' are indexed here too. cwebx-3.04.orig/cweave.w100644 1750 1750 500633 6470042042 13031 0ustar jdgjdg% This file is part of CWEBx. % This program by Marc van Leeuwen based on earlier versions by % D. E. Knuth., Silvio Levy and Frank Jensen. % It is distributed WITHOUT ANY WARRANTY, express or implied. % CWEB (Revision: 2.0) % Don Knuth, July 1990 % Version 3.x, Marc van Leeuwen, December 1993 % CWEBx 2+1.0, Marc van Leeuwen, August 1994 % CWEBx 3.0, Marc van Leeuwen, Januari 1995 % CWEBx 3.02, Marc van Leeuwen, April 1996 % CWEBx 3.03, Marc van Leeuwen, January 1998 % Copyright (C) 1987,1990 Silvio Levy and Donald E. Knuth % Copyright 1994 Marc A. A. van Leeuwen % Permission is granted to make and distribute verbatim copies of this % document provided that the copyright notice and this permission notice % are preserved on all copies. % Permission is granted to copy and distribute modified versions of this % document under the conditions for verbatim copying, provided that the % entire resulting derived work is distributed under the terms of a % permission notice identical to this one. \def\me.{CWEAVE} \def\myroots{\.{WEAVE}} % Here is TeX material that gets inserted after \input cwebxmac \def\xr.{cross-reference} \def\Cpp{\Cee\PP} \def\TeXxstring{\TeX\_string} \def\TeXxlike{\TeX\_like} \def\skipxTeX{skip\_\TeX} \def\copyxTeX{copy\_\TeX} @i intro.inc % Here is some text that matches the start of CWEAVE @d banner "This is CWEAVE (Version x"@+version_string@+")\n" @ The following parameters are specific to \.{\me.}; those which are common to \.{CTANGLE} and \.{CWEAVE} are defined in the file \.{common.inc} and appear below. Some of these values have been decreased with respect to their earlier values which were sufficient in the original |WEB| to handle \TeX; a motivation is given at the common declarations. @d max_refs 10000 /* number of \xr.s; must be less than 65536 */ @d max_toks 10000 /* number of symbols in \Cee\ texts being parsed; must be less than |65536| */ @d max_texts 2500 /* number of phrases in \Cee\ texts being parsed; must be less than |10240| */ @d max_scraps 4000 /* number of tokens in \Cee\ texts being parsed */ @d max_no_of_nodes 215 /* number of nodes in search trie for grammar rules, must be at most |256| */ @d line_length 80 /* maximal line length for \TeX\ output; should be less than |256| */ @d stack_size 400 /* number of simultaneous output levels */ @d sort_stack_size 500 /* number of identifier lists during sorting */ @ The program is built from two compilation units, one with source file \.{common.w}, which contains a collection of routines and data shared between \.{CTANGLE} and \.{CWEAVE}, and a second with master source file \.{cweave.w} containing all code specific to \.{\me.}, and whose typeset version you are now reading. All compilation units of the \.{CWEB} system incorporate the file \.{common.inc} containing common declarations. \.{\me.} has the following outline. It operates in three phases: first it reads the source file once for collecting \xr. data, then it reads the source file for a second time, meanwhile producing the bulk of the \TeX\ output file, and finally it outputs the information in its tables from which \TeX\ will produce the list of module names, index and table of contents. Syntax errors would often be reported identically in the first two phases, and problems in printing module names similarly in the second and third phase; to avoid this we check after each phase if any serious errors were found, and if so, we stop prematurely. \.{\me.} can optionally be compiled with preprocessor symbols |DEBUG| and/or |STAT| defined; the former is useful for the user who requires detailed information about the parsing process, the latter if one wishes to keep track of how much of the static resources were actually used. @c @< Function prototypes used but not defined in the shared code @>@; @< Typedef and enumeration declarations @>@; @< Prototypes @>@; @< Global variables @>@; int main (int argc,char** argv) { program=cweave; make_xrefs=true; common_init(argc,argv); @< Set initial values @> if (show_banner) print(banner); /* print a ``banner line'' */ @< Store all the reserved words @> phase_one (); /* read all the user's text and store the \xr.s */ if (history>harmless_message) wrap_up(); /* stop in case of trouble */ open_output_file(); phase_two (); /* read all the text again and translate it to \TeX\ form */ if (history>harmless_message) wrap_up(); /* stop in case of trouble */ phase_three (); /* output the \xr. index */ wrap_up (); /* and exit gracefully */ return 0; /* for completeness---not reached */ } @ The macro |variant| needs to be defined before we include the file \.{common.h}, in order to affect the definition of |id_info| and |mod_info| (see \.{common.w} for an explanation); it refers to the |struct xref_info| that will be declared later. @d variant @;xref_info @< Prototypes @>= void phase_one (void); /* read all the user's text and store the \xr.s */ void phase_two (void); /* read all the text again and translate it to \TeX\ form */ void phase_three (void); /* output the \xr. index */ @i common.inc @* Data structures exclusive to {\tt \me.}. @:title@> Although the eventual typeset output produced after running \.{\me.} is meant to closely resemble the \.{CWEB} source from which it was created (or maybe it is the other way around), the task of \.{\me.} is more complicated than that of \.{CTANGLE}, because it involves a much more detailed ``understanding'' of the program fragments present in the source. Therefore the bulk of the program text is concerned with the parsing process during Phase~II. Detailed information about these matters shall be given at more appropriate places below; here we only discuss how individual tokens are tagged on input, and how \xr. infromation is stored. The |ilk| field of an |id_info| structure is used to distinguish between various types of identifiers, as follows: \yskip\hang |normal| identifiers are part of the \Cee\ program and will usually appear in italic type. \yskip\hang |roman| identifiers are index entries that appear after \:\^ in the \.{CWEB} file. \yskip\hang |wildcard| identifiers are index entries that appear after \:? in the \.{CWEB} file. \yskip\hang |typewriter| identifiers are index entries that appear after \:. in the \.{CWEB} file. @:section number example@> \yskip\hang |reference| identifiers are used for explicit \xr.s by the user within pieces of \TeX~text, allowing us for instance to remark that this is section@#section number example@> out of a total of@#index@> in this file, and that the sections having titles are number@#title@>. These \xr.s marks are set by~\:: and referred to by~\:\#. \yskip\hang |type_defined| identifiers have occurred in a |typedef| declaration. They will parse as types like |int|, but \xr.s to them will be collected. \yskip\hang |TeX_like| identifiers like |TeX| are typeset as user-definable control sequences. \yskip\hang |NULL_like| identifiers like `|NULL|' (\.{NULL}) are also typeset as (\TeX) control sequences, but in addition are treated like reserved words for indexing purposes, i.e., only underlined references are generated. \yskip\hang |case_like|, \dots, |while_like| identifiers are \Cee\ reserved words that will be typeset in boldface. The further distinction between these |ilk| values determines their distinguished behaviour in parsing (and consequently in the formatted output), since the values are also used as category codes when these reserved words are converted into scraps. Their values, which all exceed |NULL_like|, shall be defined together with the other category codes. Two special cases of these are given here, whose distinction is only needed in the lexical analysis, and which will get a category different from their |ilk|: |const_like| and |typedef_like| will become |int_like| in parsing (and the same holds for |type_defined| identifiers). @f TeX_like TeX @d reserved(a) (a->ilk>=type_defined) /* whether identifier automatically gets this |ilk| */ @d unindexed(a) (a->ilk>=NULL_like) /* whether cross-referencing is suppressed */ @< Typedef and enum... @>= enum @/ { normal, /* |ilk| of ordinary identifiers */ roman, /* |ilk| of roman type index entries */ wildcard, /* |ilk| of user-formatted index entries */ typewriter, /* |ilk| of typewriter type entries */ reference, /* |ilk| of identifiers used for explicit \xr.s */ type_defined, /* |ilk| of identifiers that are defined by |typedef| */ TeX_like, /* |ilk| of identifiers with user-given control sequences */ NULL_like, /* |ilk| of \.{NULL} */ const_like, typedef_like /* special reserved words */ }; @ Besides the memory used for storing names, another large memory area is used in \.{\me.} for keeping the \xr. data. All uses of the name~|p| are recorded in a linked list beginning at |p->xref|, which is an |xref_pointer| pointing into the |xmem| array (here we use the pointer field for additional information in |id_info| and |mod_info| structures, which was called |equiv| in~\.{CTANGLE}). The elements of |xmem| are structures consisting of an integer, |num|, and the index |next| of another element of |xmem|. If |x==p->xref|, the value of |x->num| is a section number where |p| occurs, plus a multiple of |cite_flag| that indicates the nature of the occurrence, or it is |file_flag| marking the fact that |p| is a module name which is the name of an auxiliary output file (for \.{CTANGLE}). The next such \xr. for~|p|, if any, is |xmem[x->next]|. Since the entries of |xmem| are small but numerous, and the \xr. lists are not traversed very frequently, a significant amount of space is saved by storing a |sixteen_bits| index as a link to the next node rather than an |xref_pointer|, and at very little cost in performance. The main price paid is in terms of elegance, as somewhat different methods of traversal of the lists are appropriate depending on whether the traversal may leed to the insertion of a new node into the list or not. If no such insertion is needed, then we can work with |xref_pointer| values as if we were dealing with an ordinary linked list, provided only that the pointer to a successor node is obtained by invoking a macro |next_xref| instead of selecting the link field. If however we may need to insert a new node into the list, the the most convenient method is to use a pointer to a link field, which then must have type |(sixteen_bits*)|; for that case the macros |xnum| and |xlink|, which select the |num| and |next| fields respectively of a node specified by its index, and which yield expressions that one can assign to or take the address of, are more useful than |next_xref|. Since the beginning of the list is indicated by a |xref_pointer| rather than by its index, we need to convert that pointer into an index (for which the macro |xref_index| is supplied) when using the second method, and store this index in a local variable, not forgetting to update the original |xref_pointer| in case an insertion occurs at the front of the list. During collection of \xr.s, the lists are basically in decreasing order by section number, as a result of the fact that new nodes are added at the beginning of the list (for module names the situation is actually a bit more complicated, as described below), so that in that phase the |next| field really refers to the previous \xr. for the same name; at the end of Phase~I, however, the lists are reversed. The global variable |xref_switch| is set either to |def_flag| or to zero, depending on whether the next \xr. to an identifier is to be underlined or not in the index. This switch is set to |def_flag| when \:! or \:d or \:f is scanned, and it is cleared to zero when the next identifier or index entry \xr. has been made. Similarly, the global variable |mod_xref_switch| is either |def_flag|, |cite_flag|, or zero, depending on whether a module name is being defined, cited, or used. During Phase~II a number of \xr.s for identifiers will be changed into underlined ones, when the identifier is found to occur in a declaration or function definition, since these occurrences cannot be reliably recognised in Phase~I. @d xref equiv_or_xref @d next_xref(x) (&xmem[(x)->next]) @d xnum(i) (xmem[i].num) @d xlink(i) (xmem[i].next) @d xref_index(p) ((sixteen_bits)((p)-xmem)) @d cite_flag 0x4000 /* a |sixteen_bits| power of 2 that is at least |max_sections+2| */ @d def_flag 0x8000 /* twice that */ @d num_mask (cite_flag-1) /* a bit-mask for working modulo |cite_flag| */ @= typedef struct xref_info { sixteen_bits num; /* section number plus a multiple of |cite_flag| */ sixteen_bits next; /* index of the next \xr. in the list */ } xref_info, *xref_pointer; @ We use an allocation pointer~|xref_ptr| that indicates how much of |xmem| is already in use. Unlike in the other cases of sequential allocation, where the allocation pointer points to the first free position, |xref_ptr| points to the last occupied position in |xmem|; the first node |xmem[0]| is already in use when \.{\me.} starts. All allocation of |xref_info| nodes is performed by calling |make_xref(n,i)|, where |n| is the |num| field of the new node, and |i| is the index of its successor in the list; after |make_xref| has been invoked, |xref_ptr| points to the new node. @d make_xref(n,i) /* create \xr. node with |num==n| and successor |xmem[i]| */ if (++xref_ptr >= &xmem[max_refs]) overflow ("cross-reference"); @.cross-reference capacity exceeded@> else xref_ptr->num=n, xref_ptr->next=i @; @= xref_info xmem[max_refs]; /* contains \xr. information */ xref_pointer xref_ptr = &xmem[0]; /* the last used position in |xmem| */ sixteen_bits xref_switch = 0, mod_xref_switch = 0; /* either zero or |def_flag| */ @ A first node is initially allocated, with its |num==0|; the node is used to initialise \xr. lists (see |init_id_name| and |init_module_name| below), and serves as a sentinel at the end of those lists. @= xnum(0)=0; /* sentinel node terminating \xr. lists */ @ A new \xr. for an identifier~|p| is formed by calling |new_id_xref(p)| with |section_count| and |xref_switch| set to the appropriate value. Multiple references to the same section are merged (with underlining activated if any reference requires so) while non-underlined references to one-letter identifiers, reserved words or identifiers with |ilk==NULL_like| (like `\.{NULL}') are ignored. Explicit \xr.s set by~\:: are never underlined, so |xref_switch| is ignored in this case. If the user has set the |no_xref| flag (the \.{-x} option of the command line), |new_id_xref| is active only for explicit \xr.s; when are reading a header file included by~\:h, it is inactive altogether (but since \:: is disabled in such header files, this case is treated together with the |no_xref|~case). @d no_xref (!flags['x']) @d make_xrefs flags['x'] /* should cross references be output? */ @c void new_id_xref (id_pointer p) { sixteen_bits f=xref_switch; xref_switch=0; if (p->ilk==reference) f=0; else if (f==0 && (unindexed(p) || length(p)==1) || no_xref || including_header_file) return; if ((p->xref->num&num_mask)==section_count) p->xref->num|=f; else {@; make_xref(section_count|f,xref_index(p->xref)); p->xref=xref_ptr; } } @ The \xr. lists for module names are slightly different. Suppose that a module name is defined in sections $d_1$,~\dots,~$d_k$, cited in sections $c_1$,~\dots,~$c_l$, and used in sections $u_1$,~\dots,~$u_m$, where the sequences of $d$'s, $c$'s and $u$'s each are in increasing order. Then its list will contain |@t$d_k$@>+def_flag|, \dots, |d1+def_flag|, |@t$c_l$@>+cite_flag|, \dots, |c1+cite_flag|, $u_m$, \dots, $u_1$,~$0$, in this order; the final~$0$ is the sentinel node, which allows the loops below to be a little faster. If the module name specifies an output file (with the \:( feature) then a node containing the special value |file_flag| is prepended to this list. The special ordering described here only serves for efficiency of insertion, and after Phase~II the order will be adjusted to the more natural sequence |d1+def_flag|, \dots, |@t$d_k$@>+def_flag|, |c1+cite_flag|, \dots, |@t$c_l$@>+cite_flag|, $u_1$, \dots,~$u_m$,~0. There can be multiple applied or cited occurrences of some module name within one section, but only one defining occurrence. Therefore, in the former cases we perform a test to avoid duplicate references. @d file_flag (cite_flag-1) /* a distinguished value, not divisible by |cite_flag| */ @c void new_mod_xref (mod_pointer p) { sixteen_bits head, *q, m=section_count+mod_xref_switch; if (p->xref->num==file_flag) q=&p->xref->next; /* skip |file_flag| */ else head=xref_index(p->xref),q=&head; if (mod_xref_switch!=def_flag) { while (xnum(*q)>m) q=&xlink(*q); /* skip the $d_i$'s and possibly $c_i$'s */ if (xnum(*q)==m) return; /* don't duplicate */ } make_xref(m,*q); mod_xref_switch=0; if (q==&head) p->xref=xref_ptr; @+ else *q=xref_index(xref_ptr); } @ When a module name starts with \:(, we will call |set_file_flag|. @c void set_file_flag (mod_pointer p) {@; if (p->xref->num!=file_flag) {@; make_xref(file_flag,xref_index(p->xref)); p->xref=xref_ptr; } } @ A third large area of memory is used for sixteen-bit `tokens', which appear in short lists similar to the strings of characters in |byte_mem|. Token lists are used to contain the result of \Cee\ code translated into \TeX\ form; further details about them will be explained later. Sequences of tokens which have been delimited as such are called texts, and can be accessed via elements of the array |text_mem|. Only a pointer to the first token of the sequence is stored, so in order to be able to compute the extent of a token list one needs a pointer into |text_mem|, which is called a |text_pointer|. @= typedef sixteen_bits token, * token_pointer, ** text_pointer; @~The first position of |tok_mem| that is unoccupied by replacement text is called |tok_ptr|, and the first unused location of |text_mem| is called |text_ptr|. Since we already know that the next text to be stored will start at |tok_ptr|, we make sure that |*text_ptr==tok_ptr| whenever we are not in the process of appending tokens. In this way we can also always find the end of any stored text as the beginning of the next one, which is computed by the macro |text_end|. @d tok_mem_end (&tok_mem[max_toks]) /* end of |tok_mem| */ @d text_mem_end (&text_mem[max_texts]) /* end of |text_mem| */ @d text_index(p) ((sixteen_bits)((p)-text_mem)) @d text_at(i) (&text_mem[i]) @d text_begin(p) (*(p)) @d text_end(p) (*(p+1)) @= token tok_mem[max_toks]; /* tokens */ token_pointer text_mem[max_texts]; /* directory into |tok_mem| */ token_pointer tok_ptr = tok_mem; /* first unused position in |tok_mem| */ text_pointer text_ptr = text_mem; /* first unused position in |text_mem| */ #ifdef STAT token_pointer max_tok_ptr = tok_mem; /* largest value of |tok_ptr| */ text_pointer max_text_ptr = text_mem; /* largest value of |text_ptr| */ #endif @ We initialise our invariant. @= *text_ptr=tok_ptr; @ Here are the three functions needed to complete |id_lookup|. For a name~|x| stored in the table to match a string of length~|l| starting at~|q| and of requested ilk~|ilk|, it is required that the names match exactly, and either |x->ilk==ilk|, or |ilk==normal| and |x->ilk| specifies some reserved word (where the ilks |TeX_like| and |NULL_like| are considered as reserved). This rule means that if we look up an ``identifier'' whose name matches reserved word stored in the table, the reserved word rather than an identifier is returned, so that recognition of reserved words is automatic when the tables are properly initialised. The other two functions install the sentinel node |xmem[0]| at the end of each \xr. list as it is created. @c boolean names_match (id_pointer x, char* q, int l, int ilk) { char* p=name_begin(x); if ((x->ilk==ilk || ilk==normal && reserved(x))) @/{@; while (--l>=0) if (*p++!=*q++) return false; return *p=='\0'; } else return false; } void init_id_name (id_pointer p, int t) {@; p->ilk = t; p->xref = &xmem[0]; } void init_module_name (mod_pointer p) {@; p->xref=&xmem[0]; } @* Skipping and copying \TeX\ material. @:title@> The source file consists roughly speaking of two sorts of input, namely \TeX\ material and pieces of \Cee~text. The limbo part is entirely the realm of \TeX, and the sections are divided (possibly quite unevenly) into a \TeX~part, and the remainder which is~\Cee, although the distinction is not quite as pure here due to `\pb' interjections and comments. The meddling of \.{\me.} (and indeed of all of \.{CWEB}) in the \TeX\ parts is extremely superficial: it is restricted to skipping or copying it, only replacing \:@@ by~`\.{@@}', and the main interest in these parts of the source is to find out where they end. By contrast the \Cee\ fragments have to be broken up into meaningful parts by \.{\me.} in order to perform the required operations of collecting \xr.s and formatting for pretty-printing output. Before we study that more complicated process of lexical scanning, let us study the easier question of what happens with the \TeX~parts. The three tasks of passing over limbo material, over the \TeX~part of a section, and over a comment embedded in the \Cee~part are sufficiently different that they merit separate routines. Moreover, the entire source is scanned twice by \.{\me.}, with different purposes, and so the scanning routines have two flavours. We could choose either to use distinct scanning routines on both passes, or to have a single all-purpose scanning routine. The first option is more attractive if the functions are simple and small, and can perform tasks specific to their pass on-the-fly, while the second option is to be favoured if the scanning task becomes complicated, to avoid near-duplication of substantial pieces of code. We have chosen the first option for the routines that pass over ordinary \TeX\ material, both in limbo and in sections, and the second option for the function passing over comments; for the lexical scanning of \Cee~text we shall also write a single function for both phases. Where distinct routines are used we must of course make sure that they are sufficiently similar that they will make the same decisions about where these parts of the source text end. @ Although the functions to be described here have a quite trivial task, they will eventually run into a control code that terminates their action, and so we have to be aware of tokens that do not belong in the \TeX~part. The relevant tokens form only a small subset of the tokens that can be recognised by the function |get_next| that scans \Cee~text, so we defer an enumeration of possible tokens to a later point. At this point it is sufficient to know there is an array |ccode| which translates characters that may follow `\.{@@}' in a control code into a numeric value greater than any (unsigned) character, which values have symbolic names and are arranged in order of increasing significance. In particular any value greater than or equal to |format| terminates the \TeX~part of a section, and the largest value of all is |new_section|. By using the same encoding, the \TeX~text scanning functions can return a value directly usable for continuing the scan into \Cee~territory. Tokens consisting of a single character are represented as that character itself by |get_next|, and at this point the relevant case is the character~`\.\v' which interrupts \TeX~text. Like in |CTANGLE|, we always access |ccode| by means of the macro |code_of|. @d code_of(c) ccode[(unsigned char)(c)] @= int ccode[UCHAR_MAX + 1]; /* meaning of a character following `\.{@@}' */ @ This section performs the simplest of all scanning operations, namely to skip through portions of the input that are not in any sections, i.e., that precede the first section, on the first pass. It uses the fact that |get_line| places a~|' '| at~|*limit|, so that placing the sentinel cannot inadvertently create a token \.{@@@@}. Although a few control codes like \:q are allowed in limbo, they can be ignored here, since they do not affect the recognition of the end of the limbo part. An exception is the format code \:s, which must be obeyed in this first phase; the code doing this will be given together with the processing of other format codes. After the code below is executed, the value of |input_has_ended| will tell whether or not a section has actually been found. @< Skip the limbo part @>= while (find_char()) { limit[1]='@@'; /* place a sentinel */ while (*loc++!='@@') {} if (loc<=limit) { int c=code_of(*loc++); if (c==new_section) break; if (c==format) @< Process a format code in limbo @> } } @ In Phase~II, the corresponding task is slightly less trivial, as the limbo material must also be copied to the output; it is performed by the |copy_limbo| function. Output is generated by calling |out| for ordinary characters, and |finish_line| when a completed line is to be sent out. No spaces or tab marks are copied by |copy_limbo| into the beginning of a line (indicated by the macro |output_line_empty|), nor by the function |copy_TeX| below. As a consequence a line with only spaces and removed items (like \:q, \:s, or for |copy_TeX| \xr. entries for the index) will produce a completely empty line of output; such lines will only be actually written out by |finish_line| if they come from a completely blank input line. Any pair \:@@ is replaced by `\.{@@}', and apart from this only the control codes \:q, \:s, and \:l are allowed to be present before the first section is encountered. Note that we have chosen to detect violations of this rule in Phase~II; on other occasions however, errors were checked in Phase~I, and we can then assume that errors are absent in Phase~II, as we would not even get there otherwise. @c void copy_limbo (void) /* copy \TeX\ code until the next section begins */ { while (loc<=limit || (finish_line(),get_line())) { eight_bits c; limit[1]='@@'; /* place a sentinel */ while ((c=*loc++)!='@@') if (!(output_line_empty() && isspace(c))) out(c); if (loc<=limit) switch(code_of(*loc++)) { case new_section: return; case ignored_text: get_control_text(); break; case format: get_next(); get_next(); break; /* skip two identifiers */ case char_trans: out_str("\\ATL "); break; @.\\ATL@> default: err_print("! Double @@ required in limbo part"); @.Double @@ required...@> /* fall through */ case at_sign_image: out('@@'); } } } @ The function |skip_TeX| is used on the first pass to skip through the \TeX\ code at the beginning of a section. It returns the next control code or `\.\v' found in the input. A |new_section| is assumed to exist at the very end of the file. Any comment character `\.{\%}' that is not escaped (with a backslash) will disable recognition of `\.\v' and cross-referencing control codes for the remainder of the line, so that no spurious \xr.s will be created to commented-out text. Recognition of control codes that terminate the \TeX~part of the section will still be enabled however, to maintain synchronisation with the activities of |CTANGLE|. @f skip_TeX TeX @c int skip_TeX (void) /* skip past pure \TeX\ code */ { char c; while (find_char()) { limit[1]='@@'; while ((c=*loc++)!='@@' && c!='%') if (c=='|') return c; else if (c=='\\' && *loc!='@@') ++loc; /* ignore `\.{\\\%}' and `\.{\\\v}' */ if (loc<=limit) if (c=='@@') return code_of(*loc++); else /* ignore remainder of line unless a major control code occurs */ do @+ if ((c=*loc++)=='@@' && code_of(*loc++)>=format) return code_of(loc[-1]); while (loc=format) return finish_line(),code_of(loc[-1]); while(loc= int scan_comment(int* bal, boolean one_liner); /* skip or copy \TeX\ text in comments */ @~The function |scan_comment| counts the braces it encounters to see if they are balanced; the parameter |bal| points to an integer variable keeping track of the brace level (it can be positive initially if |scan_comment| is called after `\pb' occurring within braces in a comment). This feature is mainly a remnant from \.{CTANGLE}'s Pascal origins, since there comments are closed by `\.\}', and counting is necessary to establish which braces are for \TeX\ and which are not. Although there is no such need in \Cee, the feature is retained. The parameter |one_liner| tells whether we are dealing with a \Cpp\ one-line comment. @c int scan_comment (int* bal, boolean one_liner) { char c; boolean forced_out=false; /* prematurely terminated? */ while (one_liner ? loc if (input_has_ended) forced_out=true,err_print("! Input ended in mid-comment"); @.Input ended in mid-comment@> else if (!one_liner) loc+=2; /* move past `\.{*{}/}' */ done: if (*bal>0) err_print("! Too few closing braces in comment"); @.Too few closing braces...@> return forced_out ? new_section : end_comment; } @ Like elsewhere, `\.@@' should be doubled in comments; |scan_comment| replaces them by a single `\.@@'. In Phase~II, instead of copying the \TeX\ material into the output buffer like |copy_TeX|, |scan_comment| copies it into the token memory, since comments will be output together with the rest of the formatted \Cee~code. To this end it calls the macro |app_char_tok(c)| rather than |out(c)|. When including header files comments should be skipped with no processing at all, so this module is almost completely disabled in that case. @< Handle next character... @>= if (including_header_file) ++loc; /* don't process characters here */ else { switch(c=*loc++) { case '|': return '|'; /* beginning of `\pb' inside comment */ case '@@': if (*loc++!='@@') if (code_of(loc[-1])!=new_section) err_print("! Double @@ required in comment"); @.Double @@ required...@> else {@; err_print("! Section ended in mid-comment"); @.Section ended in mid-comment@> forced_out=true; goto done; } break; case '\\': if (*loc!='@@') {@; if (phase==2) app_char_tok(c); c=*loc++; } break; case '{': ++*bal; break; case '}': @+if (*bal>0) --*bal; @+else err_print("! Extra } in comment"); @.Extra \} in comment@> @+break; case '/': @+ if (*loc=='*') err_print("! Nested comment"); @.Nested comment@> } if (phase==2) app_char_tok(c); } @* Getting the next token. @:title@> We now come to the most important lexical scanning function, |get_next|, which locates and classifies the next token of \Cee~text. Before we give the function itself, we shall first specify which kind of values it can return. The result value is an integer, which can either be at most |UCHAR_MAX|, in which case the character code represents that character, or possibly a compressed multi-character symbol as defined by the macros in~\.{common.inc}, or it is one of the values greater than |UCHAR_MAX| defined in the enumeration below, which indicates a particular token or class of tokens or a control code. (Actually, |underline| and |trace0|,~\dots,~|trace3| are never returned by |get_next| because they are treated within the scanner; they are included however because they are distinguished control codes.) The ordering of this enumeration is designed to simplify \.{\me.}'s logic; for example, from |format| on, larger numbers are given to the control codes that denote more significant milestones, and the code of |new_section| is the largest of all. Also, the sequence |identifier|,~\dots,~|xref_mark| runs parallel to the first five |ilk|~codes. For efficiency all codes that produce a fixed scrap are placed at the beginning; these are all codes preceding |ignore|. The code \:> is treated as ignored because it should not occur in places where we are not searching for it. @f TeX_string TeX @= enum @/ { at_sign_image = UCHAR_MAX+1, /* quoted `\.{@@}' */ or, /* \:v */ mul_assign, div_assign, mod_assign, plus_assign, minus_assign, left_assign, right_assign, and_assign, xor_assign, or_assign, sh_sh, ellipsis, colon_colon, @/ start_preproc, end_preproc, /* begin and end of a preprocessor directive */ join, /* \:\& */ thin_space, /* \:, */ math_break, /* \:\v */ line_break, /* \:/ */ big_line_break, /* \:) */ no_line_break, /* \:+ */ backup_line, /* \:\\ */ pseudo_semi, /* \:; */ force_expr_open, force_expr_close, /* \:[, \ \:] */ include_preproc, /* \:p */ @) ignore, /* control code of no interest to \.{\me.} */ constant, string, /* the next five codes should remain in this order */ identifier, /* any (possibly reserved) word found in \Cee\ text */ xref_roman, xref_wildcard, xref_typewriter, xref_mark, /* \:\^, \ \:?, \ \:., \ \:: */ refer, /* \:\# */ TeX_string, /* \:t */ verbatim, /* \:= */ ignored_text, /* \:q */ char_trans, /* \:l */ ASCII_code, /* \:' */ begin_comment, end_comment, underline, /* \:! */ #ifdef DEBUG trace0, trace1, trace2, trace3, /* \:0, \dots, \:3 */ #endif format, /* \:f */ definition, /* \:d */ header, /* \:h */ begin_C, /* \:c */ module_name, /* \:< and \:( */ new_section /* \:\ , \:\~ and \:* */ }; @ Here we initialise the |ccode| table in accordance with the comments given in the enumeration above. @= { unsigned char c=0; do ccode[c] = isspace(c) ? new_section : ignore; while(c++!=UCHAR_MAX); ccode['@@'] = at_sign_image; @/ccode['v'] = ccode['V'] = or; @/ccode['!'] = underline; /* set definition flag */ @/ccode['^'] = xref_roman; /* index entry to be typeset normally */ @/ccode['?'] = xref_wildcard; /* index entry to be in user format */ @/ccode['.'] = xref_typewriter; /* index entry to be in typewriter type */ @/ccode[':'] = xref_mark; ccode['#']=refer; /* explicit \xr.s */ @/ccode['t'] = ccode['T'] = TeX_string; /* \TeX\ box within \Cee\ text */ @/ccode['='] = verbatim; @/ccode['q'] = ccode['Q'] = ignored_text; @/ccode['l'] = ccode['L'] = char_trans; @/ccode['\''] = ASCII_code; @/ccode['&'] = join; /* concatenate two tokens */ @/ccode[','] = thin_space; @/ccode['|'] = math_break; @/ccode['/'] = line_break; @/ccode[')'] = big_line_break; @/ccode['\\']= backup_line; @/ccode['+'] = no_line_break; @/ccode[';'] = pseudo_semi; @/ccode['['] = force_expr_open; ccode[']'] = force_expr_close; ccode['p'] = ccode['P'] = include_preproc; #ifdef DEBUG ccode['0'] = trace0; ccode['1'] = trace1; ccode['2'] = trace2; ccode['3'] = trace3; #endif @/ccode['f'] = ccode['F'] = ccode['s'] = ccode['S'] = format; @/ccode['d'] = ccode['D'] = definition; @/ccode['h'] = ccode['H'] = header; @/ccode['c'] = ccode['C'] = begin_C; /* \Cee\ text in unnamed module */ @/ccode['<'] = ccode['('] = module_name; /* beginning of a module name */ @/ccode['~'] = ccode['*'] = new_section; /* beginning of a new section */ if (compatibility_mode) @< Reset some control codes to match \LKC. @> } @ In \.{CWEBx} there are a few control codes that also exist in Levy/Knuth \.{CWEB} but have a different meaning. In compatibility mode we reassign the meaning of these codes to that of \LKC., making their usual function inaccessible, since it is not intended that hybrid programs should be written using the codes of \LKC. together with features particular to \.{CWEBx}. @^Levy/Knuth \.{CWEB}@> @< Reset some control codes... @>= { ccode['h']=ccode['H']=include_preproc; /* \:h means \:p */ ccode['p']=ccode['P']=begin_C; /* \:p means \:c */ ccode['#']=big_line_break; /* \:\# means \:) */ ccode[':']=xref_wildcard; /* \:: means \:? */ } @ We come now to the definition of |get_next| itself. When returning certain values it will have performed some additional actions, as follows. \yskip\hang |constant|, |string|, |TeX_string|, |verbatim|: The token is copied into |mod_text|, with slight modifications; the global variables |id_first| and |id_loc| are set to the beginning and ending-plus-one locations in |mod_text|. \yskip\hang |identifier|, |xref_roman|, |xref_wildcard|, |xref_typewriter|, |xref_mark|, |refer|, |module_name|: The global variable |cur_id| or |cur_mod| will point to the identifier, control text or module name that has just been scanned (for the |xref_roman|, \dots, |xref_mark| this is only true if |phase==1|). \yskip\hang |underline|: this value is not even returned. If |get_next| sees \:!, it sets |xref_switch| to |def_flag| and goes on to the next token. \yskip Preprocessing directives complicate scanning in two ways: first, their lexical structure is different from that of ordinary text, and second, a preprocessor directive can occur at any place in a \Cee~text, so syntactically it should be treated like a comment. The first issue is resolved by maintaining a static variable |preprocessing| which is~$0$ in ordinary \Cee~text, $2$ in \&{\#include} directives, and~$1$ in other preprocessor directives. The second issue must be dealt with during parsing, and in order to be able to do so, |get_next| emits special tokens |start_preproc| and |end_preproc| when it has sensed the boundaries of a preprocessor directive. @= id_pointer cur_id; /* identifier or index entry just scanned */ mod_pointer cur_mod; /* module name just scanned */ int preprocessing=0; @ As one might expect, |get_next| consists mostly of a big switch that branches to the various cases that can arise. Any character |c>=0x80| that is not contained in a string or comment is assumed to belong to an identifier. @< Prototypes @>= int get_next (void); @~@c int get_next (void) /* produces the next input token */ { eight_bits c; /* the current character */ restart: if (!find_char()) {@; preprocessing=0; return new_section; } @< If a preprocessor line has ended, handle it and |return end_preproc| @> if ((c=*loc++)=='@@') @< Get control code and possibly module name, and either |return| it, or |goto restart| if ignored or handled within |get_next| @> if (isspace(c)) if (preprocessing>0) return ' '; /* keep spaces in preprocessor lines */ else goto restart; /* ignore other white space */ if (c=='L' && (*loc=='\'' || *loc=='"')) {@; get_string(); return string; } if (isalpha(c) || c=='_' || c>=0x80) {@; @< Get an identifier @> return identifier; } if (isdigit(c) || c=='.' && isdigit((eight_bits)*loc)) @/{@; @< Get a numeric constant @> return constant; } if (c=='\'' || c=='"' || (c=='<' && preprocessing==2)) {@; get_string(); return string; } if (c=='#' && loc==&buffer[1]) @/{@; @< Handle start of preprocessor directive; maybe |goto restart| @> return start_preproc; } if (c=='\\' && preprocessing>0 && loc==limit) { ++loc; /* move past |limit|, so |get_line| will be called */ goto restart; } @< Compress multi-character tokens @> return c; } @ When a `\.\#' is seen as the first character of a line, |get_next| returns a special code |start_preproc| and sets |preprocessing| to a non-zero value. Because of the freakish use of `\.<' and `\.>' to delimit a file name in lines that start with `\.{\#include}', those lines get an extra-special treatment, and |preprocessing| is set to~$2$ rather than to~$1$. If however we encounter a `\.{\#include}' directive when we are already busy reading a header file due to \:h in Phase~I, then we actually execute the directive, and start reading the nested header file; this action will be transparent to the stream of tokens produced, so we |goto restart| to fetch the first token from the newly opened file. @= { while (loc=7 && strncmp(loc,"include",7)==0) /* `\.{\#include}' line */ if (including_header_file) @/{@; loc+=7; push_input_file(true,false); goto restart; @+} /* start nested header file */ else preprocessing=2; else preprocessing=1; } @ When we get to the end of a preprocessor line, we lower the flag and send a code |end_preproc| (unless the last character was a `\.\\', but that case has already been taken out and never comes here). @= if (preprocessing>0 && loc==limit) {@; preprocessing=0; return end_preproc; } @ The following code assigns values to the compound operators `\.{++}', `\.{--}', `\.{->}', `\.{>=}', `\.{<=}', `\.{==}', `\.{<<}', `\.{>>}', `\.{!=}', `\.{\v\v}', and `\.{\&\&}', to the special symbols `\.{/*}', `\.{*/}', `\.{//}', `\.{...}', `\.{::}', and `\.{\#\#}', and moreover, if not in compatibility mode, to the assignment operators `\.{*=}', `\.{/=}', `\.{+=}', `\.{-=}', `\.{>>=}', `\.{<<=}', `\.{\&=}', `\.{\^=}', `\.{\v=}'. Although the comment ending token `\.{*/}' should never occur when we are scanning \Cee~text, we must recognise it in order to detect unclosed `\pb' constructions within comments (fortunately no legal combination of operators causes an adjacent sequence `\.{*/}'; only `\.*' immediately followed by a comment could cause it, and the user should simply not do this). The reason that in compatibility mode we are forced to follow suit with \LKC. @^Levy/Knuth \.{CWEB}@> in not combining assignment operators is a truly stupid one: @:truly stupid@> the Stanford GraphBase @^Stanford GraphBase@> contains fragments like `\.{\$\v n1\v=n\_1\$}' which will cause trouble if `\.{\v=}' is parsed as a single symbol (of course the fragments should have been written as `\.{\v n1\v\$\{\}=n\_1\$}', but we have set the goal to handle the GraphBase as it is, and moreover its files cannot be changed). For splitting up the assignment operators here, we shall have to pay the price of including syntax rules (in compatibility mode) for recombining them. The macros defined below are for strictly local use, so we don't mind that they could mess things up if used in the |if|-part of an |if|-|else| statement. Also, there is no need to test whether |*loc| or |loc[1]| lie beyond |limit|, since if they do, they are not evaluated anyway because we know |*limit==' '|. Note that the three-symbol assignment operators `\.{>>=}' and~`\.{<<=}' must be tested before their non-assignment counterparts `\.{>>}' and~`\.{<<}'. @d compress2(char2,code) if (*loc==char2) return ++loc, code @; @d compress3(char2,char3,code) if (*loc==char2 && loc[1]==char3) return loc+=2, code @; @d comp_ass_op2(code) if (*loc=='=' && !compatibility_mode) return ++loc, code @; @d comp_ass_op3(char2,code) if (*loc==char2 && loc[1]=='=' && !compatibility_mode) return loc+=2,code @; @= switch (c) { case '/': compress2('*',begin_comment); @+ if (C_plus_plus) compress2('/',begin_comment); comp_ass_op2(div_assign); break; case '*': compress2('/',end_comment); comp_ass_op2(mul_assign); break; case '%': comp_ass_op2(mod_assign); break; case '+': compress2('+',plus_plus); comp_ass_op2(plus_assign); break; case '-': compress2('-',minus_minus); compress2 ('>', minus_gt); comp_ass_op2(minus_assign); break; case '=': compress2('=',eq_eq); break; case '>': compress2('=',gt_eq); comp_ass_op3('>',right_assign); compress2 ('>',gt_gt); break; case '<': compress2('=', lt_eq); comp_ass_op3('<',left_assign); compress2 ('<', lt_lt); break; case '&': compress2('&',and_and); comp_ass_op2(and_assign); break; case '^': comp_ass_op2(xor_assign); break; case '|': compress2('|',or_or); comp_ass_op2(or_assign); break; case '!': compress2('=',not_eq); break; case '.': compress3('.','.', ellipsis); break; case '#': compress2 ('#', sh_sh); break; case ':': @+ if (C_plus_plus) compress2 (':',colon_colon); } @ The code below is almost identical to the corresponding module in |CTANGLE|; the difference is that we accept characters |c>=0x80| without testing whether a translation is defined for them (since we have not recorded such information) and that we always look up identifiers, even if they have length~$1$. @< Get an identifier @>= { id_first=--loc; /* mark beginning of identifier */ do c=*++loc; while (isalnum(c) || c=='_' || c>=0x80); cur_id= id_lookup(id_first,loc,normal); } @ In \Cee~text, numeric constants are specified in the ordinary \Cee~manner: octals start with `\.0', hexadecimals with `\.{0x}', and anything starting with a non-zero digit is a decimal constant; however for octal and hexadecimal constants \.{\me.} will produce output using italics or typewriter font, respectively, and introduced by a raised circle or hash mark. Forgivably in contradiction with the definition of~\Cee, we treat the ubiquitous constant~`0' as decimal rather than as octal. When the kind of constant we are dealing with has been recognised, we represent this information internally by special marker characters, which replace the marks used in the \Cee~source (like `\.{0x}' for hexadecimal or `\.E' for the exponent of a floating point constant). These markers are characters like `\.\^' that will get a backslash prepended by the output routine for constants, so that formatting of the constants can be controlled by defining the corresponding \TeX\ control words (like `\.{\\\^}' in the case mentioned) appropriately. @d shift_and_store(ch) (*id_loc++=ch,c=*++loc) @= { id_first=id_loc=&mod_text[1]; if (c=='0' && (isdigit(c=*loc) || tolower(c)=='x')) /* octal or hex */ { if (isdigit(c)) /* octal constant with at least two digits */ { *id_loc++ = '~'; /* store `\.\~' in place of leading `\.0' */ do shift_and_store(c); while (isdigit(c)); /* copy second and following digits */ } else /* hex constant */ { shift_and_store('^'); /* replace `\.{0x}' by `\.\^' */ while (isxdigit(c)) shift_and_store(c); } } else /* decimal constant */ { c=*--loc; /* recover first digit or decimal point */ while (isdigit(c)) shift_and_store(c); if (c=='.') @+ do shift_and_store(c); while (isdigit(c)); if (tolower(c)== 'e') /* floating point constant with exponent */ { shift_and_store('_'); /* replace `\.e' by `\.\_' */ if (c=='+' || c=='-') {@; *id_loc++ = c; c=*++loc; } while (isdigit(c)) shift_and_store(c); /* exponent */ } } if (isalpha(c)) /* `\.{U}', `\.{L}', and/or `\.{F}' suffix */ @/{@; *id_loc++ = '$'; do shift_and_store(c); while (isalpha(c)); } } @ After an `\.{@@}' sign has been scanned, the next character tells us whether there is more work to do. This code uses the fact that our internal code numbers |xref_roman|, |xref_wildcard|, |xref_typewriter|, and |xref_mark| are consecutive in the stated order, as are the |ilk| codes |roman|, |wildcard|, |typewriter|, and |reference|. We silently eliminate the possibility of indexing the empty string, since it would cause anomalous situations in hashing and sorting of the index, and it would look rather silly anyway. @= if (including_header_file) goto restart; /* ignore `\.@@' in header files */ else { int cc=code_of(*loc++); switch (cc) { case ignore: goto restart; case underline: xref_switch=def_flag; goto restart; #ifdef DEBUG case trace0: case trace1: case trace2: case trace3: @+ if (phase==2) tracing=cc; @+ goto restart; #endif case char_trans: err_print("! `@@l' only allowed in limbo"); goto restart; @.`@@l' only allowed in limbo@> case ASCII_code: @< Scan an \caps{ASCII} constant @> @+ return string; case module_name: @< Scan the module name and make |cur_mod| point to it @> @+ break; case ignored_text: get_control_text(); goto restart; case verbatim: case TeX_string: get_control_text(); break; case xref_roman: case xref_wildcard: case xref_typewriter: case xref_mark: case refer: if (get_control_text()) goto restart; /* don't index empty strings */ if (cc==refer) cur_id=id_lookup(id_first,id_loc,reference); else if (phase==1) cur_id=id_lookup(id_first,id_loc,cc-xref_roman+roman); } return cc; } @ There is no reason why we should allow a newline within an \caps{ASCII} constant, even if it is escaped. @< Scan an \caps{ASCII} constant @>= { id_first=&mod_text[1]; strncpy(id_first,"@@'",2); id_loc=&id_first[2]; while ((*id_loc++=c=*loc++)!='\'') { if (c=='\\') *id_loc++=*loc++; /* copy any character following backslash */ else if (c=='@@' && *loc++!='@@') {@; err_print("! Double @@ required in strings"); --loc; } @.Double @@ required...@> if (loc>=limit) {@; err_print("! ASCII constant didn't end"); break; } @.ASCII constant didn't end@> } } @ Here |get_module_name| does nearly all the work; we only need to recognise when \:( is used rather than~\:<, and if so insert |file_flag| in the appropriate \xr. list. @< Scan the module name... @>= { boolean file_module=loc[-1]=='('; cur_mod=get_module_name(); if (file_module && phase==1 && cur_mod!=NULL) set_file_flag(cur_mod); } @* Phase I processing. @:title@> We now have accumulated enough functions to make it possible to carry out \.{\me.}'s first pass over the source file. If everything works right, both Phase~I and Phase~II of \.{\me.} will assign the same numbers to sections, and these numbers will agree with what \.{CTANGLE} does. We keep track of the current section number in |section_count|, which is the total number of sections that have started. Sections which have been altered by a change file entry have their |changed_section| flag turned on during the first phase. Meanwhile we also keep track using |change_exists| of whether any change was made at all, which will tell us whether the index has changed. The global variable |next_control| often contains the most recent output of |get_next|; in interesting cases, this will be the control code that ended a section or part of a section. @d shift() (next_control=get_next()) @= boolean change_exists=false; /* has any section changed? */ int next_control; /* control code waiting to be acted upon */ @ The overall processing strategy in Phase~I has the following straightforward outline. @c void phase_one (void) /* read all the user's text and store the \xr.s */ { phase=1; reset_input(); section_count=0; @< Skip the limbo part @> while (!input_has_ended) @< Store cross-reference data for the current section @> if (change_exists) mark_section_as_changed(section_count); /* the index changes if anything does */ @< Print error messages about unused or undefined module names @> @< Reverse the \xr. lists for identifiers @> } @ The outline for each section is equally straightforward. @< Store cross-reference data... @>= { if (++section_count==max_sections) overflow("section number"); @.section number capacity exceeded@> if (loc[-1]=='*') print_section_progress (); @< Store cross-references in the \TeX~part of a section @> @< Store cross-references in the definition part of a section @> @< Store cross-references in the \Cee~part of a section @> if (section_changed(section_count)) change_exists=true; } @ We interrupt our refinement of |phase_one| temporarily for some auxiliary functions that are used in its various parts. @< Prototypes @>= void C_xref (boolean); /* make \xr.s within in straight \Cee~text */ void outer_xref (void); /* make \xr.s in \Cee~text with comments */ void mod_check (mod_pointer); /* check \xr.s for module names */ @ The function |C_xref| stores references to identifiers in \Cee~text, either for an entire fragment enclosed in `\pb', or for a portion of a macro or section body delimited by comments or module names. The boolean parameter |inner| tells whether the former is the case; if so |C_xref| should stop with |next_control=='|'|, and otherwise it should stop with either |next_control==begin_comment| or |next_control>=format|. In fact, setting |inner| will make |C_xref| stop at |'|'| but proceed past module names, while comment delimiters and major control codes will make |C_xref| stop regardless of |inner|. If |next_control>=format| when |C_xref| is called, nothing will happen, but it is safe to call the function when |next_control=='|'| or |next_control==end_comment|, which will be stepped over rather than considered as termination condition. Thus we can avoid saying |shift()| immediately before calling |C_xref| on several occasions. After a `\.\#' that starts a preprocessor directive, an identifier must follow, which must not be \xr.d; this is achieved by performing an extra |shift()|. If no identifier follows, we report an error, but do not perform the extra |shift()|. The code below uses the fact that our internal code numbers |identifier|, |xref_roman|, |xref_wildcard|, |xref_typewriter|, and |xref_mark| are consecutive. The other task of Phase~I is to collect all unusual |ilk| assignments, and the subtler part of this is the processing of |typedef| declarations. The details of this process will be explained later, but this is where it is hooked into the other actions, since all relevant tokens (which do not include module names or comments) pass here one by one. @c void C_xref (boolean inner) { while (next_control if (next_control>=identifier && next_control<=xref_mark) new_id_xref(cur_id); else if (next_control==module_name && cur_mod!=NULL) mod_xref_switch=cite_flag,new_mod_xref(cur_mod); if (next_control==start_preproc && shift()!=end_preproc &&next_control!=identifier) err_print("! Identifier should follow `#'"); @.Identifier should follow `\#'@> else shift(); if (next_control=='|' && inner || next_control==begin_comment || next_control==end_comment) return; } } @ The function |outer_xref| is like |C_xref|, but is used to scan an entire macro body, or a portion of a section body delimited by module names; it handles \Cee~text with embedded comments. It is called after \:d, a format definition, \:c, or a module name (either defining or applied) has been scanned, and after \:h has fired up its header file; in all cases |next_control| is already processed, and we start with |shift()|. (There is also one call that can occur during Phase~II, namely if illegal items following a \:s format definition were found; its purpose then is merely to ensure |next_control>=format| without producing any output.) While a comment is being scanned, tokens that pass |C_xref| should not be considered as part of a possible |typedef| that is in progress; this is achieved by invoking the macro |typedef_tracking| with the proper boolean value at the beginning and end of the comment. @c void outer_xref (void) /* extension of |C_xref| */ { shift(); /* move past previously processed token */ while (next_control= do switch (next_control=skip_TeX()) { case underline: xref_switch=def_flag; break; case '|': C_xref(true); break; case module_name: case refer: loc-=2; get_next(); break; case ignored_text: get_control_text(); break; case char_trans: err_print("! `@@l' only allowed in limbo"); break; @.`@@l' only allowed in limbo@> case xref_roman: case xref_wildcard: case xref_typewriter: case xref_mark: loc-=2; get_next(); new_id_xref(cur_id); } while (next_control=format|. @< Store cross-references in the def... @>= while (next_control outer_xref(); /* macro definition or comment after format definition */ } else @< Read a header file, scanning it for |typedef| declarations @> @ Before we handle format codes occurring in a section, let us consider their treatment in limbo. Here the code must be a non-printing \:s rather than \:f, since we are not prepared to emit formatted output in limbo. The syntax and semantics are simple: two identifiers must follow, and the |ilk| of the latter is is assigned to the former. @< Process a format code in limbo @>= if (tolower((eight_bits)loc[-1])=='f') err_print("! Double @@ required in limbo part"); @.Double @@ required...@> else { id_pointer lhs; if (shift()==identifier && (lhs=cur_id,shift()==identifier)) lhs->ilk=cur_id->ilk; else err_print("! Improper format definition"); @.Improper format definition@> } @ In a section \:s is processed in the same way; for \:f we additionally produce a defining \xr. for the left hand side. We do not call |shift| since this will be done by |outer_xref|, which is called after this code to process any comments that follow the format definition. @< Process a format code in a section @>= { boolean f= tolower((eight_bits)loc[-1])=='f'; id_pointer lhs; if (shift()==identifier && (lhs=cur_id,shift()==identifier)) { if (f) new_id_xref(lhs); @+ else xref_switch=0; lhs->ilk=cur_id->ilk; } else err_print("! Improper format definition"); @.Improper format definition@> } @ After \:h a file name follows, enclosed in white space, double quotes, or angle brackets. We open the header file by the same routine used to open \:i files, but here we do suspend reading from the change file. If all is well the whole file will contain pure \Cee~text without any control codes, so |outer_xref| will come to a halt shortly after returning from that file. @< Read a header file... @>= { if (push_input_file(true,true)) /* prepare for reading header file */ including_header_file=true; /* will be reset on closing the file */ typedef_tracking(true); /* this is what we are doing it for */ outer_xref(); /* |shift()| and collect typedefs until |next_control>=format| */ typedef_tracking(false); } @ Finally, when the \TeX\ and definition parts have been treated, we have |next_control>=begin_C|. The loop repeatedly marks a module name \xr. and then calls |outer_xref| to scan everything up to the next module name; if |next_control==module_name| initially, we raise |mod_xref_switch| which will cause that first module name to be marked as defining. @< Store cross-references in the \Cee... @>= { if (next_control @c void mod_check (mod_pointer p) /* print anomalies in subtree |p| */ { if (p != NULL) { mod_check (p->llink); /* traverse left subtree */ { boolean file_module = p->xref->num==file_flag; sixteen_bits head, *q, threshold; /* lower limit of |num| values of current interest */ if (file_module) q=&p->xref->next; @+ else head=xref_index(p->xref),q=&head; if (!complete_name(p)) @/{@; print("\n! Never completed"); print_mod(p); mark_harmless(); } @.Never completed: @> if (xnum(*q)<=(threshold=def_flag)) @/{@; print("\n! Never defined"); print_mod(p); mark_harmless(); } @.Never defined: @> else @< Reverse sublist after |*q| with entries |num>threshold|; make |q| point to final |next| field @> if (xnum(*q)>(threshold=cite_flag)) @/@< Reverse sublist... @> if (xnum(*q)==(threshold=0)) @/{@; if(!file_module) {@; print("\n! Never used"); print_mod(p); mark_harmless(); } @.Never used: @> } else @< Reverse sublist... @> if (!file_module) p->xref=&xmem[head]; /* set pointer to possibly modified value */ } mod_check (p->rlink); /* traverse right subtree */ } } @~@= mod_check(root); @ We now come to the reversal of the \xr. lists, which is necessary because by repeatedly prepending elements to these lists, or in the case of module names to one of three sublists, these (sub)lists have obtained reverse ordering. The method of traversal of the set of all identifiers is different from that for the set of all module names, whence these tasks are linked into the program at different points, but the reversal routines themselves are quite similar. As we already traverse the module names in |check_root|, the reversal code for module \xr.s was simply inserted at the proper place in that function; for traversal of the set of identifiers we use the |hash| table, by following all non-empty hash lists. @< Reverse the \xr. lists... @>= { id_pointer name; id_pointer *h; /* pointer into |hash| */ for (h=hash; hhash_link) /* traverse all hash lists */ @< Reverse the list |name->xref| @> } @ As Knuth keeps reminding us, list reversal can be thought of as a process of repeatedly popping values off one list~|x| and pushing them onto the reversed list~|y| (or you may read ``stack'' for ``list'' if you like). It can also be useful to remember that the basic action can be performed by a four-stroke engine, where the left hand side of each assignment equals the right hand side of the previous one. The basic cycle can actually take different forms, each using an auxiliary variable~|t|. One way is to use~|t| to hold the entry moved, repeating |{ t=x; x=t->next; t->next=y; y=t;}| until |x==NULL|; another way is to use~|t| to hold the remainder of the list to be reversed, repeating |{ t=x->next; x->next=y; y=x; x=t;}|, again until |x==NULL|. For reversing the \xr. lists of identifiers, we use the first form. @< Reverse the list |name->xref| @>= { sixteen_bits x=xref_index(name->xref),t,y=0; /* index of the sentinel node */ while (xnum(x)!=0) {@; t=x; x=xlink(t); xlink(t)=y; y=t; } name->xref=&xmem[y]; /* don't forget to link in the reversed list */ } @ The reversal of sublists of the \xr. list attached to module names is only slightly more complicated. At the three places where the code below is used, things have been set up so that |q| points to the location of the link pointing to the start of the sublist (since this link is going to be changed, we need a pointer to it) and the end of the list is implicitly indicated by |threshold|: the sublist ends before the first entry with |num<=threshold| (which always exists because of the sentinel with |num==0|). It has also been ensured that the code is only invoked when the indicated sublist is not empty, so that we can use a |do|-|while| loop; we have chosen the alternative order of assignments with respect to the previous section, mainly to demonstrate the possibility, although it also allows the code to be slightly shorter here. After the sublist has been reversed, some links must be redirected to install it in its proper place. The link |*q| must be pointed to the head of the reversed list, which is in |y|, while the link at the end of the sublist must be pointed to the unaffected remainder of the list (this remainder should ideally have been assigned to |y| initially, but it is only located once we have arrived at the first entry with |num<=threshold|). Fortunately the final node of the sublist is not only pointed to by its predecessor, but also by |*q| (before it is changed) since it used to be the first node of the sublist; therefore a small sequence of carefully ordered assignments will do the trick. It is instructive to check that if the sublist to be reversed has length~$1$, then all variables will eventually return to their original state. The initialisation of~|y| is only present to keep certain compilers from complaining that its value is used before it is first assigned to; the initial value is irrelevant since it will be overwritten. @< Reverse sublist... @>= { sixteen_bits x=*q,y=0,t; do {@; t=xlink(x); xlink(x)=y; y=x; } while (xnum(x=t)>threshold); xlink(t=*q)=x; *q=y; q=&xlink(t); } @* Outline of Phase II processing. @:title@> With the description of Phase~I still fresh in our memory, let us look at the general outline of Phase~II, which is analogous, although it is more complicated. The extra complication is due to the fact that much more has to be done during Phase~II, notably the \Cee~texts have to be parsed in order to determine their proper formatting, and the resulting token lists have to be written to a file in a form that \TeX\ will be able to process. Most of the actual work however is localised in a few powerful functions that will be defined in detail later on, so that the definition of |phase_two| can be given here without much problems. There are three stages in the processing of a piece of \Cee~text during Phase~II: first it is scanned lexically (using |get_next|), and the resulting tokens are collected in the form of `scraps' that form the input for the second stage, the parsing algorithm, which transforms the scraps into a recursively nested token list, which is converted in the third stage to textual output. For the small parts of \Cee~text enclosed in `\pb', a function~|do_C| is available which handles all three stages of processing: it is called when an opening `\.\v' is seen, and when it is completed one has arrived at the closing `\.\v', and the required output is written on the \TeX~file. For the larger parts of \Cee~text, the function |outer_read| will read in, and convert to scraps, chunks of \Cee~text delimited by control codes |c>=format| (so like |outer_xref| it handles comments, but it will not incorporate module names), and when enough of these have been accumulated, a call on |finish_C| will invoke the parsing algorithm and send the resulting tokens to the output file. A number of simple functions for producing output are also called explicitly at certain points. We have already seen |out| and~|finish_line| for character-based output; there is also |out_str| for writing a string, |out_sec_nr| for a section number, |list_refs| for generating the text for \:\#, and |footnote| for producing the module \xr. information at the end of sections. Invoking the macro |tex_new_line| immediately after |finish_line| was called will produce an empty line on the output. Furthermore certain small pieces of code which have been scanned directly rather than via |do_C| or |outer_read| (for instance after format or macro definitions) are converted into scraps: first a number of tokens are appended by means of |app| or~|app_str| and then the whole sequence is converted to a scrap by calling |pack_scrap|; |app| and~|pack_scrap| are macros. @< Prototypes @>= void do_C (void); /* handle \Cee~text enclosed in `\pb' */ void outer_read (void); /* transform input into scraps */ void finish_C (void); /* finishes a definition or a \Cee~part */ void finish_line(void); /* send out a line of output */ void out_str (char*); /* write multiple characters */ void out_sec_nr (int); /* output a section number */ xref_pointer list_refs (xref_pointer,sixteen_bits); /* output module \xr.s */ void footnote(xref_pointer*,sixteen_bits); /* same with heading text */ void app_str(char*); /* append a sequence of character tokens */ @ Like in |phase_one|, we loop over the sections after passing over the limbo part. @c void phase_two (void) /* read all the text again and translate it to \TeX\ form */ { phase=2; reset_input (); print_progress("\nWriting the output file..."); @.Writing the output file...@> section_count=0; copy_limbo(); finish_line(); tex_new_line(); /* insert a blank line, it looks nice */ while (!input_has_ended) @ } @ The output file will contain the control sequence `\.{\\Y}' before a non-empty definition portion of a section, and before a non-empty \Cee~portion (the \TeX~portion is always considered to be non-empty, since it contains at least the section number). This puts a little white space between adjacent portions when they are printed. @d emit_space() out_str ("\\Y"); @.\\Y@> @< Translate the current section @>= { section_count++; @< Output the code for the beginning of a new section @> @< Translate the \TeX~part of the current section @> if (next_control } if (next_control @< Show cross-references to this section @> } @< Output the code for the end of a section @> } @ Sections beginning with the \.{CWEB} control sequence \:{\ } start in the output with the \TeX\ control sequence `\.{\\M}', followed by the section number. Similarly, \:* sections lead to the control sequence `\.{\\N}', and \:\~ sections to `\.{\\n}'. If this is a changed section, we put `\.*' just before the section number. @< Output the code for the beginning... @>= { out('\\'); out(loc[-1]=='*' ? 'N' : loc[-1]=='~' ? 'n' : 'M' ); @.\\N@> @.\\n@> @.\\M@> if (loc[-1]=='*') {@; print_section_progress(); @< Handle title level @>@+ } out_sec_nr(section_count); out_str(". "); } @ Between \:* and the title that follows, a level can be specified in the form of another `\.*', or a decimal number; the absence of a number will be interpreted as level~0. The level will be written out after `\.{\\N}' as a first argument, delimited by a space, after which the second argument is the section number and the third specifies the title. @< Handle title level @>= { if (*loc=='*') ++loc,out_str("-1"); else if (!isdigit((eight_bits)*loc)) out('0'); else do out(*loc++); while (isdigit((eight_bits)*loc)); out(' '); /* terminate level by a space */ } @ In the \TeX~part of a section, we simply copy the source text, except that index entries are not copied and \Cee\ text within `\pb' is translated; during this translation we track typedef definitions so that any complete typedef declaration within `\pb' will be parsed correctly, as will be explained below. @< Translate the \TeX... @>= do switch (next_control=copy_TeX()) { case '|': typedef_master=0; do_C(); break; case at_sign_image: out('@@'); break; case thin_space: case math_break: case ASCII_code: case line_break: case big_line_break: case no_line_break: case join: case pseudo_semi: case force_expr_open: case force_expr_close: err_print("! You can't do that in TeX text"); @.You can't do that...@> break; #ifdef DEBUG case trace0: case trace1: case trace2: case trace3: tracing=next_control; break; #endif case module_name: loc-=2; get_next(); break; /* get module name */ case refer: loc-=2; get_next(); /* get name referred to */ if (cur_id->xref->num==0) err_print("! Undefined reference"); else list_refs(cur_id->xref,0); break; case TeX_string: err_print("! TeX string should be in C text only"); @.TeX string should be...@> /* fall through */ case xref_roman: case xref_wildcard: case xref_typewriter: case xref_mark: case ignored_text: get_control_text(); /* skip to \:> */ } while (next_control= { typedef_tracking(false); do { boolean suppressed=false; /* whether output suppressed by \:s */ if (next_control==format) @< Start a format definition @> else if (next_control==definition) @< Start a macro definition @> else @< Start a header file inclusion @> if (!suppressed) outer_read(), finish_C(); else if (next_control if (next_control==begin_comment) loc-=2; /* try to get back in phase */ outer_xref(); /* skip illegal stuff */ } } while (next_control= if (tolower((eight_bits)loc[-1])=='s') @/{@; suppressed=true; shift(); shift(); shift(); } /* skip format definition */ else { int saved_code=0,saved_mathness; app_str("\\F"); shift(); /* this will produce `\&{format}' */ @.\\F@> if (cur_id->ilk!=TeX_like && cur_id->ilk!=NULL_like) app(id_flag+id_index(cur_id)); else @< Expand identifier and set |saved_code| and |saved_mathness| @> app('~'); pack_scrap(insert,yes_math); shift(); app((cur_id->ilk==normal || cur_id->ilk==TeX_like || cur_id->ilk==NULL_like ? id_flag : res_flag )+id_index(cur_id)); @/check_scrap(); pack_scrap(insert,cur_id->ilk==TeX_like ? no_math : yes_math); shift(); if (saved_code!=0) { app_str("\\quad("); app(saved_code); app(')'); @/check_scrap(); pack_scrap(insert,saved_mathness); } } @ Since conversion of an identifier into a \TeX\ control sequence is performed by the output routine, we need to circumvent this to force italic type; this is done by expanding the name into characters directly rather than leaving this to the output routine. @< Expand identifier... @>= { char* p=name_begin(cur_id); saved_mathness=cur_id->ilk==TeX_like ? no_math : yes_math; saved_code=id_flag+id_index(cur_id);/* save to print afterwards */ app_str("\\\\{"); @.\\\\@> do {@; if (*p=='_') app('\\'); app_tok(*p); } while (*++p!='\0'); app('}'); check_toks(10); } @ Keeping in line with the conventions of the \Cee\ preprocessor (and otherwise contrary to the rules of \.{CWEB}) we distinguish here between the cases that a `\.(' immediately follows the identifier being defined, and the case that anything else (possibly a space) does. In the latter case, the replacement text starts immediately after the identifier, in the former case, it starts after we scan the matching `\.)', which must be simply the first `\.)' that follows. @= { if (shift()!=identifier) err_print("! Improper macro definition"); @.Improper macro definition@> else { app_str("\\D$"); /* this will produce \&{\#define} */ @.\\D@> app(id_flag+id_index(cur_id)); if (*loc=='(') { shift(); do { app_char_tok(next_control); if (shift()!=identifier) break; app(id_flag+id_index(cur_id)); } while(shift()==','); check_toks(2); if (next_control==')') {@; app(')'); shift(); } else err_print("! Improper macro definition"); } else shift(); app('$'); app(break_space); pack_scrap(insert,no_math); } } @ For scanning the token following \:h we temporarily set |preprocessing=2|, so that angle brackets will be recognised as string quotes. @= { app_str("\\h"); /* this will produce \&{\#include} */ @.\\h@> pack_scrap(insert,no_math); { int save=preprocessing; preprocessing=2; /* emulate `\.{\#include}' */ while (shift()==' ') {} /* skip spaces and read file name as string */ preprocessing=save; } } @ Finally, when the \TeX\ and definition parts have been treated, we have |next_control>=begin_C|. If the section defines a module name, we assign the name to the variable |this_module|, so that the proper \xr. information can be listed at the end of the section. Like in Phase~I it is necessary to pay special attention to |typedef| declarations, and this time tracking is enabled both within `\pb' in \TeX~text and in the \Cee~part of a section; since the former may involve incomplete pieces of syntax like a sole `|typedef|', we reset the master counter to its neutral state at the beginning of a \Cee~part. After the heading of the \Cee~text has been processed we alternatively read ordinary pieces of \Cee~text and module names until the module has ended; we start with calling |outer_read| before testing termination, in order to ensure that the overflow tests contained in |outer_read| will be executed even in case of a section with a \Cee~part consisting only of a heading. @= { typedef_master=0; if (next_control==begin_C) shift(); else { this_module=cur_mod; /* register the name for this module */ @< Check that `\.{=}' or `\.{==}' follows this module name, and emit the scraps to start the module definition @> } do { outer_read(); if (next_control==new_section) break; if (next_control==module_name) @< Append a module name scrap @> else err_print("! You can't do that in C text"); @.You can't do that...@> /* |format|, |definition| or |begin_C| */ shift(); } while (true); finish_C(); } @ Despite the name of this module, we allow `\.+' to precede the `\.{=}' or `\.{==}', just as |CTANGLE| does. Note however that, unlike in |CTANGLE|, the `\.+' will be scanned as part of an `\.{+=}' compound operator (whence in fact `\.{+= =}' is allowed here whereas `\.{+ ==}' is not; we hope nobody minds this). Note also that if for whatever reason the current section number should fail to appear in the \xr. list for the module name (e.g., if section numbers have inadvertently got out of synchronisation with respect to Phase~I), then listing the \xr. information at the end of the section is suppressed by setting |this_module=NULL|. @< Check that `\.{=}' ... @>= { if (shift()=='=' || next_control==eq_eq || next_control==plus_assign) @/{@; if (next_control!=plus_assign || shift()=='=') shift(); } /* accept `\.=', `\.{==}', `\.{+=}' or `\.{+==}' */ else err_print("! You need an = sign after the module name"); @.You need an = sign...@> if (this_module!=NULL) /* i.e., unless module name was bad */ { xref_pointer x=this_module->xref; if (x->num==file_flag) x=next_xref(x); app_str("\\4$"); /* module name will be flush left */ @.\\4@> app(mod_flag+mod_index(this_module)); if (x->num != section_count+def_flag) { app_str("\\PE"); /* module has also been defined before */ @.\\PE@> this_module = NULL; /* so we won't give \xr. info here */ } else app_str("\\EQ"); /* output a module definition sign */ @.\\EQ@> app_str("{}$"); app(force); pack_scrap(insert,no_math); /* this forces a line break unless \:+ follows */ } } @ Cross references relating to a named module are given after its first defining section ends (for further defining sections of this name we will have put |this_module=NULL|). @< Show cross-references... @>= { if (this_module != NULL) { xref_pointer foot_ref=this_module->xref; if (foot_ref->num==file_flag) foot_ref=next_xref(foot_ref); foot_ref=next_xref(foot_ref); /* don't \xr. to yourself */ footnote(&foot_ref,def_flag); /* display further defining sections; advance |foot_ref| */ footnote(&foot_ref,cite_flag); /* display any citations */ footnote(&foot_ref,0); /* display uses */ } } @ The `\.{\\fi}' closes a \TeX~conditional that was initiated by the macro that started off the section; this allows printing of only the changed sections in a simple way. @= {@; out_str ("\\fi"); finish_line (); tex_new_line(); } @.\\fi@> /* insert a blank line, it looks nice */ @* Auxiliary functions used in Phase~II. @:title@> We now define the functions that do the actual processing of \Cee~code during Phase~II, but without going into the details of parsing and output. We explain the functions |do_C|, |outer_read|, |finish_C|, |list_refs|, and |footnote| used above, and also two further auxiliaries |C_read| and~|C_translate|. @< Prototypes @>= text_pointer translate(void); /* build formatted text from collected scraps */ void make_output(text_pointer,mode); /* output text in |inner| or |outer| mode */ @ Before we discuss these functions, we must first discuss what scraps are. Scraps are the objects manipulated during parsing, and they have two main attributes: a syntactic category |cat|, that determines the way they will be treated by the parser, and a translation |trans|, which is a pointer into~|text_mem| denoting a (possibly recursively nested) sequence of tokens, that determines the representation of the scrap upon output. Since some parts of the output are to be processed in \TeX's math mode, and other parts in horizontal mode, an additional field |mathness| tells which mode is required at each end of the translation of the scrap. @= typedef struct { eight_bits cat; /* category code */ eight_bits mathness; /* whether in math mode at left and right boundary */ text_pointer trans; /* translation text */ } scrap, *scrap_pointer; @ When \Cee\ text is converted into scraps for parsing, the resulting scraps are placed in an array |scrap_info|, between the locations pointed to by |scrap_base| and~|scrap_ptr|. Actually, |scrap_info| is one field of a |union|, since the same memory is used for a different purpose during Phase~III. Basic scraps are created by invoking |app|, |app_tok| or |app_char_tok| a number of times creating the constituent tokens, and then consolidating the text by means of |freeze_text|; the resulting text is accessible as |text_ptr| before, and as |text_ptr-1| after the call of |freeze_text|. In the common case that the text forms the translation of a new scrap that is to be added to the scrap sequence, |pack_scrap| can be used in place of |freeze_text|; a category and `mathness' should be supplied in this case. The latter can take one of three values as explained later, and by multiplying it by~5 (binary~$0101$) it is duplicated into the two least significant pairs of bits, because for elementary scraps the value is the same at its left and right boundaries. Note that none of |app|, |freeze_text| and |pack_scrap| do bound checks, since it is assumed that these have been done beforehand; |app_tok| and |app_char_tok| however can be called at more uncertain times. @d scrap_info scrap_union.scrap_field @d scrap_info_end (&scrap_info[max_scraps]) /* end of |scrap_info| */ @) @d app(a) (*tok_ptr++ = a) @d app_tok(a) @+ if (tok_ptr>tok_mem_end-2) overflow("token"); @.token capacity exceeded@> @+ else app(a) @; @d app_char_tok(c) app_tok((unsigned char)(c)) @d freeze_text() (*++text_ptr = tok_ptr) @d pack_scrap(c,m) ( scrap_ptr->cat = c, scrap_ptr->trans = text_ptr, freeze_text(), (scrap_ptr++)->mathness = 5*(m) ) @= union { scrap scrap_field[max_scraps]; /* memory array for scraps */ @< Alternative use of |scrap_union| @>@; } scrap_union; scrap_pointer scrap_base=scrap_info; /* beginning of the current scrap sequence */ scrap_pointer scrap_ptr = scrap_info; /* points to end of the current scrap sequence */ #ifdef STAT scrap_pointer max_scr_ptr = scrap_info; /* largest value assumed by |scrap_ptr| */ #endif @ Token lists are stored in |tok_mem| and represent text to be output to the \TeX~file. Because during parsing token lists will often be formed by concatenation of existing ones, a representation is chosen where this can be done easily; in particular a token can be a reference to a token list stored elsewhere. Also identifiers, reserved words and module names are represented by a reference to the name table rather than by their constituent characters. All other items are stored as list of characters, which have been widened to fill a 16-bit |token|, and special layout codes that will be explained below. More precisely, a |token t@;| is interpreted as follows. \yskip \item{$\bullet$} |t<=UCHAR_MAX|: the character~|t|, which possibly is a compressed operator like `\.{\&\&}'; \item{$\bullet$} |UCHAR_MAX= enum @/ { cancel=UCHAR_MAX+1,/* the following 9 items should remain in this order */ indent, outdent, opt, flush_left, break_space, force, big_force, backup, big_backup, @/ relax, @/ space=opt, tilde=flush_left @/ }; @ The memory management for tokens follows a simple block regime. The source file is divided into blocks, and whenever a block is entered, the current states of the |text_mem| and~|tok_mem| are marked, after which they will gradually get filled up further; at the end of the block all memory used during the block is released by resetting the pointers into these arrays to the values they had on block entry. There are three kinds of blocks: the global block, which is filled during initialisation and is never released, the section blocks, which correspond to each individual section with a non-empty \Cee~part (or to a macro or format definition), and the inner blocks, which correspond to each `\pb' contained in the \TeX~part of a section or in a module name (but not in a comment). Because nesting is at most three blocks deep (for `\pb' inside module names) a two-element stack will suffice to hold the saved markers (nothing needs to be saved for the global block), and we can address these elements directly without a stack pointer since we know at which level we are. In fact the section blocks butt together, and nothing is added to the global block except at initialisation time, so that each section block starts in the same state. Therefore, if we call |enter_block(0)| after initialisation to record this state, then there is no need to call it any more, and it suffices to call |leave_block(0)| each time upon leaving a section. Scraps follow a different regime than texts and tokens, since they must be assembled into a single contiguous sequence before each translation, and can be discarded when the translation is over. In particular the scraps used to parse `\pb' within a comment can and must be released before reading in the \Cee~text following the comment, but the tokens and texts formed while translating the comment must remain until they are output. @d enter_block(i) save[i].txt=text_ptr, save[i].tok=tok_ptr; @d leave_block(i) text_ptr=save[i].txt, tok_ptr=save[i].tok; @< Global variables @>= struct {@; text_pointer txt; token_pointer tok; } save[2]; @ The conversion of input tokens as obtained by |get_next| into scraps that can be processed by the parser is mainly handled by the function |C_read|, which is analogous to the |C_xref| routine used during Phase~I. Like |C_xref|, the function |C_read| takes a boolean argument telling whether it is processing `\pb'; it starts with the current value of |next_control| and it uses the operation |shift| repeatedly to read \Cee~text until encountering a terminating token. Also like |C_ref|, the initial conditions |next_control=='|'| or |next_control==end_comment| will not lead to immediate termination; in these cases the first time through the |while| loop will have no effect. @c void C_read (boolean inner) /* creates scraps from \Cee\ tokens */ { while (next_control if (shift()=='|' && inner || next_control==begin_comment || next_control==end_comment) return; } } @ Many input tokens are completely determined by the value returned from |get_next|; we have arranged it that such a value is always less than |ignore|. For those tokens we will install corresponding scrap at initialisation time in an array |token_trans|, so that when such a token comes along, we can simply copy the scrap from |token_trans| into scrap memory. @< Global variables @>= scrap token_trans[ignore]; @~Since the scraps in |token_trans| contain pointers into |tok_mem|, that array cannot be initialised statically; rather we do the initialisation dynamically based on information stored statically. For each token three kinds of information are supplied: the category of its translation, a string of characters giving the translation itself, and an indication for its the |mathness| (whether the translation must or must not occur in math mode, or whether both are allowed). The actual initialisation values will be given later. @< Set initial values @>= { static struct {@; short tok; eight_bits cat, mathness; char* tr; } trans_ini [] = @/{ @< Initialiser for |trans_ini| @>@;@; };@/ int i,n=array_size(trans_ini); scrap o={ insert, 5*maybe_math, NULL }; /* completely inert scrap */ for (i=0; icat=trans_ini[i].cat; p->mathness=5*trans_ini[i].mathness; app_str(trans_ini[i].tr); p->trans=text_ptr; freeze_text(); } @< Install the translations of tokens involving line breaks @> o.trans=text_ptr; freeze_text(); /* empty translation */ for (i=0; i<=UCHAR_MAX; ++i) /* clear all remaining tokens */ if (token_trans[i].cat==0) token_trans[i]=o; enter_block(0); /* fix tokens; will be restored after each section */ } @ Having installed |token_trans|, the number of cases in the switch statement below is greatly reduced. @< Append the scr... @>= { check_scrap(); check_toks(6); /* `\.{\\hbox\{}' */ switch (next_control) { case string: case constant: case verbatim: @< Append a string or constant @> @+ goto done; case TeX_string: @< Append a \TeX\ string scrap @> @+ goto done; case identifier: @< Append an identifier scrap @> @+ goto done; case module_name: @< Append a module name scrap @> @+ goto done; case start_preproc: @< Append a scrap starting a preprocessing directive @> @+ goto done; case refer: err_print("! You can't use `@@#' in C text"); /*fall through */ case ignore: case begin_comment: case end_comment: case xref_roman: case xref_wildcard: case xref_typewriter: case xref_mark: goto done; @\@< Cases necessary for proper parsing of typedefs @> case '|': @+ if (inner) goto done; /* skip initial `\.\v' of `\pb' */ } *scrap_ptr=token_trans[next_control]; /* fixed scrap for this input token */ @< Possibly scoop up some dangling output tokens in compatibility mode @> ++scrap_ptr; /* incorporate the scrap */ done: {} } @ When we need to be sure there is enough space to store |n|~more tokens, we say |check_toks(n)|; when a scrap has to be appended we invoke |check_scrap|, and when a text is to be formed otherwise than by |pack_scrap|, we invoke |check_text|. The key points in the program where we make such checks is when reading in \Cee~text in the functions |C_read| and |outer_read|, and when translating the text in |reduce|. At these points we make sure that there is enough room to spare so that in cases where explicit tokens occasionally need to be appended (e.g., by |app_str|) no test is necessary. When we cannot be sure about this, for instance because an indefinite number of tokens is appended, we use |app_tok| or |app_char_tok| instead of |app|; after this it will in fact be safe to do one additional |app|. @d check_toks(n) @+ if (tok_ptr>tok_mem_end-n) overflow("token"); @.token capacity exceeded@> @+ else @; @d check_text() @+ if (text_ptr>=text_mem_end-1) overflow("text"); @.text capacity exceeded@> @+ else @; @d check_scrap() @+ if (scrap_ptr>=scrap_info_end) overflow("scrap"); @.scrap capacity exceeded@> @+ else check_text() @; @ As was just explained, we can use |app| rather than |app_char_tok| here. @c void app_str(char* s) @+{@; while(*s!='\0') app(*s++); } @ In long strings we insert a discretionary break every 20~characters, so that \TeX\ is less likely to run into problems, especially if the string occurs in the \TeX~part of a section; if we are directly after a backslash however, we postpone the break since otherwise the quote escaping the line break would appear to be escaped itself. Many of the special characters in a string must be prefixed by `\.\\' so that \TeX\ will print them properly. In the case of constants however, this `\.\\' converts marker characters that were inserted during scanning into control sequences used in formatting the constant. @^special string characters@> @< Append a string or... @>= { int count = -1; /* characters remaining before string break */ if (next_control==constant) app_str("\\T{"); @.\\T@> else if (next_control==string) {@; count=20; app_str("\\.{"); } @.\\.@> else app_str("\\vb{"); @.\\vb@> while (id_first if (strchr(" \\#%$^{}~&_",*id_first)!=NULL) app('\\'); @.\\\ @> @.\\\\@> @.\\\#@> @.\\\%@> @.\\\$@> @.\\\^@> @.\\\{@> @.\\\}@> @.\\\~@> @.\\\~@> @.\\\&@> @.\\\_@> app_char_tok(*id_first++); } app('}'); if (next_control==verbatim) pack_scrap(insert,maybe_math); else pack_scrap(expression,yes_math); } @ A \TeX~string is boxed and copied without further ado; undoubling of any \:@@ has already been done by |get_control_text|. In compatibility mode we do not however produce a scrap, but rather leave the output tokens produced to be picked up by the next scrap appended; this makes |dangling_tokens()| hold. In order to make sure that this next scrap exists, we will append a dummy scrap in |translate| if necessary. @:TeX string@> @d dangling_tokens() (compatibility_mode && tok_ptr>*text_ptr) @< Append a \TeX\ string scrap @>= { app_str("\\hbox{"); while (id_first since they do not call |freeze_text|, so we must take some action to avoid that \TeX~strings will appear too late in the output. When the code below is encountered, the fixed scrap has been copied to |*scrap_ptr|. Its category and mathness are correct, but we may need to prepend tokens to its translation; this is done by appending that translation to the dangling tokens, wrapping them up together, and replacing the original translation by a pointer to the combined result. @< Possibly scoop up... @>= {@; if (dangling_tokens()) {@; app_trans(scrap_ptr); scrap_ptr->trans=text_ptr; freeze_text(); } } @ It is during the conversion of identifiers to scraps that their |ilk| plays a crucial r\^ole. Ordinary identifiers, and those with |ilk| equal to |TeX_like| or |NULL_like|, will get the category |expression|, and are set in math mode except in the case of |TeX_like| identifiers. If the |ilk| specifies some reserved word on the other hand, that |ilk| becomes the category of the identifier, determining its behaviour during parsing. The translation of these reserved words is done using |res_flag| rather than |id_flag|, as a result of which they will be printed in boldface; they get mathness |maybe_math|, indicating that they can be set equally well inside and outside math mode. Identifiers whose |ilk| is |type_defined|, |const_like|, or |typedef_like| will become reserved words with category |int_like|. These special |ilk| values have served their purpose during the scanning of typedef declarations; this involves subtle manoeuvres that will be explained later. @< Append an identifier scrap @>= { id_pointer p=cur_id; int cat=p->ilk; @< Track identifiers relevant to typedef; maybe change |cat| from |int_like| to |expression| @> if (cat==normal || cat==TeX_like || cat==NULL_like) { app(id_flag+id_index(p)); pack_scrap(expression , cat==TeX_like && !compatibility_mode ? no_math : yes_math); } else { if (cat==type_defined || cat==const_like || cat==typedef_like) cat=int_like; app(res_flag+id_index(p)); pack_scrap(cat,maybe_math); } } @ For bad module names (e.g., an ambiguous prefix) an error has already been reported, and they are silently suppressed from the output. @< Append a module name scrap @>= {@; if (cur_mod!=NULL) app(mod_flag+mod_index(cur_mod)), pack_scrap(mod_scrap,yes_math); } @ We tested in Phase~I that `\.\#' is followed by an identifier or by a newline (which will cause |get_next| to return |end_preproc|). In the former case we incorporate the identifier into the scrap for the preprocessor directive; in the latter case the |lproc| scrap will just contain the `\.\#', but since we already scanned the following |end_preproc|, we append the corresponding |rproc| scrap as well. @< Append a scrap starting a preprocessing directive @>= { app(force); app(flush_left); app_str("\\&\\#"); if (shift()==identifier) {@; app(res_flag+id_index(cur_id)); pack_scrap(lproc,no_math); } else if (next_control==end_preproc) @/{@; pack_scrap(lproc,no_math); check_scrap(); *scrap_ptr++=token_trans[end_preproc]; } else confusion("no identifier after `#'"); @.no identifier after `\#'@> } @ When the `\.\v' that introduces \Cee\ text is sensed, a call on |C_translate| will return a pointer to the \TeX\ translation of that text. If scraps exist in |scrap_info|, they are unaffected by this translation process, which is useful since we convert comments to single scraps with help of |C_translate| while building the scrap sequence for the surrounding piece of \Cee~text. @c text_pointer C_translate(void) { text_pointer p; scrap_pointer save_base=scrap_base; scrap_base=scrap_ptr; C_read(true); /* get the scraps together */ if (next_control != '|') err_print("! Missing `|' after C text"); @.Missing `|'...@> p=translate(); /* make the translation */ #ifdef STAT if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr; #endif scrap_ptr=scrap_base; scrap_base=save_base; return p; } @ The function |outer_read| is to |C_read| as |outer_xref| is to |C_xref|: it constructs a sequence of scraps for \Cee~text until |next_control>=format|, taking care of embedded comments. It is called between each occurrence of \:d, \:f, \:h, or a module name, which makes it a convenient place to test whether the memory arrays have enough spare room to cater for the stuff that could be needed for processing the next such token; the most demanding requirements are for a module name heading the \Cee~part of a section. These tests must be made outside the main loop of |outer_read|. We use that if |next_control==end_comment| when |C_read| is called, this value is effectively ignored. @c void outer_read (void) /* makes scraps from \Cee\ tokens and comments */ { while (next_control check_scrap(); check_toks(11); /* `\.{\$\\4$m$\\PE\{\}\$$f$}' */ } @ Since the call on |C_translate| used to process `\pb' inside comments will itself create tokens, the token sequence for the comment under construction must be wrapped up each time this happens, and a token referring to that initial segment contributed as first new item after the translation is made. Tests on the availability of two more tokens and a text must be made inside the loop that incorporates successive `\pb' fragments; the space for tokens outside these fragments is tested within |scan_comment|. @< Read a comment, and convert it into a scrap @>= { boolean one_liner=loc[-1]=='/'; int bal=0; /* brace level in comment */ typedef_tracking(false); check_scrap(); check_toks(4); app(cancel); app_str(one_liner ? "\\SHC{" : "\\C{"); @.\\C@> @.\\SHC@> while ((next_control=scan_comment(&bal,one_liner))=='|') { text_pointer p=text_ptr, q=(freeze_text(), C_translate()); check_toks(7); app_tok(text_flag+text_index(p)); if (compatibility_mode) app_str("\\PB{"); @.\\PB@> app(inner_text_flag+text_index(q)); if (compatibility_mode) app('}'); check_text(); } app_char_tok('}'); app(force); pack_scrap(insert, no_math); /* the full comment becomes a scrap */ typedef_tracking(true); } @ The function |do_C| does the scanning, translation, and output of \Cee~text within `\pb' brackets. It is called during the scanning of the \TeX~part of a section and during the output of module names. As we have seen, this function is not called when processing comments, where |C_translate| is used instead, because no direct output should be produced at such times. @c void do_C (void) /* read, translate, and output \Cee~text in `\pb' */ { enter_block(1); if (compatibility_mode) out_str("\\PB{"); @.\\PB@> make_output(C_translate(),inner); /* output the list */ if (compatibility_mode) out('}'); #ifdef STAT if (text_ptr>max_text_ptr) max_text_ptr = text_ptr; if (tok_ptr>max_tok_ptr) max_tok_ptr = tok_ptr; #endif leave_block(1); /* forget the tokens */ } @ The function |finish_C| outputs the translation of the current scraps, preceded by the control sequence `\.{\\B}' and followed by the control sequence `\.{\\par}'. It also restores the token and scrap memories to their state as immediately after initialisation. @c void finish_C (void) { out_str ("\\B"); @.\\B@> make_output(translate(),outer); out_str("\\par"); finish_line(); #ifdef STAT if (text_ptr>max_text_ptr) max_text_ptr=text_ptr; if (tok_ptr>max_tok_ptr) max_tok_ptr=tok_ptr; if (scrap_ptr>max_scr_ptr) max_scr_ptr=scrap_ptr; #endif leave_block(0); scrap_ptr=scrap_info; /* forget the tokens and the scraps */ } @ The function |footnote| gives \xr. information about further definitions of a module name (if |flag==def_flag|), about citations of a module name (if |flag==cite_flag|), or about the uses of a module name (if |flag==0|). It assumes that |*p| points to the first \xr. entry of interest, and it leaves |*p| pointing to the first element not printed (possibly the sentinel with |num==0|). Typical outputs are `\hbox{\.{\\Q 2001.}}', `\hbox{\.{\\Us 370\\ET1009.}}'\ETs `\hbox{\.{\\As 8, 27\\*, 51\\ETs64.}}'. @c void footnote (xref_pointer* p,sixteen_bits flag) { if ((*p)->num<=flag) return; finish_line(); out('\\'); out(flag==0 ? 'U' : flag==cite_flag ? 'Q' : 'A'); @.\\A@> @.\\Q@> @.\\U@> *p=list_refs(*p,flag); out('.'); } @~The function |list_refs|, which does the main work for |footnote| and is also used to produce the text replacing \:\#, distinguishes three cases, according as the number of relevant \xr.s is one, two, or more than two. The function always produces at least one \xr.: it is never called with |x->num<=flag|. The value of |x| after traversing the references is returned, for the benefit of |footnote|. @c xref_pointer list_refs (xref_pointer x,sixteen_bits flag) { xref_pointer q=next_xref(x); /* second element in \xr. list */ if (q->num>flag) out('s'); /* use `\.{\\As}', `\.{\\Qs}' or `\.{\\Us}' */ @.\\As@> @.\\Qs@> @.\\Us@> out(' '); while (out_sec_nr(x->num&num_mask),x=next_xref(x),x->num>flag) if (next_xref(x)->num>flag) out_str(", "); /* |x| is not the last */ else { out_str("\\ET"); /* next number printed will be the last */ if (x!=q) out('s'); /* `\.{\\ETs}' for the last of more than two */ } @.\\ET@> @.\\ETs@> return x; } @* The problem of typedef declarations. @:title@> We now consider how |typedef| declarations are processed. It is a slightly problematic matter, since it involves the syntactic structure of the program, but we are doing only lexical analysis during Phase~I. Nevertheless, we don't want to delay recognition of |typedef| declarations to Phase~II, since this would restrict the user's freedom of ordering the sections so that all typedef declarations remain before any of their uses, and it would make it cumbersome to mention a typedef identifier in the commentary directly before its definition. Our approach will be to specify simple rules that pinpoint the identifiers which are subject to a |typedef| definition in any syntactically correct program; we don't care too much about strange behaviour in the presence of syntax errors, and also assume some basic decency on the part of the user, e.g., the identifier should occur in the same section as the |typedef| token. Obviously there is little to care about until a |typedef| token (recognisable by its |ilk|) comes along, but then it becomes tricky. Ordinarily the identifier being defined is the first non-reserved word that follows, but |struct| (and |union| and |enum|) tokens complicate the situation; also there may be more than one identifier subject to the |typedef|, separated by commas from each other. Because of |struct| we must keep track of the nesting of braces, and because the mentioned commas should be distinguished from those occurring in the parameter specifications of functions, we should also keep track of parenthesis nesting. A semicolon at the proper level of brace nesting signals the end of a |typedef| declaration. Fortunately |typedef| declarations cannot be nested inside each other, so we can use global variables to keep the proper counts. Three integer counters are used, two for the nesting levels of braces and parentheses, which are set to~0 whenever a |typedef| is scanned and properly maintained thereafter, and a master counter which determines if we are paying attention at all. @d typedef_tracking(b) (typedef_master += b ? 5 : -5) @< Global... @>= local int typedef_master=-5; /* tracking disabled outside \Cee~parts */ local int brace_level, par_level; @ The master counter is ordinarily equal to~0 when tracking is enabled, and negative if it is disabled. When it is~0 and a |typedef| is seen, it is raised to~2, after which an |int_like| or |type_defined| identifier will further raise it to~4, indicating that any |normal| identifier coming along at the same brace level will be made |type_defined|. When that happens the master counter drops to~1, indicating that it can still be rekindled by a comma at the proper parenthesis level. We must be prepared to see more than one typedef for the same identifier, as we may have scanned a header file before seeing the source that produced that file, so we also let the master counter drop to~1 if it was~4 and an identifier that is already |type_defined| is seen. If however following the |typedef|, when the master counter is~2, a |struct_like| identifier is seen, the master counter is raised only to~3, so that a following identifier will not be made |type_defined|, but rather pass the honour on by setting the master counter to~4; this also happens when instead of an identifier a left brace is seen. Finally, a semicolon at the right brace level will return a positive value of the master counter to~0. @< Keep track of tokens relevant to |typedef| declarations @>= { if (typedef_master==0 && next_control==identifier && cur_id->ilk==typedef_like) @/{@; typedef_master=2; brace_level=par_level=0; } else if (typedef_master>0) switch(next_control) { case identifier: if (brace_level==0) if (typedef_master==2) { if (cur_id->ilk==int_like || cur_id->ilk==type_defined) typedef_master=4; else if (cur_id->ilk==struct_like) typedef_master=3; } else if (typedef_master==4) { if(cur_id->ilk==normal||cur_id->ilk==type_defined) /* this is it */ cur_id->ilk=type_defined, typedef_master=1; } else if (typedef_master==3) typedef_master=4; break; case '{': @+ if (brace_level++==0 && typedef_master==3) typedef_master=4; @+ break; case '}': --brace_level; break; case ',': @+ if (typedef_master==1 && par_level==0) typedef_master=4; @+ break; case '(': ++par_level; break; case ')': --par_level; break; case ';': @+ if (brace_level==0) typedef_master=0; } if (C_plus_plus) @< Take action to mark identifiers following \&{class} as |type_defined| @> } @ In \Cpp, any identifier that follows \&{class} (or |struct| of |union|) is considered as a typedef identifier, i.e., every time some \&{class~x} is encountered, an implicit `|typedef| \&{class~x~x}' is assumed. @< Take action to mark identifiers following \&{class}... @>= { static boolean class_seen=false; if (class_seen) { if (next_control==identifier && cur_id->ilk==normal) cur_id->ilk=type_defined; class_seen=false; } else if (next_control==identifier && cur_id->ilk==struct_like) class_seen=true; } @ We now consider |typedef| declarations in Phase~II. In Phase~I we have set the |ilk| of the defined identifiers to~|type_defined|, which will make them behave as |int_like|; although this works fine everywhere else, it thwarts a correct parse of their |typedef| declaration itself during Phase~II. In fact, according to the \caps{ANSI}/\caps{ISO}~\Cee\ syntax, the identifier being declared in a typedef declaration should not be considered to be a \\{typedef-name}, since that would make the declaration unsyntactical; this is justified by the fact that the typedef identifier only comes into scope after the declaration is completed. This is not a problem for single-pass compilers, but it is for us: we have recorded the typedef declaration on the first pass, and it will be active during the entire second pass, since we cannot delimit the declaration to its proper range (we are not processing the code in the same order that the \Cee~compiler will, and for the commentary parts of sections, the concept of range does not even apply). To attempt to write the grammar in such a way that it will accept typedef declarations in which the defined identifier is |int_like| would be very difficult, since without the help of some rather remote context, a declarator of this kind can not always be distinguished form an abstract declarator; compare the declaration `\hbox{|typedef char *(example[4]);|}', which declares |example| to specify the type ``array of 4 pointers to character'', with a valid declaration `|void f(example[3]);|' that might follow it, declaring |f| as a function that takes as an argument an array of 3 such |example| objects (in fact the argument will be passed as a pointer to |example|, of course). So, rather than solving the problem in a syntactic way, we stoop down to emulating a one-pass system by setting the |category| of the defining occurrence of an identifier in a typedef declaration explicitly to |expression|, despite the fact that its |ilk| is |type_defined|. The defining occurrence is located by the same lexical means used in Phase~I, in fact, by using the same intricate succession of states of |typedef_master| that was used there. A difference is that at the {\sl moment supr\`eme\/} the identifier found is now |type_defined| rather than |normal|. The fact that this identifier is not necessarily the only or first |type_defined| identifier in the declaration, and that is has to be recognised in a left-to-right pass, may explain some of the details of our code, for instance why |const_like| had to be distinguished from both |int_like| and |type_defined|; one may compare `\hbox{|typedef unsigned long int * const example;|}' with `\hbox{|typedef const example * examp2;|}'. With respect to the code for Phase~I, there is also a slight difference in the way the code is hooked into the program, since the function |C_read| already contains a |switch| on the value of |next_control|, which has a special case for identifiers. So let us first consider the identifier cases (including reserved words). @f example scrap /* pretend |example| is |type_defined| */ @f examp2 example /* and |examp2| is another such identifier */ @< Track identifiers... @>= { if (typedef_master==0 && cat==typedef_like) typedef_master=2, brace_level=par_level=0; else if (typedef_master>0 && brace_level==0) if (typedef_master==2) { if (cat==int_like || cat==type_defined) typedef_master=4; else if (cat==struct_like) typedef_master=3; } else if (typedef_master==4 && cat==type_defined) /* this is it */ cat=expression, typedef_master=1; else if (typedef_master==3) typedef_master=4; } @ And here are the relevant cases of non-identifiers. @< Cases necessary for proper parsing of typedefs @>= case '{': @+ if (typedef_master>0 && brace_level++==0 && typedef_master==3) typedef_master=4; @+break; case '}':@+ if (typedef_master>0) --brace_level; @+break; case ',':@+ if (typedef_master==1 && par_level==0) typedef_master=4; @+break; case '(':@+ if (typedef_master>0) ++par_level; @+break; case ')':@+ if (typedef_master>0) --par_level; @+break; case ';':@+ if (typedef_master>0 && brace_level==0) typedef_master=0; @+break; @i parser.w @i rules.w @* Output of tokens. @:title@> Now that we have treated the highest level of processing by \.{\me.}, we shall have to descend again to the level of character strings, which eventually have to written to the output file. Our first concern is to linearise the multi-layered token lists into a sequence of output tokens. The output of special layout tokens is affected by whether or not they were generated from within `\pb', so there are two modes of output: during output of `\pb' we are in |inner| mode, and otherwise in |outer| mode. A switch from |outer| to |inner| mode can occur in the middle of a text, namely for `\pb' fragments inside comments; this is indicated by the fact that the reference to the translation of the fragment is tagged with |inner_text_flag|. Apart from the fact that the mode is set to |inner| during the output of such subtrees, no traces of the tree structure of the internal representation of texts are left after linearisation. Linearising texts is therefore a straightforward process, that is easy to implement using a stack. In the linearised stream of tokens certain sequences of tokens, particularly layout tokens, have to be considered together, e.g., |cancel| will remove any adjacent line-breaking tokens. No line-breaking tokens will occur at either end of the output of a complete text either; if a line break is required there, the caller of the output routines will supply it explicitly. For identifiers, reserved words and module names, the transformation of a single token into a sequence of characters is handled by separate functions to be discussed later. For module names the transformation may result in a recursive call of the output routines via the processing of `\pb' fragments by~|do_C|, but this recursion in never more than one level deep, since such fragments cannot contain module names. @^recursion@> @< Prototypes @>= void out_identifier (id_pointer); void out_keyword (id_pointer); xref_pointer out_module_name(mod_pointer); @ The stack that is used to keep track of token lists at different levels of output is similar to the one used in |CTANGLE|. Entries have three parts: for the token list at each level |tok_field| and |end_field| record where we are respectively where we should stop, and |mode_field| records the output mode. The current values of these quantities are referred to quite frequently, so they are stored in a separate place, and are called |cur_tok|, |cur_end|, and |cur_mode|. @d cur_tok cur_state.tok_field /* location of next output token in |tok_mem| */ @d cur_end cur_state.end_field /* current ending location in |tok_mem| */ @d cur_mode cur_state.mode_field /* current mode of interpretation */ @= typedef enum { inner, outer } mode; typedef struct { token_pointer tok_field; /* present location within token list */ token_pointer end_field; /* ending location of token list */ mode mode_field; /* interpretation of control tokens */ } output_stack_element, *stack_pointer; @ The stack grows upwards with the global variable |stack_ptr| pointing to the first vacant location above the top of the stack. The entry |stack[0]| at the bottom of the stack is not used; when |stack_ptr| points to it then |cur_state| itself has become invalid and the output process is completed. Therefore we can use |stack[0]| to store |cur_state| in. @d cur_state stack[0] /* the currently active state variables */ @d stack_end (&stack[stack_size]) /* end of |stack| */ @= output_stack_element stack[stack_size]; /* info for non-current levels */ stack_pointer stack_ptr=&stack[0]; /* first unused location in the output state stack */ #ifdef STAT stack_pointer max_stack_ptr = stack; /* largest value assumed by |stack_ptr| */ #endif @ To insert token list |p| into the output, the function |push_level| is called; it saves the old level of output and gets a new one going. Conversely, the macro |pop_level| restores the conditions that were in force when the current level was begun. The value of |cur_mode| is not changed by |push_level|, but it might be changed explicitly directly after the call; if so this setting will remain in effect until the matching invocation of |pop_level|. At the beginning of |make_output|, a call to |push_level| is made to put the root text into the output stream. If this is not a recursive call to |make_output|, the old (undefined) value of |cur_state| will be ``saved'' into |stack[0]|, which is |cur_state| itself; although this is redundant, no harm is done. @d pop_level() cur_state = *--stack_ptr @c void push_level (text_pointer p) /* suspends the current level */ { if (stack_ptr==stack_end) overflow("stack"); @.stack capacity exceeded@> *stack_ptr++=cur_state; #ifdef STAT if (stack_ptr>max_stack_ptr) max_stack_ptr=stack_ptr; #endif cur_tok=text_begin(p); cur_end=text_end(p); } @ The function |make_output| traverses the nested token lists using the stack, and producing the corresponding output. Since it can be called recursively, the initial value of |stack_ptr| is saved in |stack_bot|; we return from |make_output| when |stack_ptr| drops back again to this level. When we encounter tokens marked with |id_flag|, |res_flag| and |mod_flag|, we respectively call |out_identifier|, |out_keyword| and |out_module_name|. As mentioned above, the last of these may result in a recursive call to |make_output| via |do_C|; because calls of |do_C| also involve lexical scanning, the values of |next_control|, |cur_id| are saved, and restored when |make_output| is complete. @^recursion@> @c void make_output(text_pointer t,mode m) /* output a complete text */ { int save_next_control=next_control; id_pointer save_cur_id=cur_id; stack_pointer stack_bot=stack_ptr; token state=cancel; push_level(t); cur_mode=m; do if (cur_tok==cur_end) pop_level(); else { token a= *cur_tok % id_flag; switch (*cur_tok++/id_flag) { @\@< Cases 1, 2, 3: identifiers, reserved words and module names @> case 4: push_level(text_at(a)); break; case 5: push_level(text_at(a)); cur_mode=inner; break; case 0: @< Output the character or format control |a| @> } } while(stack_ptr>stack_bot); @< Complete any unfinished work at the end of the translation @> next_control=save_next_control; cur_id=save_cur_id; } @ To handle the proper interaction of adjacent output tokens, we keep track of an integer |state| variable to record that a possibly unfinished sequence is being processed. The state is mainly affected by the output of format control tokens like |break_space|, so for convenience we often set the state equal to the value of such a token. If |state==0| the most recent token was an ordinary one, and no special action is called for. If |state| has one of the values |space|, |tilde|, |break_space|, |force|, |big_force|, |backup|, or |big_backup|, it is the upper bound in the sense mentioned before of a sequence of such tokens that has recently passed (where |space|~and~|tilde| represent the tokens |' '|~and~|'~'|, respectively). We call such states `white-space states'; when an ordinary character comes along in a white-space state, the token recorded in |state| is first output. @< Output white space if specified by |state| @>= { if (state>=space) if (state @.\\6@> @.\\7@> else out(state-backup+'6'),out_str("\\4"); /* `\.{\\6\\4}' or `\.{\\7\\4}' */ @.\\6@> @.\\7@> @.\\4@> finish_line(); } state=0; } @ The following three cases now become easy. @< Cases 1, 2, 3... @>= case 1: @< Output white space... @>@+ out_identifier(id_at(a)); break; case 2: @< Output white space... @>@+ out_keyword(id_at(a)); break; case 3: @< Output white space... @>@+ out_module_name(mod_at(a)); break; @ Whether or not a forced break is required at the end of the translation is determined by the function calling |make_output|, not by the code being translated; therefore we do not invoke |@< Output white space... @>| at the end, and if a white-space state was set, it will sink silently into oblivion. An exception is made for |big_force| or |big_backup|, for which we output `\.{\\Y}' so that the extra vertical white space (which must have been inserted explicitly by a \:) control code) will not vanish, for instance when it occurs between macro definitions. @< Complete any unfinished work... @>= {@; if (cur_mode==outer && (state==big_force || state==big_backup)) out_str("\\Y"); } @ White-space format controls operate by modifying |state| when appropriate, as described above. The same holds basically for |' '| and |'~'|, but these characters can also occur in ordinary text inside comments, where they must not be contracted, so we take some care that in such a position (where |cur_mode==outer| holds), they are output as ordinary characters. In white-space states |opt| and the digit following it are ignored. Like the white-space format controls, the token |cancel| also sets |state| to its own value, but it ignores the value it previously had. In this state any spaces and format control tokens except |indent|, |outdent| and |flush_left| are ignored. We also have set |state==cancel| at the beginning of |make_output|, so that there will be no forced break or white space at the beginning of the translation. The tokens |indent| and |outdent| are output directly without altering |state|; effectively this means that they will be moved in front of any white-space format controls that preceded them. This is important because the format controls that are explicitly inserted by codes like \:/ will stick to the token to their left, so that any indentation changes generated by the syntax rules at the same point will necessarily come {\it after\/} them, while in fact these indentation changes must be allowed to affect the indentation at explicitly inserted line breaks. With the exceptions noted above, |opt| and |flush_left| are transmitted like normal tokens. When |cur_mode==inner| things are a bit different. The tokens |indent|, |outdent|, and |flush_left| are completely ignored, and all white-space tokens except |'~'| are treated like |' '|, i.e., they set |state=space|. Optional breaks are produced using `\.{\\0}' rather than `\.{\\3}' to avoid a ragged right margin within a paragraph. @< Output the character or format control |a| @>= { switch (a) { case relax: @< Output white space... @>@+ break; case cancel: state=cancel; break; case indent: case outdent: if (cur_mode==outer) {@; out('\\'); out(a-indent+'1'); } break; @.\\1@> @.\\2@> case opt: { int digit=*cur_tok++; if (state==0) {@; out('\\'); out(cur_mode==outer ? '3' : '0'); out(digit); } @.\\3@> @.\\0@> break; } case flush_left: if (cur_mode==outer) {@; @< Output white space... @> out_str("\\8"); } @.\\8@> break; case big_force: case backup: if (a+state==big_force+backup) a=big_backup; /* fall through */ case break_space: case force: case big_backup: if (cur_mode==inner) a=space; up_state: if (state!=cancel && state=break_space) break; /* else fall through */ default: @< Output white space... @>@+ out(a); } } @* Low-level output routines. @:title@> We now come to the functions that write individual tokens and characters. We start with a simple function to output a section number in decimal notation. The number to be converted by |out_sec_nr| is known to be less than |def_flag|, so it cannot have more than five decimal digits. If the section was changed, we output `\.{\\*}' just after the number. @c void out_sec_nr (int n) /* output a section number */ { char s[6]; sprintf(s,"%d",n); out_str(s); if (section_changed(n)) out_str ("\\*"); @.\\*@> } @ Since we want to typeset identifiers such as \.{catch22} as |catch22|, which is achieved by writing the string `\.{\$\\\\\{catch\}\_\{22\}\$}' to the \TeX~file, it is useful to have an auxiliary function |out_id_part| that will output a substring of an identifier (or reserved word), enclosing it in braces if it contains more than one character. The substring is specified by a pointer and a length count. A call to |out_id_part| for the full name referred to by an |id_pointer p@;| is achieved by invoking |out_id_full(p)|. @d out_id_full(p) out_id_part(name_begin(p),length(p)) @c void out_id_part (char* s, int l) { boolean b=l!=1; if (b) out ('{'); while (--l>=0) {@; if (*s=='_') out ('\\'); out (*s++); } if (b) out('}'); } @ For index entries we do not wish to give underscores a special treatment, so we define a function |out_index| to use in place of |out_id_full|. Index entries of length~0 are discarded on input, so they never come here; for index entries of length~1 we omit braces. In \LKC., there is no function like |out_index|, and the equivalent of |out_id_part| is used instead, and as a consequence, underscores are automatically escaped in index entries, but other special characters are not. While there is no particular rhyme or reason to such a behaviour, user have learned to adapt to it, and so, in an attempt at bug-to-bug compatibility, we transfer control to |out_id_part| in compatibility mode. @c void out_index (id_pointer p) { char* s=name_begin(p); boolean b= s[1]!='\0'; if (compatibility_mode) {@; out_id_full(p); return; } if (b) out ('{'); out_str(s); if (b) out('}'); } @ The function |out_keyword| simply prefixes `\.{\\\&}' to the output of |out_id_full|. @c void out_keyword(id_pointer p) @+{@; out_str("\\&"); out_id_full(p); } @.\\\&@> @ The function |out_identifier| formats ordinary identifiers (those that were stored with |id_flag|). Yet there is some variation in the way these are formatted. Identifiers whose |ilk| is |TeX_like| or |NULL_like| are converted into control sequences. If only one alphabetic character is present, it is set in math (rather than text) italic, otherwise if all characters are either upper case letters or digits, the identifier is set in typewriter type, and in the remaining, most common, case, it is set in text italic. If math or text italic are used, a trailing sequence of digits in the name, if present, is set as a subscript to the rest of the identifier. @c void out_identifier (id_pointer p) { int k=length(p); eight_bits* ch=(eight_bits*)name_begin(p); @/enum { ord, indexed, caps, single, indexed_single } kind; if (p->ilk==TeX_like || p->ilk==NULL_like) @.\\NULL@> @.\\TeX@> @/{@; @< Output the identifier |p| in the form of a \TeX\ control sequence @> return; } if (!compatibility_mode) { do --k; while (isdigit((eight_bits)ch[k])); /* terminates because |!isdigit(ch[0])| */ ++k; /* point to end of identifier without its index (if any) */ } @< Determine the |kind| of |p|, and possibly output some leading characters @> if (kind==indexed || kind==indexed_single) { out_id_part(name_begin(p),k); /* main part */ out ('_'); out_id_part(name_begin(p)+k,length(p)-k); /* subscript */ } else out_id_full (p); } @ When an identifier is output in the form of a control sequence, underscores are replaced by `\.x' so that they will become part of the control sequence. If the |ilk| of the identifier was |TeX_like|, the control sequence will be processed in horizontal mode with italic type selected; if the |ilk| was |NULL_like|, it will be processed in math mode. @< Output the identifier |p| in the form of a \TeX\ control sequence @>= { if (p->ilk==TeX_like) out_str("\\\\{"); out('\\'); @+ do out(*ch=='_' ? 'x' : *ch); while (*++ch!='\0'); if (p->ilk==TeX_like) out('}'); } @ For identifiers that consist of a single character, not counting any trailing digits, that character is written in unadorned form, since it will be processed in math mode, so that the math italic font will be used. We do output a space before the character however, to eliminate the possibility that it is captured by a control word at the end of the previous output. @< Determine the |kind|... @>= if (k==1) {@; out(' '); kind= length(p)==1 ? single : indexed_single; } else { int i=k; @+ while (--i>=0) @+ if (!isupper(ch[i])&&!isdigit(ch[i])&&ch[i]!='_') break; kind= i<0 ? caps : k @.\\.@> } @ The last function for output of tokens, |out_module_name|, represents the most complicated aspect of such output, since up to here we have not paid any attention to `\pb' constructions in module names. When these are present they will need a complete treatment by the scanning, parsing and general output routines described above. Now |out_module_name| does not involve itself directly in such actions---they are contained in a call to |do_C|---but we do have to be aware that we are in the midst of scanning and output operations going on at a different level, and be careful not to disturb them. The text between `\pb' will be placed at the end of the active input buffer (this is why the buffer has size |long_buf_size| rather than |buf_size|) and the translation process uses the end of the active |tok_mem| area. On the other hand |out_module_name| is also used during Phase~III while writing the list of module names (which implies that scanning and parsing is not over at the end of Phase~II). We want |out_module_name| to behave slightly differently in Phase~III however, by printing a full list of section numbers in which the module is defined rather than just the first one. Also, immediately after calling |out_module_name| we will need a pointer to the sublist of \xr.s to sections in which the module is used, and since this sublist starts at the end of the list of defining sections, we might as well return it as result from |out_module_name|. @c xref_pointer out_module_name(mod_pointer name) { xref_pointer x=name->xref; boolean file_module= x->num==file_flag; if (file_module) x=next_xref(x); out_str ("\\X"); @.\\X@> if (x->num>=def_flag) { out_sec_nr(x->num-def_flag); /* output the defining section number */ if (phase==3) /* all of them in Phase III */ while (x=next_xref(x), x->num>=def_flag) out_str (", "), out_sec_nr(x->num-def_flag); } else out ('0'); /* section number `0' means `nowhere defined' */ out (':'); if (file_module) out_str("\\.{"); @.\\.@> @ if (file_module) out_str ("}"); out_str ("\\X"); return x; } @ Copying \TeX\ text from a module name to the output is fairly simple, but we shouldn't forget to escape special characters if this is actually a file name (which is typeset verbatim), even though they are probably quite rare. @< Output the text... @>= { char* k=name_begin(name),c; while ((c=*k++)!='\0') { if (file_module) @+{@; if (strchr(" \\#%$^{}~&_",c)!=NULL) out ('\\'); } if (c=='@@' && *k++!='@@') @< Report illegal control code in module name @> if (file_module || c!='|') out(c); else { char* save_loc=loc, *save_limit=limit; @< Copy the \Cee\ text into the |buffer| array @> do_C(); loc=save_loc; *(limit=save_limit)=' '; } } } @ We haven't checked for illegal control codes in module names yet, so we should report an error if we encounter one. @< Report illegal control... @>= {@; print("\n! Illegal control code in module name"); print_mod(name); @.Illegal control code...@> mark_error(); } @ Within `\pb' we should be aware of character and string constants, since any `\.\v' occurring there is not the closing one. The variable |delimiter| is zero outside such constants, otherwise it equals the delimiter that began the constant. We copy the opening and closing `\.\v' into the buffer, so that an error message that displays the whole buffer will look a little bit sensible. We also add a space at the end (just like |get_line| does) so that the closing `\.\v' cannot by accident be parsed as part of an operator such as `\.{\v=}'. Putting |next_control='|'| completes the proper initial conditions for calling |do_C|. We need not test for overflow of |buffer| here, since we start filling it at position |limit| which is at most |&buffer[buf_size-2]|, and add at most |longest_name| characters from the module name, so that if worst comes to worst, the final space stored at the end is at position |&buffer[long_buf_size-2]| leaving one more place after |limit|, which is occasionally needed by |get_next|. @< Copy the \Cee\ text into... @>= { char delimiter='\0'; /* |'"'| or |'\''|, or |'\0'| outside of strings */ next_control=*limit++='|'; loc=limit; do if ((c=*k++)=='\0') @< Report a runaway \Cee~text @> else { *limit++=c; @/@< In special cases copy the character after |c|, or change |delimiter| @> } while (c!='|' || delimiter!='\0'); *limit=' '; } @ Here cases like `\.{1+@@\v2}', `\.{@@'\v'}' and `\.{'\\''}' within `\pb' in module names are handled properly, as well as the more ordinary cases of character constants and strings. In case of a control code, the character following the `\.@@' cannot be the null character that terminates the module name, because it is impossible to enter a module name that ends with a single `\.@@'. If the user is being really devious by saying `\.{@@< Sudden \v"death\\ @@>}', then the final `\.\\' is silently removed to avoid disaster to \.{\me.}. @< In special cases... @>= { if (c=='@@') /* control code; now |*k!='\0'| */ { if ((*limit++=*k++)=='\'' && delimiter=='\0') /* copy code, test for \:' */ delimiter='\''; /* which behaves like `\.'' */ } else if (c=='\\' && delimiter!='\0') { char d=*limit++=*k++; /* escaped character, possibly |delimiter| */ if (d=='\0') --k,limit-=2; /* remove backslash, error is issued anyway */ } else if (c=='\'' || c=='"') if (delimiter=='\0') delimiter=c; else if (delimiter==c) delimiter='\0'; } @ If the module name ends before the \Cee~text does, we add an |'|'|, and if a string or character constant was left unclosed we prepend the closing delimiter to it: we must prevent that |do_C| will ever call |get_line|, even in erroneous situations. In these cases there is a slight chance that we might overflow |buffer|, in which case we quit since the user is just trying to break \.{\me.} anyway. @< Report a runaway \Cee~text @>= { print("\n! C text in module name didn't end"); print_mod(name); @.C text...didn't end@> mark_error(); if (delimiter!='\0') *limit++=delimiter; *limit++='|'; if (limit>&buffer[long_buf_size-2]) fatal("fix that first, you sneaky devil"); @.fix that first...@> break; } @ Finally we come to the point where the characters produced are sent off to the \TeX~file. The \TeX\ output is supposed to appear in lines at most |line_length| characters long, so we place it into an output buffer |out_buf|. The first character of the output buffer is used as a sentinel, and is not actually written to the output file, so we usually refer to the output buffer via |out_line|. The pointer |out_ptr| indicates the next position to be used in the output buffer. During the output process, |out_line_nr| will hold the current line number of the line about to be output; it is only used for (rather unlikely) diagnostic messages. @d out_line (&out_buf[1]) /* start of actual output line */ @d out_buf_end (&out_line[line_length]) /* end of |out_buf| */ @= char out_buf[line_length+1]; /* assembled characters */ char* out_ptr; /* first unused position in |out_buf| */ int out_line_nr=1; /* number of next line to be output */ @ The auxiliary function |flush_buffer| empties the buffer up to a given breakpoint~|b|, and moves any remaining characters to the beginning of the buffer, so that they will appear on the next line. If the |percent| parameter is |true| a |'%'| is appended to the line that is being output; in this case the breakpoint |b| should be strictly less than |out_buf_end|. If the |percent| parameter is |false|, trailing blanks are suppressed. The characters emptied from the buffer form a new line of output. @d tex_putc(c) putc (c, tex_file) @d tex_new_line() (putc('\n', tex_file),++out_line_nr) @d tex_printf(format) fprintf(tex_file, format) @c void flush_buffer(char* b, boolean percent) /* output from |out_line| to |b|, where |b<=out_ptr| */ { int j=(int)(b-out_line); /* number of characters to be output */ if (!percent) @+ while (j>0 && out_line[j-1]==' ') --j; /* remove trailing blanks */ fprintf(tex_file, "%.*s",j,out_line); if (percent) tex_putc('%'); tex_new_line(); { char* p=out_line; while (b= { char* line1=" \\input cwebxmac"; out_ptr=&out_buf[0]; @+ do *out_ptr++=*line1++; while (*line1!='\0'); if (compatibility_mode) out_ptr[-4]='c'; /* change to \.{cwebcmac} */ } @ When we wish to append one character |c| to the output buffer, we write `|out(c)|'; this will cause the buffer to be broken at a sensible place and flushed, if it was already full. If we want to append more than one character at once, we say |out_str(s)|, where |s| is a string containing the characters. @d out(c) *(out_ptr>=out_buf_end ? (break_out(),out_ptr++) : out_ptr++)=c @c void out_str (char* s) @+{@; while (*s!='\0') out (*s++); } @ The function |break_out| will determine a good break point in the output buffer when it is about to overflow. @< Prototypes @>= void break_out (void); @~For speed we search from right to left for a proper break point, although in this direction we cannot know whether we are inside a control sequence. Nevertheless any blank space is a safe break point, as is the position just before a backslash that isn't preceded by another backslash. If the break is not at a space, a |'%'| is output at the break. @c void break_out (void) /* finds a way to break the output line */ { char* k=out_ptr,c; int count=0; /* number of backslashes seen */ do @+ if ((c=*--k)==' ') goto found; @+ while (c!='\\'); do ++count; while ((c=*--k)=='\\'); found: if (++k>out_line) flush_buffer(k,c!=' '); @+ else @< Break peculiar line @> } @ We get to this section only in the unusual case that the entire output line consists of a string of backslashes followed by a string of characters that contains no spaces or backslashes. Depending on the number |count| of backslashes we handle this case as well as possible. If the |count>=2| we split off a maximal even number of initial backslashes, and if |count==0| we break the line by putting a |'%'| at the position of the last character. In the latter case we do not place the |'%'| after the last character, since that would make the output line one character longer than we promised; similarly we are cautious in the former case not to place a |'%'| after a buffer completely filled with backslashes. If |count==1| the line is probably one enormous control word, and in this unlikely case we simply output the whole line as is stands, after issuing a warning message. @< Break peculiar line @>= if (count==0) flush_buffer(out_ptr-1,true); else if (count>=2) flush_buffer (&out_line[count&=~1]==out_buf_end ? out_buf_end-2 : &out_line[count],true); else { print("\n! Line had to be broken (output l.%d):\n",out_line_nr); @.Line had to be broken@> term_write(out_line,out_ptr-out_line); new_line(); mark_harmless(); flush_buffer(out_ptr,false); } @* Phase III processing. @:title@> After Phase~II is completed, \.{\me.}'s only remaining task is writing out the index, after sorting the identifiers and index entries, and a list of module names. If the user has set the |no_xref| flag (the \.{-x} option on the command line), we just finish off the page, omitting the index, module name list, and table of contents. @d triple_file_output flags['t'] @d even_out_pages flags['e'] @c void phase_three (void) /* output the \xr. index */ { finish_line(); if (no_xref) out_str("\\endcodemode\\vfill\\end"); else { phase=3; print_progress("\nWriting the index..."); @.Writing the index...@> typedef_tracking(false); /* during parse of `\pb' in module names */ if (change_exists) {@; @ finish_line(); } if (triple_file_output) @< Finish the \TeX~file and open the index file @> else {@; out_str("\\inx"); finish_line(); } @.\\inx@> /* index */ @ if (triple_file_output) @< Finish the index file and open the module listing file @> else {@; out_str("\\fin"), finish_line(); } @.\\fin@> /* end of index */ @ if (!triple_file_output) { out_str("\\con"); @.\\con@> /* table of contents */ if (even_out_pages) out_str("even"); @.\\coneven@> } } finish_line(); fclose(tex_file); print_progress("\nDone.\n"); check_complete(); /* was all of the change file used? */ } @ When output is to be distributed over three files, we can already finish off the main output file, since it ends in a standard way: with `\.{\\input}' commands for the two other files (that still have to be written) and either `\.{\\con}' or `\.{\\coneven}' to produce the table of contents. Since the three output files are written one after the other, we can reuse the file pointer |tex_file| for all of them. @< Finish the \TeX~file and open the index file @>= { out_str("\\inx \\input \\jobname.idx\n" @+ "\\fin \\input \\jobname.scn\n" @+ "\\con"); if (even_out_pages) out_str("even"); finish_line(); fclose(tex_file); if ((tex_file=fopen(idx_file_name,"w"))==NULL) fatal("! Cannot open \"%s\" as output file",idx_file_name); } @~The switch from the index file to the module listing file is similar, but simpler. @< Finish the index file and open the module listing file @>= { finish_line(); fclose(tex_file); if ((tex_file=fopen(scn_file_name,"w"))==NULL) fatal("! Cannot open \"%s\" as output file",scn_file_name); } @ Just before the index comes a list of all the changed sections, including the index section itself. The outer |while| loop will terminate without overstepping the bounds of |changed_section| in the inner loop since |section_changed(section_count)| holds. @= { int k=0; boolean first=true; out_str("\\ch "); @.\\ch@> /* changes */ while (k= typedef struct {@; id_pointer head; int depth; } sort_node, * sort_pointer; @~In fact |sort_info| shares its memory with the |scrap_info| array that serves no purpose at this moment (although it will be used again during the output of module names). @d sort_info scrap_union.id_list_field @d sort_info_end (&sort_info[sort_stack_size]) @< Alternative use... @>= sort_node id_list_field[sort_stack_size]; @ The variable |sort_ptr| points to the first unused slot above the top of the stack in~|sort_info|. @= sort_pointer sort_ptr=sort_info; #ifdef STAT sort_pointer max_sort_ptr=sort_info; /* largest value of |sort_ptr| */ #endif @ The desired alphabetic order is specified by the |collate| array; namely, |collate[0] < collate[1] < @t\dots@> < collate[end_collate-1]|. Upper case letters are treated like the corresponding lower case letters, since we want to have `|t=128|, which would cause the part of |collate| that is actually used to be shorter than that; therefore we compute the actual length in |end_collate|. @= eight_bits collate[UCHAR_MAX-25]; /* collation order */ int end_collate; /* length of the part of |collate| actually used */ @ We use the order |'\0' < ' ' < @tother characters@> < '_' < 'A'=='a' < @t\dots@> < 'Z'=='z' < '0' <@t\dots@> < '9'|. If there should be any characters~|c| for which |tolower(c)| does not occur in |collate| (this can only happen if |isalnum(c)| incorrectly holds), then entries containing the character~|c| will not appear in the index. By computing |end_collate| rather than using a constant |UCHAR_MAX-25|, we avoid however that the mere existence of such characters would disrupt the proper order of the index entries that do appear, due to spurious bytes |'\0'| at the end of~|collate|. @= { char *p="_abcdefghijklmnopqrstuvwxyz0123456789"; int c='\1', k=2; collate[0]='\0'; collate[1]=' '; do @+ if (!isalnum(c) && c!='_' && c!=' ') collate[k++]=c; @+ while (++c<=UCHAR_MAX); while ((c=*p++)!='\0') collate[k++]=c; end_collate=k; /* record the length */ } @ The lists of identifiers used for sorting the index cannot be linked together using |hash_link|, since we still need to be able to look up identifiers when writing the list of module names. Therefore we declare a separate array |index_link| that will provide the necessary links, and a macro |ilink| used to access it: |ilink(p)| points to the successor of~|p| (or is~|NULL|) for any |id_pointer p@;|. At each sorting step we partition a list of identifiers into at most |UCHAR_MAX-25| sublists, based on the first character position where the entries of the list are not known to be equal. After the partitioning step the sublist for character~|c| is pointed to by |bucket[tolower(c)]|. @d ilink(p) index_link[id_index(p)] @= id_pointer index_link[max_idents]; /* links identifiers during sorting */ id_pointer bucket[UCHAR_MAX+1]; @ The basic sorting step is to pop the list from the top of the stack, and either output it, if needs no further refinement, or to split it up into the buckets, after which the function |unbucket| collects the non-empty buckets and pushes them back on the stack. At the very first step however, the identifiers do not come from the stack but directly from the identifier table. In the remaining cases, the |depth| field of the element popped off the stack is used to determine the character position used for splitting, and is passed to |unbucket| so that it can set the proper depth when it pushes lists back on the stack. When |depth| has risen to~|255| (or more likely, has been set explicitly to that value by a previous |unbucket|), no attempt is made to split up a list, even if it is not reduced to a singleton. @d infinity 255 /* $\infty$ (approximately) */ @< Sort and output... @>= { @ /* the first time, entries do not come from |sort_info| */ unbucket(1); /* pick up first-order bucketed lists */ while (sort_ptr>sort_info) /* i.e., the stack is not empty */ { eight_bits depth=(--sort_ptr)->depth; id_pointer name=sort_ptr->head; if (ilink(name)==NULL || depth==infinity) /* singleton or set of look-alikes */ @< Output index entries for the list starting at |name| @> else {@; @< Split the list starting at |name| into further lists @> unbucket(depth+1); } } } @ To begin the sorting, we go through all the hash lists and put each entry having a non-empty \xr. list into the proper bucket. The buckets are emptied initially; they will also be emptied by each call to~|unbucket|. The entries of the |index_link| array are initialised when their corresponding identifiers are prepended to the list of some bucket. @= { id_pointer name; eight_bits c=UCHAR_MAX; id_pointer *h; /* pointer into |hash| */ do bucket[c]=NULL; while (c--!=0); for (h=hash; hhash_link) /* traverse all hash lists */ if (name->ilk!=reference && name->xref->num!=0) /* leave out unreferenced names */ {@; c=name_begin(name)[0]; c=tolower(c); ilink(name)=bucket[c]; bucket[c]=name; } } @ Here we split the list at the top of the stack into buckets. Entries of length |depth| will bet sent to |bucket['\0']| due to the null character stored at the end of identifier names; shorter entries do not come here since they will already have been output. Since we are changing the |ilink| of the front node of the list when putting it into its bucket, we must be a bit more cautious then usual in traversing the list. @< Split the list... @>= do { eight_bits c= tolower((eight_bits)name_begin(name)[depth]); id_pointer next_name=ilink(name); /* save link */ ilink(name)=bucket[c]; bucket[c]=name; name=next_name; /* put into bucket */ } while (name!=NULL); @ The function |unbucket| goes through the buckets in the reverse order of the collating sequence specified in the |collate| array, and adds non-empty lists to the stack. The parameter |d| to |unbucket| tells the current depth in the buckets; it will be recorded in all lists pushed on the stack during this call, except that the contents of the bucket containing the identifiers of length~|d-1| will be given depth |infinity| so that they will be output directly afterwards. Any two sequences that agree up to the case of their characters (with any characters beyond position~255 being ignored) are regarded as identical and will be output in a random order. It is only because of this case-merging that there could possibly be more than one identifier of length~|d-1|, which makes setting the depth to |infinity| necessary; in other cases the special attention is superfluous, since singleton lists will be output directly anyway. @< Prototypes @>= void unbucket (eight_bits); @~@c void unbucket (eight_bits d) /* empties buckets having depth |d| */ { int i=end_collate; /* index into |collate| */ while(--i>=0) @+ if (bucket[collate[i]]!=NULL) { if (sort_ptr>=sort_info_end) overflow("sorting"); @.sorting capacity exceeded@> sort_ptr->depth= i==0 ? infinity : d; /* |infinity| means there is nothing left to compare */ sort_ptr++->head=bucket[collate[i]]; bucket[collate[i]]=NULL; /* push and empty bucket */ #ifdef STAT if (sort_ptr>max_sort_ptr) max_sort_ptr=sort_ptr; #endif } } @ Each line in the index file starts with the macro `\.{\\@@}', which will be properly defined when it is encountered. @< Output index... @>= do { out_str("\\@@"); @.\\@@@> @ @ } while ((name=ilink(name))!= NULL); @ In this section the distinction between |xref_roman|, |xref_wildcard|, and |xref_typewriter| finally becomes effective. As first character (immediately after `\.{\\@@}') we output a character `\.h' or `\.m',indicating whether the index entry should be processed in horizontal or in math mode. @.\\NULL@> @.\\TeX@> @= switch (name->ilk) { case normal: case NULL_like: out('m'); out_identifier(name); break; case TeX_like: out('h'); out_identifier(name); break; case roman: out('h'); out_index(name); break; case wildcard: out_str("h\\9"); out_index(name); break; @.\\9@> case typewriter: out_str("h\\."); out_index(name); break; @.\\.@> default: out('h'); out_keyword(name); } @ Section numbers that are to be underlined are enclosed in `\.{\\[}\dots\.]'. The first `\.{,\ }' will be scooped up by the macro `\.{\\@@}'. @= { xref_pointer x=name->xref; do { sixteen_bits n=x->num; out_str(", "); if (n } while ((x=next_xref(x))->num!=0); out('.'); finish_line(); } @ The following recursive function traverses the tree of module names and prints them. We use the order already present in the tree, which means that we do not use the collation sequence that was employed for sorting the index. If the user starts all module names with capital letters however, the difference should hardly be noticeable. @^recursion@> @< Prototypes @>= void list_modules (mod_pointer); @~The macro `\.{\\@@}' is redefined to serve for lines in the list of module names as well. Since module names are to be processed in math mode, we enclose them in dollar signs. @c void list_modules (mod_pointer p) /* print all module names in subtree |p| */ { if (p != NULL) { list_modules(p->llink); @/ out_str("\\@@$"); @.\\@@@> leave_block(0); scrap_ptr=scrap_info; /* get ready for parsing */ {@; xref_pointer x=out_module_name(p); out('$'); footnote(&x,cite_flag); footnote(&x,0); }@/ finish_line(); @/ list_modules(p->rlink); } } @~Initially |list_modules| is called for the root of the tree, of course. @= list_modules(root); @ At the end of the run, if |STAT| was defined and the `\.{+s}' flag present, we report how much of all the arrays was actually needed. @d report(k,c,m) printf("\t%lu %ss (out of %lu)\n",(unsigned long)(c),k,(unsigned long)(m)) @c #ifdef STAT void print_stats() { print("\nMemory usage statistics:\n"); @.Memory usage statistics@> @/report("identifier", id_index(id_ptr), max_idents); @/report("module name", mod_index(mod_ptr), max_modules); @/report("byte", byte_ptr-byte_mem, max_bytes); @/report("cross-reference", xref_ptr-xmem, max_refs-1); @)printf("Parsing:\n"); @/report("scrap", max_scr_ptr-scrap_info, max_scraps); @/report("text", max_text_ptr-text_mem, max_texts); @/report("token", max_tok_ptr-tok_mem, max_toks); @/report("trie node", node_no, max_no_of_nodes); @/report("level", max_stack_ptr-stack, stack_size); @)printf("Sorting:\n"); @/report("level", max_sort_ptr-sort_info, sort_stack_size); } #endif @* Index. @:index@> @:title@> If you have read and understood the code for Phase~III above, you know what is in this index and how it got here. All sections in which an identifier is used are listed with that identifier, except that reserved words are indexed only when they appear in format definitions, and the appearances of identifiers in module names are not indexed. Underlined entries correspond to where the identifier was declared. Error messages, control sequences put into the output, and a few other things like ``recursion'' are indexed here too. cwebx-3.04.orig/cwebcmac.tex100644 1750 1750 5123 6417406364 13623 0ustar jdgjdg% Compatibility mode macros for CWEBx (in addition to cwebxmac.tex) % File: cwebcmac.tex, Author: Marc van Leeuwen, Date: November 1994 % This file gives definitions to macros defined in cwebmac.tex that are % undefined in cwebxmac.tex, in order to simulate the cwebmac macro % environment as much as possible in compatibility mode of CWEBx. % In addition \PB is defined so as to get decent behaviour of |...| when used % inside math mode (which one should't do, but some do it anyway). \ifx\documentstyle\undefined\else\endinput\fi % LaTeX will use other macros \input cwebxmac % that's the standard stuff \xdef\fmtversion{\fmtversion c} \let\mc=\ninerm % medium caps \let\sc=\eightrm % small caps (NOT a caps-and-small-caps font) %\font\tenss=cmss10 \let\cmntfont\tenss % alternative comment font \def\CEE/{\Cee} \def\UNIX/{\caps{UNIX}} \def\TEX/{\TeX} \def\CPLUSPLUS/{\Cpp} \def\9#1{} \let\cmntfont\tenrm \def\|#1{\leavevmode\hbox{$#1$}} % in case this is used directly \def\ATP{\X\kern-.5em:Preprocessor definitions\X} \let\ATH=\ATP % in case the user refers to \ATH directly \def\PB#1{\ifmmode\hbox{#1}\else#1\fi} % make |...| safe in math mode \def\DC{\kern.1em{::}\kern.1em} % symbol for :: \def\PA{\mathbin{.*}} % symbol for .* \def\MGA{\mathbin{\MG*}} % symbol for ->* % Revert to Levy/Knuth representations of operators \def\K{=\Penalty2} % assignment operator \def\E{\Penalty7\equiv} % equality test \newbox\MGbox % symbol for -> \setbox\MGbox=\hbox{\kern-2pt\lower3pt\hbox{\teni\char'176}\kern1pt} \def\MG{\copy\MGbox} \secpagedepth=3 % page breaks will occur for depths -1, 0, and 1 \let\SHC\C % "// short comments" treated like "/* ordinary comments */" \outer\def\N#1 #2. #3. % start of `@*' section {\global\gdepth=#1\global\advance\gdepth1 \global\gtitle={#3}\MN#2.% \ifon \ifnum#1<\secpagedepth \vfil\eject \else \vskip 0pt plus .5 \vsize \penalty-1000\vskip 0pt plus -.5 \vsize \vskip\intersecskip \fi \fi \message{*\secno} % progress report \edef\next{\write\cont % to contents file {\ZZ{#3}{\number\gdepth}{\secno}{\noexpand\number\pageno}}} \next \ifon\startsection{\bf#3.\quad}% } \def\contentsline#1#2#3#4% #1==title, #2=depth+1, #3=secno, #4=pageno {\ifnum#2=0 \smallbreak\fi \line{\consetup{#2}#1 \rm\leaders\hbox to .5em{.\hfil}\hfil \ #3\hbox to3em{\hss#4}% }% } \def\consetup#1% #1=depth+1 {\ifcase#1 \bf % @** \or % @* \or \hskip2em % @*1 \or \hskip4em % @*2 \or \hskip6em % @*3 \or \hskip8em % @*4 \or \hskip10em % @*5 \else \hskip12em % depth >=6 \fi } \def\,{\relax\ifmmode\mskip\thinmuskip\else\thinspace\fi} cwebx-3.04.orig/cwebxmac.tex100644 1750 1750 44434 6465330553 13677 0ustar jdgjdg% Standard macros for CWEBx listings (in addition to plain.tex) % File: cwebxmac.tex, Author: Marc van Leeuwen, Date: November 1994 \ifx\documentstyle\undefined\else\endinput\fi % LaTeX will use other macros \ifx \cwebxmacloaded\undefined \let\cwebxmacloaded=\relax \else \endinput \fi \xdef\fmtversion{\fmtversion+CWEBx3.0} \let\:=\. % preserve a way to get the dot accent \font\ninerm=cmr9 \font\eightrm=cmr8 \font\titlefont=cmr7 scaled\magstep4 % title on the contents page \font\ttitlefont=cmtt10 scaled\magstep2 % typewriter type in title \font\tentex=cmtex10 % TeX extended character set (used in strings) \fontdimen7\tentex=0pt % no double space after sentences \hyphenchar\tentex=-1 % no automatic hyphenation within strings \newdimen\indentunit \indentunit 1em \parskip 0pt % no stretch between paragraphs \parindent\indentunit % for paragraphs and for the first line of C text \chardef\v=`| % vertical (|) \def\caps#1{\hbox{\ninerm #1}} \def\Cee{\caps C} \def\Cpp{\Cee\PP} % \def\UNIX{\caps{UNIX}} etc. \let\Sec=\S % section mark \def\Secs{\Sec\Sec} % as in \Sec@#label@> \let\mainfont=\tenrm \def\Cident#1{{\it#1\/\kern.05em}} % italic type for identifiers \def\\#1{\leavevmode\hbox\Cident{#1}} % robust version \def\Cbold#1{{\bf \def\_{\kern.04em\vbox{\hrule width.3em height .6pt}\kern.08em}% #1\/\kern.05em}} % boldface type for reserved words \def\{\leavevmode\hbox\Cbold{#1}} % robust version \def\Cstring#1{\ifmmode {}$\typewriter{#1}${}\else\typewriter{#1}\fi} \def\typewriter#1{{\tentex % typewriter type for strings \let\\=\BS % backslash in a string \let\{=\LB % left brace in a string \let\}=\RB % right brace in a string \let\~=\TL % tilde in a string \let\ =\SP % space in a string \let\_=\UL % underline in a string \let\&=\AM % ampersand in a string \let\^=\CF % circumflex in a string #1\kern.05em}} \def\.#1{\leavevmode\hbox\typewriter{#1}} \def\){\discretionary{"}{"}{}} % discretionary string break \def\AT{@} % at sign for control text (no longer needed since CWEBx2+1.0) \def\a#1{\mathopen{\hbox to \indentunit{$#1$\hss}}} % '{' in tab space \def\m#1{\mathord{#1}} % braces required here if #1 generates a penalty \chardef\AM=`\& % ampersand character in a string \chardef\BS=`\\ % backslash in a string \chardef\LB=`\{ % left brace in a string \chardef\RB=`\} % right brace in a string \def\SP{{\tt\char`\ }} % (visible) space in a string \chardef\TL=`\~ % tilde in a string \chardef\UL=`\_ % underline character in a string \chardef\CF=`\^ % circumflex character in a string \newbox\PPbox % symbol for ++ \setbox\PPbox=\hbox{\kern.5pt\raise1pt\hbox{\sevenrm+\kern-1pt+}\kern.5pt} \def\PP{\copy\PPbox} \newbox\MMbox \setbox\MMbox=\hbox{\kern.5pt\raise1pt\hbox{\sevensy\char0 \kern-1pt\char0}\kern.5pt} \def\MM{\copy\MMbox} \newbox\SSbox % symbol for ## \setbox\SSbox=\hbox{\kern.5pt\raise1pt\hbox{\sevenrm\#\kern-1pt\#}\kern.5pt} \def\SS{\mathbin{\copy\SSbox}} \def\MG{{\rightarrow}} % symbol for `->' \let\GG=\gg \let\LL=\ll \let\NULL=\odot \mathchardef\AND="2026 % bitwise and; also \& (unary operator) \def\OR{\Penalty6\mid} % bitwise or \let\XOR=\oplus % bitwise exclusive or \def\CM{{\sim}} % bitwise complement \newbox\MODbox \setbox\MODbox=\hbox{\eightrm\%} \def\MOD{\mathbin{\copy\MODbox}} \def\CC{::} \newbox\bak \setbox\bak=\hbox to -\indentunit{} % backspace one em \newbox\bakk\setbox\bakk=\hbox to -2\indentunit{} % backspace two ems \newcount\ind % current indentation level + 2 (for hanging indentation) \newcount\pl %level of parentheses \mathchardef\lpar=\mathcode`( \mathchardef\rpar=\mathcode`) \mathchardef\lbrac=\mathcode`[ \mathchardef\rbrac=\mathcode`] \mathchardef\plus=\mathcode`+ \mathchardef\minus=\mathcode`- \mathchardef\mcolon=\mathcode`: {\catcode`(=\active \catcode`)=\active \catcode`[=\active \catcode`]=\active \gdef({\global\advance\pl 1\lpar} \gdef){\ifnum\pl>0\global\advance\pl-1\fi\rpar} \gdef[{\global\advance\pl 2\lbrac} \gdef]{\ifnum\pl>0\global\advance\pl-2\fi\rbrac} \catcode`+=\active \catcode`-=\active \catcode`*=\active \gdef+{\Penalty8\plus} \gdef-{\Penalty8\minus} \gdef*{\Penalty9\ast} \catcode`:=\active \gdef:{\Penalty3\mcolon} } \def\cweblbrace{\global\advance\pl 1\lbrace} \def\cwebrbrace{\ifnum\pl>0\global\advance\pl-1\fi\rbrace} % breaking within parentheses will be unattractive, % and within brackets even more so \def\0#1{\penalty\number\pl#10 } % optional break in inner mode \def\1{\global\advance\ind by1\global\hangindent\ind\indentunit} % indent one more notch \def\2{\global\advance\ind by-1} % indent one less notch \def\3#1{\hfil\0#1\hfilneg} % optional break in outer mode \def\4{\copy\bak\ignorespaces} % backspace one notch \def\5{\hfil\penalty-1\hfilneg\enspace\kern2\indentunit\copy\bakk \ignorespaces}% break space \def\6{\ifmmode\else\par % forced break \hangindent\ind\indentunit\noindent\kern\ind\indentunit \copy\bakk\ignorespaces\fi} \def\7{\Y\6} % forced break and a little extra space \def\8{\hskip-\ind\indentunit\hskip 2\indentunit} % cancel full indentation \let\yskip=\smallskip % amount of space between subsections, and at `@)' \newskip\intersecskip \intersecskip=12pt minus 3pt % space between sections \newif\ifcodemode \codemodefalse % whether we are in \Cee part of section \newif\ifon % whether a section will produce any printed output \let\maybe=\iftrue % \if-like macro; governs printing of unchanged sections \def\changesonly{\let\maybe=\iffalse} % the user might set this in limbo \def\onmaybe{\let\ifon=\maybe} % this is executed for each unchanged section \def\Penalty#1{\relax\ifcodemode\3#1\fi} % optional break if in code mode % Summary of use of \Penalty %\def*{\Penalty9\ast} %\def+{\Penalty8\plus} \gdef-{\Penalty8\minus} %\def\I{\Penalty7\neq} % inequality test %\def\S{\Penalty7=} % equality test %\def\OR{\Penalty6\mid} % bitwise OR %\def\W{\Penalty5\land} % logical AND %\def\V{\Penalty4\lor} % logical OR %\def\?{\Penalty3\mathrel?} % `?' operator %\def:{\Penalty3\mcolon} % colon in math mode matches `?' operator %\def\K{\Leftarrow\Penalty2} % assignment operator, % comma operator gets \Penalty1 by CWEAVE \def\note#1#2.% for cross-referencing notes, as at the end of a section {\Y\noindent{\hangindent2\indentunit\baselineskip10pt\eightrm#1~#2.\par}} \def\defin#1{\global\advance\ind by 2 \1\&{#1}\quad} % begin `#define' or `format' of `#include' \def\lapstar{\rlap{*}} \def\stsec % start up setion {\endcodemode\noindent{\let\*=\lapstar\bf\secstar.\quad}} \let\startsection=\stsec % this one is used; provides hook for extra actions \newcount\gdepth % depth of last starred section \newcount\secpagedepth \secpagedepth=2 % depth where no page break is forced \newtoks\gtitle % title of last starred section \def\A{\note{See also section}} % xref for doubly defined module name \def\As{\note{See also sections}} % xref for multiply defined module name \def\ATL{\par\noindent\bgroup\catcode`\_=12 \postATL} % print @l in limbo \def\postATL#1 #2 {\bf letter \\{\uppercase{\char"#1}} tangles as \tentex "#2"\egroup\par} \def\noATL#1 #2 {} \def\noatl{\let\ATL=\noATL} % suppress output from @l \def\ATP{\X\kern-.5em:Preprocessor directives\X} \def\B{\leavevmode % go into C mode \ifcodemode\else \begingroup\codemodetrue \rightskip=0pt plus 300pt \pretolerance 10000 \hyphenpenalty 9999 % so strings can be broken (with string break inserted) \exhyphenpenalty 10000 \binoppenalty 10000 \relpenalty 10000 \mathcode`(="8000 \mathcode`)="8000 \mathcode`[="8000 \mathcode`]="8000 \mathcode`+="8000 \mathcode`-="8000 \mathcode`*="8000 \mathcode`:="8000 \let\{=\cweblbrace \let\}=\cwebrbrace \let\.=\Cstring \let\\=\Cident \let\&=\Cbold \fi \global\ind=2 \global\pl=0 \1} \def\endcodemode {\ifcodemode \endgroup % get out of C mode \global\hangindent=0pt % counteract \global\hangindent in \1 \fi } \def\C#1% ordinary C comment; try to allow a break if little room is left {\hfil\penalty0\hfilneg\kern4\indentunit\copy\bakk$/\ast\,$#1$\,\ast/$} \def\SHC#1% C++ one-line comment; force onto the current line {\nobreak\kern2\indentunit\hbox{$/\!/\,$#1\unskip}}% \def\D{\8\defin{\#define}} % macro definition \def\E{\Penalty7=} % equality test \let\EQ=\equiv % after defining module name \def\PE{\mathrel\plus\EQ} % further defining section of a module \def\ET{ and~} % conjunction between two section numbers \def\ETs{, and~} % conjunction between the last two of several section numbers \def\F{\defin{format}} % format definition \let\G=\ge % greater than or equal sign % \H is long Hungarian umlaut accent \def\h{\8\defin{\#include}} % header file inclusion \def\I{\Penalty7\neq} % inequality test \def\J{\.{@\&}} % TANGLE's join operation \def\K{\Leftarrow\Penalty2} % assignment operator, % can be changed to `=' (if \E is redefined) or `\leftarrow', if desired \def\KK#1{\mathrel{#1}\K} % composite assignment operators % braces are required for same reason as for \m \def\MRL#1{\mathrel{#1}} % for first such operator in compatibility mode % \L is Polish letter suppressed-L \outer\def\M#1. % start of `@ ' section {\MN#1.\ifon \vskip 0pt plus .5 \vsize \penalty-1000\vskip 0pt plus -.5 \vsize \vskip\intersecskip\startsection } \outer\def\n#1. % start of `@~' section {\MN#1.\ifon\vskip\intersecskip\startsection } \outer\def\N#1 #2. #3. %start of `@*' section {\global\gdepth=#1\global\gtitle={#3}\MN#2.% \ifon \ifnum\gdepth<\secpagedepth \vfil\eject \else \vskip 0pt plus .5 \vsize \penalty-1000\vskip 0pt plus -.5 \vsize \vskip\intersecskip \fi \fi \message{*\secno}% progress report \edef\next{\write\cont % to contents file {\ZZ{#1}{#3}{\secno}{\noexpand\the\pageno}}} \next \ifon\startsection{\bf#3.\quad}% } \def\MN#1.% common code for \M, \N, \n {\par{\xdef\secstar{#1}\let\*=\empty\xdef\secno{#1}}% \ifx\secno\secstar \onmaybe % print unchanged section if \maybe=\iftrue \else\ontrue % changed sections are always printed \fi \mark{{{\tensy x}\secno}{\the\gdepth}{\the\gtitle}}% }% each \mark is {section reference}{depth}{title} % \O is Scandinavian letter O-with-slash % \P is paragraph sign \def\Q{\note{This code is cited in section}} % xref for citation of a module \def\Qs{\note{This code is cited in sections}} % xref for citations of a module \let\R=\lnot % logical not % \S is section sign \def\T#1{% {\let\~=\oct \let\^=\hex \let\_=\timestentothepower \let\$=\withsuffix #1}} \def\oct{{}^\circ\kern-.2em\it\aftergroup\afteroct} \def\afteroct{\kern.2em } \def\hex{{}^{\scriptscriptstyle\#}\tt} \def\timestentothepower{\cdot 10^{\aftergroup}} \def\withsuffix{_{\rm\,\aftergroup}} \def\U{\note{This code is used in section}} % xref for use of a module \def\Us{\note{This code is used in sections}} % xref for uses of a module \def\V{\Penalty4\lor} % logical or \def\W{\Penalty5\land} % logical and \def\X#1:#2\X{\langle\,${#2\eightrm\enspace#1}$\,\rangle} % module name \def\Y{\par\yskip} \let\Z=\le % less than or equal sign \let\ZZ=\relax % now you can \send the control sequence \ZZ \def\?{\Penalty3\mathrel?} % `?' operator \def\vb#1{\leavevmode\hbox{\kern2pt\vrule\vtop{\vbox{\hrule \hbox{\strut\kern2pt\.{#1}\kern2pt}} \hrule}\vrule\kern2pt}} % verbatim string \let\*=* % output routines \newif\iftitle % if true suppresses first running head \def\lheader{\mainfont\the\pageno\eightrm\qquad\grouptitle\hfill\title\qquad \mainfont\topsecno} % top line on left-hand pages \def\rheader{\mainfont\topsecno\eightrm\qquad\title\hfill\grouptitle \qquad\mainfont\the\pageno} % top line on right-hand pages \def\grouptitle{\let\i=I\let\j=J\uppercase\expandafter{\expandafter \takethree\topmark}} \def\topsecno{\expandafter\takeone\topmark} \def\takeone#1#2#3{#1} \def\taketwo#1#2#3{#2} \def\takethree#1#2#3{#3} \def\nullsec{{\eightrm\kern-2em }} % the \kern-2em cancels \qquad in headers \let\page=\pagebody \normalbottom % \def\page{\box255 }% faster, but loses plain TeX footnotes \def\normaloutput#1#2#3% #1=page, #2=running head if even, #3 idem if odd {\ifodd\pageno\hoffset=\pageshift\fi \shipout\vbox {\vbox to\fullpageheight {\iftitle\global\titlefalse % no running head, but reset for next pages \else\hbox to\pagewidth{\vbox to10pt{}\ifodd\pageno #3\else#2\fi}% \fi \vfill#1% parameter #1 is the page itself }} \global\advance\pageno by1 } \def\title{\uppercase\expandafter{\jobname}} % default title \def\topofcontents{\centerline{\titlefont\title}\vskip.7in \vfill} % this material will start the table of contents page \def\botofcontents{\vfill\vfill \centerline{\covernote}} % this material will end the table of contents page \def\covernote{} \def\contentspagenumber{0} % default page number for table of contents \newdimen\pagewidth \pagewidth=6.5in % the width of each page \newdimen\pageheight \pageheight=8.7in % the height of each page \newdimen\fullpageheight \fullpageheight=9in % page height including headlines \newdimen\pageshift \pageshift=0pt % shift righthand pages wrt lefthand ones \def\magnify#1{\mag=#1\pagewidth=6.5truein\pageheight=8.7truein \fullpageheight=9truein\setpage} \def\setpage{\hsize=\pagewidth \vsize=\pageheight} % use after changing page size \def\contentsfile{\jobname.toc } % file that gets table of contents info \def\readcontents{\input\contentsfile} \newwrite\cont \output= % temporary for first page, which is emtpy so as to define \topmark {\setbox0=\box255 % throw away empty page \openout\cont=\contentsfile % gets written when first real page is shipped \write\cont{\catcode `\noexpand\@=11 } % first line makes `@' letter \global\output={\normaloutput\page\lheader\rheader}% the real \output } \setpage \vbox to 2\vsize{} % dummy page, but the first \topmark won't be null \gtitle={\.{CWEB} output} % this running head is reset by starred sections \mark{\noexpand\nullsec0{\the\gtitle}} % page must be oversized so even a very early \inx won't get the chance % to reassign \output before this page is sent off. \newbox\sbox % one-page buffer for delayed output of page before index \newif\ifpagesaved % whether buffer has been filled \def\bufferedoutput {{\ifpagesaved\normaloutput{\box\sbox}\lheader\rheader\fi % empty buffer \global\setbox\sbox=\page \global\pagesavedtrue % fill buffer }} \def\ch#1.% {{\let\*=\relax \note{The following sections were changed by the change file:}{#1}.% }} \newbox\lbox % lefthand column in the index \def\inx {\par\vskip6pt plus 1fil \endcodemode % we are beginning the index \def\page{\box255 } \normalbottom \write\cont{\catcode `\noexpand\@=12\relax} % make `@' other char \closeout\cont % the contents information has been fully gathered \output=\bufferedoutput\pagesavedfalse\eject % eject pages, keeping last \setbox\sbox=\vbox{\unvbox\sbox} % allow its glue to reset \vsize=\pageheight \advance\vsize by -\ht\sbox % the remaining height \hsize=.5\pagewidth \advance\hsize by -10pt % column width for the index (20pt between cols) \parfillskip 0pt plus .6\hsize % try to avoid almost empty lines \def\lr{L} % this tells whether the left or right column is next \output= {\if L\lr\global\setbox\lbox=\page \gdef\lr{R} \else \normaloutput {\vbox to\pageheight {\box\sbox \vss \hbox to\pagewidth{\box\lbox \hfil \page}}% }% page argument to \normaloutput \lheader\rheader % other two arguments to \normaloutput \global\vsize\pageheight \gdef\lr{L} \global\pagesavedfalse \fi }% \message{Index:} \parskip 0pt plus .5pt \let\@=\inxentry \def\[##1]{$\underline{##1}$} % underlined index item \rm \rightskip0pt plus 2.5em \tolerance 10000 \let\*=\lapstar \hyphenpenalty 10000 \parindent0pt } \outer\def\inxentry#1#2, % index entry; #1 is `h' or `m' for horiz/math mode {\par\hangindent2em\noindent\if#1m$#2$\else#2\fi:\kern1em} \def\fin {\par\vfill\eject % this is done when we are ending the index \ifpagesaved\null\vfill\eject\fi % needed in case index is empty \if R\lr \null\vfill\eject\fi % finish the current page \parfillskip 0pt plus 1fil % restore normal paragraph end \setpage % restore page shape \output={\normaloutput\page\lheader\rheader}% restore output routine \message{Module names:} \def\grouptitle{NAMES OF THE MODULES} \let\topsecno=\nullsec \let\note=\finnote \def\Q{\note{Cited in section}}% crossref for mention of a section \def\Qs{\note{Cited in sections}}% crossref for mentions of a section \def\U{\note{Used in section}}% crossref for use of a module \def\Us{\note{Used in sections}}% crossref for uses of a module \def\@{\par\hangindent 2em}\let\*=* } \def\finnote#1#2.{\quad{\eightrm#1~#2.}} \def\con {\par\vfill\eject % finish the module names \rightskip 0pt \hyphenpenalty 50 \tolerance 200 \setpage \output={\normaloutput\page\lheader\rheader} \titletrue % prepare to output the table of contents \pageno=\contentspagenumber \def\grouptitle{TABLE OF CONTENTS} \message{Table of contents:} \topofcontents \line{\hfil Section\hbox to3em{\hss Page}} \let\ZZ=\contentsline \readcontents % read the contents info \botofcontents \eject\end % print the contents page(s) and terminate } \def\contentsline#1#2#3#4% #1=depth, #2=title, #3=secno, #4=pageno {\line{\ignorespaces#2 \leaders\hbox to .5em{.\hfil}\hfil \ #3\hbox to3em{\hss#4}% }% } \def\coneven % force even number of pages before contents {\par\vfill\eject\ifodd\pageno\else\shipout\hbox{}\advancepageno\fi\con} \def\noinx{\def\inx{\endcodemode\end}} % no indexes or table of contents \def\nomods{\let\FIN=\fin \def\fin{\let\parfillskip=\end \FIN}} % no index of module names or table of contents \def\nocon{\let\con=\end} % no table of contents \def\today{\ifcase\month\or January\or February\or March\or April\or May\or June\or July\or August\or September\or October\or November\or December\fi \space\number\day, \number\year} \newcount\twodigits \def\hours{\twodigits=\time \divide\twodigits by 60 \printtwodigits \multiply\twodigits by-60 \advance\twodigits by\time \printtwodigits} \def\gobbleone1{} \def\printtwodigits{\advance\twodigits100 \expandafter\gobbleone\number\twodigits \advance\twodigits-100 } \def\now{{\eightrm\today\ at \hours}} \def\datethis % say `\datethis' in limbo, to get your listing timestamped {\def\startsection{\leftline\now\bigskip\let\startsection=\stsec\stsec}} \def\datecontentspage % timestamps the contents page {\def\topofcontents {\leftline\now\bigskip\centerline{\titlefont\title}\vfill}% } \def\indentation#1{\indentunit#1\relax \parindent\indentunit \setbox\bak\hbox to-\indentunit{}% backspace one unit \setbox\bakk\hbox to-2\indentunit{}% backspace two units } cwebx-3.04.orig/examples/ 40755 1750 1750 0 5702260232 13041 5ustar jdgjdgcwebx-3.04.orig/examples/Makefile100644 1750 1750 705 5702254135 14565 0ustar jdgjdgCFLAGS = CWEAVE = cweave +d CTANGLE = ctangle .SUFFIXES: .tex .dvi .w .w.tex: $(CWEAVE) $* .tex.dvi: tex $* .w.dvi: $(CWEAVE) $* tex $* .w.c: $(CTANGLE) $* .w.o: $(CTANGLE) $* $(CC) $(CFLAGS) -c $*.c .w.dvi: make $*.tex make $*.dvi .w.o: make $*.c make $*.o treeprint: treeprint.w make treeprint.c make treeprint wc: wc.w make wc.c make wc wmerge: wmerge.w make wmerge.o $(CC) $(CFLAGS) -o wmerge wmerge.o ../common.o cwebx-3.04.orig/examples/compare.w100644 1750 1750 7304 5666373555 15006 0ustar jdgjdg@* Comparing text files. This is an entirely trivial program, that tests whether two text files are equal, and if not so, points out the first point of difference. @h @h @c typedef char bool; @ The outline of the program is simple. We read characters from both input files into |c1| and~|c2| until the comparison is complete. Line and column counts are maintained in |line| and~|col|. @d left_margin 1 /* leftmost column number; change to 0 if you prefer */ @c @@; int main(int n, char** arg) { FILE *f1,*f2; /* the two input files */ int c1,c2,col=left_margin; long line=1; @< Open the files |f1| and~|f2|, taking their names from the command line or from the terminal; in case of an error for which no recovery is possible, call |exit(1)| @> @< Search for first difference, leaving |c1!=c2| if and only if a difference was found @> @< Report the outcome of the comparison @> return 0; /* successful completion */ } @ The heart of the program is this simple loop. When we reach the end of one of the files, the files match if and only if the other file has also reached its end. For this reason the test |c1==c2|, which requires characters to be read from both files, must precede the test for file end; when only one file ends, it is the former test which breaks the loop. @< Search for first difference... @>= while ((c1=getc(f1))==(c2=getc(f2)) && c1!=EOF) if (c1=='\n') {@; ++line; col=left_margin; } @+ else ++col; @ When the first difference occurs at the end of one of the files, or at the end of a line, we give a message indicating this fact. @< Report... @>= if (c1==c2) printf("Files match.\n"); else { printf("Files differ.\n"); if (c1==EOF || c2==EOF) @/{@; the_file(c1==EOF); printf("is contained in the other as initial segment.\n"); } else if (c1=='\n' || c2=='\n') @/{@; the_file(c1=='\n'); printf("has a shorter line number %ld than the other.\n",line); } else printf("First difference at line %ld, column %d.\n",line,col); } @ The function |the_file| starts a sentence about the first or second file, depending on its boolean argument. @= void the_file(bool is_first) @+{@; printf("The %s file ", is_first ? "first" : "second" ); } @ There can be be zero, one or two command line arguments. If there are none, the user is prompted to supply them, and if there are two these are taken as the file names, prompting the user only in case a file could not be opened. In case just one argument is present, the first file is assumed to be the standard input, which does not have to be opened; in this case however we will not read a file name from terminal in case the second file cannot be opened. @d read_mode "r" @< Open... @>= --n; ++arg; /* ignore ``argument'' 0, which is the program name */ if (n==0) @/{@; open_file(&f1,"First file to compare", NULL); open_file(&f2,"Second file to compare", NULL); } else if (n==1) { f1=stdin; if ((f2=fopen(*arg,read_mode))==NULL) {@; printf("Could not open file %s.\n",*arg); exit(1); } } else if (n==2) { open_file(&f1,"Give another first file", *arg++); open_file(&f2,"Give another second file", *arg); } else {@; printf("No more than two command line arguments are allowed.\n"); exit(1); } @ The function |open_file| will try to open the file |name| for reading, and if this fails it will prompt for another file name until it has success. If called with |name==NULL|, the function starts with prompting right away. @= void open_file(FILE** f,char* prompt,char* name) { char buf[80]; if (name==NULL || (*f=fopen(name,read_mode))==NULL) do {@; printf("%s: ",prompt); fflush(stdout); scanf("%79s",buf); }@/ while ((*f=fopen(buf,read_mode))==NULL); } @* Index. cwebx-3.04.orig/examples/treeprint.w100644 1750 1750 14706 5702223112 15366 0ustar jdgjdg% Copyright 1987 Norman Ramsey -- Rutgers University % Adapted to CWEB version 3.0 by Marc van Leeuwen -- CWI Amsterdam @*Directory Trees. The object is to print out a directory hierarchy in some pleasant way. The program takes output from \.{find * -type d -print {\v} sort} @^system dependencies@> and produces a nicer-looking listing. For those of you who may not have \.{find} or \.{sort}, the output is a list of fully qualified directory names (parent and child separated by slashes |'/'|), and everything is already nicely sorted in lexicographic order. |treeprint| takes one option, |"-p"|, which tells it to use the printer's line-drawing set, rather than the terminal's. @h @c @< Global declarations @>@; main(int argc, char** argv) { @< Variable declaration for |main| @> @; @< Search for options and set special characters on |"-p"| @> @< Read output from find and enter into tree @> @< Write tree on standard output @> exit(0); } @ We make all the siblings of a directory a linked list off of its left child, and the offspring a linked list off the right side. Data are just directory names. @d sibling left @d child right @< Global decl... @>= typedef struct tnode { struct tnode *left, *right; char *data; } TNODE; @ @< Variable declaration for |main| @>= struct tnode *root; @*Input. Reading the tree is simple---we read one line at a time, and call on the recursive |add_tree| procedure. @c read_tree (FILE* fp,struct tnode** rootptr) { char buf[255], *p; while ((fgets(buf, 255, fp))!=NULL) { @< If |buf| contains a newline, make it end there @> add_tree(rootptr, buf); } } @ Depending what system you're on, you may or may not get a newline in |buf|. @< If |buf| contains a newline... @>= p=buf; while (*p!='\0'&&*p!='\n') p++; @^system dependencies@> *p='\0'; @ To add a string, we split off the first part of the name and insert it into the sibling list. We then do the rest of the string as a child of the new node. @c add_tree(struct tnode** rootptr, char* p) { char *s; int slashed; if (*p=='\0') return; @< Break up the string so |p| is the first word, |s| points at null-begun remainder, and |slashed| tells whether |*s=='/'| on entry @> if (*rootptr==NULL) { @< Allocate new node to hold string of size |strlen(p)| @> strcpy((*rootptr)->data,p); } if (strcmp((*rootptr)->data,p)==0) { if (slashed) ++s; add_tree(&((*rootptr)->child),s); } else { if (slashed) *s='/'; add_tree(&((*rootptr)->sibling),p); } } @ We perform some nonsense to cut off the string |p| so that |p| just holds the first word of a multiword name. |s| points at what was either the end of |p| or a slash delimiting names. In either case |*s| is made |'\0'|. Later depending on wether we want to pass the whole string or the last piece, we will restore the slash or advance |s| one character to the right. @< Break up... @>= for (s=p;*s!='\0'&&*s!='/';) s++; if (*s=='/') slashed=1, *s='\0'; else slashed=0; @ Node allocation is perfectly standard\dots @< Allocate new node... @>= *rootptr=(struct tnode *) malloc (sizeof(struct tnode)); (*rootptr)->left = (*rootptr)->right = NULL; (*rootptr)->data = malloc (strlen(p)+1); @ @< Global decl... @>= char *malloc(); @ In this simple implementation, we just read from standard input. @< Read... @>= read_tree(stdin,&root); @*Output. We begin by defining some lines, tees, and corners. The |s| stands for screen and the |p| for printer. You will have to change this for your line-drawing set. @^system dependencies@> @d svert '|' @d shoriz '-' @d scross '+' @d scorner '\\' /* lower left corner */ @d pvert '|' @d phoriz '-' @d pcross '+' @d pcorner '\\' /* lower left corner */ @ The default is to use the terminal's line drawing set. @< Global declarations @>= char vert=svert; char horiz=shoriz; char cross=scross; char corner=scorner; @ With option |"-p"| use the printer character set. @< Search for options... @>= while (--argc>0) { if (**++argv=='-') { switch (*++(*argv)) { case 'p': vert=pvert; horiz=phoriz; cross=pcross; corner=pcorner; break; default: fprintf(stderr,"treeprint: bad option -%c\n",**argv); break; } } } @ We play games with a character stack to figure out when to put in vertical bars. A vertical bar connects every sibling with its successor, but the last sibling in a list is followed by blanks, not by vertical bars. The state of bar-ness or space-ness for each preceding sibling is recorded in the |indent_string| variable, one character (bar or blank) per sibling. @< Global decl... @>= char indent_string[100]=""; @ Children get printed before siblings. We don't bother trying to bring children up to the same line as their parents, because the \caps{UNIX} filenames are so long. We define a predicate telling us when a sibling is the last in a series. @d is_last(S) (S->sibling==NULL) @c print_node(FILE* fp, char* indent_string, struct tnode* node) { char string[255]; int i; char *p, *is; if (node==NULL) { } else { *string='\0'; for (i=strlen(indent_string); i>0; i--) strcat(string,@, " | "); strcat(string,@t\ \ @> " +--"); @< Replace chars in |string| with chars from line-drawing set and from |indent_string| @> fprintf(fp,"%s%s\n",string,node->data); @) /* Add vertical bar or space for this sibling (claim |*is=='\0'|) */ *is++ = (is_last(node) ? ' ' : vert); *is=='\0'; print_node(fp, indent_string, node->child); /* extended |indent_string| */ *--is='\0'; print_node(fp, indent_string, node->sibling); /* original |indent_string| */ } } @ For simplicity, we originally wrote connecting lines with |'|'|, |'+'|, and |'-'|. Now we replace those characters with appropriate characters from the line-drawing set. We take the early vertical bars and replace them with characters from |indent_string|, and we replace the other characters appropriately. We are sure to put a |corner|, not a |cross|, on the last sibling in a group. @< Replace chars... @>= is=indent_string; for (p=string; *p!='\0'; p++) switch(*p) { case '|': *p=*is++; break; case '+': *p=(is_last(node) ? corner : cross); break; case '-': *p=horiz; break; default: break; } @ For this simple implementation, we just write on standard output. @< Write... @>= print_node(stdout, indent_string, root); @*Index. cwebx-3.04.orig/examples/wc.w100644 1750 1750 17762 5702225321 13774 0ustar jdgjdg% wc: An example of CWEB by Silvio Levy and Donald E. Knuth % Adapted to CWEB version 3.0 by Marc van Leeuwen -- CWI Amsterdam \nocon % omit table of contents \datethis % print date on listing @* An example of \.{CWEB}. This example presents the ``word count'' program from \caps{UNIX}, rewritten in \.{CWEB} to demonstrate literate programming in \Cee. The level of detail is intentionally high, for didactic purposes; many of the things spelled out here don't need to be explained in other programs. The purpose of \.{wc} is to count lines, words, and/or characters in a list of files. The number of lines is the number of newline characters in the file. The number of characters is the file length in bytes. A ``word'' is a maximal sequence of consecutive characters other than white space, containing at least one visible character. @ Most \.{CWEB} programs share a common structure. It's probably a good idea to have one module that states this structure explicitly, even though the elements could all be introduced in sections contributing to of the unnamed module if they don't need to appear in any special order. @c @< Global variables @>@; @< Functions @>@; @< The main program @>@; @ We must include the standard I/O definitions, since we want to send formatted output to |stdout| and |stderr|. We also use the character classification macro |isgraph| to detect visible characters. @h @h @ The |status| variable tells the operating system if the run was successful or not, and |prog_name| is used in case there's an error message to be printed. @d OK 0 /* |status| code for successful run */ @d usage_error 1 /* |status| code for improper syntax */ @d cannot_open_file 2 /* |status| code for file access error */ @< Global variables @>= int status=OK; /* exit status of command, initially |OK| */ char *prog_name; /* who we are */ @ Now we come to the general layout of the |main| function. @< The main... @>= main (int argc,char** argv) { @< Variables local to |main| @>@; prog_name=*argv++; --argc; /* process program name */ @< Set up option selection @> @< Process all the files @> @< Print the grand totals if there were multiple files @> exit(status); } @ If the first argument begins with a `\.-' the user is choosing the desired counts and specifying the order in which they should be displayed. Each selection is given by the initial character (lines, words, or characters). For example, `\.{-cl}' would cause just the number of characters and the number of lines to be printed. We do not process this string now. It will be used to control the formatting at output time. @< Var... @>= int file_count; /* how many files there are */ char *which; /* which counts to print */ @ @< Set up o... @>= which="lwc"; /* if no option is given, print all three values */ if (argc>0 && (*argv)[0] == '-') {@; which=&(*argv++)[1]; --argc; } file_count=argc; @ Now we scan the remaining arguments and try to open a file, if possible. The file is processed and its statistics are given. We use a |do|~\dots~|while| loop because we should read from the standard input if no file name is given. @< Process... @>= do { @< If a file is given try to open |*argv|; |continue| if unsuccesful @> @< Initialize pointers and counters @> @< Scan file @> @< Write statistics for file @> @< Close file @> @< Update grand totals @> /* even if there is only one file */ } while (++argv,--argc>0); @ Here's the code to open the file. We use the low-level functions |open|, |read|, and |close| that operate work file descriptors rather than with |FILE|s. A special trick allows us to handle input from |stdin| when no name is given. Recall that the file descriptor to |stdin| is 0; that's what we initialize our file descriptor to. @< Variabl... @>= int fd=0; /* file descriptor, initialized to |stdin| */ @~@d READ_ONLY 0 /* read access code for system |open| routine */ @< If a fi... @>= if (file_count>0 && (fd=open(*argv,READ_ONLY))<0) { fprintf (stderr, "%s: cannot open file %s\n", prog_name, *argv); @.cannot open file@> status|=cannot_open_file; --file_count; continue; } @ @< Close file @>= close(fd); @ We will do some homemade buffering in order to speed things up: Characters will be read into the |buffer| array before we process them. To do this we set up appropriate pointers and counters. @d buf_size BUFSIZ /* \.{stdio.h}'s |BUFSIZ| is chosen for efficiency*/ @< Var... @>= char buffer[buf_size]; /* we read the input into this array */ register char *ptr; /* the first unprocessed character in |buffer| */ register char *buf_end; /* the first unused position in |buffer| */ register int c; /* current character, or number of characters just read */ int in_word; /* are we within a word? */ long word_count, line_count, char_count; /* number of words, lines, and characters found in the file so far */ @ @< Init... @>= ptr=buf_end=buffer; line_count=word_count=char_count=0; in_word=0; @ The grand totals must be initialized to zero at the beginning of the program. If we made these variables local to |main|, we would have to do this initialization explicitly; however, \Cee's globals are automatically zeroed. (Or rather, ``statically zeroed.'') (Get it?) @^Joke@> @< Global var... @>= long tot_word_count, tot_line_count, tot_char_count; /* total number of words, lines and chars */ @ The present module, which does the counting that is \.{wc}'s {\it raison d'\^etre}, was actually one of the simplest to write. We look at each character and change state if it begins or ends a word. @< Scan... @>= while (1) { @< Fill |buffer| if it is empty; |break| at end of file @> c=*ptr++; if (isgraph(c)) /* visible character */ {@; if (!in_word) ++word_count, in_word=1; } else if (isspace(c)) { in_word=0; /* |c| white space */ if (c=='\n') ++line_count; } } @ Buffered I/O allows us to count the number of characters almost for free. @< Fill |buff... @>= if (ptr>=buf_end) { ptr=buffer; c=read(fd,ptr,buf_size); if (c<=0) break; char_count+=c; buf_end=buffer+c; } @ It's convenient to output the statistics by defining a new function |wc_print|; then the same function can be used for the totals. Additionally we must decide here if we know the name of the file we have processed or if it was just |stdin|. @< Write... @>= wc_print(which, char_count, word_count, line_count); if (file_count) printf (" %s\n", *argv); /* not |stdin| */ else printf ("\n"); /* |stdin| */ @ @< Upda... @>= tot_line_count+=line_count; tot_word_count+=word_count; tot_char_count+=char_count; @ We might as well improve a bit on \caps{UNIX}'s \.{wc} by counting the files too. @< Print the... @>= if (file_count>1) { wc_print(which, tot_char_count, tot_word_count, tot_line_count); printf(" total in %d files\n",file_count); } @ Here now is the function that prints the values according to the specified options. The calling routine is supposed to supply a newline. If an invalid option character is found we inform the user about proper usage of the command. Counts are printed in 10-digit fields so that they will line up in columns. @d print_count(n) printf("%10ld",n) @< Fun... @>= wc_print(char* which, long char_count, long word_count, long line_count) { while (*which) switch (*which++) { case 'l': print_count(line_count); break; case 'w': print_count(word_count); break; case 'c': print_count(char_count); break; default: if ((status & usage_error)==0) { fprintf (stderr, "\nUsage: %s [-lwc] [filename ...]\n", prog_name); @.Usage: ...@> status|=usage_error; } } } @ Incidentally, a test of this program against the system \.{wc} command on a SPARCstation showed that the ``official'' \.{wc} was slower. Furthermore, although that \.{wc} gave an appropriate error message for the options `\.{-abc}', it made no complaints about the options `\.{-labc}'! Perhaps the system routine would have been better if its programmer had been more literate? @* Index. Here is a list of the identifiers used, and where they appear. Underlined entries indicate the place of definition. Error messages are also shown. cwebx-3.04.orig/examples/wmerge.w100644 1750 1750 3347 5702226225 14627 0ustar jdgjdg% Adapted to CWEB version 3.0 by Marc van Leeuwen -- CWI Amsterdam \noinx @* Introduction. This file contains the program |wmerge|, which takes two or more files and merges them according to the conventions of \.{CWEB}. We use the routines of \.{CWEB} itself. The function |common_init| takes care of processing command line arguments. Since the result of the merge will be produced on the standard output, we prevent distraction as much as possible by clearing flags |'h'| and |'p'| (for |'b'| it is not required since no banner is produced anyway. @h @h "../common.h" /* the header file for \.{CWEB}'s \.{common.w} */ @c @< Prototype @>@; main (int argc,char** argv) { common_init(argc,argv); flags['h']=flags['p']=0; reset_input(); while (get_line()) put_line(); wrap_up(); } @ This file should be linked together with the object file produced from |"common.w"|, which is also used in both |CTANGLE| and |CWEAVE|. That file defines the functions |common_init|, |reset_input|, |get_line|, and |wrap_up|. There are however a number of functions that are required by that compilation unit although they are not actually used; we define them with trivial function bodies. Since the linker doesn't check types anyway we don't specify any here either. @c void print_stats() @+ {} void names_match () @+ {} void init_module_name() @+ {} void init_id_name () @+ {} @ All that remains is to define |put_line| which is trivial. The external variable |buffer| holds the characters read by |get_line|, up to |limit|, and |loc| points to the next character to be read, i.e., after calling |get_line| it points to |buffer[0]|. @< Prototype @>= void put_line(void); @~@c void put_line(void) { while (loc{\hbox{\rm\def\ { }$\langle\,$#1$\,\rangle$}} \def\sp.{structured programming} \def\lp.{literate programming} \def\Lp.{Literate programming} \def\LP.{Literate Programming} \def\WEB.{\caps{WEB}} \def\ASCII.{\caps{ASCII}} \def\CWEB.{\.{CWEB}} \def\CWEBx.{\.{CWEBx}} \def\LKC.{Levy/Knuth \CWEB.} \def\:#1{`\/\.{@#1}'} \def\atspace.{\:{ }} \outer\def\N#1 #2. #3. %start of `@*' section {\MN#2.% \ifon \vskip 0pt plus .5 \vsize \penalty-1000\vskip 0pt plus -.5 \vsize \vskip\intersecskip \startsection{\bf#3.\quad}% } \def\MN#1.% common code for \M, \N, \n {\par\xdef\secstar{#1}\ontrue}% no \mark, no starred sections \def\inx {\par\vskip6pt plus 1fil \endcodemode % we are beginning the index \output=\bufferedoutput\pagesavedfalse\eject % eject pages, keeping last \setbox\sbox=\vbox{\unvbox\sbox} % allow its glue to reset \vsize=\pageheight \advance\vsize by -\ht\sbox % the remaining height \hsize=.5\pagewidth \advance\hsize by -10pt % column width for the index (20pt between cols) \parfillskip 0pt plus .6\hsize % try to avoid almost empty lines \def\lr{L} % this tells whether the left or right column is next \output= {\if L\lr\global\setbox\lbox=\page \gdef\lr{R} \else \normaloutput {\vbox to\pageheight {\box\sbox \vss \hbox to\pagewidth{\box\lbox \hfil \page}}% }% page argument to \normaloutput \lheader\rheader % other two arguments to \normaloutput \global\vsize\pageheight \gdef\lr{L} \global\pagesavedfalse \fi }% \parskip 0pt plus .5pt \outer\def\@##1##2, % index entry; ##1 is `h' or `m' for horiz/math mode {\par\hangindent2em\noindent\if##1m$##2$\else##2\fi:\kern1em} \def\[##1]{$\underline{##1}$} % underlined index item \rm \rightskip0pt plus 2.5em \tolerance 10000 \let\*=\lapstar \hyphenpenalty 10000 \parindent0pt } \def\fin{\let\@\relax\con}% make these innocuous before scanning argument: \def\con#1\con{% ignore stuff after index \par\vfill\eject % this is done when we are ending the index \ifpagesaved\null\vfill\eject\fi % needed in case index is empty \if R\lr \null\vfill\eject\fi % finish the current page \parfillskip 0pt plus 1fil % restore normal paragraph end \setpage % restore page shape \output={\normaloutput\page\lheader\rheader} % restore output routine } \centerline{\titlefont Literate Programming in C} \medskip \centerline{\twelverm The {\twelvett CWEB} System of Structured Software Documentation} \medskip \centerline{Manual for \CWEB.x3.0} \bigskip \centerline{Marc A. A. van Leeuwen} \bigskip\bigskip \beginsection Overview This document describes \CWEB.x3.0, a particular implementation of \CWEB., a system that supports the concept of ``\lp.'' for programs written in the language~\Cee\ (more particularly for this version, in \caps{ANSI/ISO} \Cee). As this manual aims to supply all information possibly relevant to a wide variety of users, it is necessarily rather extensive. However, \CWEB. is not a complicated system, and just a few simple commands suffice for practical programming purposes; these are discussed in the section~4 (the remainder serves mainly to allow fine-tuning of the presentation of the printed documents describing literate programs). As the somewhat contrived name of the system indicates, \CWEBx. is not the only version of \CWEB.; indeed it is based on an older version of \CWEB. by Silvio Levy, which is an adaptation to~\Cee\ of the \.{WEB} system (for Pascal) written by Donald~E. Knuth, the founder of \lp.. That \CWEB. system has independently evolved into a system that is currently distributed, under joint responsibility of Levy and Knuth, as \CWEB.~3.4. Those with experience using \LKC. will already be acquainted with most aspects of \CWEBx., and may turn to section~9 for a summary of the differences between the two systems; however, \CWEBx. also provides a compatibility mode (selectable by specifying `\.{+c}' on the command line) in which it should be able to process \Cee~programs written using \LKC. without any modification. The structure of this manual is as follows. In section~2 an exposition of the ideas underlying the concept of \lp. is given, and a description of how systems of the \.{WEB} family provide concrete tools to support this programming methodology. This section is directed particularly to those who are new to \lp.; it explains the purpose of \CWEB. and the logical connection between the various elements that \lp. adds with respect to traditional programming. As an illustration we then give a small example program using \CWEB. in section~3. The main commands of \CWEB., which define the structure of the source text and tell the \CWEB. tools what to do with the various pieces of that text, are discussed in section~4. In section~5 we discuss how the behaviour of \CWEB. tools can be further affected by means of command line options and file name arguments supplied when the programs are invoked. In section~6 we discuss some facilities for distributing the source text over several input files, including a ``change file'' that allows applying local patches without affecting the original source files; this is not directly related to \lp., but can be quite useful in larger projects. In section~7 the remaining minor \CWEB. commands are explained, and section~8 discusses some features of the \TeX\ format employed by \CWEB., which can be used to affect the appearance of \CWEB. documents. Section~9 is devoted to a comparison with \LKC.; finally section~10 summarises all \CWEB. codes recognised in the source text. \beginsection About literate programming \Lp. is a concept that was developed, implemented, and propagated by D.~E. Knuth in the early 1980's, as a natural sequel to the concept of ``\sp.'' that had caused a revolution in the world of software development about a decade earlier. (At this moment, another decade further, one may conclude that \lp. has not caused a similar revolution, since many programmers practicing illiterate programming do not feel at all as guilty about this as they would if they were to be found practicing unstructured programming.) So let us first consider the idea of \sp. more closely. \subsection Structured programming Without attempting a definition of the term, it seems fair to say that \sp. involves designing a program by hierarchical decomposition of the task at hand, and constructing a program that has a similar decomposition into parts, where each part ``solves'' the corresponding subtask. The subdivision of the program manifests itself in its division into subroutines (procedures, functions), and at the more detailed level in the syntactic composition of those routines (control structures, blocks); this explains why much emphasis is placed on the use of subroutines, and why languages with a linear program model and (conditional) jumps as their main control structure (like assembly language) do not form a natural vehicle for \sp.. The question as to which criteria should be used in subdividing problems into smaller ones is difficult to answer in general, but a good rule of thumb is that at the level of abstraction at which a task is defined it should be possible to give a reasonably simple informal description of its subtasks, in which a new level of detail about the method used to perform the task is given. (It would be nice if the subtasks also had simple formal specifications, but in general this is too much to ask, since informal descriptions do not only abstract from implementation details, they may also conceal numerous obvious specification details). It can be said that the idea of \sp., undone of any dogmatism that has been associated with it, has been rather universally accepted, and has proven to be an effective methodology in software development. Other methodologies have been put forward, such as data encapsulation, but this has been in addition to rather than as an alternative for \sp.. \Lp. however is related in a different way to \sp., as it concerns not the contents of the program itself, but rather the way it is presented. \Lp. presupposes \sp., but is independent of other programming paradigms; any program that has been designed in a structured way can in principle be rendered as a literate program, without requiring a change of the program text itself. \subsection Limitations of traditional \sp. Although the composition of a structured program should reflect the design decisions that led to its construction, the traditional way of presenting such programs lacks appropriate facilities for communicating this information effectively to readers of the program, seriously limiting the readability, especially to people other than the programmer of the code. Yet readability is of vital importance, because it is only by careful reading that we can verify that the design of a program is sound and well-implemented, and to understand where and how changes can be made when such a need arises. Of course the code may be documented by adding an arbitrary amount of comments, but there are various reasons why this has a limited effectiveness, so that in practice the level of documentation is nearly always (much) lower than would be desirable from the point of view of maintainability. The syntactic decomposition of a piece of a structured program into a hierarchy of control structures, compound statements, etc., is crucial to understanding the way it functions, yet the human eye is much less capable of performing this task then a parser is, even when proper indentation is applied. The difficulty rapidly increases with the size of the program fragment, and can become a serious factor when this size exceeds a dozen or two lines (depending on the complexity of the structure). And even when we can recognise the structure, the meaning of the individual parts cannot be immediately perceived, but must be derived from close inspection of the code, or from the comments. However, as comments are localised in the code, and it is hard to attach a comment clearly to a construct of some considerable extent. Also, adding too many comments may actually decrease the readability of the program, by making the structure recognition problem worse. Even indentation, useful as it may be, can be more of a nuisance than of any help when it becomes so deep that it forces code and comments to be squeezed together into (or fall off) the right margin. Finally the fact that program sources are usually plain text files, represented in a single rather crude font, does not improve human readability. The limitations of the character set hamper formulation of comments, where special symbols, formulae, tables or illustrations might convey the information much more effectively; in the program itself the eye has to do without visual clues marking the distinction between various types of program elements (identifiers, keywords, constants, literal strings, operators, comments, etc.). As a consequence of all this, few people will find much pleasure in reading source listings, even if the program is well designed and documented, and possibly even contains some interesting and subtle algorithms. Many of these problems would not be too grave if all subroutines were severely limited in size and complexity. However, although it might be possible to live up to such a restriction for certain kinds of programs (in particular if the task involves mostly simple actions of administrative nature rather than any really complicated algorithms), it would be a very impractical requirement in general, certainly for procedural programming languages. First of all, having to break up a subroutine using auxiliary subroutines solely because of the size of the code, violates the basic principle that such a decomposition should be the result of design decisions. Furthermore, there are several technical reasons why such a decomposition could be either impossible, or might involve a large amount of additional code that has little to do with the actual task being performed, and that might deteriorate performance unacceptably. Examples of such reasons are for instance the need to perform a large multi-way branching, to have local variables that are visible throughout the execution of a complex algorithm, or to have the possibility to jump out of nested structures on certain (error) conditions (while the language might not allow jumps out of a subroutine into the routine calling~it). Finally, introducing many small subroutines, for reasons that cannot be described easily outside the immediate context in which they will be used, creates a serious problem of giving them sensible names and remembering the tasks they perform. Concluding we may say that \sp. in its traditional form does not encourage or even allow the level of documentation that would be desirable for maintenance and intelligibility by people other than the author of the program. \subsection Requirements for \lp. The basic idea of \lp. is to take a fundamentally different starting point for the presentation of programs to human readers, without any direct effect on the program as seen by the computer. Rather than to present the program in the form in which it will be compiled (or executed), and to intercalate comments to help humans understand what is going on (and which the compiler will kindly ignore), the presentation focuses on explaining to humans the design and construction of the program, while pieces of actual program code are inserted to make the description precise and to tell the computer what it should do. The program description should describe parts of the algorithm as they occur in the design process, rather than in the completed program text. For reasons of maintainability it is essential however that the program description defines the actual program text; if this were defined in a separate source document, then inconsistencies would be almost impossible to prevent. If programs are written in a way that concentrates on explaining their design to human readers, then they can be considered as works of (technical) literature; it is for this reason that Knuth has named this style of software construction and description ``\lp.''. More background information about this concept and its history concept can be found in Knuths book ``\LP.'', CSLI Lecture notes~\#27, Leland Stanford Junior University, 1992 (ISBN 0-937073-81-4). From the discussion above it will be clear that traditional programming languages are not directly suitable for \lp.. We shall now try to formulate requirements for a system that supports \lp.. Doing so we shall especially keep in mind programs that evolve after their original design, possibly altering certain parts of that design, and possibly being realised by different persons, since it is in such cases that the benefits of \lp. are particularly crucial. The documentation parts of the program description should allow for the same freedom of expression that one would have in an ordinary technical paper. This means that the document describing the program should consist of formatted text, rather than being a plain text file. This does not exclude the possibility that the source is written as a plain text file, but then it should undergo some form of processing to produce the actual program description. The document should moreover contain fragments of a program written in some traditional (structured) programming language, in such a way that they can be mechanically extracted and arranged into a complete program; in the formatted document on the other hand layout and choice of fonts for these program fragments should be so as to maximise readability. Parts of the program that belong together logically should appear near to each other in the description, so that they are visible from the part of the documentation that discusses their function. This means that it should be possible to rearrange program text with respect to the order in which it will be presented to the computer, for otherwise the parts that deal with the actions at the outer level of a subroutine will be pushed apart by the pieces specifying the details of inner levels. The most obvious and natural way to do this is to suppress the program text for those inner levels, leaving an outline of the outer level, while the inner levels may be specified and documented elsewhere; this is a bit like introducing subroutines for the inner levels, but without the semantic implications that that would have. There should be no restrictions on the order in which the program fragments resulting from this decomposition are presented, so that this order can be chosen so as to obtain an optimal exposition; this may even involve bringing together fragments whose location in the actual program is quite unrelated, but which have some logical connection. Obviously there should be a clear indication of where pieces of program have been suppressed, and which other program fragments give the detailed specifications of those pieces. From the programming language point of view the most obvious method of identification would be to use identifiers, resulting in a simple system of parameterless macros, with as only unusual aspect that uses of the macro are allowed to precede the definition, and indeed do so more often than not. Actually, \lp. uses a method that differs from this only trivially from a formal standpoint, but has a great advantage in practical terms: identification is by means of a more or less elaborate phrase or sentence, marked in a special way to indicate that it is a reference to a program fragment. This description both stands for the fragment that is being specified elsewhere, and also serves as a comment describing the function of that fragment at a level of detail that is appropriate for understanding the part of the program containing it. In this way several purposes are served at once: a clear identification between use and definition is established, the code at the place of use is readable because irrelevant detail is suppressed, with a relevant description of what is being done replacing it, and at the place of definition a reminder is given of the task that the piece of code presented is to perform. The documenting power of such a simple device is remarkable. In some cases the result is so clear that there is hardly any need to supply further documentation; also it can sometimes be useful to use this method to replace small pieces of somewhat cryptic code by a description that is actually longer than the code itself. It is hard to give a sharp limit on the length of the description for a program fragment, but if substantially more than a sentence is needed, say a full paragraph, then the fragment probably does not represent a well chosen abstraction, which might be an indication that the design of the program has some room for improvement. On the other hand, it is good practice to explicitly mention any unusual control flow that might be caused by executing the abstracted fragment, like jumping to a label outside the fragment, since such information is vital for a proper understanding of the program at the place where the fragment is used. \subsection WEB systems for \lp. Until now we have discussed \lp. as a general concept, independent of any particular implementation; this was done to stress the generality of the idea. We shall now indicate how these ideas are realised by ``\WEB. systems'', a family of systems that were modelled after Knuth's original \.{WEB}, and of which \CWEB. is a member. In these systems the program source is written as a plain text file, and a pair of programs is provided, which transform this source into other text files suitable for processing by a compiler respectively by a typesetting program. Other kinds of \lp. tools are conceivable (e.g., ones that would provide the programmer with a direct graphical representation of the typeset document while editing, possibly with hypertext facilities), but this approach has the advantage of being fairly simple and portable across many platforms. The approach is not the simplest possible however, as a substantial part of the work done by the tools deals with transforming the program fragments from their plain text form into typeset text with proper fonts and layout (i.e., with pretty-printing); this part of the task also depends essentially on the programming language being used. By reverting to verbatim representation of program fragments one could make simple tools that support \lp. in a language independent way---and indeed such tools exist---but then a price is paid in terms of readability. We should also note that \WEB. systems support writing documents whose purpose is to simultaneously specify and document a program; if one is primarily writing a theoretic paper, in which only occasionally pieces of program text are mentioned, then one might prefer a slightly different kind of system that does not impose as much global structure on the document as \WEB. systems do. Not surprisingly, \WEB. systems satisfy all the requirements for \lp. formulated above, and they do so in a fairly straightforward manner. A typeset \WEB. document consists of a sequence of consecutively numbered {\sl sections}, whose size is typically less than half a page. Each section may contain a program fragment, called a {\sl module}, preceded by a commentary in ordinary text, although in some cases either one of these parts might be absent. (The programming language used for the program fragments depends on the particular \WEB. system used, as does the typesetting system that eventually produces the printed document; for the \CWEB. system described here these are respectively \Cee\ and \TeX.) In most cases a module is headed by a text in angle brackets called its {\sl module name}, which gives a description of the task it performs. This name is followed by `$\EQ$' and the program code that constitutes the module itself; this is called a defining occurrence of the module name. (We make a distinction between the words `section' and `module', using the former for a numbered portion of the \WEB. document, and the latter for a named portion of the program described by it.) A module name can also be used in the body of some other module, either before or after its definition in the document, to represent the corresponding piece of program text. \WEB. facilitates locating the definition of a module from the place where it is used by automatically incorporating the number of the defining section in the module name. The actual program text is then constructed by recursively replacing module names by the text specified in their definition (this should of course follow the grammatical structure of the program, lest the most basic principles of \lp. be violated). A program fragment occurring at the outermost level is distinguished by the fact that it is not headed by a module name. Most module names will have just one defining occurrence, and will also be referenced just once; in both cases there may however be exceptions, where a module name has more than one occurrence of the indicated kind. If a module is multiply referenced, this simply means that the corresponding part of the program text is repeated identically in more than one place. If a module name has more than one defining occurrence, then the text of the corresponding module is obtained by concatenating the program fragments of all its definitions in the order in which they occur in the document. In a similar fashion all fragments without module name are combined into an ``unnamed module''. These are the only occasions where the order in which the sections are given can have any effect on the final program; apart from this the literate programmer has complete freedom of ordering the sections in a way that facilitates understanding the program as much as possible. We need not discuss all aspects of \WEB. systems here, but a few points that contribute to \lp. by improving readability should be mentioned. The proper formatting of all program fragments is automatically taken care of by the \WEB. system, providing a uniform style of presentation. The system also provides a large amount of cross-reference information; this greatly facilitates reading the program and searching for specific pieces of code. Not only is the number of the (first) section defining a module incorporated in its name, but in that defining section indications are also given of the section(s) in which the module is used, and possibly of any further defining occurrences of the same module name. At the end of the document an alphabetically sorted index is added, listing for each identifier all the sections in which it is defined or used; the programmer may also add additional entries to the index by indicating in the program source that certain sections should be referenced for particular terms. A list of all module names is also given, which may help locating the part of the program dealing with some issue. So in many ways the \WEB. system tries to aid human understanding of the program, but of course the literacy of the programmer will always remain the decisive factor in this respect. \beginsection What a\/ {\tentt CWEB} program looks like Enough now of general considerations, let us turn our attention to the \CWEB. system this manual is really about. The best way to learn about it is probably to read a \CWEB. document. Therefore we include a small but complete \CWEB. program below; the program is about as small as possible without rendering a decomposition into modules pointless. The example is not intended as a showpiece of programming literacy, but it demonstrates various aspects of the system. The index at the end of the program is included, but not the list of the four module names in this program or the table of contents. One will notice that some symbols appear to be different from their official representation in \Cee, for instance the assignment operator~`\.=' appears as~`$\K$', the equality operator~`\.{==}' as~`$\E$', the logical ``and'' and ``or'' operators `\.{\&\&}'~and~`\.{||}' as `$\W$'~and~`$\V$' respectively, the variable~`\.{f1}' as~`$f_1$', and the null pointer~`\.{NULL}' as~`$\NULL$'; thus the possibilities of the typesetting system are used to improve the appearance of the program. \vfil\eject \begingroup \input compare \par\endgroup \subsection Some remarks about the example program Reading the program should not cause great problems to anyone familiar with the \Cee\ language, once one gets used to the representation of the symbols. We mention a number of points that will have become clear in the course of the example. The commentary text at the beginning of the sections is set in ordinary paragraphs, which contrasts sufficiently with the appearance of the program text that the dividing line between the two can be easily perceived, even though it is only marked by a bit of white space. In case the section defines (part of) a named module, the module name heading the program fragment is set flush left, and is followed by~`$\EQ$', or in case this is not the first defining occurrence of that name, by~`$\PE$' (therefore, the occurrence of the module name `$\X5:Functions\X$' in \Sec2 is not a defining one, whereas the occurrences of that name in \Sec5 and~\Sec7 are). The style in which the module names and comments contained in the program fragments are set is similar to that of ordinary text; indeed if they are too long to fit on the line, they will be broken across lines (with proper care taken to respect the indentation level). In \CWEB. embedded comments are always attached to the right of a program element (usually a statement or declaration); in the example we can see there is relatively little need for embedded comments, because of the other means provided for documentation. An embedded comment that is split across lines will not look very good, and should only occur in cases of emergency; in most cases it is better to use the commentary part at the beginning of the section for any elaborate explanation. On the other hand, long module names (occupying up to about four lines) are not uncommon when the task performed by the module calls for an extensive description. As one can see in the example, it is common to refer to small pieces of \Cee~code (in most cases just variables or simple expressions) from within the commentaries, module names and comments. The \CWEB. system makes it easy to include such pieces, by providing a variant of the formatting routines used for the actual program fragments (differing from them by the omission of any 2-dimensional layout features such as indentation). In many cases the pieces of \Cee~code are so simple that they could easily be typeset directly (using \TeX's math mode), producing the same formatted output without using the facilities of \CWEB.. But even then it is preferable to use \CWEB. instead, because it will then guarantee that all identifiers mentioned in such a way in the documentation part of a section or in a comment, will be included in the index at the end of the program. Although in many cases a reference would have been generated anyway by the program fragment in the same section (as happens in all cases for our example program), this mechanism ensures that even remarks about the use of variables and functions made in sections that contain no program fragment at all can be traced from the index. Incidentally, identifiers that are used only in a module name are not indexed, which is why there is no reference to \Sec2 in the index entry for~\\{exit}. When an index entry is recorded, whether from within a program fragment or a piece of \Cee~code embedded in text, the occurrence may be flagged as `defining', depending on the context; this happens for instance in the case of parameters in an \caps{ANSI/ISO} style function heading, of variable declarations and of labels. If at least one occurrence of an identifier in some section is a defining one, then the corresponding section number in the index entry for that identifier will be underlined. Single-letter identifiers, the special identifier \.{NULL} (appearing as `$\NULL$'), and keywords of the language are considered so ubiquitous that no index references for them are generated, except those that are underlined; e.g., in the example there is no reference to \Sec6 for the variable~$n$. For keywords this means that they will not appear in the index at all (unless the programmer explicitly marks certain occurrences as defining); note however that identifiers defined in a \&{typedef} declaration (like \&{bool} in the example) will be indexed, even though they are set in boldface just like keywords are. \subsection Further attributes of\/ {\tentt CWEB} programs An aspect of \CWEB. programs that does not stand out very clearly in our miniature example is that it allows sets of related sections to be grouped together into ``chapters''. Each chapter is identified by its title, which appears in boldface after the number of its first section; in our example sections 1~and~8 start new chapters. The division into chapters has a few more effects on the document, which were suppressed in our example, since they would interfere with the overall structure of this manual: each chapter starts on a fresh page, its title appears in the running head of all its pages, and all chapter titles are collected in the table of contents. (Style changes such as employed in this manual are easy to obtain, since the style is not determined by the \CWEB. system, but rather by a separate format consisting of \TeX~macros; a few small changes to standard format can change the overall appearance of the document, and it would be equally easy to change for instance the page size or the symbols used to represent operators.) There is one important point left to explain about the example, which is the special position of the lines starting with \&{\#define} and \&{\#include}. Although they look like ordinary preprocessor lines, which could have been included in the program fragments, they are in fact separate items that are given between the documentation part and the program part of a section (this can be seen best in~\Sec6), forming a third type of constituent of sections (although in most sections they will be absent). Their place in \CWEB. is less distinctive then that of their analogues in \WEB. systems for languages that have no preprocessor (like the original \.{WEB} for Pascal, which provides a separate macro facility itself): indeed the directives are just passed on to the \Cee\ preprocessor. Yet there is some advantage in specifying them as special items to \CWEB., and in most cases using these facilities is preferable to embedding the directives in the \Cee~program fragments. One reason is that one usually wants the effects of preprocessor directives to be visible throughout the \Cee~file that is generated, while this would not always be the case if they were specified inside the program fragments; for instance if the definition of \\{read\_mode} in \Sec6 had been included in the program fragment, it could not have been validly used in \Sec7, because that section will precede~\Sec6 in the \Cee~file produced. This difficulty could be overcome by collecting all macro definitions in a module that is used at the start of the program and defined in many sections throughout the \CWEB. document. In fact this is just about how \CWEB. treats the separately specified preprocessor directives: they are collected in order of appearance, and placed at the very beginning of the \Cee~file. (Some other place of insertion for the preprocessor directives can be specified by means of a pseudo-module named `$\ATP$', but this is quite rare.) Since a section can define only one module, the \CWEB. facility for preprocessor directives may help avoid having to split up sections merely because they contain such a directive. Furthermore, an important reason to specify \&{\#include} directives to \CWEB., is that this allows it to inspect those header files for any typedef declarations, so that programs can be formatted properly; without this programs using typedef identifiers defined in header files would seriously confuse the syntax analysis that \CWEB. performs, resulting in very poor quality formatting of program fragments. Preprocessor directives other than those mentioned above can only be incorporated in a program by including them in an ordinary program module, but there is relatively little need for such directives. In situations where one would use conditional compilation in ordinary~\Cee, one can usually use the ``change file'' mechanism provided by \CWEB. instead (this will be discussed below), especially if it involves system dependent modifications; this has the advantage that such modifications do not affect the main source files, and only those modifications that are actually applied will be visible in the \CWEB. document. In the rare cases that one does include a preprocessor directive in a program fragment, the fact that it is not being specified as a separate item to \CWEB. is usually easy to recognise in the \CWEB. document, because the module name being defined or some program text precedes it; however even if this should not be the case then such embedded directives can still be distinguished by a slight difference in horizontal and vertical spacing. \subsection Output to multiple files There is one important construction one may encounter in \CWEB. documents, that we have not mentioned yet. There may be module names that consist of a file name in typewriter type, like `$\X14:\.{common.h}\X$'; usually such module names are nowhere referenced, but only have one or more defining occurrences. \CWEB. documents containing such a module will produce a file of that name in addition to the \Cee~program that is normally produced. The module bearing the name of the file will form the root module of the \Cee~code written to that file, in the same way as the unnamed module forms the root module for the ordinary output. This feature is particularly useful for the production of header files that can be included by other compilation units (and even by the program produced as main output). It allows one for instance to state function prototype declarations that go to the header file and the matching function definitions in the \Cee~program in the immediate vicinity of one another within the \CWEB. document. The module with the file name can refer to submodules, and so on to any depth, just like the modules contributing to the main output. This possibility should be used with some restraint however, lest readers have difficulty finding out to which file the program fragment defined by some module will be sent. The preprocessor lines that are handled by \CWEB. will normally only become part of the main program output, not of any additional output files; this provides one valid reason for sometimes bypassing the facilities of \CWEB., and incorporating \&{\#define} and \&{\#include} directives directly into program modules. \beginsection How to create a\/ {\tentt CWEB} program In the previous section we have explained how one should read \CWEB. documents; in this section we shall discuss how they can be written. The ``\CWEB. document'' we have been discussing is the printed text that is eventually produced from the source file written by the programmer, but that file does not look quite like the printed version; on the other hand the difference in appearance is not so great that there is any difficulty finding the place in the source file corresponding to some part of the printed text. \subsection The general setup The programmer creates a plain text file using the format explained below, which contains both program fragments and commentary, and has file name extension `\.{.w}'; e.g., the file from which the example above was produced is \.{compare.w} (it is included in the \CWEBx. distribution). The \CWEB. system consists of two utility programs `\.{CTANGLE}' and `\.{CWEAVE}' that can be applied to this source file. In order to create an executable program, one issues the command `\hbox{\.{ctangle compare}}', which will read the file \.{compare.w} and write a file \.{compare.c} containing the corresponding \Cee~program. This file can then be processed in the ordinary way by any \Cee~compiler to produce an executable program. To produce a printed document on the other hand, one issues the command `\hbox{\.{cweave compare}}', which will again read the file \.{compare.w}, and this time write a file \.{compare.tex}. This file serves as input for the typesetting program~\TeX: by giving the command `\hbox{\.{tex compare}}' it will be processed, and the result is a file \.{compare.dvi}. This file can be either previewed or converted to hardcopy output by the system dependent programs for this purpose that accompany \TeX. Despite the somewhat elaborate processing trajectories, it will become apparent that the programmer has good control over the final result produced in both cases. A word of explanation about the names of \CWEB. and its constituent programs. The initial \.C's stand for the programming language, of course; the rest of the names are the same as those chosen by Knuth for the original \.{WEB}~system (which existed long before the World Wide Web). The \CWEB. language allows one to separately describe small parts of a \Cee~program and their interconnections, both formal (via module references) and informal (by some semantic relationship); with some fantasy this evokes the image of a web of connected pieces. These parts are linearised quite differently in their presentation for human readability than in the ``official'' form in which they are presented to the \Cee~compiler, and it is the program \.{CTANGLE} that does the somewhat complicated reordering to obtain the latter from the former. This process is traditionally called ``tangling'' the code, although one could also call it untangling if one prefers formal to human order. The \.{CWEAVE} program intertwines the \TeX\ and \Cee~parts of the source text and ``weaves'' them together like warp and weft, resulting in a beautifully formatted document. Despite these pretty metaphors, you will be forgiven if you sometimes get these names mixed up. This general organisation of \CWEB. has some immediate consequences. First of all, one needs to have an operational \TeX~system and (not surprisingly) a \Cee~compiler in order to use \CWEB.; the \CWEB. programs form only a comparatively small part of the utilities needed. Second, the \CWEB. language must be such that both valid \Cee~code and \TeX~input can be derived mechanically from it, which are rather different formats. Nevertheless the \CWEB. language is quite simple: this is because for almost all of the \CWEB. source text the required format is either that of \TeX\ or that of \Cee. The main function of the specific \CWEB. commands is to structure the source file and determine which parts of the input will be processed further in what way. Finally, a somewhat unfortunate consequence of \CWEB.'s setup is that errors may be detected by any one of \.{CTANGLE}, the \Cee~compiler, \.{CWEAVE} and \TeX. The knowledge about \Cee~and~\TeX\ built into the \CWEB. programs is far from sufficient to ensure that they will always produce error-free output code, although of course they do their best not to introduce any errors themselves. A bright point in the case of \Cee~errors, is that the \&{\#line} directives produced by \.{CTANGLE} enable the compiler to refer directly to lines in the \CWEB. source file in its errors messages, rather than to the intermediate \Cee~file (but \TeX\ does not have a similar facility). It follows from these facts that the \CWEB. programmer must be acquainted both with \Cee\ and with \TeX; however, the depth of the knowledge required it not the same in both cases. Obviously, one cannot write a computer program without a good understanding of the programming language used, but a very superficial knowledge of \TeX\ will suffice: in most cases no \TeX pertise beyond the basic facts in chapters~2--6 of {\sl The \TeX book\/} is required (but please don't skip chapter~2, as only too many people have done). The reason for this is that one rarely needs to instruct \TeX\ to do sophisticated formatting. It is true that the proper typesetting of computer programs is a subtle matter, but it is precisely this part that is taken care of by \.{CWEAVE} (even for references to \Cee~constructs in the commentary), and the programmer can just concentrate on writing syntactically correct \Cee~code. On the other hand the full power of \TeX\ is available if one wishes to use it, for instance to illuminate the program with things like complicated tables, or math formulae of a different nature than those occurring in a computer program. Since the \CWEB. commands deal only with the structure of the source file, not with its contents, they can be very brief: they consist of `\.@' followed by one other character, and are commonly referred to as {\sl control codes}. For instance, \atspace. (i.e., `\.@' followed by white space) indicates the start of a new section, and \:c marks the start of the \Cee~part of a section that contributes to the unnamed module. Control codes may placed at any position within the source lines, although it is customary to place the ones defining the coarse structure of the source file at the beginning of a line for better visibility. In some cases a control code marks the beginning of a piece of text that will be interpreted by \CWEB. in a special way, as for instance \:< which starts a module name; the end of these {\sl control texts\/} is always marked by the special code \:>. The character `\.@' was selected because it is quite uncommon both in \Cee\ and in \TeX\ source code, but in those cases where one does need to pass on the character itself (e.g., in \Cee~strings and comments) it should be written as \:@. We now discuss the various control codes, grouped by their function. Here we shall treat only the most important control codes, which are used regularly in ordinary programs. Treatment of a number of additional control codes, that either serve for fine tuning in special cases, or are intended to allow emergency fixes in unforeseen cases, is deferred to a later section, in order not to confuse novice \CWEB. users. For the codes that are discussed, we do however provide full details of their use; most of these can be skipped on first reading. A summary of all \CWEB. control codes can be found an the end of this manual. \subsection Sectioning codes: \:*, \atspace., \:\~ The most important control codes are those that specify the division of the \CWEB. program into sections. There are three codes that indicate the start of a new section, and are therefore called sectioning codes. Each of them has a slightly different effect, and each section must start with one of them (i.e., a section is never implicitly started). The three sectioning codes are \:*, \atspace., and \:\~, of which the second one is the most commonly used. No section numbers should be given in the source file: these will be automatically computed and inserted by \CWEB.. A tab or newline following `\.@' is considered equivalent to a space, and for any of these three control codes, (further) white space separating it from the \TeX~text that follows is ignored, as long as there is no completely blank line (which \TeX\ would interpret as the end of the paragraph that started with the section number). A section starting with \:* will start a new chapter of the \CWEB. document; it should be followed by the title of the chapter, which is terminated by the two-character sequence `\hbox{\.{. }}' (again the space might be any white space character). The title is not recognised by \CWEB. itself, but rather by \TeX, as a delimited macro argument \unskip\footnote{${}^\dagger$} {Therefore, if no correctly specified title follows~\:*, then \.{CWEAVE} will find nothing wrong, but \TeX\ will complain about a ``Runaway argument'' of a macro that the programmer did not explicitly write (namely `\.{\\N}'); this is one of the scarier error message that novice users can come across, so please be warned. }. This means that if one wants to have an occurrence of the sequence `\hbox{\.{. }}' in the title itself, this can be achieved by enclosing the title (but not the `\hbox{\.{. }}' terminating it) in braces. If one wants to put other things than plain text in a chapter title, one should be aware that it is converted to upper case in the running heads of pages and also written to the table of contents file; only items that behave properly under these operations should be used in a chapter title. Apart from issuing a title that will appear in several places, a section starting a chapter will force a page break before it, and it will cause the section number to be printed on the terminal during the execution of \.{CTANGLE} and \.{CWEAVE}, as a progress report. As a feature for advanced users of \CWEB., some extra information may be supplied with the \:* control code: if it is immediately followed by `\.*' or by a decimal number, than this is not included in the chapter title, but rather interpreted as an indication of the ``level'' of the chapter. Here `\.{@**}' indicates the start of a grouping of sections even coarser than a chapter, and the grouping started by `\.{@*$n$}' becomes finer as $n$ increases, with `\.{@*0}' corresponding to unadorned \:*. The effect of this level depends on the definition of the \TeX\ macros that format the chapter title and the lines in the table of contents, `\.{\\N}' respectively `\.{\\contentsline}', to which the level is passed as first argument (for `\.{@**}' the level is~$-1$); it could effect for instance the font used for the chapter title or the amount of indentation of that title in the table of contents. In the default definitions of these macros the level is largely ignored, except that `\.{@*$n$}' will not force a page break for $n\geq s$, where $s$ is the value of the `\.{\\secpagedepth}' register, which is set initially to~2. In contrast to~\:*, a section starting with~\:\~ instead of~\atspace. will tie itself to the previous section, in the sense that a page break between these sections will be avoided. More precisely this is what happens: normally \.{CWEAVE} will instruct \TeX\ to break pages only between sections (except when one is too large too fit on a single page) and put as many sections on each page as possible subject to this restriction; however, a section starting with~\:\~ will be considered to be continuation of the previous section for the purpose of page breaking. A situation where one would use \:\~ is the following: suppose we define a function, and also want to state its prototype, which will belong to a different module, since it has to appear earlier in the program or even on a separate (header) file. A natural place to give the prototype in the \CWEB. document is directly before the function, so that it can easily be seen that the prototype matches the actual definition. Now without special measures there is a substantial chance that a page break will occur between these two sections, since the short section with the prototype might fit on an already partially filled page, whereas the larger section with the definition might not. By starting the latter section with~\:\~, it can be achieved that in such cases the former section is moved together with the latter to the new page. Like any other section the very first section starts with s sectioning code (usually \:*), and any text that might precede it is not part of any section; this text is said to be ``in limbo''. This material is ignored by \.{CTANGLE}, and copied literally into the \TeX\ file by \.{CWEAVE} (except for the replacement of~\:@ by~`\.@'), following the first line which always reads `\.{\\input cwebxmac}' (in order to load the standard format). The purpose of the text in limbo is to allow issuing \TeX~commands that apply to the whole document (such as macro definitions, possibly modifications or additions to the standard format), or producing a title page or an introduction preceding the sections of the \CWEB. document. No control codes are allowed in the limbo text (well, almost; there are two exceptions, that will be mentioned below). The last section ends simply at the end of the \CWEB. source file; there is no way to add material after it (or elsewhere outside the sections). However, \.{CWEAVE} will append some material at the end itself (unless it is invoked with a `\.{-x}' flag): an index of identifier uses, a list of module names, and a table of contents. Because the index is seamlessly attached to the last section, it is customary to give that section the title ``Index'' and not to include any program fragment in it. \subsection Subsectioning codes: \:d, \:h, \:f, \:c, `\/\.{@< {\dots} @>=}' Each section, as delimited by the sectioning codes, contains a \TeX~part (although it may be empty), and in addition at most one \Cee~part, which always comes at the end of the section, and zero or more intermediate parts, of which there are three kinds: those that specify \&{\#define} and \&{\#include} directives, and format definitions (see below). Intermediate parts can be given in an arbitrary order, as long as they come after the \TeX~part and before the \Cee~part, if present. The beginning of any part other than the \TeX~part (which starts directly after the sectioning code) is marked by an appropriate control code, which is called a subsectioning code; these codes are optional in the sense that they need only be given if the corresponding part is present. The end of the \TeX~part is determined by the first subsectioning code, or in absence of any of them by the next sectioning code. The \Cee~part, if present, begins at the first occurrence of \:< or \:c; the former starts a defining occurrence of a module name, and the latter is used when the \Cee~part belongs to the unnamed module. The code \:c may also be written as \:C (in fact all alphabetic codes are equivalent to their upper case counterparts). Once the \Cee~part of a section is started, any further module names are interpreted as modules references rather than as defining occurrences. A module name, whether defining or not, consists of \TeX~code between the \:< and the next occurrence of~\:>. As a measure against accidental misinterpretation of module names, due for instance to a forgotten \atspace. or \:c, the closing \:> of a defining occurrence must be followed (optionally with some white space in between, but no newline) by one of `\.=', `\.{==}', `\.{+=}' and `\.{+==}', while for a non-defining occurrence this must not be the case. The possibilities `\.{+=}' and `\.{+==}' are included for those who like their source code for continuations of modules to resemble the printed output, but the distinction is ignored by \CWEB.: it will simply print `$\EQ$' after the first defining occurrence of a module name and `$\PE$' after any further defining occurrences. The subsectioning codes that mark the beginning of intermediate parts are \:d, \:h, and \:f. Of these the first two specify preprocessor directives for respectively a macro definition and the inclusion of a header file, and the last specifies a so-called format definition. The codes \:d and \:h will be replaced by \&{\#define} respectively by \&{\#include} in both the program and the printed document. We already mentioned how the effect of using \:d or \:h differs from that of using \&{\#define} or \&{\#include} directly in the \Cee~part of the section: the directive will be moved to the beginning of the \Cee~file, and in case of \:h, the header file will be scanned for typedef definitions. Here we mention a few more points that are relevant when writing the source file. Macro definitions following \:d are not line-oriented like those in \Cee: everything up to the next subsectioning or sectioning code is considered to belong to the macro, and newlines need not be escaped, as \.{CTANGLE} will take care of escaping any newlines while writing to the \Cee~file. There are some mild restrictions on the replacement text of a \:d macro definition: parentheses and braces should be balanced (this is a deliberate requirement, made in order to allow detection of programming errors that would otherwise be very hard to track; the same requirement also holds for each complete \Cee~part of a section), and no module names should be referenced. It is not possible to use other preprocessor directives in macro definitions either, but that is because this is already impossible in~\Cee. After a \:h command, at least one newline should occur before the next sectioning or subsectioning code. Apart from this, \:d and \:h are followed by whatever would follow \&{\#define} respectively \&{\#include}, with the same deviant lexical rules as in~\Cee. So whether a macro introduced by \:d is defined with or without arguments depends on whether the first character after the identifier following \:d is a left parenthesis or not, where spaces {\it are\/} significant. The file name after~\:h may be enclosed either in double quotes or in angle brackets; the latter indicates that the header file is located in some system include file area. After the file name a comment may be placed. The header file specified after \:h itself should of course contain ordinary \Cee~code rather than \CWEB. input; after all, it will be read directly by the \Cee~compiler. As as mentioned before, the file will be scanned by \.{CWEAVE} as well, searching for any typedef definitions; moreover, if it contains any lines starting with \&{\#include}, then those files will be scanned recursively as well. In the case of system header files (specified with angle brackets), \.{CWEAVE} will refrain from scanning the file unless the file is found on an explicitly specified search path (see below); in fact it is better not to scan any of the \caps{ANSI/ISO} standard header files, since \.{CWEAVE} already knows about all typedef definitions that can occur in such header files. It is not uncommon that a header file specified after~\:h (using quotes) is itself an auxiliary output file produced from a \CWEB. source file, possibly even from the very source file containing the \:h~command. There is no circularity or other problematic aspect of such a situation, but one should remember to run \.{CTANGLE} to produce the header file, before the run of \.{CWEAVE} that needs it. The way \.{CWEAVE} searches for the header file depends on how the name following \:h is specified: if it is enclosed in quotes then \.{CWEAVE} will look first in the current directory. There may have been specified one or more alternative places to look for header files, in the form of strings that can be prefixed to the file name (given on the command line or compiled into \.{CWEAVE}, or both). If so, these will be tried in order, regardless of the delimiters used for the file name, until a match is found; \.{CWEAVE} will only insist on actually finding a header file if the file name was enclosed in quotes. There is one aspect of scanning header files that might cause a problem in some cases: when scanning a header file, \.{CWEAVE} is unaware of other preprocessor directives that may disable certain nested \&{\#include} directives; \.{CWEAVE} will therefore obey such \&{\#include} directives unconditionally. Such a problem is not very likely, but it could be serious if the nested header file cannot be found (and is enclosed in quotes), or if there are circular references between header files. Various solutions could be found for such a problem, depending on the precise situation, varying from creating dummy files or avoiding conditional compilation by the use of change files to (as a last resort) avoiding the scan of the header file altogether, by using \&{\#include} in a program fragment rather than~\:h; in the latter case relevant information could be extracted from the header file manually, and converted into format definitions (\:f) described below. When preprocessor directives are incorporated in the \Cee~part of a section, the ordinary rules of \Cee\ apply: they should be spelled out in full, as `\.{\#define}' or `\.{\#include}', and occur at the beginning of a line; the directive ends at the next non-escaped newline. Although in \Cee\ it is permissible to extend a preprocessor directive into the following line by placing a multi-line comment that contains the newline, this should not be done in \CWEB., since the comment will be removed by \.{CTANGLE} but the newline will remain. If one needs a very long comment after a preprocessor directive, one should start it on the line following the directive; in the formatted document such a comment will be placed on the same line as the directive. The same holds for comments placed after a \:h command. Format definitions, indicated by the code \:f, are entirely specific to \CWEB., and have no effect on the \Cee~program that is defined. They are not needed very often, but when they are, a proper use of them is essential for obtaining acceptably formatted output. To understand why they are sometimes needed, one has to consider the way \.{CWEAVE} formats program fragments. The input is broken up into tokens (like identifiers, constants, operator symbols), and a syntactic category is attached to each; the resulting sequence of categories is then analysed according to a grammar, and formatted correspondingly. Certain identifier tokens are recognised as reserved words and get a corresponding the syntactic category, others are recognised as typedef identifiers and get the same syntactic category as for instance \&{size\_t}, and the remaining ones are treated as ordinary identifiers. This scheme usually works fine, but occasionally there can be problems, caused by the fact that \.{CWEAVE} is not aware of all the information that is available to the compiler. The main reasons for this are macros (which may cause the code seen by the compiler to be quite different from that seen by \.{CWEAVE}), typedef declarations that are hidden from \.{CWEAVE}'s sight, and module names that stand for a construct of a different syntactic category than \\{statement} (which is what \.{CWEAVE} expects them to be by default). In all these cases \CWEB. provides mechanisms for the user to put \.{CWEAVE} on the right track, and format definitions are one such mechanism (others will be discussed below). Format definitions allow the programmer to explicitly state the syntactic category that \.{CWEAVE} should attach to a given identifier. They have the form `\.{@f x y}', which will become `\&{format}~$x$~$y$' in the typeset output; here $x$ and $y$ can be arbitrary identifiers or keywords. This definition has the effect of associating to~$x$ the same syntactic category that is associated to~$y$. Such a change of category is required when an identifier is defined as a macro to stand for a keyword: whenever you say `\.{@d ident keyword}', say `\.{@f ident keyword}' as well. For instance, the author of this manual thinks the keyword \&{static} is not very informative when applied to functions, and therefore often creates an alias for it by saying `\hbox{\&{\#define} \\{local} \&{static}}'; this directive is then followed by `\hbox{\&{format} \\{local} \&{static}}'. We see that the first identifier after \:d~or~\:f is always typeset in italics; this is so despite the fact that in the example, as a consequence of the format definition, this identifier will be typeset as \&{local} in all other places. Another reason to change a category could be that an identifier is in fact a typedef identifier, but \.{CWEAVE} cannot deduce this fact (presumably the declaration occurs in some header file that is not scanned by \.{CWEAVE}); in such cases one can use a standard defined type like \&{FILE} or \&{size\_t} as the second argument to \:f. Finally, it is possible that some \Cee~implementation uses additional, non-standard keywords (or macros that behave as a keyword); such an identifier should be formatted like a standard keyword that has a similar syntactic function as it (which hopefully exists). In fact the identifier \&{va\_dcl}, which is used in a convention for functions with variable argument lists that is not part of \caps{ANSI/ISO}~\Cee, is nevertheless built into \.{CWEAVE}, because there is no keyword that has the required syntax category (namely \\{declaration}), so that it would otherwise not be possible to introduce it; one can on the other hand easily undo the reservation by saying `\&{format}~\\{va\_dcl}~$x$'. Format definitions can also be used for a reason that does not have to do with syntax analysis. There are two classes of identifiers that are parsed like ordinary identifiers, but are nevertheless treated specially; these classes consist initially of the identifiers \\{TeX} respectively \\{NULL}. The main distinction of these classes is that their identifiers are typeset differently, namely as \TeX~macros; the mentioned identifiers will therefore be written to the \TeX~file as `\.{\\TeX}' respectively `\.{\\NULL}', which causes them to be typeset as `\TeX' respectively `$\NULL$'. This mechanism gives the user the ability to change the appearance of identifiers in any desired way, simply by defining the macro appropriately. The class of~\\{TeX} is intended for identifiers that are still alphabetic in appearance (possibly with letters being accented or shifted), while the class of~\\{NULL} is intended for identifiers that are represented by mathematical symbols. Hence the \TeX~macro will be processed in horizontal mode with italic font selected in the first case, and in math mode in the second case. Simply saying `\.{@f alpha NULL}' suffices to make \\{alpha} print as~$\alpha$; the format definition is typeset as `\hbox{\&{format} \\{alpha} $\NULL$\quad($\alpha$)}' to make the correspondence of the identifier and typeset symbol evident. Unlike \Cee~identifiers, \TeX~macros cannot contain underscores and digits. On writing of the macros to the \TeX~file, underscores are replaced by~`\.x', so that they will become part of the macro. Digits however are not changed, so identifiers containing digits should not be put into the class of~\\{TeX} or~\\{NULL} by a format definition, unless special care is taken: the macro will only consist of the part up to the first digit. No index entries for identifiers of the class of~\\{NULL} are recorded (the same holds for keywords); on the other hand index entries for typedef identifiers are recorded, despite the fact that they are formatted as keywords. \subsection Text within \Cee~program fragments: comments and module names Within the program part of a section, the input should basically follow the rules of the \Cee~syntax, but amidst the \Cee~tokens there may also occur module names and comments. In both cases the \Cee~code is temporarily interrupted by a piece of ordinary text that is processed directly by~\TeX, just like the \TeX~part of a section. In the case of module names this text is delimited by \:<~and~\:>, in the case of comments by `\.{/*}'~and~`\.{*/}'. So comments are actually valid \Cee~comments, but the converse is not true: the contents of a comment is processed by~\TeX, so not all \Cee~comments can be used without modification; a point to keep in mind if one is converting ordinary \Cee~code to \CWEB.. Like \Cee~comments, the comments of \CWEB. cannot contain the two-character sequence `\.{*/}' (regardless of the \TeX~context, because comments are recognised before \TeX\ even gets to see them). The sequence `\.{/*}' is forbidden as well, which allows \.{CTANGLE} to warn the programmer about unclosed comments, that might otherwise lead to particularly elusive errors. In the \TeX~texts of comments and module names no control codes are allowed (except in embedded pieces of \Cee~code, described below), but \:@ can be used to represent the character `\.@' (this is true in all contexts); a module name is terminated by the first occurrence of the code \:>. During the processing of these \TeX~texts, line ends are replaced by spaces, which implies that \TeX~comments (starting with `\.\%') cannot be used. (In the \TeX~part of a section on the other hand, such comments can safely be used: they are completely ignored by \.{CWEAVE}, and not even copied to the \TeX~file.) The text for module names serves a dual purpose: apart from determining the text representing the module in the printed output, it also serves to identify defining occurrences of a module name with references to it. For the latter purpose it is irrelevant how the contents of a module name will be further processed; there should basically be a character-by-character match. This rule is however alleviated in two ways to make matching easier. First, any amount of consecutive white space is replaced by a single space, and white space at either end of a module name is discarded. Second, an abbreviation mechanism for module names may be used. A module name may be specified by a prefix of the full name, followed by~`\.{...}'. A few conditions must be satisfied to allow this mechanism to work. All specifications of one same module name must be extensions of the one among them of minimal length, which must not be a prefix of any other (full) module name. All specifications of the name that do not end with~`\.{...}' must be equal; there must be at least one such specification, which defines the full module name of which all other specifications give a prefix. Loosely speaking, the minimal specification is used for identification purposes, and the maximal specification is used for typesetting all occurrences. With the help of these rules, and a text editor, there should be little reason to choose module names any shorter than what is needed to express the function of a module clearly. There is a limit on the length of a module name, but it is so generous that this could hardly be a problem: 1000~characters after replacement of consecutive white space characters by single spaces. The parser of \.{CWEAVE} normally assumes that references to modules stand for (compound) statements, which is likely to be a valid assumption in the majority of the cases (or at least one that does not upset parsing, for instance when the module is actually a statement sequence). Occasionally however, one of two other syntactic categories applies instead, namely \\{declaration} or \\{expression} (the remaining categories are extremely unlikely). When this is the case, the programmer should make it clear to \.{CWEAVE}, lest the parser might choke on the input and produce badly formatted output. This can be done by placing the control code \:; once (for a \\{declaration}) respectively twice (for an \\{expression}) directly after the module name (in the latter case this also conveniently provides a separation from any `\.=' or `\.{+=}' that might follow). At the end of the \CWEB. document, after the index, a list will be placed of all module names used. This list sorted lexicographically, with sorting based on the source strings for the full module names, collated (unlike the identifier index) in the order of the internal (\ASCII.) character codes. For this reason it is a good convention to ensure that all module names are already distinguished by a prefix consisting of alphabetic characters and spaces only, of which the first word is capitalised; then the order of the list will be natural and independent of any internal details that the reader is not aware of. \subsection \Cee~code within text: `\pb' fragments In order to mention a piece of \Cee~code within \TeX~text, it can simply be enclosed in vertical bar characters (`\.|'); then \.{CWEAVE} will format it in a way similar to to \Cee~code of modules. This feature may be used in any kind of \TeX~text except in limbo, i.e., in the ordinary \TeX~part of a section, in comments and in module names. The piece of \Cee~code itself should not contain any comment. The ``lightweight'' construction with vertical bars resembles the math shift characters (`\.\$') for \TeX's math mode, and indeed in simple cases like `\.{|a[i+3]|}' the output would be identical if the `\.|' characters were replaced by `\.\$'. The two modes should not be confused however: the ``\Cee~mode'' is implemented by \.{CWEAVE}, which translates the \Cee~constructs before \TeX\ ever gets to see them; it often uses math mode itself, and as a consequence it should never be used when \TeX\ is already in math mode. The syntax used by \.{CWEAVE} is of a stricter kind than that of \TeX's math mode, but it can still be used for some expressions that are not quite proper~\Cee; in particular there is no objection to writing things like $\\{begin}\Z p<\\{end}$, which humans understand better than compilers. On the other hand an incomplete formula like `$\leq n$' (which can be used in sentences, with the missing operand expressed in words) is better written as `\.{\$\\leq n\$}' than as `\.{|<=n|}': the latter is not understood by \.{CWEAVE}'s parser, and therefore the `\.{<=}' and the `\.n' are translated separately with an ordinary space in between; the result looks reasonable, but \TeX\ may very well decide to break the line at the space. There is a lexical price to pay for using delimiters that are not control codes: it is impossible to use character `\.|' in any piece of \TeX~text where `\pb' constructions are allowed (even if one tries for instance to set up a verbatim context, because \.{CWEAVE} acts before \TeX\ does). This should not cause great problems however, since `\.|' is not a character in ordinary text fonts, and for `\.|'~and~`\.{\\|}' in math mode, plain \TeX\ already has the substitutes `\.{\\vert}' and~`\.{\\Vert}'; for exceptional text fonts (like typewriter type) that do have `\.|', the standard format for \CWEB. provides `\.{\\v}' as a substitute (by means of \.{\\chardef}) for~`\.|'. Inside `\pb' one has a similar problem of not being able to write the bitwise-or operator~`$\OR$' in the usual way. For this purpose \CWEB. provides the control code~\:v to represent that operator (which you may also use in an actual program fragment, although there is no need to do so there). Note that the composite operators `\.{|=}'~and~`\.{||}' can be used without problem; consequently no `\pb' should be immediately followed by~`\.=' or by another `\pb'. Although \Cee~comments are forbidden inside `\pb', it is possible to mention a module in \TeX~text by enclosing the module name in vertical bars; this \TeX~text can either be the \TeX~part of a section or a comment, but not another module name. Mentioning a module in this way does not imply any inclusion of the module body, so it is not considered to be a use of the module; in the cross-references it is referred to as a ``citation'' of the module. For the module name itself the same rules apply as for other occurrences of module names; in particular the abbreviation mechanism can be used, and \.{CWEAVE} will automatically insert the relevant section number in the module name. Citing a module may form an exception to the rule that an occurrence of a module name when the \Cee~part of a section has not already started must be a defining one. Since \.{CTANGLE} normally ignores the vertical bars of `\pb' constructions together with the surrounding \TeX~text, it needs a simple rule to decide whether a module is being cited or defined. It does this by inspecting the next token (where a newline counts as a token, but codes like \:; that are ignored by \.{CTANGLE} are skipped): if this is `\.=' (or `\.{+=}' etc.), then it assumes that the module is being defined, and if it is `\.|' that the module is being cited; in other cases it signals an error (this could for instance happen if a \:c code is missing). Therefore it is not really necessary that the module name is the only item in the `\pb' construction, as long as it is the final item; this extra freedom is not likely to be of much practical use, however. \subsection Modules producing additional output files: `\/\.{@( {\dots} @>}' As was mentioned before, there are special module names that will cause the program produced by that module to be written to a separate output file. Such a module name is specified by enclosing the file name in \:(~and~\:>; in fact it is sufficient to use \:( instead of~\:< in just one occurrence of the module name. The file name will be set in typewriter type by \.{CWEAVE}, so that the difference with an ordinary module name is easily perceived. Although hardly relevant for this case, the compression of white space and the abbreviation mechanism for module names also applies to these special module names. The file name can contain any special characters, including `\.|' and `\.@'; the latter must as always be doubled. \subsection Control codes that help parsing in special situations: \:;, \:[, \:] In the discussion of the \&{format} command we already mentioned the way \.{CWEAVE} parses and formats program fragments, and the fact that some programming constructions can confuse the parser, leading to badly formatted output. Like \:f, the control codes in this subsection provide ways to avoid such problems, but they do so on a local basis in the code itself, rather than by global definitions. They are mainly used in connection with macros with replacement texts and/or arguments that are not expressions. Since macro invocations look like identifiers or function calls, and macro arguments appear to be function arguments, a piece of code containing a macro invocation whose replacement text and arguments are not all expressions may seem syntactically incorrect when not expanded. An example of such a scenario is a macro whose replacement text is a compound statement; an invocation of such a macro needs no semicolon following it, and sometimes placing a semicolon would actually cause an error (e.g., if the invocation is used as the first branch of an \&{if}-\&{else} statement, since the semicolon would be taken to be an empty statement {\it after\/} the conditional statement, and the \&{else} would be unmatched). Since the parser of \.{CWEAVE} does not expand macros, it will fail to recognise a macro invocation without a following semicolon as a statement, and like many parsers it is not good at recovering from such a failure. Although no error message is usually issued, formatting can be severely disrupted; indeed, correct formatting will only be inserted locally for constructions that do not contain the ``error'', so one unrecognised construction can easily destroy the layout of the entire program fragment it occurs in. \.{CWEAVE} provides some simple mechanisms for guiding the parser through such unusual code, and by applying them in several ways nearly all problems that arise in practice can be solved. One of these is the control code~\:;, which produces no \Cee~code (nor any printed output), but which can be used in places where the \.{CWEAVE} parser would require a semicolon for a successful parse; another is the combination \:[,~\dots,~\:], used as a pair of parentheses, which will cause whatever is enclosed to get the syntactic category `expression', regardless of its actual category. The most obvious use of~\:;\ is in the case already mentioned of a macro invocation that expands to a (compound) statement: placing \:; after such a macro invocation will cause it to be recognised as a statement by \.{CWEAVE}, keeping its parser happy while not affecting the actual \Cee~program. There are other situations as well where one does not want to place a semicolon, yet wishes \.{CWEAVE} to act as if it were there. If a macro stands for statement that happens to end in a semicolon, then it is a good idea to suppress the final semicolon in the definition: in that case all invocations can supply the semicolon, and one does not have to remember writing \:; instead of~`\.;' at the invocations of this macro. For instance, the macro replacement text could be `\&{do}~\~\&{while}~(\)', or `\&{if}~(\)~\~\&{else}~\', or even `\&{if}~(\)~\~\&{else}', where the final \&{else} was placed with the purpose of picking up the following semicolon as an empty statement; in all these cases the macro invocation together with the following semicolon is a complete statement that can be used without special precaution, even as the first branch of an \&{if}-\&{else} statement. However, in these cases the macro definition itself needs a bit of extra care: a~\:; should be placed at the end to represent the semicolon that will follow in invocations, so that \.{CWEAVE} can properly format the replacement text of the macro. Finally, it there can be purely aesthetic reasons for wanting to suppress a semicolon at the end of a `\pb' construction, for instance when referring to a declaration as `$\&{char}~\m*p$', which strictly speaking requires a final semicolon to become a declaration; to let \.{CWEAVE} format this properly, one should write `\.{\v char *p @;\v}'. Constructions like `$\&{return}~\\{home}$' and `$\&{goto}~\\{sleep}$', which are fairly common to mention in module names, would also fall into this category, but in this particular case no \:; is necessary, since \.{CWEAVE} parses these as expressions, even though strictly speaking they are not. Since \:; is invisible in the output, yet can be sensed by the parser, it can conveniently be used to pass information to the parser, and there are a few instances of such use where it does not stand for a semicolon. We already mentioned placing one or two copies of \:; after a module name to indicate the syntactic category. Another use is to place it before a typedef identifier to cause it to be treated as an ordinary identifier; this is useful if the identifier is locally redeclared, or used as field selector in a \&{struct} or \&{union} specifier. When the identifier is used as a tag immediately after \&{struct} or \&{union}, or as a selector after `$.$'~or~`$\MG$', it is not necessary to place \:; before it. Unlike \:;, the control codes \:[~and~\:] themselves do not participate in parsing. The material between them is parsed normally, which may or may not succeed in recognising a single construct; then the pieces recognised are concatenated (without separation), and the result is given the category \\{expression} for the purpose of parsing further items outside. The most obvious use of this mechanism is to encapsulate any arguments in a macro invocation that are not expressions (e.g., some storage allocation macros have a type as argument), so that the invocation can be parsed as a function call. There need not be anything in between \:[~and~\:], so `\hbox{\.{@[ @]}}' can be used as an ``invisible expression'' in the same way as \:; can be used an invisible semicolon. An example where this is useful, is a module standing for an initialiser list, that is moreover defined in multiple sections (see for instance the module `$\X157:Rules\X$' in the source document for \.{CWEAVE}): it is natural to end each program fragment defining a part of such a module with a comma, but this will not be parsed properly unless an expression follows, which can be achieved by adding `\hbox{\.{@[ @]}}'. Finally, if for some tricky piece of code none of the mentioned methods suffice to get it parsed properly by \.{CWEAVE}, one may use \:[~and~\:] (followed by \:; if necessary) to minimise the damage: by placing \:[~and~\:] around an appropriate part of the program containing of the problem area, we can ignore the fact that the parser failed to recognise it, and force it to continue as if it ad recognised an expression; thus we can contain the problem, and prevent the effects from spreading any further. \beginsection Invocation of {\tentex CTANGLE} and {\tentex CWEAVE} The simplest form of calling \.{CTANGLE} and \.{CWEAVE} is to supply one command line argument, which is the name of the \CWEB. source file without the `\.{.w}' suffix. It is possible however to modify the behaviour of the programs by selecting certain optional settings, and small patches to the master source file can be achieved by supplying a ``change file''. The general syntax for invoking \.{CTANGLE} is $$ \hbox { \.{ctangle}\quad [$(\.+\mid\.-)$\]\quad \<\CWEB. file>[\.{.w}]\quad [$(\[\.{.ch}]\mid\.+\mid\.-)$ \quad [\[\.{.c}]]] } $$ where square brackets indicate optionality, vertical bars separate alternatives, and parentheses are used for grouping. Here \ is a string of one or more characters designating options, as described below; there may be more than one such string of options, and they may be given between or after the files names instead of before them, with no difference in meaning. For \.{CWEAVE} the situation is entirely similar, except that the default extension for the output file is `\.{.tex}' instead of `\.{.c}'. \subsection Command line options A command parameter that starts with `\.+' or `\.-' and has at least one more character, serves to control optional settings of the program being invoked. The characters after the initial character `\.+'~or~`\.-' denote individual options that are turned on respectively off; option characters are case-insensitive. The character `\.i' forms an exception, since it is used to supply a string argument rather than to set a switch; the string is the remainder of the option string (following the `\.i'), and `\.{+i}'~and~`\.{-i}' are equivalent. All option characters will be accepted, but only the ones listed below have any effect on the operation of the program. We list the switches in the direction that alters the default setting. $$\vcenter{\tabskip=1em \halign{\tentex#\hfil & #\hfil & #\hfil \cr \omit\it switch& \it program& \it effect \cr \noalign{\yskip} -b & both & do not write a banner line to the terminal \cr -p & both & do not show a progress report on the terminal \cr -h & both & omit confirmation of successful completion \cr \noalign{\yskip} -l &\.{CTANGLE} & omit \&{\#line} directives, make \Cee~file look nice \cr \noalign{\yskip} -x & \.{CWEAVE} & do not attach index and other information at the end of the document \cr +d & \.{CWEAVE} & report failure to completely parse pieces of \Cee~code \cr +t & \.{CWEAVE} & write three files, with separate ones for index and list of module names \cr +e & \.{CWEAVE} & even out number of pages before table of contents \cr \noalign{\yskip} +i & \.{CWEAVE} & add alternative search path for header files (takes argument) \cr \noalign{\yskip} +f & \.{CWEAVE} & force a line break after each statement \cr +a & \.{CWEAVE} & force all statements to be on a line by themselves \cr +u & \.{CWEAVE} & ``unaligned brace style'': do not align `$\{$' and `$\}$' vertically \cr +w & \.{CWEAVE} & ``wide brace style'': force line breaks before and after `$\{$' \cr +m & \.{CWEAVE} & ``merged declarations style'': do not force line breaks between local declarations \cr \noalign{\yskip} +c & both & run in compatibility mode with \LKC. \cr +s & both & show memory usage statistics at completion \cr ++ & both & handle \Cpp~language instead of \Cee \cr }} $$ The options `\.{+d}'~and~`\.{+s}' only operate if \.{CWEAVE} or \.{CTANGLE} was compiled with the preprocessor symbol \.{DEBUG} respectively \.{STAT} defined (with most \Cee~compilers this can be accomplished by including a command line parameter \.{-DDEBUG} respectively \.{-DSTAT} when compiling the \CWEB. system). The options `\.b', `\.h', and `\.p' can be used to control the amount of output that \CWEB. writes to the user terminal; the combination `\.{-bph}' will eliminate terminal output altogether when no errors are encountered. The option `\.{-l}' of \.{CTANGLE} is intended either for use with broken compilers or debuggers that cannot handle \&{\#line} directives properly, or for cases where the \Cee~file is of more importance than just as an intermediate file, for instance when the program is transferred to people who do not wish to practice \lp.. Apart from omitting \&{\#line} directives and comments that indicate the section number from which code originates, an attempt is made to make the \Cee~file more readable to humans: the spacing and (almost all) comments of the source file are preserved in the \Cee~output, and when modules are substituted into others, indentation levels are accumulated, so as to produce indentation that looks natural. Doubtlessly the result is not perfect (and lines may get quite long), but it is definitely more readable than the output normally produced. Since layout and comments of the source file need to be preserved by \.{CTANGLE}, this option consumes significantly more memory than its contrary. The option `\.{+d}' causes \.{CWEAVE} to issue a warning when it could not properly parse some piece of \Cee~code; this could happen either because a code fragment is incomplete in the sense that it does not represent a single complete syntactic entity (as in the `\.{|<=n|}' example above, or when a module body ends with a label without a following statement), or because the code is actually unsyntactic, or because \.{CWEAVE} has been fooled by an unusual construction. In all cases however the result can be (very) badly formatted output, and a correction should be made; users who care about the quality of the typeset output are advised to always set this option (or at least when the document is being finalised). Setting the `\.{+d}' switch is equivalent to placing a control code \:1 at the beginning of the first section; the nature of the warning messages and possible remedies will be discussed later in this manual. The two output files that the option `\.{+t}' will cause \.{CWEAVE} to create in addition to its main output file, are called \.{\.idx} and \.{\.scn}, where \ is the name of the main output file without its extension. These files will be read by `\.{\\input}' commands in the main output file, so that the typeset document will not be any different; on large projects however it can be helpful to have this information on separate files, for instance for making a global index. The option `\.{+e}' is intended for use with two-sided printers: it ensures that the table of contents comes out on a fresh sheet of paper, so that it can conveniently be moved to the front. The option `\.{+i}' (or equivalently `\.{-i}') can be used to specify a directory for \.{CWEAVE} to search for header files in \:h commands. Although directory structures are system-dependent, \CWEB. assumes that a file can be looked up in a specified directory by prefixing a string indicating that directory to the file name (this works for many systems); the desired prefix string should then be supplied as the remainder of the option string after the `\.i' character. E.g., on the \caps{UNIX} system the author uses, \.{CWEAVE} can be told about the location of the `Xlib' header files by supplying an argument `\.{+i/usr/local/X11R5/include/}' (one could replace `\.{+i}' by `\.{-I}' to make it look more like the similar option passed to the \Cee~compiler); the important thing to note is the final pathname separator `\./'. Up to~8 additional prefixes can be specified by giving several such arguments; they will be tried in order from left to right. It is also possible to fix one such prefix at compile time, by defining the preprocessor symbol `\.{CWEBHEADERS}' to be the desired prefix string when compiling the compilation unit \.{common.c} of \CWEB.; this will behave as if it were the first prefix specified by a `\.{+i}'~argument. The last five options mentioned will alter layout style of program fragments. The option `\.{+f}' will result in a more vertical style than the default, and `\.{+a}' will do so even more; the difference between them is that `\.{+f}' will not force a simple statement to start on a new line if it follows a label or the condition of an \&{if} or \&{while} statement, whereas `\.{+a}' will start a new line in such cases. The option `\.{+u}' selects a style in which corresponding opening and closing braces are unaligned because a line break is inserted after~`$\{$' instead of before~it. The option `\.{+w}' on the other hand selects a brace style that has more vertical symmetry than the default one, since opening braces will appear on a line by themselves, like closing braces; the price is that listings will consume more paper. The option `\.{+a}' overrides `\.{+f}', and similarly `\.{+w}' overrides `\.{+u}'. Finally, the option `\.{+m}' is for people (like the author) who are extremely keen on saving paper: it avoids forced line breaks between the declarations in a compound statement, just like they are not placed by default between the statements; the separation between declarations and statements within a compound statement is still indicated by a line break, that even has some extra vertical space, because this separation is significant in the \Cee~syntax (unlike the \Cpp~syntax). In compatibility mode, specified by `\.{+c}', both \.{CTANGLE} and \.{CWEAVE} modify their behaviour in such a way that they try to ensure that they can handle any file that can be correctly processed by \LKC., and that the output is an equivalent \Cee~program, respectively a valid \TeX~file (this is the hard part) that produces a comparable printed document. In the current version this claim can only be made for programs written in~\Cee; a wholehearted attempt to do the same for \Cpp~programs would cost a substantial amount of extra work. There are so many differences in the details of formatting between \CWEBx. and \LKC. that one cannot expect formatted output that is identical to what would be produced under \LKC., but to get the best approximation, one should in addition to~`\.{+c}' specify the options~`\.{+uft}'. The option `\.{+s}' is included because the \CWEB. utilities use statically allocated memory areas, which may therefore run out; using this option one can see how close one is to the limits of \CWEB.. The most important limited resources that it provides information about are are: \ (a)~The name tables in which \.{CTANGLE} and \.{CWEAVE} store all distinct identifiers and index entires, respectively module names (the entries `identifiers', `module names', and `bytes'); \ (b)~\.{CTANGLE}'s main memory, in which the complete \Cee~program file processed during a single run has to be stored, albeit in a compactified form (`replacement texts' and~`tokens'); \ (c)~\.{CWEAVE}'s cross-reference memory, in which all the data for the index and list of module names are stored (`cross-references'); \ (d)~its parsing buffers, which must be able to hold any one program fragment or piece of \Cee~code (`scraps', `texts', and~`tokens'). There should be no immediate need to increase the size of these memory areas, since even for the main program of \.{CWEAVE}, the largest of \CWEB.'s own compilation units, the use of any of these resources is less than a third of the amount available. There is one resource of which a larger fraction is used, namely `trie nodes', but its usage depends only on the set of grammar rules used, which is independent of the particular \CWEB. source file. When for some source file \CWEB. is approaching its limits, one can of course try to recompile \CWEB. with larger arrays, but alternatively one may restructure the source file: when one of (a),~(b), or~(c) runs out, one might consider breaking up the file into several separately processed pieces; when (d) runs out, a remedy could be splitting up some huge module body into smaller ones, by introducing submodules or multiple definitions of the module. Switching to the \Cpp~language has only a minor influence on the operation of \CWEB.: one-line comments starting with `\.{//}' will be recognised, the main output file produced by \.{CTANGLE} will have default extension `\.{.C}' instead of~`\.{.c}', and \.{CWEAVE} will recognise a few more reserved words and use a slightly different syntax. Since there is no general agreement about the proper extension for \Cpp~files, and alternative default extension for \Cpp~mode (instead of \.{"C"}) may be built in by setting the preprocessor symbol \.{CPPEXT} to the desired string (that should not contain the leading period) when compiling \.{common.c}. Currently the \.{CWEAVE} grammar will handle only a basic subset of the \Cpp~language, which does not include templates or exception handling. \subsection File name arguments Any command line arguments that do not have the form of an option are taken to indicate file names; their number can vary from 1~to~3. The first one specifies the main source file, the second (if present) indicates the change file, and the third optionally defines a non-standard name for the main output file. The contents and function of the change file is discussed in the next section; here we we just indicate how the actual file names used are derived from the given file name arguments. As far as \CWEB. is concerned a file name is composed of a base name and an extension. Loosely speaking, the extension of the main file defaults to~`\.{w}', that of the change file to~`\.{ch}', and that of the output files to `\.{c}' or `\.{tex}' for \.{CTANGLE} respectively \.{CWEAVE} (but see also the discussion of the `\.{++}' option above); the base names of the change file and the main output file default to that of the main file. If in place of a change file name an argument `\.-' is specified, no change file is used; also if only one file name argument was given, or if the change file name was specified as `\.+', then the default change file name is tried, but if no such file exists, processing proceeds without a change file. (Specifying the change file as `\.+' is only useful if a third file name argument is given.) Therefore, assuming regular naming conventions, there is no need to specify more than the main file name without extension, whether or not a change file is being used. The precise rules are as follows. On file systems where an extension is not a standard property of file names, like that of \caps{UNIX}, it is assumed the a period is a valid character in file names; a full file name is then formed by concatenation of the base name, a period and the extension (note that this implies that on such systems \CWEB. cannot access files whose name contains no period at all). Conversely, a string designating a full file name is broken up into a base name and an extension at the last occurrence of a period; if no period is present, then the string is taken to specify a base name only, and is said to have no extension. If the first file name argument has an extension, it specifies both base name and extension of the main source file, otherwise it specifies the base name, and the extension is taken to be~`\.w' (if no such file is found, the extension `\.{web}' is also tried, but this feature is obsolete). The base name of the main source file is also the default base name of the change file and the main output file; their default extensions are as described above. If a second and possibly third file name argument is present and is not `\.+'~or~`\.-', it overrides the base name, and also the extension if it has one, of the change file respectively of the main output file. No change file will be used either if the second file name argument is `\.-', or if no change file is found when the second file name argument is `\.+' or absent. \beginsection Subsidiary input files and change files As we have described it so far, the \CWEB. tools read a single source file, from which a main output file and possibly some auxiliary output files are produced. Since \Cee~programs can be built from several compilation units, it is not uncommon that several \CWEB. source files contribute independently to the same program, and there might be non-\CWEB. source files as well. However, even what is conceptually a single \CWEB. source, described by a single printed document, may in fact be composed from several input files. Two mechanisms are provided for combining information from several files, with different purposes. First, subsidiary files may be read in from the main source file in a way similar to the way \&{\#include} files are handled by a \Cee~compiler. In the case of \CWEB. however, the main purpose is usually not to share information among several sources, but merely to allow breaking up large source files into more easily manageable parts. Second there is the change file mechanism already mentioned above, which serves to install system dependent patches to a master source, allowing that master to remain free of system dependencies. When a line of the form `\.{@i}~\' appears in a \CWEB. source file, \CWEB. will read in the indicated file at that point, and continue reading at the next line when it reaches the end of the subsidiary file. The \ may either be delimited by white space, or be enclosed in double-quote characters (but not in angle brackets). Source files may be nested in this way up to 10~levels deep. Nothing in the printed \CWEB. document will indicate the switch from one source file to another, nor will there be any effect on the \Cee~file(s) written by \.{CTANGLE}, except that \&{\#line} directives will of course always point to the proper point of origin for each piece of code written to such files. Like for header files, there is a way to indicate that if a file included by \:i is not found in the current directory, an alternative place can be tried; unlike header files however there is relatively little need to use this facility, unless one has files that are useful to include identically in more than one project. At most one alternative place to search can be given, and it is specified by a prefix to be applied to the file name, in the same way as for header files. This prefix may either be compiled into the \CWEB. programs by setting the preprocessor symbol \.{CWEBINPUTS} equal to that string when compiling \.{common.c} (analogously to \.{CWEBHEADERS}), or it can be specified at run time by setting the environment variable \.{CWEBINPUTS}; when both methods are used, the latter takes precedence. The change file, if present, contains a sequence of ``changes'', each of which specifies the replacement of one or more lines from the main input stream by another set of lines. Each change has the form `\.{@x} \ \.{@y} \ \.{@z}', where each of the codes \:x, \:y, and~\:z occupies a line by itself. The \ is a non-empty set of lines that should match exactly with some sequence of lines in the main input stream (except for the fact that trailing white space on any line is ignored). Furthermore, different changes should affect non-overlapping sets of lines, and their order in the change file should be the same as that of the parts of the main input stream that they replace. For each change in succession, a sequence of lines matching \ is searched for, and replaced by the corresponding \; like for \:i file insertions, the resulting stream of lines will be processed in the usual way as if it constituted a single \CWEB. source file. The ``main input stream'' referred to here is the result of (recursively) inserting any auxiliary files indicated by \:i lines into the main \CWEB. source file. It therefore makes no sense to specify \:i in the \, nor is \:i allowed in the \: it should simply not occur anywhere in the change file. On the other hand it is legitimate for the \ to match a sequence of lines coming from more than one physical source file. The fact that input is temporarily switched to the change file is not entirely transparent to the \CWEB. document, as it was in the case if \:i files: \.{CWEAVE} will mark all sections that were modified under control of the change file, by attaching an asterisk to their section number, and to all references to that number. (If some changes should add or remove entire sections in the middle of the \CWEB. source, which is allowed although not encouraged, then the section numbering will be altered, but sections for which this is the only change will not be flagged with an asterisk.) If one is only interested in sections that are modified, then it is even possible to restrict printing to only those sections, by including the \TeX~command `\.{\\changesonly}' in the text in limbo, preferably by means of the change file. In order to facilitate efficient implementation of the change file mechanism, an additional constraint is placed on the changes: once an exact match of a line in the main input stream with the first line of a change is found, the remaining lines of the change (up to the \:y) should also match. Any empty lines immediately following \:x are not used for matching (and are in fact completely ignored) so the first matching line is never an empty one; it is preferable to choose changes such that their first line matches a unique line of the main input. It is a good idea to start changes in the \TeX~part of sections (after all, if the program changes, so should its explanation); in this case uniqueness of the match of the first change line can always be ensured (even when the \TeX~part is empty) by placing a \TeX~comment in the main input, that serves merely as a target for replacement by the change file. All text in the change file that is not part of a change is ignored, except that there should be no lines starting with \:i, \:y, or~\:z; this text can be used for instance to explain the purpose of the change to the person installing the program on a new system, rather than to the ordinary reader of the program. As we have said earlier, the change file mechanism provides an alternative to system dependent conditional compilation, and it is usually a much more elegant way to incorporate system dependencies. The main reason for this is that one does not have to anticipate all possible systems that a program could be ported to, nor is the main source polluted by such considerations: it suffices to provide a separate change file each time the program is moved to a system with different system dependent requirements. Users of a particular system need to know about the change file for that system only, and the responsibility for maintaining main source and the change file might lie with different persons; additional effort is only required when the main source changes in such a way that a change file fails to match. One should not get carried away by the benefits of change files though: they provide only a rather crude mechanism (due to the inflexible matching rules), and if there are many changes, they will become difficult to maintain when the master file evolves. Portability is still best obtained by limiting system dependent features as much as possible, and if inevitable, confining them to some well defined part of the program. If one should wish to create variants of a program that involve significant changes, then writing extensive change files is probably not the best way to go. This method could lead to a form of ``rigor mortis'' for the original version of the program, caused by fear that any alterations could upset one of the change files, even trivial changes that only involve the commentary, or even just the layout of the source file. A better approach would be to collect routines of general utility as much as possible into separate compilation units used by all variants, and to complement these with completely independent compilation units to define the specific behaviour of each of the variants. It is certainly pointless to use a change file for such things as bug fixes or further development of a program; the whole idea is that such modifications can be made in the master file while the change files for various systems need little or no adjustment. The codes \:i, \:x, \:y, and~\:z of this section have the appearance of control codes, but they are not really part of the \CWEB. language, and obey different rules than control codes. For instance, they are line oriented (and rightly so, since their goal is to select which lines will be actually processed by \CWEB.): they should appear at the beginning of a line, and any further text on the line (in case of \:i, after the file name) is ignored. Also they act quite independently of \CWEB.'s current mode of operation: rules such as the one forbidding control codes in limbo do not apply to these codes. \beginsection Control codes for advanced or emergency use In this section we discuss control codes that are not essential for everyday use of \CWEB., but are provided to enable either refinements in the presentation of the \CWEB. document, or special manoeuvres to deal with certain unusual situations or requirements. Most of them serve to allow the programmer some form direct control over the contents of either the \CWEB. document, the \Cee~file, or the source file, bypassing the automatic processing by which these are normally related to each other; there are also a few that serve as debugging aid, eliciting explicit information from the \.{CWEAVE} parser about its actions. \subsection Control codes for cross-referencing: \:!, \:\^, \:., \:?, \::, \:\# Some control codes are provided that allow the programmer to influence indexing and to perform explicit cross-referencing. The codes in this subsection are the only ones that are allowed to occur in the \TeX~part of sections, outside `\pb'; with the exception of~\:\#, they can also be used in \Cee~text. Control codes such as these, that are intended only to affect the printed document, are ignored completely by \.{CTANGLE}. Incidentally, cross-referencing in \CWEB. always means referring to section numbers rather than to page numbers: \.{CWEAVE} cannot know about page numbers since these are determined only at the \TeX\ processing stage. It would be possible to have \TeX\ produce a table mapping section numbers to page numbers; in fact the table of contents provides a coarse approximation to such a map. Whenever \.{CWEAVE} can determine from the context that an occurrence of an identifier is a defining one, it will make the corresponding section reference in the index underlined. If some case is missed by \.{CWEAVE}'s normal rules, or if one wants to make a reference to a reserved word (which is only made if it is underlined), then one can place the code \:! in front of the identifier to create an underlined reference. Cases where this may be required include arguments of functions with an old-style (pre-\caps{ANSI}) heading for which no declaration is given before the function body (i.e., the default type \&{int} applies), and enumeration constants that appear out of context of the \&{enum} keyword (e.g., because the enumeration list is given as a separate module). In general, the occasions where one needs \:! are quite rare. A group of three codes serves to include additional entries in the index, amidst those generated automatically by \.{CWEAVE} for identifiers. It may be useful for instance to maintain references to concepts like `system dependencies', or to all error messages that can be generated. The three codes are \:\^, \:., and~\:?; they differ only in the way the index entry will be typeset. In each case the index entry is specified as a control text terminated by \:>; control code and control text will be removed by \.{CWEAVE}, but the control text will appear in the index, followed by the section number(s) where the control code occurred. For \:\^,~\:., and~\:?, the index entry will be set respectively in roman type, in typewriter type, and as argument to the control sequence `\.{\\9}' (which is undefined in the standard format, but which the programmer may define in limbo). The first possibility is most suited for general concepts, the second for strings that occur in the program, and the third for any further special purpose one may think of. These control codes can be put either in the \TeX~part of a section or within \Cee~code; the effect will be the same, but this allows the programmer to put the control code in such a place that it is most likely to remain in the right place in case the section should be reorganised and possibly subdivided. Like for references to identifiers, one can make an index reference underlined by prefixing the corresponding control code with~\:!. Unlike the control text forming a module name, the control texts discussed here (as well as those that have not been introduced yet) should be contained in a single line of input; also, no spaces are contracted or removed. The control texts are passed unchanged to \TeX\ (with only `\.{@@}' being undoubled as usual), so that they can use \TeX~commands for special effects. Inside `\.{@.\dots@>}' one can get the special characters occurring in `\.{\#\$\%\^\&\{\}\~\_\\}' by prepending a backslash, `\.{\\v}' gives a vertical bar~`\.\v', and `\.{\\ }' gives a visible space `\.\ '. The control texts are also used as a sort key to determine the place in the index where the entry appears. Different occurrences of these control codes are combined in the index only if there is an exact match of both control code and control text, and no merging takes place with identifiers whose name happens to be equal to the control text (however, their relative order in the index is unpredictable). In sorting, a collating sequence is used that differs from the standard \ASCII. order: alphanumeric characters appear at the end of the sequence, with upper and lower case being considered equivalent, and the space character appears at the beginning of the sequence. In case there are entries that cannot be correctly positioned by ordinary means, the following trick has been suggested by Knuth: define `\hbox{\.{\\def\\9\#1\{\}}}' and represent the tricky entries as `\.{@?\\}\{\<\TeX~code>@>}', where \ contains sufficiently many characters to uniquely determine the position of the entry in the index, and \<\TeX\ code> produces the index entry itself; this works because \.{CWEAVE} will write the index entry `\.{\\9\{\\}\{\<\TeX~code>\}}', which ``expands'' to `\.{\{\<\TeX~code>\}}'. Besides references from the index, \.{CWEAVE} provides cross-references, in the form of the section numbers that link the (first) defining occurrence of a module name with the places where it is used and cited. There is also a mechanism for the user to explicitly state similar cross-references in the \TeX~part of a section, so that it is possible make a reference to another section (where some related matters are treated), that will remain correct if sections are renumbered. The mechanism is simple: in the section referred to, one places the control code~\::, followed by a control text serving as a label, and at the place of reference one uses \:\#, followed by the identical control text (both control texts are terminated by~\:>). The rules for placing \::\ are the same as for \:\^ and its relatives, except that \:!\ has no effect here; the control text will not appear in the index, and there is no conflict when the same string is used as an identifier or index entry. For~\:\# and its control text, \.{CWEAVE} basically substitutes the section number of the matching \::~code, but because there might be multiple occurrences of \:: with the same control text, the precise replacement rule is a bit more complicated. The replacing text is precisely what would follow ``See also section'' in a cross-reference for a module name: one or more section numbers in increasing order, separated by commas and ``and'' as appropriate, and preceded by a space and, in case there is more than one section number, by an `\.s' before that space. This is set up so that a reference of the form `\.{section@\#label@>}' will generate a proper reference, whether or not there are multiple definitions of the label. One can also use `\.{\\Sec@\#label@>}' since in the standard format `\.{\\Sec}' expands to `\Sec' and `\.{\\Secs}' to `\Secs' (in this case the space produced by~\:\# is ignored after the \TeX\ control sequence); by defining other \TeX~macros one could do anything one likes with the text provided by~\:\#. Although \:\# cannot be used directly in comments and module names, it is possible to capture its text in a macro definition (within a \TeX~part) and use that macro instead. \subsection Control codes for layout in programs: \:,, \:|, \:/, \:), \:\\, \:+, \:; As we mentioned before, \.{CWEAVE} formats the program fragments and pieces of \Cee~code by inserting formatting controls in the the output based on a syntactic analysis of the \Cee~tokens of the program fragments; in particular the layout of the code in the source file is completely ignored. Although this automatic formatting usually works well provided that \.{CWEAVE} succeeds in parsing the program fragment (possibly with help of some codes already discussed), there may still be occasions where one is not quite satisfied by the result. If one wishes certain constructions to be systematically treated in a different way, then a more pleasing style might be available by calling \.{CWEAVE} with certain options set; if not, then there is always the possibility of changing the grammar or layout rules of \.{CWEAVE} (that program was written in a way that tries to make this as easy as possible, but it still requires some careful study of the relevant chapters of the \.{CWEAVE} source document). However in some cases one simply wants to override the general rules in specific cases by adding or removing a few formatting controls. There are a number of control codes which can be used to do that. These codes are ignored by~\.{CTANGLE}; since most of them deal with line breaks, their importance for `\pb' fragments is minimal. The control code \:, will insert a thinspace (a small amount of horizontal white space) where it is placed. Within an statement \:| may be used to indicate a place where a line break may be optionally taken (with no associated penalty), when the statement is too long to fit on a single line. Note however that optional breaks are already allowed at most operator symbols, with a penalty that increases with the operator priority and the number of enclosing parentheses, so \CWEB. will almost always succeed in finding very a reasonable break point in long expressions. A line break can be forced by~\:/; this can be used for instance between statements (if line breaks are not already forced there), in order to group related statements on one line rather than simply as much as possible. The code \:) will also force a line break, and in addition create a bit of vertical white space to give an even more visible separation. (\.{CWEAVE} will never issue more than one line break on the same place, so there is no problem if a line break was already present on that spot.) The code \:\\ is another variation: it forces a line break and backs up the next line by one indentation unit. It is useful before a module name that represents one or more cases in a \&{switch} statement: this will make the name line up with the case labels. Finally, \:+ cancels any (forced) line break that might be inserted by \.{CWEAVE} at the point where it is placed, and replaces it by a space with optional line break (the kind of space that is usually inserted between statements). Its main use is to force small conditional or loop statements onto a single line when \.{CWEAVE} would otherwise use a multiple-line layout. Because the line can still be broken at the inserted space, such one-liners do not make it impossible to retypeset the program in a narrower column. A warning is in place however if, as a result of applying \:+, a substantial stretch of \Cee~code is void of forced breaks, and that code contains constructions that affect the indentation level. \TeX nically speaking, the indentation at optional breaks is governed by the hanging indentation parameter of \TeX, whose value is constant throughout a paragraph, which in this case is everything between two forced breaks; under the mentioned circumstances the amount of indentation at optional breaks can be unexpected and inappropriate. For convenience, an alternative method is provided to fit compound statements on a single line, and similarly for \&{struct} and \&{union} specifiers. Instead of writing \:+ on every place where \.{CWEAVE} would otherwise force a line break (which incidentally depends on the chosen layout style), it suffices to place \:; immediately after the opening brace. This will activate a different set of layout rules than is normally used, which will not insert forced breaks between the declarations and statements of the compound statement, respectively between the fields of the \&{struct} or \&{union} specifier. In the case of a compound statement, any forced breaks caused by conditional or loop statements appearing directly inside the compound statement are also avoided (but nested statements are not affected, so they should be handled separately if present, possibly using another~\:;). Compound statements starting with `\.{\{@;}' will be treated as if they were simple statements in further parsing, which may affect formatting; for instance, if the statement is the branch of a conditional it will be placed on the same line as the \&{if} or \&{else} controlling it. If this is too much of a good thing, a forced break may be explicitly inserted at the beginning and/or end of the compound statement; in fact the sequence `\.{@/\{@;}' is a fairly common one. There is another use of~\:+, which does not cause any breaks to be cancelled, but where on the contrary the purpose is insert white space. It applies when a long string constant is needed, for which the string-break feature is used: a sequence of strings separated by white space only will be concatenated by the compiler into a single string. Although \.{CTANGLE} will correctly insert a space between any two consecutive strings, \.{CWEAVE} (guided by syntax rather than by lexical structure) will simply juxtapose them; by inserting \:+ between the strings, one guarantees that in the printed document there will either be a horizontal separation or (if the constituent strings themselves are already long) a line break. Incidentally, if the problem of breaking a string is in the source file rather than in the printed output, one can use the traditional solution of an escaped newline within the string; \.{CWEAVE} will treat this as if the parts of the string were on the same source line. If one should create a string in this way that does no fit on a single line of output, a break will be introduced automatically at a some point, which will be typeset as if a string-break was used. In very long strings however it is better to write string-breaks explicitly; for strings broken only by escaped newlines, the same length limit holds as for module names (1000~characters). \subsection Codes for special items in\/ \Cee~code: \:p, \:v, \:t, \:\&, \:=, \:' Contrary to \TeX~text, pieces of \Cee~code are broken up into tokens by both \.{CTANGLE} and \.{CWEAVE}, stored internally and output at some later time after having undergone some processing. This makes it potentially difficult to put something into \Cee~code that \CWEB. is not prepared to handle. Since \Cee\ is a much more regular language that \TeX, occasions where one would need to do such a thing should be quite rare, yet some escape mechanisms have been provided, which we treat in this subsection. The code \:p can be used to explicitly specify the place where the preprocessor directives generated by \:d and \:h commands will be placed in the \Cee~file. Multiple use of \:p is allowed; as soon as it is used at least once, the default placement at the beginning of the \Cee~file is cancelled. This code provides the only way to write the directives generated by \:d and \:h to an auxiliary output file. In the formatted output this code is represented by the pseudo-module `$\ATP$', which (like preprocessor directives embedded in a program fragment) is set on a separate line and does not otherwise affect the formatting of the surrounding code. Two other codes are intended mainly for use within `\pb'. As mentioned earlier, \:v represents the bitwise-or operator. The code \:t is followed by a control text, which can be used to insert any \TeX\ symbols into a \Cee~expression; the result gets category \\{expression} but (if used in a program fragment) does not produce any actual \Cee~code. It is for instance possible to obtain `$\\{phi}<\hbox{$\pi$}/\T{2}$' by writing `\hbox{\.{| phi < @t\$\\pi\$@> / 2 |}}', or if one prefers, to get `$\\{phi}<\hbox{$\pi\over2$}$' by writing `\hbox{\.{| phi < @t\$\\pi\\over2\$@> |}}'. The control text is put into an \.{\\hbox} that will appear at the specified point in the formula. One might imagine using \:t as a means to sneak in \TeX\ commands that will modify the formatting produced by \.{CWEAVE}, but this is strongly discouraged unless one thoroughly understands that formatting and the way it is obtained. The codes \:\& and \:= are intended as a means to alter or bypass the processing of \Cee~tokens by \.{CTANGLE}; they should only be used in very exceptional situations. The code \:\& forces \.{CTANGLE} to output the symbols to the left and right of it directly adjacent to each other. Normally \.{CTANGLE} inserts space between two symbols if it thinks this is necessary for lexical reasons, regardless of whether a space was present in the input. Items with a lexical structure unknown to \.{CTANGLE} might confuse it, so that it would output a spurious space; this space could then be eliminated by~\:\&. For instance, an earlier version of \.{CTANGLE} would not recognise `\.{1000000UL}' as a constant, and consequently it output a space before the `\.U', so that the \Cee~compiler could not recognise it either; this problem could then be remedied by inserting~\:\&. No similar cases are known for the current version of \.{CTANGLE}. The code \:= can be used to place some text in the \Cee~file that \.{CTANGLE} will not produce by ordinary means: the control text following~\:=, up to the next \:> is copied verbatim to the \Cee~file (with `\.{@@}' undoubled as usual). If some special compiler activity, or some action by another tool, is triggered by the occurrence of some special form of comment in the \Cee~code, then such a comment can be placed using \:= (normally comments are removed by \.{CTANGLE}). Also, should \.{CTANGLE} unjustly decide that two symbols need no space in between them, then a space can be forced by writing `\hbox{\.{@= @>}}' \unskip\footnote{${}^\dagger$} {One case where this would be necessary is the famous example `$\T{\^123E}+\T{1}$': the \Cee~standard states that unless a space is put between the `\.E' and the `\.+', the preprocessor should treat this as a single number (a~kind of mixture of a hexadecimal and a floating point constant), which turns out not to be valid, causing an error. \.{CTANGLE} however never places a space between an identifier and an operator (even if one was present in the input), so the way to get this expression properly through the compiler is to write `\hbox{\.{0x123E @= @> + 1}}' (since this bug is now documented, it has become a feature). }. The control text will be set in typewriter type and framed in a box by \.{CWEAVE}, so that it stands out clearly; it is syntactically neutral (like a comment). Finally, the code \:' can be used to introduce a single-character constant, in the same way as the character `\.'' does in~\Cee. The difference between the two ways of specifying this value is that \.{CTANGLE} will replace \.{@'}$c$\.' by the (decimal) numeric \ASCII. value of the character~$c$, whereas \.'$c$\.' is passed on to the \Cee~compiler, which will evaluate it to the same value. The feature is therefore of little use in the current version of \CWEB., which assumes the \ASCII. character set, but is provided as an aid in writing programs that will be easier to port to non-\ASCII. versions of \CWEB.. In such systems \.{CTANGLE} should still use the \ASCII. code to compute \.{@'}$c$\.', while \.'$c$\.' represents the internal code for~$c$. The idea is that one can then (as is done in the program \TeX) map all characters on input to their \ASCII. equivalents, perform all internal manipulations independently of the externally used character set, and convert back to that code on output. \subsection Control codes behind the scenes: \:s, \:q, \:l The control codes of this subsection have in common that their use is never essential, but can be convenient in some situations, and is largely or completely invisible in the \CWEB. document. They are also the only control codes allowed in limbo. The code \:s has the same effect as \:f, but produces no output in the \CWEB. document. It can be used as a subsectioning code, just like \:f, but no comment should follow the two identifiers it applies to in this case (since there is nothing to attach the comments to); alternatively \:s can be used in limbo. In either case the format definition is noted but nothing is written to the \TeX~file. One might prefer to use \:s in situations where showing a \&{format} definition is considered to be more distracting than informative. Also, if a header file~$h$ is included by \&{\#include} rather than by \:h or is located in a place where \.{CWEAVE} cannot find it, and it contains typedef declarations, then~$h$ could be accompanied by a file containing a line `\hbox{\.{@s ident FILE}}' for each typedef identifier defined in~$h$, which can be read in by means of \:i by any \CWEB. file that includes~$h$. This method of passing information between files is more error-prone than having \.{CWEAVE} scan the header file however, so the latter method is to be preferred whenever possible. The code \:q is followed by a control text, and is completely ignored both by \.{CTANGLE} and \.{CWEAVE}; it can be used either in \TeX~text (even in limbo) or in \Cee~code. It can be used to make comments relevant only when the source file itself is being read, particularly within \Cee~code, where \TeX~comments cannot be used for this purpose. For instance, it can be used to put a descriptive or identifying comment at the beginning of a file included using \:i. This code can also be used to accommodate any other tools than \.{CTANGLE} and \.{CWEAVE} that might inspect the source file, e.g., if a text editor tries to match braces and the like, it is unlikely to correctly handle the complicated lexical structure of \CWEB. files in all cases, and an occasional brace contained in a \:q control text may help to keep it happy. Such occurrences of \:q are best removed however when source files are made public. The code \:l is used to allow certain 8-bit characters (i.e., characters with values in the range 128--255) to be used in identifiers. Doing so is only useful if measures are taken to ensure that \TeX\ can handle these characters properly. \TeX~version~3.0 and newer can handle 8-bit characters in the input, but the standard fonts do not have any characters in positions 128--255, so one has to either load other fonts that do have characters in those positions, or define such characters to be active characters that somehow produce an appropriate glyph in the current font. For identifiers the relevant font is text italic (selected by `\.{\\it}'), but if these characters are available for identifiers, one will probably also want to use them in \TeX~text (including module names and comments), so other fonts should be provided for as well. \.{CWEAVE} does not take any special measures for 8-bit characters, and just passes them on to \TeX\ (when they occur in \Cee~code outside comments and module names, they are assumed to be part of an identifier). However, since such characters cannot be used in actual \Cee~identifiers, \.{CTANGLE} must replace them by characters that are valid in \Cee~identifiers (letters, digits, and underscores). The code~\:l can be used to specify which translation \.{CTANGLE} is to use for a given 8-bit character. The code should only be used in limbo, and have the form `\.{@l}~\~\', where \ specifies the character by a pair of hexadecimal digits in the range \.{80}--\.{FF} (without leading `\.{0x}'), and \ is a string of up to~9 characters that are valid in \Cee~identifiers, terminated by a space. While copying limbo material, \.{CWEAVE} replaces \:l by `\.{\\ATL}'; its default definition will make `\.{@l}~fc~ue~' print a paragraph saying `{\bf letter \\{\"u} tangles as \tentex "ue"}', assuming that `\hbox{\.{\{\\it\\char "FC\}}}' indeed produces `\\{\"u}'; by stating `\.{\\noatl}' the definition can be changed so that nothing appears at all. \subsection Control codes for tracing {\tentex CWEAVE}: \:0, \:1, \:2, \:3 As will have become clear by now, the most sensitive part of \CWEB. is \.{CWEAVE}'s parsing mechanism, and occasionally something may go wrong with it, so that an awful result is produced. We have already discussed the means available for corrective action, but sometimes it can be a problem to find out just what is causing the trouble. Sometimes the reason is an actual syntax error, which is best located by applying \.{CTANGLE} and a \Cee~compiler to the \CWEB. source, but as noted before, \.{CWEAVE} may have a problem that a \Cee~compiler does not experience. For this reason, its parser can produce diagnostic messages on the terminal, showing details about its actions and any anomalies found. The amount of diagnostics produced is controlled by a level that may take values from~0 to~3, and can be selected by one of the control codes \:0, \:1, \:2, and~\:3. These codes can be placed in the \TeX~part of sections or within \Cee~code, and they determine the level until the next such code or the end of the file; the initial level is~0, or~1 if \.{CWEAVE} was called with the option~`\.{+d}'. Since a complete \Cee~fragment is read in before parsing starts, the level is constant throughout each fragment, and determined by the value at the end of the fragment. The diagnostic output uses abbreviations for syntactic categories, e.g., `\.{unop}' and `\.{binop}' stand for unary respectively binary operators, and `\.{op}' stands for operators like `$*$' that can be used either way; `\.{exp}' stands for an expression, `\.{decl}' for one or more declarations, `\.{stmt}' for one or more statements. Simple symbols like braces and commas stand for themselves, as do many keywords; `\.{for}' stands for `\&{for}' or~`\&{while}', and `\.{int}' for a type, storage class specifier or typedef identifier. A complete list of category abbreviations can be found in the source code for the function $\\{print\_cat}$ in~\.{CWEAVE}. (To fully understand the parser's diagnostic messages one has to be familiar with the parsing algorithm and the grammar rules, but for error detection a detailed understanding is usually not required.) At level~$0$ the parser will not produce any diagnostic output. At level~$1$ it will report any \Cee~fragment that could not be recognised as a single syntactic entity, which is a good indicator of grammatical problems and possibly of ugly output. It can be argued that level~1 is the natural level to use (which is why the `\.{+d}' option is provided), since not getting any diagnostics when there are syntax problems only gives a false impression that things are in order; after all, nobody would want to use a compiler that would spare the programmer its diagnostics for syntax errors, but instead would produce unreliable code. In a syntactically correct \CWEB. program it is almost always possible to apply \:f, \:;, \:[ and~\:] in such a way that no diagnostic output is produced at level~$1$; indeed this is has been done for all sources of \CWEBx. itself. The diagnostic messages produced at level~1 print the successive categories of the sequence of recognised items, which could not be combined into any larger entity. Interpreting such a message takes a bit of practice, as one has to guess which part of the program fragment corresponds to each category printed; however, a look at the (badly) formatted output can often be helpful. The boundaries between the entities corresponding to the printed categories can often be recognised by the fact that some form of layout is obviously missing, and a space appears instead; for instance, if a closing brace of a compound statement is not preceded by a line break, then something inside that statement must have prevented it from being recognised by the parser, and its opening and closing brace will occur among the printed categories. At levels $2$~and~$3$ the parser will print the result of every single step it takes; this extremely verbose mode can be used to trace the exact steps by which the parser obtains its result. Detailed knowledge of the set of grammar rules is assumed, and these levels are mostly useful to those who wish to study or modify the rules. The set of rules can be found as a chapter of the \CWEB. source document for \.{CWEAVE}, or can be obtained separately by running `\hbox{\.{cweave -x rules}}' (ignoring the warning about an unused module) and `\hbox{\.{tex rules}}'. After each reduction step the number of the rule used is printed, followed by a list of categories after reduction, with the one that was formed by the reduction step enclosed in inverted angle brackets. Tracing at level~$3$ is even more esoteric than at level~$2$: all categories printed will have an additional character at both ends, indicating whether \TeX\ should be in math mode (`\.+') or in horizontal mode (`\.-') at that end of the item, or that it doesn't matter (`\.?'); this may help to explain the positioning of math shifts (`\.\$') in the \TeX\ output, which is controlled indirectly by the grammar rules. To reduce the amount of output, all categories that have not yet been considered by the parser are replaced by an ellipsis. The sequence of categories before the reduction can be found by looking up the reduction rule with the given number. Here is some sample output for a simple piece of \Cee~code at level~$2$. \vfil\penalty500\vfilneg\smallskip\vbox {\narrower\obeylines\catcode`\_=12 \tentex \let\\=\BS \obeyspaces\let =\ % Tracing after l.3: @2 |if (n>0) printf("n-1 = \%d.\\n",n-1);| 2: if ( >exp< ) ... 10: if >exp< exp ... 110: >if_head< exp ( ... 6: if_head exp ( >exp< op ... 3: if_head exp ( >exp< ) ... 10: if_head exp >exp< ;. 7: if_head >exp< ;. 80: if_head >stmt<. 117: >stmt<. } \smallskip We see that the first three steps reduce `\&{if}~$(n<\T{0})$' to an \.{if\_head}; then `$\.{"n-1\ =\ \%d.\\n"},n$' is combined to an expression, after which `${}-1$' is incorporated as well; then the statement calling \\{printf} is reduced in three steps, and finally it is combined with the `\.{if\_head}' to form another statement. Even this small example shows that \.{CWEAVE} parses the code in a different way than a \Cee~compiler would. This is partly due to its strict bottom-up strategy, which is largely unaware of context: parentheses (rule~10) and a comma (rule~6) are incorporated by the expression syntax, even when they actually figure in a conditional statement or function call. Furthermore, some distinctions are irrelevant for determining the proper layout: the ``comma operator'' is unjustly given precedence over the minus operator, but the printed output will be no different for it. \beginsection Some features of the standard format The \TeX~file produced by \.{CWEAVE} will begin with loading the standard format from the file \.{cwebxmac.tex}, whose definitions control the typesetting process: \.{CWEAVE} communicates with \TeX\ mostly by using macros defined there. Most of them have very short names, in order to limit the size of the \TeX~file; one should be aware that most of the single-letter control sequences and numerous two-letter ones are in use by \.{CWEB}, and are not available for other uses (when in doubt, consult \.{cwebxmac.tex}). All of the macros defined in plain \TeX\ for accenting letters have retained their meaning however, except `\.{\\.}' (for the dot accent), which is replaced by~`\.{\\:}'. Some of the macros of the standard format can be of interest to the literate programmer, either because they can be used directly in \TeX~text (indeed, some are not used by \.{CWEAVE}, and are only intended for this purpose), or because they can be redefined in limbo to alter the formatting of the program. In formatted \Cee~text, many operators are represented by macros that produce the appropriate symbols; by changing the definition of these macros, one can alter their appearance. Here is a table of the relevant cases. $$ \everycr{\noalign{\hrule}} \def\:#1 #2 {\vrule\vbox{\halign {\strut\ \hfil##\hfil\ \cr \.{#1}\cr \.{\\#2}\cr $\csname#2\endcsname$\cr} }} \hbox {\vrule\vbox {\hrule\hbox{ \strut\it operator }\hrule \hbox{ \strut\it macro }\hrule \hbox{ \strut\it symbol }\hrule }% \:= K \:== E \:!= I \:<= Z \:>= G \:\&\& W \:|| V \:! R \:\& AND \:| OR \:\^ XOR \:\~ CM \:<< LL \:>> GG \:++ PP \:-- MM \:\% MOD \:-> MG \:\#\# SS \vrule } $$ When such a macro is redefined, it is best to consult the original definition first, since it often issues a penalty, and it is best to retain this. Formatting of ordinary identifiers and keywords is performed by `\.{\\\\}' and `\.{\\\&}', which have one argument, that is typeset in italic respectively boldface type; similarly `\.{\\.}' is used for items in typewriter type, such as strings and all-caps identifiers. In the argument of `\.{\\.}' special characters can be used if escaped, as discussed for \:.. For `\AM' in ordinary text `\.{\\AM}' can be used (rather than `\.{\\\&}'). For names in all caps, like `\caps{ASCII}', or `\caps{UNIX}', the macro `\.{\\caps}' is provided, which makes them slightly less obtrusive by selecting a smaller font; for `\Cee' and `\Cpp' the macros `\.{\\Cee}' and `\.{\\Cpp}' are provided. Typesetting of comments, \Cpp\ one-line comments, and numeric constants is controlled by the macros `\.{\\C}', `\.{\\SHC}', and~`\.{\\T}', respectively; these can be redefined if a different style is desired. The dimensions of the pages can be controlled by setting the parameters `\.{\\pagewidth}', `\.{\\pageheight}' (the height of the text area), `\.{\\fullpageheight}' (the height including running head), and `\.{\\pageshift}' (extra displacement of odd numbered pages with respect to even numbered ones), and then invoking the macro `\.{\\setpage}'. A magnification can be applied to the entire document by saying `\.{\\magnify\{$n$\}}',where $n$ is the magnification in thousandths of the ordinary scale; this should precede any changes of the page dimensions, but if no changes are made, the page dimensions will be set to their standard values, unmagnified. The unit of indentation can be set by `\.{\\indentation\{\\}}'. The title of the program is taken from the macro `\.{\\title}', whose default value is the basename of the program source file, converted to upper case. It is used in running heads and in the table of contents. Another part of the running heads is set to the chapter title by sections starting with~\:*, but by defining the macro `\.{\\gtitle}' in limbo the corresponding text for the running heads on any pages before the first such section can be set (the default is `\.{CWEB} output'). By invoking `\.{\\titletrue}' the running head can be suppressed for one page; this is useful if the text in limbo produces a title page. The date of processing (by \TeX) can be included in the document before the first section by putting `\.{\\datethis}' in limbo; it can be placed on the table of contents by saying `\.{\\datecontentspage}'. At the end of the document one normally has an index, a list of module names and the table of contents, in that order, but \TeX\ can be made to stop short of any one of these by invoking respectively `\.{\\noinx}', `\.{\\nomods}', or `\.{\\nocon}'; as already mentioned, stating `\.{\\changesonly}' will limit the printed output to the sections affected by the change file. The appearance of the table of contents can be controlled by redefining `\.{\\topofcontents}' and `\.{\\botofcontents}': these macros determine the material that comes above the table and below it, including its title and the glue needed to fill up the page height. The page number of the table of contents is assigned from `\.{\\contentspagenumber}' (the default is~0), but it will not appear in print because the running head is suppressed on that page. It may be noted that \CWEB. documents contain some fixed phrases in the English language, such as the cross-references at the end of sections. These are not produced directly by \.{CWEAVE} however: one could adapt \CWEB. to a different language by redefining the macros `\.{\\A}', `\.{\\As}', `\.{\\Q}', `\.{\\Qs}', `\.{\\U}', `\.{\\Us}', `\.{\\ET}', `\.{\\ETs}', `\.{\\ch}', `\.{\\postATL}', `\.{\\ATP}', `\.{\\today}', `\.{\\now}', and parts of `\.{\\fin}' and~`\.{\\con}'. \beginsection Comparison with Levy/Knuth {\tentex CWEB} As was mentioned in the introduction, \CWEBx. is derived from an earlier \CWEB. system (itself derived from Knuth's \.{WEB}), that was written and distributed by Sylvio Levy and Donald~E. Knuth, and that \CWEB. system has independently evolved into a version currently distributed as \CWEB.~3.3. Both \CWEBx. and \LKC. have undergone changes with respect to their common ancestor, although the spirit of the system has not fundamentally changed in either case. Since we considered it undesirable to have a great divergence between systems that both intend to be ``a \caps{WEB} system for~\Cee'', we made a conscious effort to reduce the differences between \CWEBx. and \LKC. by including the extensions of the latter into \CWEBx. as well. There was one deliberate exception: we made no attempt to extend the grammar used by \.{CWEAVE} to handle the full \Cpp~language \unskip\footnote{${}^\dagger$} {\Cpp\ has a significantly more complicated syntax than that of~\Cee, which is already far from simple, and it has some forms of context dependence that make it doubtful whether to \.{CWEAVE} could ever reliably handle \Cpp\ in full generality (and even then, \Cpp\ is a moving target). Most effort was spent on getting the grammar for \Cee\ correct; support for \Cpp\ was restricted to some extensions of~\Cee\ that could be incorporated easily. }. On the other hand, hoping to fully bridge the gap between \LKC. and \CWEBx. for \Cee~programs, a compatibility mode was added to \CWEBx. in which it tries to mimic the behaviour of \LKC. in all aspects that are relevant to the programmer (even in cases where that behaviour is undocumented), at the price of losing some possibilities that \CWEBx. normally has. The description of the differences between \LKC. and \CWEBx. can be divided into two parts: the differences between \LKC. and the compatibility mode of~\CWEBx., and the differences between \CWEBx. with and without compatibility mode. The former differences are minimal, but hard to enumerate precisely, as they are mainly a matter of difference in implementation. The latter differences are much more significant, but they can easily be listed, since precise details of those differences can be found by looking up all index references of the identifier \\{compatibility\_mode} in the sources for the programs of~\CWEBx., and (for the differences that only involve processing by \TeX) the contents of the file \.{cwebcmac.tex} that modifies the \.{cwebxmac} format to emulate the environment provided by the \.{cwebmac} format of \LKC.. As was stated, the differences between \LKC. and the compatibility mode of~\CWEBx. should not be relevant to the programmer, but if one uses \LKC. in a way that relies on knowledge of intimate details of its implementation (which are not described in the manual but can be learned from studying the sources), then it is certainly possible to find such differences; this applies particularly to using the \TeX~code produced by \.{CWEAVE} in unusual ways. Unfortunately there is no clear specification of which aspects of \CWEB. are well defined so that the user can safely rely on them, and which aspects are implementation details. We have taken a pragmatic attitude by reproducing all aspects that are described in the manual, and moreover many undocumented aspects, enough to process the sources of \LKC. itself and of the Stanford GraphBase without problems. To give an impression of the kind of differences that remain, we shall list some of the known ones. The output files written by \CWEBx. are not equal to those written by \LKC., so processing them otherwise than directly by a \Cee~compiler respectively by~\TeX\ may reveal some deviations. For instance, entries for the index and the list of module names are written using the control sequence `\.{\\I}' by \LKC., but since `\.{\\I}' is also used for representing the operator `$\I$', this causes problems for module names in which that operator is used; therefore, \CWEBx. uses `\.{\\@}' instead. In \CWEBx. unbalanced braces or parentheses in program fragments or macro replacement texts are reported and corrected by \.{CTANGLE}, as an aid in catching programming errors early; in \LKC. this is not done (but programs with such unbalanced symbols will still bring \.{CWEAVE} into serious problems). In compatibility mode the definition of `\.{\\PB}' ensures that `\pb' can always be used from within math mode; in \LKC. this is true only in simple cases. Comments in the \TeX~parts of sections (following a non-escaped `\.{\%}' character) are ignored and removed by \CWEBx., whereas in \LKC. they are processed normally and copied to the output, which may cause spurious index entries, and in exceptional cases may cause part of the comment to appear in print. The grammars used by \.{CWEAVE} in the two systems are quite unrelated; for \CWEBx., the only guideline in constructing the grammar has been the \caps{ANSI/ISO~C} syntax. When processed by \CWEBx. with the proper options selected, \CWEB. documents will look similar to the result produced by \LKC., but not identical. Unlike \LKC., \CWEBx. places optional breaks at operators, reflecting their priority and nesting inside parentheses. In \LKC., if \:{\ } is immediately followed by a subsectioning code, then the output from the subsectioning code (e.g., \.{\#define} for~\:d) will be placed on the same line as the section number, but if anything, even an extra space, comes in between, or if the `\.@' in the sectioning code was followed by a newline rather than by a space, then that output is moved to the beginning of a fresh line; in \CWEBx. output from a subsectioning code never appears on the same line as the section number. \CWEBx. has a number of control codes and a number of command line options that \LKC. does not have; moreover there are some control codes that \LKC. does have, but under different names (that are used for other purposes in~\CWEBx.). In compatibility mode such control codes have the same interpretation as in \LKC., but if there is no such interpretation while there is one in \CWEBx., the latter is taken;. This means that in compatibility mode one can use the control codes \:v, \:\\, and \:\~, which are ignored in \LKC., while \:?~and~\:) can be used as aliases for \::~and~\:\#, respectively; furthermore the command line options controlled by the characters `\.l', `\.d', `\.t', `\.e', `\.a', `\.u', `\.w', `\.m', and~`\.+' are also extras with respect to \LKC.. Finally the control code \:; retains all its \CWEBx. uses in compatibility mode, except that of modifying the category of module names (which is different anyway), whereas in \LKC. it can only be used as an invisible semicolon. The most direct difference between \CWEBx. with and without compatibility mode is that in compatibility mode the control codes \:h, \::, \:\#, and~\:p are translated into respectively \:p, \:?, \:), and~\:c, which implies that the meaning of \:h, \::, and~\:\# as described in this manual are not available in compatibility mode. There is also an important syntactic adjustment: in compatibility mode module names are always treated as expressions (which means they must almost always followed by \:; to make the combination behave as a statement, or by `\.;', which will however become an empty statement in the \Cee~program). Then there are a few points where compatibility mode lifts certain restrictions (thereby reducing the diagnostic capabilities). All 8-bit characters will be accepted by \.{CTANGLE}, whether or not an explicit translation was specified using \:l; the default translation used corresponds to `\hbox{\.{@l \Cident{NN} X}\\{NN}}', where \\{NN} is the 2-digit hexadecimal code, in upper case, for the character. Module names used within `\pb' do not have to be the final item. Rather than performing \:i inclusions before the change file is matched, the order is more or less reversed (but if some \:i~line is not replaced by the change file, then the included file will again be scanned for changes); consequently \:i is allowed (and meaningful) in the change file. The remaining alterations affected by compatibility mode are fairly minor. Trailing digits in identifiers will not be set as subscripts. The \TeX\ control sequence corresponding to identifiers that are given the category of \\{\TeX} by means of a format definition will be processed in math mode rather than horizontal mode. The code \:t together with the following control text will not be treated as an expression in parsing, but as an inert item that sticks to the token to the right of it (unlike comments that are attached to the token to their left); this allows `\.{@/@t\\4@>}' to be used in place of `\.{@\\}'. Compound assignment operators like `\.{+=}' are treated as two separate tokens; this implies among other things that the operator `\.{|=}' must be entered as `\.{@v=}' when used inside `\pb'. In index entries produced by \:\^, \:., or~\::, the underscore character will be automatically escaped by a backslash, unlike other special characters (this makes it harder to enter formulas with subscripts into the index). The output of `\pb' is made an argument to the control sequence `\.{\\PB}', whose default definition puts its argument into an `\.{\\hbox}'; this makes it safe to use `\pb' inside math mode when using compatibility mode. Finally there are a few other small changes to the format used: \Cpp\ one-line comments will be formatted as if they were ordinary \Cee~comments, and the formatting of lines in the table of contents will be affected in font and spacing by the ``depth'' specified for that chapter title. \beginsection Summary of\/ {\tentt CWEB} codes For reference, we give a table with all the codes used in \CWEB. with their main characteristics. The letters in the column {\it where\/} indicate in which parts of the source text may immediately precede the code: `L'~indicates text in limbo, `T'~the \TeX~part of a section, `M'~indicates an intermediate part of a section (from \:d, \:h, ~\:f, or~\:s), `C'~the \Cee~part of a section, and `c' pieces of \Cee~code within~`\pb' (the letter~`M' is only used when the code terminates an intermediate part; inside the parts after \:d, \:h, and~\:f, the letter `C' applies). The column {\it frequency\/} indicates how commonly the code is used, with $\\{regular} > \\{incidental} > \\{rare} > \\{emergency}$; for the codes with $\\{frequency}\leq\\{rare}$, no sensible use could be found within any source file for the \CWEB. system itself. The codes \:0--\:3 that are only of temporary use are omitted from the table. \vfil\eject $$\vrule\vcenter{\hrule\smallskip\tabskip 1em \let\par=\cr\obeylines \def\title #1 {\noalign{\smallskip\hrule\smallskip\hbox{\sl\enspace#1}}}% \halign{\strut\.{#\unskip}\hfil & #\unskip\hfil & #\unskip\hfil & % {\it#\unskip\/}\hfil & #\unskip\hfil \omit\strut\it code & \it meaning & \it where & \it frequency & \it remarks \title Sectioning codes @* & Start of chapter & LTMC & regular % & \.{@**}, \.{@*$n$}, or \.{@*} Title\.. @~ & Start of section & LTMC & regular @\~ & Start of section tied to previous one & LTMC & regular \title Subsectioning codes @c & Start of unnamed program fragment & TM & regular % & also \.{@C} @< & Start of module name & TMCc & regular % & \.{@<} Module name \.{@>} @d & `\.{\#define}'; start macro definition & TM & regular % & also \.{@D} @h & `\.{\#include}'; specify included header file & TM & regular % & also \.{@H} @f & Format definition; change syntactic category & TM & incidental % & also \.{@F} @( & Start module name defining output file & TMCc & incidental % & \.{@(} file name \.{@>} \title Parsing control codes @; & Invisible semicolon, or magic wand for syntax & Cc & incidental @[ & Start of item forced to expression & Cc & incidental @] & End of item forced to expression & Cc & incidental \title Cross-referencing codes @! & Make index reference underlined & TCc & rare @\^ & Index entry in roman type & TCc & regular % & \.{@\^}index entry\.{@>} @. & Index entry in typewriter type & TCc & regular % & \.{@.}index entry\.{@>} @? & Index entry formatted by `\.{\\9}' & TCc & rare % & \.{@?}index entry\.{@>} @: & Define label for explicit cross-reference & TCc & incidental % & \.{@:}label\.{@>} @\# & Explicit cross-reference to defined label & T & incidental % & \.{@\#}label\.{@>} \title Layout control codes @, & Thin space & Cc & rare @| & Optional line break & Cc & rare @/ & Forced line break & Cc & incidental @) & Forced line break with vertical white space & Cc & incidental @\\ & Forced line break, next line backed up & Cc & incidental @+ & Cancel any line break, replace by space & Cc & incidental \title \Cee\ control codes @p & Insert output from \:d and \:h & Cc & rare % & also \.{@P} @v & Bitwise or operator `$|$' & Cc & incidental % & also \.{@V} @t & \TeX~code within expression & Cc & incidental % & \.{@t}\TeX~code\.{@>}; also \.{@T} @\& & Glue together adjacent tokens & Cc & emergency @= & Insert verbatim \Cee~code & Cc & emergency % & \.{@=}verbatim \Cee~code\.{@>} @' & \ASCII. constant converted to number & Cc & rare & \.{@'}$c$\.' \title Silent control codes @s & Non-printing version of \:f & LTM & rare @q & Ignored control text & LTCc & rare % & \.{@q}any text\.{@>}; also \.{@Q} @l & Specify translation of 8-bit character & L & rare % & \.{@l} $xx$ string ; also \.{@L} %\title Debugging level switches %@0 & Debugging off & TCc & temporary %@1 & Report parsing failures & TCc & temporary %@2 & Trace every parsing step & TCc & temporary % % & grammar maintenance %@3 & Trace every parsing step, show mathnesses & TCc & temporary % % & implementors only \title Miscellaneous codes (these are not control codes) @@ & Representation of `\.@' & LTCc & incidental % & legal in control text too @i & Insert subsidiary source file & any & incidental % & also \.{@I} @x & Start of change; old lines follow & any & incidental % & also \.{@X} @y & Middle of change; replacement lines follow & any & incidental % & also \.{@Y} @z & End of change & any & incidental % & also \.{@Z} }\smallskip\hrule}\vrule $$ \closeout\cont % the contents information has been fully gathered \vfill\eject \pageno=\contentspagenumber \secno=0 \titletrue \message{Table of contents:} \topglue 0pt plus 1.618 fil \centerline{\ttitlefont CWEBx\titlefont\space Manual} \vfil \def\Z#1#2#3{\line{\kern.1\hsize\indent\llap{#1}\quad #2 \leaders\hbox to .5em{.\hss}\hfil \enspace#3\kern.1\hsize}} \def\z#1#2{\line{\kern.1667\hsize #1 \leaders\hbox to .5em{.\hss}\hfil \enspace#2\kern.1\hsize}} \readcontents \vskip 0pt plus 2.618 fil \eject \bye cwebx-3.04.orig/parser.w100644 1750 1750 166024 6470040300 13050 0ustar jdgjdg@* Introduction to the grammar. @:title@> The most intricate part of \.{CWEAVE} is its mechanism for converting \Cee-like code into \TeX\ code, and we shall consider this aspect of the program now. This parsing mechanism must be able to deal with fragmentary constructions whose overall ``part of speech'' is not known (e.g., those within~`\pb'), which implies that recognition must in principle be able to proceed without any context information. Therefore a ``bottom-up'' approach is used, that collects successive tokens, and only takes action once it has seen enough to recognise a complete match with a syntax rule. Bottom-up parsing is a very powerful technique (it can handle grammars that are not tractable by many other parsing methods); it is one of the many significant contributions to computer science that the designer of the original \.{WEB} system, D.~E. Knuth, has made. Even so, the technique used here is less powerful than that of traditional bottom-up parsers produced by parser generators, since those can use information derived from the full left-context (and a small amount of right-context) to help decide difficult situations. In practice such context is not often required to unambiguously parse program fragments, and most situations where there is a possibility of incorrect recognition can be avoided be careful specification of the syntax rules to be used; this does however make formulation of the grammar a slightly subtle matter. As we have already seen, the input is represented as a sequence of {\sl scraps}, each of which specifies a {\sl category\/} and a {\sl translation}. The category defines a syntactic class, and the translation is a token list that represents \TeX\ code; the latter is effectively just a single string of characters, although it is stored in a different form for efficiency reasons. Rules of syntax and semantics tell us how to combine adjacent scraps into larger ones, and if we are lucky an entire \Cee\ text that starts out as hundreds of small scraps will join together into one gigantic scrap whose translation is the desired \TeX\ code. If we are unlucky (i.e., if the input is not properly formed for recognition by our parser), we will be left with several scraps that don't combine; their translations will simply be output, one by one. The parsing process is governed by a set of reduction rules, each of which specifies how a sequence of consecutive scraps with certain given categories can be combined into a single new scrap, and it also tells what category the new scrap has and how its translation is obtained from those of the original scraps. The set of reduction rules is given by a table, which is actually an initialiser for a static array, starting in section@#rules@>. In many cases a rule is simply given by listing the category codes forming its pattern (its left hand side) and the category code of the resulting scrap (the right hand side). For example, one of the reduction rules could be represented as $$ \hbox{|expression| |binop| |expression|}\Rightarrow\hbox{|expression|}. $$ For such simple rules the translations of the original scraps are simply concatenated to form the translation of the resulting scrap. The line in the table corresponding to this rule actually looks like this: $$ \hbox{|{2,{{expression,binop,expression}},{expression, NULL}}|}. $$ The `2' is the number used to identify the rule, which is useful when tracing the activities of the parser, and the |NULL| signifies that the default translation applies; the braces reflect the structure used to represent syntax rules, which has some additional components that are not specified explicitly here, and that are therefore initialised to~0. For other rules specific additional items are to be inserted in between the translations of the scraps that match the pattern. For instance, here is another rule present in the table $$ \hbox{|{ 6,{{expression, comma, expression}},{expression,"__p1_"}}|}. $$ Here the translation is specified by the ``format string'' |"__p1_"|, where the underscores stand for the translations of the three participating scraps, the `\.{p1}' for an |opt| token with argument~`\.1' (eventually producing `\.{\\penalty10}') that comes after the translation of the second scrap, with category |comma|. That translation will almost always be |","|, since there are no rules with |comma| as their right hand side, but if a comment follows the comma, it will have been incorporated into its scrap while preparing for the reduction, and in this case the optional break will follow the comment (and have no effect, because there is already a forced break after comments). The interpretation of all characters that can appear in the format strings can be found in section@#format@>. To handle all cases that can arise in programs, in combination with a large number of formatting options that might be selected, our mechanism for rules of a somewhat more general form than those presented above, but the extensions are used only for a few rules. Consider the following rule: $$ \hbox{|{132,{{lpar,statement,statement},1}, {statement,"_B_"},forced_statements}|}. $$ Here, the `1' indicates that the first scrap from the left in the pattern, |lpar|, serves as context only, and does not take part in the actual reduction. The rule therefore reduces a sequence of two consecutive statements into a single one, inserting a |break_space| (from the `\.B' format character) between their translations, but only if the statements are preceded by a left parenthesis (category~|lpar|); this restriction means that the rule will only apply to the first two semicolon-separated expressions following `|for|~|(|'. The entry |forced_statements| means moreover that the rule will not be used at all unless a `\.{+f}'~or~`\.{+a}' command line option was specified, which forces line breaks between consecutive statements; this rule then avoids such line breaks between the controlling expressions of a |for|-loop, since it takes precedence (see below) over the rule that normally combines statements. @ The rules are applied from left to right, as follows. Suppose we are currently working on the sequence of scraps with categories $c_1\,c_2\ldots c_n$. We try first to find a rule whose pattern matches an initial substring $c_1\,c_2\ldots\,$, and if no such rule exists, we try to find a rule applicable to the next substring $c_2\,c_3\ldots\,$; if that fails, we try to match $c_3\,c_4\ldots\,$, etc. When several patterns match, starting at the same scrap, the longest one is selected (no two rules can have identical patterns). For instance, there is a rule that reduces `|struct_like| |expression| |lbrace|' to `|struct_head|', and another one that reduces `|struct_like| |expression|' to `|int_like|'; the latter is only applied when no |lbrace| scrap follows. This is a sensible rule, since the longer pattern is the more specific one, and without such a rule it could never match. One should be aware however that this only works because the scrap with category |lbrace| represents a single token that requires no reduction to create it, for otherwise the two-scrap reduction would be applied before the three-scrap reduction would have a chance to match. One might say that we use a left-to-right eager strategy for choosing reductions; this strategy is chosen on heuristic grounds, and there is no guarantee that it will find a successful sequence of reductions if one exists. In other words, if we interchange left and right hand sides of the rules and view them as {\sl production\/} rules of a context free grammar, then the language generated by this grammar can be much larger than the one recognised by our parser. For instance, the context free grammar would, from the rules given above, generate expressions with multiple operators where implicit parentheses are placed arbitrarily (and therefore it would be ambiguous); our parsing strategy however will always place implicit parentheses to the left. (We don't care if implicit parentheses are placed incorrectly, because it does not influence typesetting.) It is an interesting theoretic problem to find an algorithm that will transform a set of reductions rules into a context free grammar generating exactly the language recognised; that would help to verify that the rules will work as intended. We must be cautious in formulating the rules, not only to give rules that are sufficient to reduce all desired programs, but also not to specify rules that could in certain circumstances match with precedence over the ones we intended for the situation. Some of the formatting of the output is controlled by the \TeX~format in the file \.{cwebxmac.tex}, rather than directly by \.{CWEAVE}. For instance, the `\.{\\penalty10}' mentioned above is actually written as `\.{\\31}'; the macro `\.{\\3}' will in fact raise the penalty for any enclosing pair of braces or parentheses; thus breaks in argument lists or initialiser lists will be avoided in favour of ones outside those lists. There are other effects not visible in the grammar, like optional breaks automatically associated with certain operators. In the whole it is a delicate interplay between the set of reduction rules, the reduction mechanism, the macro definitions, and the (mathematical) typesetting rules of \TeX\ that determines how programs will be typeset. This division of labour is quite convenient (e.g., it would tremendously complicate the task of \.{CWEAVE} if it had to decide which optional breaks should actually be taken), but it has the unfortunate consequence that it is not easy to gain a comprehensive understanding of the translation process. Indeed there have been quite a few surprises during the construction of the system, and we cannot be sure that there are no more to come; yet we believe the situation is more transparent than in the original |WEAVE|, since we have avoided rules that are oriented towards the target language (\TeX) rather than towards the source language (\Cee). @ Here is a list of the category codes that scraps can have. The category codes that apply to reserved words (e.g., |while_like|, but also |declaration| for |va_dcl|) as well as |expression| (that is used for |type_defined| identifiers in their typedef declaration) are sufficiently high that they can be distinguished from |ilk| values that are not category codes, like |type_defined|, |TeX_like|, |NULL_like|, |const_like|, and~|typedef_like|. A number of categories can occur in scraps, but do not occur in any of the reduction rules, since they are handled by other means; they have values exceeding |max_rule_cat|. The macro |valid_cat| checks whether |c| is a category that might match in a rule; it uses its argument twice, so its argument should not cause a side effect. If this section is changed, section@#category output@> should be changed correspondingly. @d max_category end_expr /* largest scrap category */ @d max_rule_cat return_like /* largest category in rules */ @d valid_cat(c) ((c)>0 && (c)<=max_rule_cat) @= enum @/ @:categories@> { unop = 1, /* a unary operator like `|!|' */ binop, /* a binary operator like `|<|' */ unorbinop, /* an operator that can be either, like `|-|' */ select, /* structure selection: `|.|' or `|->|' */ question, /* a question mark operator */ lbrace, rbrace, lpar, rpar, lbrack, rbrack, /* `|{|', \ `|}|', `(', \ `)', `[', \ `]' */ comma, semi, colon, colcol, magic, /* `,', `;', `:', `$\CC$', \:; */ subscript, /* an array subscript, like `|[]|' or `|[i++]|' */ struct_head, /* the beginning of a struct specifier, like `|struct s{|' */ short_lbrace, short_struct_head, /* from `\.{\{@@;}', for one-liners */ compound_statement, /* a complete compound statement */ statement, /* a complete statement, possibly compound */ function, /* a complete function definition */ function_head, /* a function identifier followed by formal parameters */ parameters, /* parameters in function declaration, or casting operator like `|(int)|' */ label, /* a statement label */ if_head, /* `|if|' followed by a (parenthesised) expression */ if_else_head, /* \lq|if @t\dots@>@; else|', \lq|while(@t\dots@>)|' or \lq|switch(@t\dots@>)|' */ do_head, /* `|do @t\dots@>@; while|' */ mod_scrap, /* module name */ declarator, /* abstract declarator, like `|(*)(int,char*[])|' */ declaration, /* a complete declaration */ expression, /* an expression, possibly a single identifier */ while_like, /* `|for|', `|while|', `|switch|' */ do_like, /* `|do|' */ if_like, /* `|if|' */ else_like, /* `|else|' */ int_like, /* `|int|', `|char|', `|extern|', \dots */ case_like, /* `|case|', `|default|' */ sizeof_like, /* `|sizeof|' */ struct_like, /* `|struct|', `|union|', `|enum|' */ return_like, /* `|return|', `|break|', `|continue|', `|goto|' */ lproc, /* `\&\#' and following identifier starting preprocessor directive */ rproc, /* end of a preprocessor directive */ insert, /* comment or other syntactically inert item */ begin_expr, end_expr /* \:[ and \:] */ }; @ As we have already seen, tokens are converted to elementary scraps by the function |C_read|; these scraps form the `terminal symbols' of our grammar. The translation of tokens to scraps is largely governed by the static array |trans_ini|, whose initialisation values we shall now give. @d yes_math 1 /* should be in math mode */ @d no_math 2 /* should be in horizontal mode */ @d maybe_math 0 /* works in either horizontal or math mode */ @< Initialiser for |trans_ini| @>= { '!', unop, yes_math, "\\R" }, @/ @.\\R@> { '~', unop, yes_math, "\\CM" }, @/ @.\\CM@> { '/', binop, yes_math, "/" }, @/ { '<', binop, yes_math, "<" }, @/ { '>', binop, yes_math, ">" }, @/ { '.', select, yes_math, "." }, @/ { '=', binop, yes_math, "\\K" }, @/ @.\\K@> { '|', binop, yes_math, "\\OR" }, @.\\OR@> { or, binop, yes_math, "\\OR" }, @/ { '^', binop, yes_math, "\\XOR" }, @/ @.\\XOR@> { '%', binop, yes_math, "\\MOD" }, @/ @.\\MOD@> { '+', unorbinop, yes_math, "+" }, @/ { '-', unorbinop, yes_math, "-" }, @/ { '*', unorbinop, yes_math, "*" }, @/ { '&', unorbinop, yes_math, "\\AND" }, @/ @.\\AND@> { '?', question, yes_math, "\\?" }, @/ { '(', lpar, yes_math, "(" }, @/ { ')', rpar, yes_math, ")" }, @/ { '[', lbrack, maybe_math, "[" }, @/ { ']', rbrack, maybe_math, "]" }, @/ { '{', lbrace, yes_math, "\\{" }, @/ { '}', rbrace, yes_math, "\\}" }, @/ { ',', comma, yes_math, "," }, @/ { ';', semi, yes_math, ";" }, @/ { ':', colon, maybe_math, ":" }, @/ { '#', insert, maybe_math, "\\#" }, @/ @.\\\#@> /* this should occur only in macro definitions */ { at_sign_image, insert, maybe_math, "@@" }, /* this should not occur in legal \Cee~text */ @) { not_eq, binop, yes_math, "\\I" }, @/ @.\\I@> { lt_eq, binop, yes_math, "\\Z" }, @/ @.\\Z@> { gt_eq, binop, yes_math, "\\G" }, @/ @.\\G@> { eq_eq, binop, yes_math, "\\E" }, @/ @.\\E@> { and_and, binop, yes_math, "\\W" }, @/ @.\\W@> { or_or, binop, yes_math, "\\V" }, @/ @.\\V@> { plus_plus, unop, yes_math, "\\PP" }, @/ @.\\PP@> { minus_minus, unop, yes_math, "\\MM" }, @/ @.\\MM@> { minus_gt, select, yes_math, "\\MG" }, @/ @.\\MG@> { gt_gt, binop, yes_math, "\\GG" }, @/ @.\\GG@> { lt_lt, binop, yes_math, "\\LL" }, @/ @.\\LL@> { mul_assign, binop, yes_math, "\\KK*" }, @/ @.\\KK@> { div_assign, binop, yes_math, "\\KK/" }, @/ { mod_assign, binop, yes_math, "\\KK\\MOD" }, @/ @.\\MOD@> { plus_assign, binop, yes_math, "\\KK+" }, @/ { minus_assign, binop, yes_math, "\\KK-" }, @/ { left_assign, binop, yes_math, "\\KK\\LL" }, @/ { right_assign, binop, yes_math, "\\KK\\GG" }, @/ { and_assign, binop, yes_math, "\\KK\\AND" }, @/ @.\\AND@> { xor_assign, binop, yes_math, "\\KK\\XOR" }, @/ @.\\XOR@> { or_assign, binop, yes_math, "\\KK\\OR" }, @/ @.\\OR@> { thin_space, insert, yes_math, "\\," }, @/ @.\\,@> { pseudo_semi, magic, maybe_math, "" }, @/ { force_expr_open, begin_expr, maybe_math, "" }, @/ { force_expr_close, end_expr, maybe_math, "" }, @/ { join, insert, no_math, "\\J" }, @/ @.\\J@> { ellipsis, int_like, yes_math, "\\ldots" }, @/ { sh_sh, binop, yes_math, "\\SS" }, @/ @.\\SS@> { colon_colon, colcol, yes_math, "\\CC" } @ Certain tokens that lead to fixed scraps are not included in the |trans_ini| array because their translations involve non-character tokens. Since there are only a few of them the easiest solution is to install each one explicitly into the |token_trans| array. @d start_scrap(s,c,m) p=&token_trans[s],p->cat=c, p->mathness=5*(m) @d end_scrap p->trans=text_ptr, freeze_text(); @< Install the translations of tokens involving line breaks @>= { scrap* p; start_scrap(math_break,insert,maybe_math); /* \:\v */ app(opt), app('0'); end_scrap; @/start_scrap(line_break,insert,no_math); /* \:/ */ app(force); end_scrap; @/start_scrap(end_preproc,rproc,no_math); /* end of preprocessor directive */ app(force); end_scrap; @/start_scrap(' ',insert,no_math); /* space within preprocessor directive */ app(break_space); end_scrap; @/start_scrap(big_line_break,insert,no_math); /* \:) */ app(big_force); end_scrap; @/start_scrap(backup_line,insert,no_math); /* \:\\ */ app(backup); end_scrap; @/start_scrap(no_line_break,insert,no_math); /* \:+ */ app(cancel),app(relax),app(break_space),app(relax),app(cancel); end_scrap; @/start_scrap(include_preproc,insert,yes_math); /* \:p */ app(force),app_str("\\ATP"),app(force); end_scrap; @.\\ATP@> } @ When \.{CWEAVE} is compiled with the |DEBUG| switch, it can display its parsing steps. The order of strings in |cat_name| must match that in the |enum| declaration in section@#categories@>. @c @:category output@> #ifdef DEBUG void print_cat (int c) /* symbolic printout of a category */ { static char* cat_name[]= { "unop", "binop", "op", "select" , "?", "{", "}", "(", ")", "[", "]", ",", ";", ":", "::", "@@;" , "subscr", "struct_head", "short_{", "short_struct_head" , "cmp_stmt", "stmt" , "function", "function_head", "params", "label" , "if_head", "if_else_head", "do_head" , "mod_name", "declarator", "decl", "exp", "for", "do", "if", "else" , "int", "case", "sizeof", "struct", "return" , "#{", "#}", "insert", "@@[", "@@]" }; if (c<=max_category && c>0) printf(cat_name[c-1]); else printf ("IMPOSSIBLE"); } #endif /* |DEBUG| */ @ Another major class of terminal symbols is formed by the reserved words. If a name exists in the hash table with an |ilk| specifying a reserved word, then |id_lookup| will return the reserved word when called with that name, and |C_read| will use the |ilk| to set the category of the resulting scrap. So all that has to be done is to get all the reserved words into the hash table with the right ilks initially. The simplest way to do this is to call |id_lookup| for all reserved words with the proper |ilk| at the beginning of each run of \.{CWEAVE}. Fortunately there are not too many reserved words. This code below uses the fact that instead of using pointers to beginning and end of a string for |id_lookup|, one may also pass a single pointer to a null-terminated string provided the other pointer is null. @^reserved words@> @= { int i; static char* int_likes[]= { "auto","char","double","extern","float","int","long","register" , "short","signed","static","unsigned","void" }; static char* defined_types[] = { "FILE", "size_t", "ptrdiff_t", "wchar_t" , "jmp_buf", "sig_atomic_t", "fpos_t", "div_t", "ldiv_t" , "clock_t","time_t" , "va_list" }; static char* return_likes[]= {"break","continue","goto","return"}; int int_like_nr=array_size(int_likes), defined_type_nr=array_size(defined_types), return_like_nr=array_size(return_likes); for (i=0; i } @ The main difference between \Cee\ and \Cpp, as far as \.{CWEAVE} is concerned, is that the latter has a number of additional reserved words. Most of them are sufficiently like some \Cee-reserved word (or category) that we can simply make it behave like that \Cee~symbol, without changing the syntax. For `\&{new}' and `\&{operator}', some additional syntax rules will be needed however; nevertheless we do not need to extend the set of syntactic categories. For `\&{operator}' we abuse the category |case_like|, since its proper use is rather restricted (`|case|' it is always followed by an expression, while `|default|', `\&{private}' and its relatives are always followed by a colon), so there will be no confusion with `\&{operator}', which is always followed by an operator symbol. @= { id_lookup("asm", NULL, int_like); id_lookup("class", NULL, struct_like); id_lookup("delete", NULL, sizeof_like); id_lookup("friend", NULL, int_like); id_lookup("inline", NULL, int_like); id_lookup("new", NULL, sizeof_like); id_lookup("operator", NULL, case_like); id_lookup("private", NULL, case_like); id_lookup("protected", NULL, case_like); id_lookup("public", NULL, case_like); id_lookup("this", NULL, expression); id_lookup("virtual", NULL, int_like); } @ There are a few more kinds of elementary scraps that the functions we have given before can produce, which we mention here for completeness. Ordinary identifiers get category |expression|, and their names will be expanded on output as argument to a control sequence that provides the proper formatting. For strings, constants, verbatim constructions, and \TeX~strings, the applicable control sequences and the constituent characters (escaped with backslashes where necessary) are written explicitly into token memory; their scraps also have category |expression|. Comments are converted to scraps of category |insert|, and their contents are also stored literally; in case of `\pb' fragments a reference to a text marked with |inner_text_flag| is stored, for the production of which the parsing mechanism has in fact already been invoked. By contrast module names are stored by reference to the name table, just like identifiers, and their scraps have category |mod_scrap|; in their case the parsing mechanism may be called during the {\sl output\/} process, if any `\pb' constructions occur. The final \TeX\ output produced for elementary scraps will often be marked with special control sequences. Ordinary multi-character identifiers are enclosed in `\.{\\\\\{}$\,\ldots\,$\.\}' (single character identifiers are merely preceded by a space; they will be set in math italic), identifiers whose |ilk| is |TeX_like| will become control sequences that are also enclosed in `\.{\\\\\{}$\,\ldots\,$\.\}' (to establish italic type), reserved words are enclosed in `\.{\\\&\{}$\,\ldots\,$\.\}', strings and all-caps identifiers in `\.{\\.\{}$\,\ldots\,$\.\}', constants in `\.{\\T\{}$\,\ldots\,$\.\}', and verbatim constructions in `\.{\\vb\{}$\,\ldots\,$\.\}'. Comments are enclosed in `\.{\\C\{}$\,\ldots\,$\.\}' and usually followed by `\.{\\6}' (a forced break), and module names take the form `\.{\\X$n$:}$\,\ldots\,$\.{\\X}' where |n| is the section number (since module names have |mathness==yes_math|, there is no danger that the final `\.{\\X}' will disable a following space when coming from a `\hbox{\.{\v@@< ... @@>\v}}' construction). @* The parsing mechanism. @:title@> Conceptually the working sequence of scraps is like a deck of cards, in which we repeatedly replace a sequence of consecutive cards by a single new card. Since such replacements never increase the number of cards, we can use sequential allocation for the current sequence of scraps, and our only difficulty will be how to conveniently fill the holes that might be left after each reduction step. Now reduction usually takes place near the beginning of the scrap sequence (assuming that the scrap sequence makes syntactical sense) because that is where we are looking first, and we want to avoid shifting down the whole remainder of the scrap sequence each time. Therefore the sequence of scraps, which initially occupies the positions from |scrap_base| to |scrap_ptr|, is allowed to have a hole in its middle, the low and high end of which are pointed to by variables |lo_ptr| and~|hi_ptr|. There is also a variable that points to the place where reductions are currently taking place, which is the parsing pointer~|pp|. It will always point into the area below the hole, and when it approaches the hole so closely that a potential reduction might involve scraps from above, the situation is remedied by sliding down scraps to the lower region, effectively raising the hole. Therefore the scraps in the higher region are those that have never been considered for a reduction yet. Eventually all scraps have been moved down (i.e., we have |hi_ptr==scrap_ptr|), and after that has happened a scrap with category~0 (which is not otherwise used) is copied down to signal the imminent end of the reduction process. When finally no more rules match the scraps in the lower region, the parsing stops. @< Global variables @>= scrap_pointer pp; /* current position for reducing scraps */ scrap_pointer lo_ptr; /* end of sequence of scraps that have been examined */ scrap_pointer hi_ptr; /* first scrap that has not been examined */ @ The |mathness| is an attribute of scraps that says whether their translation is to be processed in a math mode context or not. Since the translation can be concatenated from a large number of other scraps, there can be switches in and out of math inside the translation, and we need to specify the mathness at each of the boundaries. For some scraps it either makes no difference whether their translation is processed in math mode or not, or the required mathness is to be determined by the grammatical context rather than by the scrap itself. Such scraps have mathness |maybe_math| at both ends; otherwise a definite mathness is specified at either end. The least significant pair of bits of the |mathness| field of a scrap controls the right boundary, and the pair of bits to its left controls the left boundary. @d left_math(a) (a->mathness>>2) @d right_math(a) (a->mathness&0x3) @ If we combine two scraps neither of which has mathness |maybe_math| at its boundaries, then a `\.\$' is inserted in between if and only if the mathnesses at the common boundary do not agree; if a scrap with |maybe_math| joins one with a definite mathness, that mathness is propagated across the former scrap to its other boundary. In order to implement this, we maintain two mathness values while building up a text: |init_mathness| and |cur_mathness| which represent the values at the left and right boundaries of the part contributed so far; these are local variables of whatever function is concatenating translations, but they should be called by these names since they are addressed by the macros below. As a consequence of the left-to-right order of combining translations, a |maybe_math| scrap that is combined with scraps with definite mathnesses, will actually be set in the mode inherited from its left (unless it appears as the leftmost scrap in a reduction); this can be used to make certain symbols, such as colons, behave in two slightly different ways depending on their syntactic function. (This method is not infallible however, as a comment following the symbol will always force it to be processed in horizontal mode; this happens because |insert| scraps are tacked onto the scrap before them before any ordinary reduction can affect it.) @ Before scraps requiring some definite mathness are contributed, we invoke |set_mode(yes_math)| or |set_mode(no_math)| as appropriate; the first time this happens will determine the value of |init_mathness|. @d set_mode(x) if (cur_mathness==maybe_math) cur_mathness=init_mathness=x; else if (cur_mathness!=x) {@; app('$'); cur_mathness=x; } else @; /* expect semicolon */ @ The macro |app_trans| is invoked with a |scrap_pointer| as argument, and appends its translation as a single token; |add_trans| will in addition to this administrate |init_mathness| and |cur_mathness|, and interpolate any necessary math shifts. @d app_trans(a) app_tok(text_flag+text_index((a)->trans)) @d add_trans(a) { scrap_pointer scr=a; /* so that evaluating |a| may have side effects */ if (left_math(scr)!=maybe_math) { if (cur_mathness==maybe_math) init_mathness=left_math(scr); else if (cur_mathness!=left_math(scr)) app('$'); cur_mathness=right_math(scr); } app_trans(scr); } @ The function call |fuse(s,n)| will concatenate the translations of |n| scraps starting with |*s|, taking care of the mathnesses, and install the resulting text into |s->trans|. @c void fuse (scrap_pointer s, int n) { int cur_mathness=maybe_math, init_mathness=maybe_math; scrap_pointer p=s; check_toks(n); check_text(); do add_trans(p++)@; while (--n>0); /* gather all the translations */ s->trans=text_ptr; freeze_text(); s->mathness=(init_mathness<<2)+cur_mathness; } @ An |int_like| identifier following a |struct_like| token, a selection operator (`|.|' or `|->|'), or the special code \:;, is to be typeset as an ordinary identifier. The function |make_nonreserved| alters the flag of the token representing such an identifier occurrence. The scrap representing the |int_like| identifier should not be formed by any reduction, but come directly from |C_read|, so in principle we expect the translation of our scrap to be an unnested text consisting of a single token. However, a comment (or other |insert|) directly following the |int_like| identifier may complicate this picture slightly, because |insert| scraps are tacked onto the previous scrap before it gets the chance to take part in any reduction; this means our token may be buried inside one or more levels of text nesting, but still is the very first token of the translation. Any such levels of nesting are soaked off by the |while| loop below; after this process a single-token text should remain containing a reserved word, of which we replace |res_flag| by |id_flag| to make it print as an ordinary identifier. @c void make_nonreserved (scrap_pointer p) { text_pointer q=p->trans; token t; while (text_flag<=(t=text_begin(q)[0]) && t @c id_pointer first_ident(text_pointer p) { token_pointer q; token t; if (p>=text_ptr) confusion("first_ident"); @.first\_ident@> for (q=text_begin(p); q@;}|', after a |struct_like| token (declaring a structure-, union-, or enumeration tag) and for all identifiers in an enumeration list. This is accomplished by the invocation of |make_underlined| at appropriate times. @c void make_underlined (scrap_pointer p) /* underline entry for first identifier in |p->trans| */ { id_pointer name=first_ident(p->trans); /* name of first identifier */ if (name==NULL) return; /* this happens for unsyntactical things like `|int 3;|' */ { sixteen_bits head=xref_index(name->xref),* r=&head; int n; while ((n=xnum(*r)&num_mask)!=0 && nxref=xref_ptr; @+ else *r=xref_index(xref_ptr); } } } @ We will now consider the how the reduction rules themselves are represented and used. As we have seen, a rule must define a sequence of categories for its left hand side, and for its right hand side a category and a prescription for constructing its translation. In addition, some categories of the left hand side may be marked as context, so that they will not take part in the reduction, and there is a way to specify conditional loading of rules. A few more pieces of information are included for convenience and efficiency. Individual reduction rules are stored in a structure called |reduction|. It is organised in a way that allows for semi-static initialisation, i.e., the essential parts of information are stored near the beginning of the structure or of one of its sub-structures, so that they can be defined by an initialiser expression, while some further fields are computed from them and assigned at startup time. Within the fields that are statically initialised some fields that usually are~0 are put at the end, so that in the default case they can be omitted from the initialiser. The field |id| holds an identification number for the rule, which is used in debugging. Then follows the left hand side information, consisting of an array of at most |max_lhs_length| categories (which include those of a possible context; if less than the maximal number of categories are present they are padded with zeros), followed by integers |context| and |length|. The field |context| specifies which categories, if any, form the context: this can be a sequence of one or more categories at either end of the left hand side of the rule, but not at both ends. If |context==0| (as is the case if no explicit initialiser is specified), there are no context categories; when |context>0|, the first |context| categories form the left context, and when |context<0|, the last |abs(context)| categories form the right context. (In practice it is wise to use only token categories (ones that do not require reduction to be formed) for a right context, unless one can be quite sure that no unintended reduction will affect the categories taking part in the intended reduction before the right context has been reduced.) There must be at least one category that is not part of the context, lest a ``reduction'' would increase the number of scraps. The right hand side of a rule specifies a category and a string used as a format to build up the translation; for the common case that the translation is formed by concatenating the translations of all scraps of the left hand side (not including those those of the context), a null pointer may be given instead of a format string. The following field |mask| can be used to specify selective loading of rule at startup time: any bit set in it will suppress loading under some condition dependent on the setting of option flags in the call of \.{CWEAVE}. The field |displacement| is computed at startup time to record the number of positions (usually negative) by which the parsing pointer~|pp| should be changed after application of the rule. @d max_lhs_length 4 @< Typedef and enumeration declarations @>= typedef struct { short id; /* for debugging */ struct {@; eight_bits category[max_lhs_length]; signed char context,length; } lhs; struct {@; eight_bits category; char *translation; } rhs; sixteen_bits mask; short displacement; } reduction; @ We shall organise the rules in a ``trie'' structure for fast matching. If |q| points to a trie node reached after matching some sequence of categories, and that sequence corresponds to the left hand side (including context) of some rule (which should be unique), then |q->rule| points to that rule, otherwise |q->rule==NULL|. If that sequence of categories is a proper prefix of the left hand side of a rule (which may happen whether or not |q->rule==NULL|), and $c$ is the next category in that left hand side (which implies |0next[c-1]| is the index of the trie node reached after a further match of~$c$. The entries of |q->next| that do not correspond to any such successor node are set to~0, which is unambiguous because the root of the trie does not figure as a successor of any node. We do not attempt a sparse representation (which would avoid storage of such 0's), but we do use a compact |eight_bits| representation for the entries of |q->next|; this saves a considerable amount of space, since there is a total of |max_rule_cat*max_no_of_nodes| such entries. @= typedef struct {@; reduction* rule; eight_bits next[max_rule_cat]; } trie_node; @ Trie nodes are allocated from an array |trie_node_mem|, with the root of the trie at |trie_node_mem[0]|. We introduce some macros that will help us find our way around the trie. The address the successor of the trie node pointed to by |q| for category |c| can be written as |successor(q,c)|. The absence of such a successor can be found by testing |no_successor(q,c)|. If |x| is the address of any node in the tree (except the root), then we can make that node the successor of |q| for category |c| by invoking |set_successor(q,c,x)|. @d trie_root (&trie_node_mem[0]) @d successor(q,c) (&trie_node_mem[(q)->next[(c)-1]]) /* category codes start at 1 */ @d no_successor(q,c) ((q)->next[(c)-1]==0) @d set_successor(q,c,x) ((q)->next[(c)-1]=(eight_bits)((x)-trie_node_mem)) @= trie_node trie_node_mem[max_no_of_nodes]; int node_no = 1; /* number of trie nodes allocated */ #ifdef DEBUG boolean install_failed=false; #endif @ Trie nodes are allocated in a straightforward sequential way. We don't trust that uninitialised statically allocated pointers will be |NULL| (although they should), especially not on machines where |NULL| is not represented as ``all bits cleared'', so we do a bit of extra work here. For the entries of the array |next| we expect no problems however (since |eight_bits| is an integral type), so we do not explicitly initialise them. @c trie_node *get_new_trie_node(void) { if (node_no>=max_no_of_nodes) overflow("trie node"); @.trie node capacity exceeded@> trie_node_mem[node_no].rule=NULL; return &trie_node_mem[node_no++]; } @ The function |install_rule| installs a reduction, and if \.{CWEAVE} was compiled with |DEBUG| set, it also performs some checks on the validity of the rule; if any check fails the variable |install_failed| is set to |true|. Since |print| has a variable number of arguments, the macro |rule_error| does not incorporate them but just prepends |print| to the argument list; the replacement text of |rule_error| can therefore not be parenthesised, and the macro should be used with some care. @d rule_error install_failed=true,print /* report a problematic syntax rule */ @ A global variable |rule_mask| is set at startup time according to the relevant option flags; any rules~|r| for which |rule_mask & r->mask!=0| are suppressed. For each set of mutually exclusive settings, a number of bits in |rule_mask| is reserved equal to the size of the set; the current setting will have its corresponding bit set while the others are cleared. Therefore the bits set in |r->mask| specify the option settings for which the rule is disabled, and the default state |r->rule==0| means that the rule always applies, regardless of any optional settings. @< Global... @>= sixteen_bits rule_mask; @ The function |install_rule| enters a rule into the trie structure, and if |DEBUG| is defined, performs some sanity checks on the rule. @< Prototypes @>= void install_rule (reduction* rule); @~The length of rules can be found because category~0 is not used for scraps. @c void install_rule(reduction *rule) { if ((rule_mask & rule->mask)==0) { eight_bits* p=rule->lhs.category, i=0; while (ilhs.length=i; #ifdef DEBUG @< Check left-hand side of |rule| @> @< Check right-hand side of |rule| @> #endif @< Install |rule| in the trie structure @> @< Compute |rule->displacement|, and modify |rule->lhs.length| and |rule->context| @> } } @ The left-hand side should not be only context, and all categories should be legal ones. @< Check left... @>= { if (rule->lhs.length<=abs(rule->lhs.context)) rule_error("\nNo scraps to replace in rule %d.\n", rule->id); @.No scraps to replace...@> for(i=0; ilhs.length; ++i) if (!valid_cat(p[i])) rule_error("\nUnknown category %d in LHS of rule %d.\n", p[i], rule->id); @.Unknown category...@> } @ The right-hand side should have a valid category, and unless its translation is |NULL| (or |""| which we treat as if it were |NULL|), it should contain as many times |'_'| as there are non-context categories in the left-hand side. @< Check right... @>= { int c=rule->rhs.category; char* s=rule->rhs.translation; if (!valid_cat(c)) rule_error("\nUnknown category %d in RHS of rule %d.\n", c, rule->id); @.Unknown category...@> if (s!=NULL) { if (*s=='\0') s=rule->rhs.translation=NULL; /* replace empty string */ else { i=0; do if (*s!='p') i+= *s++=='_'; /* count underscores */ else if (++s,isdigit((eight_bits)*s)) ++s; /* skip digit and advance */ else rule_error("\nDigit should follow 'p' in format of rule %d.\n" @.Digit should follow 'p'...@> , rule->id); while (*s!='\0'); if (i!=rule->lhs.length-abs(rule->lhs.context)) rule_error("\nCount of '_' not equal to length LHS in rule %d.\n" @.Count of '\_' ...@> , rule->id); } } } @ Since trie nodes are not represented sparsely, insertion is easy. @= { trie_node* q=trie_root; for (i=0; ilhs.length; ++i) { if (no_successor(q,p[i])) set_successor(q,p[i],get_new_trie_node()); q=successor(q,p[i]); } #ifdef DEBUG if (q->rule!=NULL) rule_error("\nIdentical left-hand sides in rules %d and %d.\n" @.Identical left-hand sides...@> , q->rule->id, rule->id); #endif q->rule=rule; } @ We compute |displacement| conservatively, based on local considerations; alternatively we might also consider the whole set of rules to find larger (less negative) values that would make parsing go a bit faster. A rule can have a left hand side of length |max_lhs_length|. This means that it is safe to move |pp| so that it will afterwards be |max_lhs_length-1| positions to the left of the first scrap that may have got a new category. After a rule has been installed, there is no need to record the full length of the left hand side, including context, since this is implicit from the place in the trie where the pointer to this rule is located; rather we store the number of scraps that will be replaced. Similarly it is more useful to know the offset of the first scrap to be replaced (which is~0 in case of a right context) rather than the value of |context| as stored at initialisation. @< Compute |rule->displacement|... @>= { int k=rule->lhs.context,d; if (k<0) {@; rule->lhs.length+=k; k=0; } @+else rule->lhs.length-=k; d=1-max_lhs_length+k; /* this cannot be positive */ if (rule->lhs.category[k]==rule->rhs.category) /* no category change */ { ++d; #ifdef DEBUG if (rule->lhs.length==1) rule_error("\nNo categories change in rule %d.\n", rule->id); @.No categories change...@> #endif } rule->lhs.context=k; rule->displacement=d; /* if positive, an error was reported */ } @ The function |match| tests whether the category pointed to by~|p|, and its successors, match the categories in the trie structure, starting at the root of the trie, and up to a node that contains a rule. Multiple matches are possible, in which case the longest one takes precedence. We avoid using a side effect in the argument of |valid_cat|. @c reduction *match (scrap_pointer p) { trie_node* q=trie_root; reduction* rule=NULL; int c; while (c=p++->cat,valid_cat(c) && !no_successor(q,c)) if ((q=successor(q,c))->rule!=NULL) rule=q->rule; return rule; } @ When a matching rule has been found, the function |reduce| is called to perform the corresponding actions. At that point |pp| points to the first scrap involved in the match, and the argument |rule| to |reduce| points to the matching rule. If a rule has a left hand side of length~1 (not counting context) and also the default translation (plain concatenation), then all that is to be done is to change the category of a scrap, and part of the processing can be skipped. @c void reduce (reduction* rule) { int k=rule->lhs.context, l=rule->lhs.length; scrap_pointer s = pp+k, p=s;/* position of the new scrap */ char* f=rule->rhs.translation; /* format string for translation */ s->cat=rule->rhs.category; if (l>1 || f!=NULL) /* otherwise ready already */ { if (f==NULL) fuse(s,l), p+=l; /* default translation */ else @ if (l>1) @ } @ @displacement,scrap_base)| @> } @ When we have applied a reduction to the sequence of scraps, we usually remove scraps (we never create more scraps than we remove), thereby creating a small ``hole'' in the sequence. We fix that hole by sliding scraps downward across it, thereby moving the hole upwards, until it reaches the ``official'' hole at |lo_ptr|; then |lo_ptr| is adjusted so that the small hole is incorporated in the official hole. During the translation process the pointer~|p| was moved across all scraps that took part in the reduction, so the scraps to move are at positions~|i| with |p<=i= { scrap_pointer q=s+1; /* position after the newly formed scrap */ while (p= if (ppdisplacement) pp=scrap_base; @+ else pp+=rule->displacement; @ We need no extensive coding mechanism for describing translations, since they all follow a similar pattern. In all cases all the translations of the scraps in the left hand side (not including the context scraps) are used exactly once, in left to right order (violation of these principles would result in very strange effects indeed for the printed output). The only things that need to be added are formatting controls like |indent|, |force| or |break_space|, math shifts (but these are already taken care of by |add_trans|), and white space. We may also specify calls of |make_underlined| or |make_unreserved| for certain scraps in the left hand side. Although the items inserted are of a modest variety, one should realise that their presence is the only reason we need to parse at all; without them the translations could have been computed by purely lexical means. In format strings an underscore indicates the translation of the next scrap of the left hand side. Other characters each encode a formatting control; the character `\.p' encodes |opt| and is followed by a digit that becomes its argument. The characters `\.!' and `\.\$' respectively cause |make_underlined| and |make_nonreserved| to be called for the next scrap; `\.\ ' and~`\.\~' produce a space in the translation, where the latter is non-breakable and the former forces horizontal mode. To just force horizontal or math mode there are `\.h'~and~`\.m'; the latter avoids the possibility of a completely empty formula by adding a space inside math mode. The precise meaning of these and other formatting characters is easily read off from the code below. The `\.o' and `\.r' format characters (the latter is used only in compatibility mode) affect the math category used by \TeX\ for the next symbol (character or control sequence); the syntax rules using them do not put braces around that symbol, since these could also capture a following comment, causing a \TeX\ error. The number of free tokens we require to be available by calling |check_toks| is a conservative estimate, based on a hypothetic ``worst case'' reduction rule with a left hand side of length~4 and with 8 additional items in its translation (counting format codes with multi-character translations with multiplicity), whose mathnesses alternate, so that a maximal number of math shifts is required. @< Generate token list... @>= { int cur_mathness=maybe_math, init_mathness=maybe_math; check_toks(23); check_text(); do switch (*f++) @:format@> { case '+': app(indent); break; case '-': app(outdent); break; case 'p': app(opt); app(*f++); break; case 'f': set_mode(no_math); app(force); break; case 'F': set_mode(no_math); app(big_force); break; case 'b': set_mode(no_math); app(backup); break; case 'B': set_mode(no_math); app(break_space); break; case 't': set_mode(yes_math); app_str("\\a"); break; @.\\a@> /* next item in tab space */ case ',': set_mode(yes_math); app_str("\\,"); break; @.\\,@> /* thin space */ case 'h': set_mode(no_math); break; /* force horizontal mode */ case 'm': set_mode(yes_math); app(' '); break; /* force math mode, avoid `\.{\$\$}' */ case 'o': set_mode(yes_math); app_str("\\m"); break; @.\\m@> /* make ``mathord'' */ case 'r': set_mode(yes_math); app_str("\\MRL"); break; @.\\MRL@> /* make ``mathrel'' */ case '!': make_underlined(p); break; case '$': make_nonreserved(p); break; case ' ': set_mode(no_math); app(' '); break; case '~': app('~'); break; default: printf("%c: ",f[-1]); confusion("illegal character in format string"); case '_': add_trans(p++); } while (*f!='\0'); s->trans=text_ptr; freeze_text(); s->mathness=(init_mathness<<2)+cur_mathness; } @ We have now seen how a match is made, and what is done once a matching rule is found; we still have to consider how everything is set up properly, and how rules are repeatedly applied until no more reduction is possible, responding properly to successful and failing matches. All this is performed by the function |translate|; as we have seen it is called by |do_C| and |finish_C| after scraps have been stored from |scrap_base| to |scrap_ptr| of the |scrap_info| array, and it returns a pointer to the text representing the result of parsing all those scraps. We start with appending a dummy scrap if either no scraps are present at all, or some tokens remain that have not been packed into a scrap yet; the latter can only be due to `\.{@@t...@@>}' as a final item in compatibility mode. Then we set |lo_ptr| and |hi_ptr| appropriately, and begin to apply rules as long as possible. When this is done and more than one scrap remains, their translations are wrapped rather bluntly together to a single text. @c text_pointer translate (void) /* converts a sequence of scraps */ { pp=lo_ptr=hi_ptr=scrap_base; if (scrap_ptr==pp || dangling_tokens()) /* then append dummy scrap */ {@; check_scrap(); pack_scrap(insert,no_math); } @< If tracing, print an indication of where we are @> @< Reduce the scraps using the rules until no more rules apply @> @< Combine the irreducible scraps that remain, and |return| the resulting text @> } @ Before applying |match|, we must make sure it has good input (at least |max_lhs_length| scraps). If a match at |pp| exists, |reduce| will perform the required processing and updating of |pp| (in this case |pp| is certainly not increased), if not, we move to the right and try again. @< Reduce the scraps... @>= do { reduction *rule; @< Make sure the entries |pp| through |pp+max_lhs_length-1| of |scrap_info| are defined, or that |lo_ptr->cat==0| @> if ((rule=match(pp))!=NULL) reduce(rule); else {@; ++pp; @< Take special action if |pp->cat| is |end_expr| or |rproc| @> } } while (ppcat=0|, which will prevent any matches to scraps beyond those that are actually present. All scraps of category |insert| pass across the hole between |lo_ptr| and |hi_ptr|, and we take the opportunity to remove them, tacking their translation to the scrap below the hole. We even make sure that |hi_ptr->cat!=insert|, so that the scraps with which the |insert| scrap is combined will not have undergone any ordinary reduction yet. The only possible remaining |insert| scrap is one at the very start of the list; it will be handled at the end of all reductions. @< Make sure the entries... @>= { scrap_pointer lo_min = pp+max_lhs_length; while (lo_ptrcat!=0) if (hi_ptr>=scrap_ptr) lo_ptr->cat=0; else { *lo_ptr++ = *hi_ptr++; while (hi_ptrcat==insert) {@; *lo_ptr = *hi_ptr++; fuse(lo_ptr-1,2); } } } @ The category pairs |lproc|--|rproc| and |begin_expr|--|end_expr| are special in that the don't occur in any rules, but rather serve only as markers. When the material in between will not reduce any further the whole construction will be wrapped up, their translations concatenated, and the result treated as an |insert| respectively an |expression|. We do not actually form an |insert| scrap if it is not at the start of the scrap sequence, but rather combine everything directly with the scrap immediately before the |lproc| scrap, leaving the category of that scrap as it is; this is necessary, because the code that combines ordinary |insert| scraps with their predecessor only looks at scraps when they cross the hole from |hi_ptr| to~|lo_ptr|. We use here that |rproc-lproc==end_expr-begin_expr==1|. The variable names |s| and |p| are chosen with the same meaning as in |reduce|, so that we could reuse a module. @< Take special action... @>= if (pp->cat==end_expr || pp->cat==rproc) { int start=pp->cat-1; /* the opening category matching |pp->cat| */ scrap_pointer s=pp, p=pp+1; while ((--s)->cat!=start && s>scrap_base) {} if (s->cat==start) /* if opening symbol is missing, take no action */ { if (start==begin_expr) s->cat=expression; else if (s==scrap_base) s->cat=insert; @+ else --s; /* position of new scrap */ fuse(s,(int)(p-s)); @< Fill vacant scrap positions @> /* using values of |p| and |s| */ pp= s-scrap_base= { scrap_pointer j; if (scrap_base->cat==insert && lo_ptr>scrap_base+1) { fuse(scrap_base,2); /* merge initial |insert| into the next scrap */ j=scrap_base; j->cat=j[1].cat; --lo_ptr; while (++j check_toks(1); for (j=scrap_base; j= #ifdef DEBUG int tracing; /* how much parsing details to show */ #endif @ When parsing fails to reduce everything to a single scrap, pleasing results will probably not be obtained; it is therefore advisable to run \.{CWEAVE} with |tracing==trace1| before a final version of a |CWEB| program is fixed. In order to allow this without changing the source file itself, we initialise |tracing| to |trace1| if the flag `\.{+d}' is supplied to \.{CWEAVE}. @< Set initial values @>= #ifdef DEBUG tracing = flags['d'] ? trace1 : trace0; #endif @ The following code is activated by the `\.{+d}' flag or the \:1 control code. @= #ifdef DEBUG { if (tracing==trace1 && lo_ptr>=scrap_base+2) { print("\nIrreducible scrap sequence at line %d in section %d:\n" @.Irreducible scrap sequence...@> ,cur_line, section_count); mark_harmless(); for (j=scrap_base; jcat), putchar(' '); print_cat(j->cat); new_line(); /* |term_line_empty| is still valid */ } } #endif @ When full tracing is enabled the following message indicates which piece of \Cee~text is being parsed (but for section bodies it will generally show the first line of the next section, since that has already been fetched). @= #ifdef DEBUG { if (tracing>=trace2) { print("\nTracing after l.%d:\n", cur_line); @.Tracing after...@> if (loc>buffer+50) /* shorten long lines to keep within margins */ {@; printf("..."); term_write (loc-50,50); } else term_write(buffer,loc-buffer); new_line(); /* |term_line_empty| is still valid */ } } #endif @ After each reduction, full tracing will print a line starting with the rule number, followed by a display of all the categories of scraps which have been considered until now, i.e., those at positions below |lo_ptr|. The scrap that was produced by this reduction, which is pointed to by~|s|, has its category highlighted by enclosing it in inverted angle brackets. If tracing is set to~3, extra-full tracing is active, and mathnesses at the boundaries of scraps are indicated. @d math_char(x) ((x)==yes_math ? '+' : (x)==no_math ? '-' : '?') @= #ifdef DEBUG { scrap_pointer k; /* pointer into |scrap_info| */ if (tracing>=trace2) { print("\n%3d:", rule->id); for (k=scrap_base; k'), print_cat(k->cat), putchar('<'); else print_cat(k->cat); if (tracing==trace3) putchar(math_char(right_math(k))); } print("%s\n", hi_ptr \def\:#1{`\.{@@#1}'}% for in case this file is processed in isolation We first arrange the proper setting of |rule_mask|, which will control the selection of rules actually used. Recall that any bits set in the mask of a rule prescribe its {\it suppression\/} when the same bit is set in |rule_mask|; therefore for instance the bit characterising \Cpp\ is called |no_plus_plus|, so that rules specifying it will not be loaded for \Cpp. In some cases two masks will be combined using the bitwise-or operator `|@v|', this means (somewhat counterintuitively) that the rule will only be selected if the conditions represented by the two masks are {\it both\/} satisfied. The use of the bitwise-and operator `|&|' is even more exceptional: it is only meaningful if its two operands both select one setting of the same three-way switch; the rule will then be selected if that switch is in either of the two indicated positions. The |merged_decls| flag is special in that setting `\.{+m}' only enables an extra rule, but does no disable any rules; therefore only one bit is used for this option, and raising this bit in |rule_mask| suppresses the rule marked with |merged_decls|. @d cwebx 0x0001 /* use normally */ @d compatibility 0x0002 /* use in compatibility mode */ @d only_plus_plus 0x0004 /* use in \Cpp\ */ @d no_plus_plus 0x0008 /* use in ordinary \Cee\ only */ @d unaligned_braces 0x0050 /* use if `\.{+u}' flag was set */ @d aligned_braces 0x0020 /* use unless `\.{+u}' flag was set */ @d wide_braces 0x0030 /* use if `\.{+w}' set */ @d standard_braces 0x0060 /* use unless `\.{+u}' or `\.{+w}' set */ @d merged_decls 0x0080 /* use if `\.{+m}' set */ @d forced_statements 0x0100 /* use if `\.{+a}' or `\.{+f}' set */ @d no_forced_statements 0x0600 /* use unless `\.{+a}' or `\.{+f}' set */ @d all_stats_forced 0x0300 /* use if `\.{+a}' set */ @d not_all_stats_forced 0x0400 /* use unless `\.{+a}' set */ @< Set initial values @>= rule_mask= (compatibility_mode ? 0x0001 : 0x0002) | (C_plus_plus ? 0x0008 : 0x0004) | (flags['w'] ? 0x0040 : flags['u'] ? 0x0020 : 0x0010) | (flags['m'] ? 0x0000 : 0x0080) | (flags['a'] ? 0x0400 : flags['f'] ? 0x0200 : 0x0100) ; { static reduction rule[] = { @< Rules @>@;@; }; @/int i=array_size(rule); @+ do install_rule(&rule[--i]); while (i>0); #ifdef DEBUG if (install_failed) fatal("inconsistent grammar",0); #endif } @ {\it Expressions}. @:rules@> These rules should be obvious. Rule~5 allows typedef identifiers to be used as field selectors in structures; rules 7~and~8 attach a parameter list in a function call. In rule~14 we prefix a potentially binary operator such as `|*|' that is used in a unary way by a `\.{\\mathord}' command to make sure that \TeX\ will not mistake it for a binary operator. In simple cases such as |*p| this is redundant, but if such operators are repeated more than one level deep, as in |**p|, \TeX\ would otherwise treat the first operator as the left operand of the second, and insert the wrong spacing. Moreover, typical \Cee~constructions as a cast |(void*) &x| or a declaration |char *p@;| would confuse \TeX\ even more. In rule~13 we need not insert `\.{\\mathord}', since operators of category |unop| are already treated as ordinary symbols by~\TeX. @< Rules @>= { 1, {{expression, unop}}, {expression, NULL}}, @/ { 2, {{expression, binop, expression}}, {expression, NULL}}, @/ { 3, {{expression, unorbinop, expression}}, {expression, NULL}}, @/ { 4, {{expression, select, expression}}, {expression, NULL}}, @/ { 5, {{expression, select, int_like}}, {expression, "__$_"}}, @/ { 6, {{expression, comma, expression}}, {expression, "__p1_"}}, @/ { 7, {{expression, expression}}, {expression, NULL}}, @/ { 8, {{expression, lpar, rpar}}, {expression, "__,_"}}, @/ { 9, {{expression, subscript}}, {expression, NULL}}, @/ {10, {{lpar, expression, rpar}}, {expression, NULL}}, @/ {11, {{lbrack, expression, rbrack}}, {subscript, NULL}}, @/ {12, {{lbrack, rbrack}}, {subscript, "_,_"}}, @/ {13, {{unop, expression}}, {expression, NULL}}, @/ {14, {{unorbinop, expression}}, {expression, "o__"}}, @[@] @~Here are some less common kinds of formulae. Processing the colon belonging to the question mark operator in math mode will give it the proper spacing, which is different from that of a colon following a label. Rule~21 processes casts, since the category |parameters|, which represents parenthesised lists specifying function argument types, encompasses the case of a single parenthesised type specification. The argument of |sizeof| may be a type specification rather than an expression; in \Cee\ (unlike \Cpp) it then must be parenthesised. %, but not in \Cpp\ (and |sizeof_like| might be `\&{new}'). @< Rules @>= {20, {{question, expression, colon}}, {binop, "__m_"}}, @/ {21, {{parameters, expression}}, {expression, "_,_"}}, @/ {22, {{sizeof_like, parameters}}, {expression, NULL}}, @/ {23, {{sizeof_like, expression}}, {expression, NULL}}, @/ {24, {{sizeof_like, int_like}}, {expression,"_~_"},only_plus_plus}, @[@] @ {\it Declarations}. In a declaration in \Cee, the identifier being declared is wrapped up in a declarator, which looks like an expression of a restricted kind: only prefix asterisk, postfix subscript and formal parameters, and parentheses are used. In a bottom-up parser of the kind we are using, it is natural, and hardly avoidable, that declarators are parsed as expressions. Therefore we start recognising a declaration when we see a type specifier followed by the first declarator; at that point we have a succession `|int_like| |expression| |semi|' or `|int_like| |expression| |comma|' (rules 31~and~33). It is also possible that there are no declarators at all, namely when a |struct|, |union|, or~|enum| specifier is introduced without declaring any variables; in that case we have `|int_like| |semi|' (rule~32). Because the type specifier might be composite, like |unsigned long int|, and there might moreover be storage class specifiers and type modifiers (like `|const|'), we first contract any sequence of |int_like| items to a single one (rule~30). In case the declarator was followed by a comma we reduce to |int_like|, so that the next declarator can be matched, otherwise we reduce to |declaration|. It is not quite true that declarators always look like expressions, since the type modifiers `|const|' and `|volatile|' may penetrate into declarators. When they do they will almost always be preceded by an asterisk, and rule~34 will treat such cases. The choice for |int_like| as the result category is not completely obvious, since it makes the modifier and the preceding asterisk part of the type specifier rather than of the declarator, which strictly speaking is not correct; the choice for |unop| or |unorbinop| might therefore seem a more logical one. One reason for not doing that is that a space would have to be inserted in the translation after the modifier scrap, which would not look right in abstract declarators for contrived cases like \hbox{|int f(char *const)@;|}; more importantly, if the modifier would become part of the declarator, it would be a (reserved) identifier that precedes the identifier actually being declared, and when the declarator then receives a call from |make_underlined| by rule 31~or~33, it would mislead |first_ident|. The current solution has a small flaw as well, since it cannot handle the situation where the modifier is separated from the type specifier by a parenthesis, as in $\&{void}~(\m*\&{const}~\m*f)~(\&{int})$; such cases are quite uncommon, are hard to handle by rules that will not spuriously match in other situations, and even then they would still cause problems with |make_underlined|, so we do not attempt to handle them. @< Rules @>= {30, {{int_like, int_like}}, {int_like, "_~_"}}, @/ {31, {{int_like, expression, semi}}, {declaration, "_~!__"}}, @/ {32, {{int_like, semi}}, {declaration, NULL}}, @/ {33, {{int_like, expression, comma}}, {int_like, "_~!__p1"}}, @/ {34, {{unorbinop, int_like}}, {int_like, "o__"}}, @[@] @ If a typedef identifier is simultaneously used as a field selector in a |struct| or |union| declaration, it must be made to parse as expression and be printed in italic type; this can be achieved by placing the magic wand \:; before the identifier, by rule~35. The reason that we place \:; at the beginning rather than at the end of the construction here, is to prevent the |int_like| identifier from combining with something before it first. Rule~35 only applies if the \:; does not match by any rule with what comes before it. Rule~36 handles the case that a function is declared with specified argument types, which is not handled by the expression syntax given until now. It also parses new-style (\caps{ANSI/ISO}) headings of function definitions; in that case, the resulting |function_head| will not be incorporated into a |declaration| (unless a comma or semicolon follows) but rather into a |function|. If the parameter specifications include identifiers (as in the case of function headings), the arguments look like declarations without the final semicolon; rule~37 (with aid of rule~33) constructs such parameter lists. Parameter specifications using abstract declarators (without identifiers) will be treated below. In |struct| declarations we may encounter bit-field specifications with or without an identifier; these are handled by rules 38~and~39 (the constant expression following the colon will later receive a spurious call from |make_underlined|, but in case of numeric constants this does no harm). @< Rules @>= {35, {{magic, int_like}}, {expression, "_$_"}}, @/ {36, {{expression, parameters}}, {function_head, "_B_"}}, @/ {37, {{lpar, int_like, expression, rpar}}, {parameters, "_+++_~!_---_"}},@/ {38, {{int_like, expression, colon}}, {int_like, "_~!_m_"}}, @/ {39, {{int_like, colon}}, {int_like, "_m_"}}, @[@] @ Abstract declarators are used after type specifiers in casts and for specifying the argument types in declarations of functions or function pointers. They are like ordinary declarators, except that the defined identifier has been ``abstracted''; an example is `|**(* )(int)|' in `|void g(char**(* )(int))@;|', which tells that |g| takes as argument a pointer to a function with |int| parameter yielding a pointer to pointer to |char|. A difficulty with abstract declarators is that they are built up around the vacuum left by abstracting the identifier, and since for more than one reason we cannot allow rules with empty left hand side, we have to find an alternative way to get them started. The natural solution to this problem is to look for sequences that can only occur if an identifier has been abstracted from between them, for instance `\.{*)}' (in categories: |unorbinop| |rpar|). The most compelling reason why in |C_read| we had to laboriously change the category of a |type_defined| identifier to |expression| instead of |int_like| inside its defining typedef declaration, is that it allows us to ensure that any remaining |int_like| scrap that is followed by a |subscript| is a sure sign of an abstract declarator. Here are the cases that start off abstract declarators (these are the first examples of rules that need context categories in their left hand side). As a visual hint to the reader we leave a little bit of white space on the spot where the identifier has vanished. Rules 40~and~41 handle declarators for pointer arguments, where the vanished identifier is preceded by an asterisk, which either stands at the end of the declarator, or is parenthesised (for function pointer arguments). In these rules there is no need to prefix the asterisk with `\.{\\mathord}', since the right context makes an interpretation as binary operator impossible. Rules 42~and~43 treat declarators for arrays, possibly of pointers; there are no corresponding rules with |parameters| instead of |subscript| since abstract declarators never specify functions themselves, only function pointers. In fact the ``function analogue'' of rule~43 would incorrectly match a cast following an operator like `|*|' or `|-|'. Rule~44 treats an abstract declarator consisting of subscripts only, which are redundantly parenthesised; here too the corresponding pattern with |parameters| is not only never needed, it would also spuriously trigger on parenthesised expressions that start with a cast. @< Rules @>= {40, {{unorbinop, rpar}, -1}, {declarator, "_,"}}, @/ {41, {{unorbinop, comma},-1}, {declarator, "_,"}}, @/ {42, {{int_like, subscript},1}, {declarator, ",_"}}, @/ {43, {{unorbinop, subscript},1}, {declarator, ",_"}}, @/ {44, {{lpar, subscript},1}, {declarator, ",_"}}, @[@] @~ Abstract declarators may grow just like ordinary declarators, to include prefixed asterisks, as well as postfixed subscripts and parameters, and grouping parentheses. @< Rules @>= {45, {{unorbinop, declarator}}, {declarator, "o__"}}, @/ {46, {{declarator, subscript}}, {declarator, NULL}}, @/ {47, {{declarator, parameters}}, {declarator, NULL}}, @/ {48, {{lpar, declarator, rpar}}, {declarator, NULL}}, @[@] @~ Here is how abstract declarators are assembled into |parameters|, keeping in mind that the ``abstract declarator'' might be completely empty (i.e., absent) as in `|void f(int);|' (rules 51~and~53). We put no space after the type specifier here, since it is followed either by an abstract declarator, a right parenthesis or comma, so certainly not by an identifier; therefore a space is neither necessary, nor would it improve readability. The \caps{ANSI/ISO} syntax allows empty parentheses as a parameter specification in abstract declarators, although this is an old-style form; rule~54 has been included to handle this case. Fortunately a parenthesised list of identifiers (which would parse as |expression|) is not allowed as parameter specification. @< Rules @>= {50, {{lpar, int_like, declarator, comma}}, {lpar, "____p5"}}, @/ {51, {{lpar, int_like, comma}}, {lpar, "___p5"}}, @/ {52, {{lpar, int_like, declarator, rpar}}, {parameters, NULL}}, @/ {53, {{lpar, int_like, rpar}}, {parameters, NULL}}, @/ {54, {{lpar, rpar}}, {parameters, "_,_"}}, @[@] @ {\it Structure, union, and enumeration specifiers}. It is permissible to use typedef identifiers as structure, union, or enumeration tags as well, so we include cases where an |int_like| follows a |struct_like| token. In \Cpp, we may also find things like `\&{private}:' in a class specifier; these are parsed just like `|default:|', i.e., as a |label| (rule~66). @< Rules @>= {60, {{struct_like, lbrace}}, {struct_head, "_ft_"},standard_braces}, @/ {60, {{struct_like, lbrace}}, {struct_head, "_~_"},unaligned_braces}, @/ {60, {{struct_like, lbrace}}, {struct_head, "_f_"},wide_braces}, @/ {61, {{struct_like, expression, lbrace}}, {struct_head, "_~!_ft_"},standard_braces}, @/ {61, {{struct_like, expression, lbrace}}, {struct_head, "_~!_~_"},unaligned_braces}, @/ {61, {{struct_like, expression, lbrace}}, {struct_head, "_~!_f_"},wide_braces}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!$_ft_"},standard_braces|no_plus_plus}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!_ft_"},standard_braces|only_plus_plus}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!$_~_"},unaligned_braces|no_plus_plus}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!_~_"},unaligned_braces|only_plus_plus}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!$_f_"},wide_braces|no_plus_plus}, @/ {62, {{struct_like, int_like, lbrace}}, {struct_head, "_~!_f_"},wide_braces|only_plus_plus}, @/ {63, {{struct_like, expression}}, {int_like, "_~_"}}, @/ {64, {{struct_like, int_like}}, {int_like, "_~$_"},no_plus_plus}, @/ {64, {{struct_like, int_like}}, {int_like, "_~_"},only_plus_plus}, @/ {65, {{struct_head, declaration, rbrace}}, {int_like, "_+_-f_"},standard_braces}, @/ {65, {{struct_head, declaration, rbrace}}, {int_like, "_+f_-f_"},unaligned_braces & wide_braces}, @/ {66, {{label, declaration}}, {declaration, "b_f_"},only_plus_plus}, @[@] @ Rules 67--70 are for enumerations; they avoid forced line breaks and call |make_underlined| for all the enumeration constants. @< Rules @>= {67, {{struct_like, lbrace, expression},-1}, {struct_head, "_B_"}}, @/ {68, {{struct_like, expression, lbrace, expression},-1}, {struct_head, "_~_B_"}}, @/ {69, {{struct_head, expression, comma, expression},1}, {expression, "__B!_"}}, @/ {70, {{struct_head, expression, rbrace}}, {int_like, "_~+!_-B_"}}, @[@] @ The following rules are added to allow short structure and union specifiers to be kept on one line without having to repeatedly specify \:+. The idea is to place \:; after the left brace; this will cause the rules below to be invoked instead of those above, which avoids introducing forced line breaks. @< Rules @>= {71, {{struct_like, lbrace, magic}}, {short_struct_head, "_B__+"}}, @/ {72, {{struct_like, expression, lbrace, magic}}, {short_struct_head, "_~!_B__+"}}, @/ {73, {{struct_like, int_like, lbrace, magic}}, {short_struct_head, "_~!$_B__+"}, no_plus_plus}, @/ {73, {{struct_like, int_like, lbrace, magic}}, {short_struct_head, "_~!_B__+"}, only_plus_plus}, @/ {74, {{short_struct_head, declaration}}, {short_struct_head, "_B_"}}, @/ {75, {{short_struct_head, rbrace}}, {int_like, "_-B_"}}, @[@] @ {\it Statements}. Rule~80 gives the usual way statements are formed, while rule~81 handles the anomalous case of an empty statement. Its use can always be avoided by using an empty pair of braces instead, which much more visibly indicates the absence of a statement (e.g., an empty loop body); when the empty statement is used however, it will either be preceded by a space or start a new line (like any other statement), so there is always some distinction between a |while| loop with empty body and the |while| that ends a |do|~statement. A rule like this with left hand side of length~1 makes the corresponding category (viz.~|semi|) ``unstable'', and can only be useful for categories that usually are scooped up (mostly from the left) by a longer rule. Rules 82--84 make labels (ordinary, case and default), and rules 85~and~86 attach the labels to statements. Rule~87 makes \:; behave like an invisible semicolon when it does not match any of the rules designed for it, for instance if it follows an expression. @< Rules @>= {80, {{expression, semi}}, {statement, NULL}}, @/ {81, {{semi}}, {statement, NULL}}, @/ {82, {{expression, colon}}, {label, "!_h_"}}, @/ {83, {{case_like, expression, colon}}, {label, "_ _h_"}}, @/ {84, {{case_like, colon}}, {label, "_h_"}}, @/ {85, {{label, label}}, {label, "_B_"}}, @/ {86, {{label, statement}}, {statement, "b_B_"},not_all_stats_forced}, @/ {86, {{label, statement}}, {statement, "b_f_"},all_stats_forced}, @/ {87, {{magic}}, {semi, NULL}}, @[@] @ The following rules format compound statements and aggregate initialisers. Rules 90--94 combine declarations and statements within compound statements. A newline is forced between declarations by rule~90, unless the declarations are local (preceded by a left brace) and `\.{+m}' was specified (rule~91); this rule does not apply to structure specifiers, because the left brace will already have been captured in a |struct_head| before the rule can match. If `\.{+f}'~or~`\.{+a}' was specified, then a newline is forced between statements as well (rule~93). Between the declarations and statements some extra white space appears in ordinary \Cee\ (rule~92), but not in \Cpp, where declarations and statements may be arbitrarily mixed (rule~94). Rules 95--97 then build compound statements, where the last case is the unusual one where a compound statement ends with a declaration; empty compound statements are made into simple statements so that they will look better when used in a loop statement or after a label. If compound statements are not engulfed by a conditional or loop statement (see below) then they decay to ordinary statements by rule~98. Rules 99~and~100 reduce aggregate initialiser expressions, where the reduction of comma-separated lists of expressions is already handled by the expression syntax. @< Rules @>= {90, {{declaration, declaration}}, {declaration, "_f_"}}, @/ {91, {{lbrace, declaration, declaration},1}, {declaration, "_B_"},merged_decls}, @/ {92, {{declaration, statement}}, {statement, "_F_"},no_plus_plus}, @/ {92, {{declaration, statement}}, {statement, "_f_"},only_plus_plus}, @/ {93, {{statement, statement}}, {statement, "_f_"},forced_statements}, @/ {93, {{statement, statement}}, {statement, "_B_"},no_forced_statements},@/ {94, {{statement, declaration}}, {declaration, "_f_"},only_plus_plus}, @/ {95, {{lbrace, rbrace}}, {statement, "_,_"}}, @/ {96, {{lbrace, statement, rbrace}}, {compound_statement, "ft_+_-f_"},standard_braces}, @/ {96, {{lbrace, statement, rbrace}}, {compound_statement, "_+f_-f_"},unaligned_braces}, @/ {96, {{lbrace, statement, rbrace}}, {compound_statement, "f_+f_-f_"},wide_braces}, @/ {97, {{lbrace, declaration, rbrace}}, {compound_statement, "ft_+_-f_"},standard_braces}, @/ {97, {{lbrace, declaration, rbrace}}, {compound_statement, "_+f_-f_"},unaligned_braces}, @/ {97, {{lbrace, declaration, rbrace}}, {compound_statement, "f_+f_-f_"},wide_braces}, @/ {98, {{compound_statement}}, {statement, "f_f"}}, @/ {99, {{lbrace, expression, comma, rbrace}}, {expression, "_,__,_"}},@/ {100, {{lbrace, expression, rbrace}}, {expression, "_,_,_"}}, @[@] @ Like for structure and union specifiers, we allow compound statements to be kept on one line by inserting \:; after the left brace. Such statements will reduce to |statement| rather that to |compound_statement|, so that they will be treated as if they were simple statements. @< Rules @>= {101, {{lbrace, magic}}, {short_lbrace, "__+"}}, @/ {102, {{short_lbrace, declaration}}, {short_lbrace, "_B_"}}, @/ {103, {{short_lbrace, statement}}, {short_lbrace, "_B_"}}, @/ {104, {{short_lbrace, rbrace}}, {statement, "_-B_"}}, @[@] @ {\it Selection, iteration and jump statements}. There are three intermediate categories involved in the recognition of conditional statements. The category |if_like| stands for `|if|' or an initial segment of a repeated if-clause, up to and including `|else|~|if|'. An |if_head| is an |if_like| followed by its (parenthesised) condition (rules 110~and~111). If the statement following the condition is followed by `|else|~|if|', the whole construct reduces to |if_like| (so that the indentation will not increase after the second condition, rules 112~and~113), otherwise, if only `|else|' follows, reduction is to an |if_else_head| (rules 114~and~115), and finally, if no |else| follows at all, we reduce with only the if-branch to |statement| (rules 116~and~117). The reduction rules for |if_else_head| differ from those for |if_head| in that it will not combine with an |else|, even if it is present; the formatting is identical to that of an |else|-less |if_head| (rules 118~and~119). (It might be tempting to replace rules 116~and~117 by a reduction from |if_head| to |if_else_head| to be applied if no matching `|else|' is found, but that would require some subtle measures to prevent this decay at times when the right context is insufficiently reduced to decide whether an `|else|' is present or not.) The formatting of the if and else branches depends on whether they are compound statements or some other kind of statement (possibly another conditional statement), and on the flags for statement forcing and brace alignment. @< Rules @>= {110, {{if_like, expression}}, {if_head, "f_~_"}}, @/ {111, {{lbrace,if_like,expression},1}, {if_head, "_~_"},standard_braces},@/ {112, {{if_head, compound_statement, else_like, if_like}}, {if_like, "__f_~_"},aligned_braces}, @/ {112, {{if_head, compound_statement, else_like, if_like}}, {if_like, "_~_~_~_"},unaligned_braces}, @/ {113, {{if_head, statement, else_like, if_like}}, {if_like, "_+B_-f_~_"},not_all_stats_forced}, @/ {113, {{if_head, statement, else_like, if_like}}, {if_like, "_+f_-f_~_"},all_stats_forced}, @/ {114, {{if_head, compound_statement, else_like}}, {if_else_head, "__f_"},aligned_braces}, @/ {114, {{if_head, compound_statement, else_like}}, {if_else_head, "_~_~_"},unaligned_braces}, @/ {115, {{if_head, statement, else_like}}, {if_else_head, "_+B_-f_"},not_all_stats_forced},@/ {115, {{if_head, statement, else_like}}, {if_else_head, "_+f_-f_"},all_stats_forced}, @/ {116, {{if_head, compound_statement}}, {statement, "__f"},aligned_braces}, @/ {116, {{if_head, compound_statement}}, {statement, "_~_f"},unaligned_braces}, @/ {117, {{if_head, statement}}, {statement, "_+B_-f"},not_all_stats_forced}, @/ {117, {{if_head, statement}}, {statement, "_+f_-f"},all_stats_forced}, @/ {118, {{if_else_head, compound_statement}}, {statement, "__f"},aligned_braces}, @/ {118, {{if_else_head, compound_statement}}, {statement, "_~_f"},unaligned_braces}, @/ {119, {{if_else_head, statement}}, {statement, "_+B_-f"},not_all_stats_forced}, @/ {119, {{if_else_head, statement}}, {statement, "_+f_-f"},all_stats_forced}, @[@] @ The following rules prevent forced line breaks from conditional statements that occur within a one-line compound statement. @< Rules @>= {120, {{short_lbrace, if_like, expression},1}, {if_head, "_~_"}}, @/ {121, {{short_lbrace, if_head, statement, else_like}}, {short_lbrace, "_B_B_B_"}}, @/ {122, {{short_lbrace, if_head, statement}}, {short_lbrace, "_B_B_"}}, @[@] @ Switch and loop statements make use of the syntax for conditionals by reducing to |if_else_head| which will take one further statement and indent it (rules 130~and~131). Recall that `|for|' and `|switch|' are both |while_like|; the parenthesised object following `|for|' looks like nothing we have seen before, however, so we need extra rules to come to terms with it (rules 132--134). Rule~132 is needed to avoid a line break when these are normally inserted between statements, and rule~134 is needed in case the third expression is empty. The |do|-|while| loops have to be treated separately. Because we want to distinguish the case of a |compound_statement| as loop body from other kinds of statements, we cannot wait until the |while| combines with the loop control condition to an |if_else_head|, since by then a |compound_statement| will have decayed to |statement|. Hence we pick up the unreduced `|while|' token and form a new category |do_head| (rules 135~and~136); in case of a compound statement the `|while|' will be on the same line as the closing brace. Rules 137~and~138 then combine this with the condition and the ridiculous mandatory semicolon at the end to form a |statement|. @< Rules @>= {130, {{while_like, expression}}, {if_else_head, "f_~_"}}, @/ {131, {{lbrace, while_like, expression},1}, {if_else_head, "_~_"},standard_braces}, @/ {132, {{lpar, statement, statement}, 1}, {statement, "_B_"}, forced_statements}, @/ {133, {{lpar, statement, expression, rpar}}, {expression, "__B__"}}, @/ {134, {{lpar, statement, rpar}}, {expression, NULL}}, @/ {135, {{do_like, compound_statement, while_like}}, {do_head, "__~_"},standard_braces}, @/ {135, {{do_like, compound_statement, while_like}}, {do_head, "_~_~_"},unaligned_braces}, @/ {135, {{do_like, compound_statement, while_like}}, {do_head, "__f_"},wide_braces}, @/ {136, {{do_like, statement, while_like}}, {do_head, "_+B_-B_"},not_all_stats_forced}, @/ {136, {{do_like, statement, while_like}}, {do_head, "_+f_-f_"},all_stats_forced}, @/ {137, {{do_head, expression, semi}}, {statement, "f_~__f"}}, @/ {138, {{lbrace, do_head, expression, semi},1}, {statement, "_~__f"}}, @[@] @ The following rules prevent forced line breaks from loop statements that occur within a one-line compound statement. Since no special layout is required between the heading of a |while| loop and its body, rule~139 incorporates the heading as if it were a separate statement. For a |do|-|while| loop we must take a bit more effort to get the spacing following the |while| correct. @< Rules @>= {139, {{short_lbrace, while_like, expression}}, {short_lbrace, "_B_~_"}}, @/ {140, {{short_lbrace, do_like, statement, while_like},1}, {do_head, "_B_B_"}}, @/ {141, {{short_lbrace, do_head, expression, semi}}, {short_lbrace, "_B_~__"}}, @[@] @ The tokens `|goto|', `|continue|', `|break|', and `|return|' are all |return_like|; although what may follow them is not the same in all cases, the following two rules cover all legal uses. Note that rule~146 does not wait for a semicolon to come along; this may lead to a premature match as in `|return a+b;|', but this does not affect formatting, while the rule allows saying things like `|return home|' in a module name (or elsewhere) without risking irreducible scraps. @< Rules @>= {145, {{return_like, semi}}, {statement, NULL}}, @/ {146, {{return_like, expression}}, {expression, "_~_"}}, @[@] @ {\it Function definitions and external declarations}. Apart from the initial specification of the result type (which is optional, defaulting to |int|), a new-style function heading will parse as an |function_head| (see the declaration syntax above), while an old-style function heading is an |expression| possibly followed by a |declaration| (specifying the function parameters). Rules 150--152 parse these two kinds of function headings together with the function body, yielding category |function|; rule~153 attaches the optional result type specifier. Although the \Cee~syntax requires that the function body is a compound statement, we allow it to be a |statement| (to which |compound_statement| will decay), for in case a very short function body is specified using `\.{\{@@;}'. At the outer level declarations and functions can be mixed; when they do a bit of white space surrounds the functions (rules 154--156). The combination of several declarations is already taken care of by the syntax for compound statements; no extra white space is involved there. Rules 157--159 take care of function declarations that are not definitions (i.e., there is no function body); if followed by a semicolon, a comma or a right parenthesis, the |function_head| decays to an |expression|, and the rest of the syntax will take care of recognising a |declaration| or |parameters|. Rules 153~and~157 will be replaced in~\Cpp, for reasons explained below (incidentally, this is the reason the category |function_head| was introduced; it used to be simply |expression|). @< Rules @>= {150, {{function_head, statement}}, {function, "!_f_"}}, @/ {151, {{expression, statement}}, {function, "!_f_"}}, @/ {152, {{expression, declaration, statement}}, {function, "!_++f_--f_"}}, @/ {153, {{int_like, function}}, {function, "_ _"}}, @/ {154, {{declaration, function}}, {function, "_F_"}}, @/ {155, {{function, declaration}}, {declaration, "_F_"}}, @/ {156, {{function, function}}, {function, "_F_"}}, @/ {157, {{function_head, semi},-1}, {expression, NULL},no_plus_plus}, @/ {158, {{function_head, comma},-1}, {expression, NULL}}, @/ {159, {{function_head, rpar},-1}, {expression, NULL}}, @[@] @ {\it Module names}. Although module names nearly always stand for statements, they can be made to stand for a declaration by appending \:;, or for an expression by appending `\.{@@;@@;}'. The latter possibility is most likely to be useful if the module stands for (part of) an initialiser list. A module name can also be made into an expression by enclosing it in \:[ and~\:], but in that case rule~160 will apply first, placing a forced break after the module name. Rules 161, 164,~and~165 prevent a module name from generating forced breaks if it occurs on a one-line compound statement or structure or union specifier, while rules 167~and~168 serve to prevent rules 163~and~164 from matching with priority over rule~166. The rules given here will be replaced by other ones in compatibility mode. @< Rules @>= {160, {{mod_scrap}}, {statement, "_f"},cwebx}, @/ {161, {{short_lbrace, mod_scrap},1}, {statement, NULL},cwebx}, @/ {162, {{mod_scrap, magic}}, {declaration, "f__f"},cwebx}, @/ {163, {{lbrace, mod_scrap, magic},1}, {declaration, "__f"},cwebx|standard_braces}, @/ {164, {{short_lbrace, mod_scrap, magic},1}, {declaration, NULL},cwebx}, @/ {165, {{short_struct_head, mod_scrap, magic},1}, {declaration,NULL},cwebx}, @/ {166, {{mod_scrap, magic, magic}}, {expression, NULL},cwebx}, @/ {167, {{lbrace, mod_scrap, magic, magic},1}, {expression, NULL},cwebx|standard_braces}, @/ {168, {{short_lbrace, mod_scrap, magic, magic},1}, {expression, NULL},cwebx}, @[@] @ {\it Additional rules for compatibilty mode}. @^Levy/Knuth \.{CWEB}@> Although our grammar differs completely from the one used in \LKC., we use most of it also in compatibility mode (the exception is formed by the rules concerning module names). We do add a few rules in compatibility mode, mostly do deal with circumstances that are different for some reason or other. We start with module names, which behave in a completely different way. In compatibility mode, as in \LKC., a module name normally stands for an expression (rule~164) and in practice is almost always followed by a visible or invisible (|magic|) semicolon. Rules 160~and~161 treat these cases explicitly, in order to insert a forced break after the semicolon; rule~161 for the case of an invisible semicolon is needed because if we would wait for the |magic| semicolon to decay to an ordinary one, it might instead combine with an |int_like| token following it. Rules 162~and~163 are provided to allow the short form of compound statements even in compatibility mode (even though it is not present in \LKC.): they preempt rules 160~and~161, avoiding the forced break. Since in compatibility mode one has no means of indicating that a module name stands for a set of declarations, we add rule~165 to allow them nevertheless to be used before a function definition. Rules 170~and~171 compensate for the fact that compound assignment operators like `|+=|' are scanned as two tokens in compatibility mode (see section@#truly stupid@> for an explanation why this is done). Rule~172 allows types to be used in the argument lists of macros, without enclosing them between \:[~and~\:], in compatibility mode; this is done frequently in the Stanford GraphBase. @^Stanford GraphBase@> It is sufficient to remove expressions from the beginning of the argument list, since types, and more generally types followed by declarators, are already removed by the standard rules for |parameters|. As a result the argument list will either reduce to an |expression| or to |parameters|, depending on whether the final item was an expression. In both cases it will combine with the macro name to an |expression|, although the spacing will be a bit too wide in the |parameters| case. But then, one ought to use \:[~and~\:] anyway, which avoids this problem. @< Rules @>= {160, {{mod_scrap, semi}}, {statement, "__f"},compatibility}, @/ {161, {{mod_scrap, magic}}, {statement, "__f"},compatibility}, @/ {162, {{short_lbrace, mod_scrap, semi},1}, {statement, NULL},compatibility}, @/ {163, {{short_lbrace, mod_scrap, magic},1}, {statement, NULL},compatibility}, @/ {164, {{mod_scrap}}, {expression, NULL},compatibility}, @/ {165, {{statement, function}}, {function, "_F_"},compatibility}, @/ @) {170, {{binop, binop}}, {binop,"r__"},compatibility}, @/ {171, {{unorbinop, binop}}, {binop,"r__"},compatibility}, @/ {172, {{lpar, expression, comma}}, {lpar, "___p1"}, compatibility}, @[@] @[@] @ {\it Additional rules for \Cpp}. Up to this point we have included some specific rules for \Cpp, in places where a slight deviation from the \Cee~syntax was required. There are however a large number of syntactic possibilities of \Cpp\ that are not even remotely similar to those of~\Cee, so it is most convenient to collect them in a separate section. The author of \.{CWEBx} wishes to make it clear that he is quite aware of the incompleteness of the set of rules specified below, and that he assumes no responsibility for correcting this. One reason for this is that he has no readable formal grammar of \Cpp, which possibly could be used for validation (nor does he use \Cpp\ himself), another is that the pieces of grammar that he has seen show so little coherence that he seriously doubts whether it is possible at all to parse \Cpp\ reliably with a grammar of the type implemented here. In fact, the rules here were merely added in an attempt to cope with problems reported by users. We start with rules for `\&{operator}', which are simple: it should combine with a following operator symbol of any type to form an expression (rules 180--182). Then rules 183--186 take care of the `::'~operator: either a class name or nothing is expected at the left, and either an ordinary or class identifier at the right; the resulting category is that of the right hand side. Type identifiers may appear as the left hand side of an assignment within a list of formal parameters, indicating a default argument; in this case the while assignment should behave as a type identifier (rule~187). Next we give rules catering with constructor declarations in class definitions. First of all we must recognise the fact that the class name is being used as a function name here; the simplest solution is to recognise the combination of an |int_like| followed by a (possibly empty) parameter list (rules 190~and~191). We cannot let a |function_head| (possibly created by the rules kust mentioned) decay to an |expression| when followed be a semicolon, as we do for~\Cee, since declarations of constructor members of a class lack an initial type specification, so the |expression| would fail to become part of a |declaration|. Therefore, special measures are necessary: the simplest solution is to simply absorb (rule~192) any preceding type specifier into the |function_head| (thereby removing the distinction between its presence or absence), and construct de |declaration| explicitly from the |function_head| and the following semicolon (rule~193). @< Rules @>= {180, {{case_like, binop}}, {expression, "_o_"},only_plus_plus}, @/ {181, {{case_like, unorbinop}}, {expression, "_o_"},only_plus_plus}, @/ {182, {{case_like, unop}}, {expression, NULL},only_plus_plus}, @/ {183, {{int_like, colcol, expression}}, {expression, NULL},only_plus_plus}, @/ {184, {{colcol, expression}}, {expression, "o__"},only_plus_plus}, @/ {185, {{int_like, colcol, int_like}}, {int_like, NULL},only_plus_plus}, @/ {186, {{colcol, int_like}}, {int_like, "o__"},only_plus_plus}, @/ {187, {{int_like, binop, expression}}, {int_like, NULL},only_plus_plus}, @/ {190, {{int_like, parameters}}, {function_head, "_B_"},only_plus_plus}, @/ {191, {{int_like, lpar,rpar}}, {function_head, "_B_,_"},only_plus_plus},@/ {192, {{int_like, function_head}}, {function_head, "_ _"},only_plus_plus}, @/ {193, {{function_head, semi}}, {declaration, "!__"},only_plus_plus}, @[@] cwebx-3.04.orig/intro.inc100644 1750 1750 11014 6470041672 13173 0ustar jdgjdg\def\hang{\hangindent 3em\indent\ignorespaces} \def\pb{$\.|\ldots\.|$} % C brackets (|...|) \def\LKC.{Levy/Knuth \.{CWEB}} \def\:#1{`\.{@@#1}'} \def\title{\me. (Version x3.04)} \def\topofcontents {\topglue 0pt plus .5 fill \centerline{\titlefont The {\ttitlefont \me.} program} \vskip 15pt \centerline{(\.{CWEB} version x3.04)} \vfill } \def\botofcontents {\vfill\noindent Copyright \copyright\ 1987,\thinspace1990 Silvio Levy and Donald E. Knuth \par\noindent Copyright 1994 Marc A. A. van Leeuwen \bigskip\noindent Permission is granted to make and distribute verbatim copies of this document provided that the copyright notice and this permission notice are preserved on all copies. \smallskip\noindent Permission is granted to copy and distribute modified versions of this document under the conditions for verbatim copying, provided that the entire resulting derived work is distributed under the terms of a permission notice identical to this one. } @* Introduction. @:title@> This is the main source text for the program \.{\me.}, one of the two text processing tools that constitute the \.{CWEB} system. This version of the system, identified as \.{CWEB}x3.0, was written by Marc van Leeuwen. The history of this system is somewhat complicated, and starts with the \.{WEB} system created by D.~E. Knuth (for the programming language Pascal); the program corresponding to the current program in that system was called~\myroots. That program was converted into \.{\me.}, written in and for the \Cee~language, by Silvio Levy, and further developed under joint responsibility with Knuth. Based on \.{CWEB}~2.1 by Levy and Knuth, a version adapted to \caps{ANSI~C} was made by Frank Jensen. That version has served as the starting point for the current author; an intermediate version between it and the current version was made public under the name \.{CWEB}~3.x. Helpful comments and suggestions by Gareth McCaughan, Werner Lemberg, and Andreas Scherer were greatly appreciated. The name \.{CWEB}~3.x was an unfortunate choice, since the Levy/Knuth version of \.{CWEB} had also been independently developed further, and it is currently being distributed as \.{CWEB}~3.4f. Both these branches have retained the basic functionality of the version of \.{CWEB} they were both derived from, making only minor (but different) additions and changes. Realising that this divergence is undesirable and confusing, the development of this branch from \.{CWEB}~3.x to the current version was mainly concerned with improving compatibility and peaceful coexistence with the other branch. First of all the name was changed by moving the character~`x' to the front of the version number, so that this branch of the system can be referred to as `\.{CWEBx}' without mentioning a complete version number; for clarity we shall refer to the other branch as `\LKC.'. @^Levy/Knuth \.{CWEB}@> Secondly, all the extensions of \LKC. were included, although in a few cases in a slightly different form to avoid name conflicts with features already present in \.{CWEB}~3.x. Thirdly, in order to accomodate existing programs written for \LKC., among which are those of Knuth's impressive collection called the Stanford GraphBase, a ``compatibility mode'' was added, selectable by a command line option (\.{+c}), in which the remaining, mostly trivial, differences in syntax and semantics are removed, and the system attempts to be an alternative implementation for exactly the same language as accepted by \LKC., at the price of losing a few possibilities particular to~\.{CWEBx}. The major version number `3' now corresponds to the same version number of \LKC., and since no further extension of the possibilities of that system are anticipated, we do not expect that compatibility will ever require any further increase of this major version number. The Stanford GraphBase @^Stanford GraphBase@> states that it requires \.{CWEB} version 3.0 or greater; this version of \.{CWEBx} can fully process it in a satisfactory way, using compatibility mode. The ``banner line'' defined here should be changed whenever changes to \.{\me.} are made publicly available. Users who are discontent with some aspect of the program are encouraged to make the necessary modifications in their copy of the source files; if this involves an improvement of the implementation, they are kindly requested to inform the author responsible for the current version. If they choose to make the resulting program available under the name \.{\me.}, they should modify the non-numeric prefix of the version specification. @d version_string "3.04" cwebx-3.04.orig/sample.ps100644 1750 1750 116223 6456662562 13235 0ustar jdgjdg%!PS-Adobe-2.0 %%Creator: dvipsk 5.58a Copyright 1986, 1994 Radical Eye Software %%Pages: 2 %%PageOrder: Ascend %%BoundingBox: 0 0 596 842 %%DocumentPaperSizes: a4 %%EndComments %DVIPSCommandLine: dvips -f -p6 -l7 %DVIPSParameters: dpi=300, compressed, comments removed %DVIPSSource: TeX output 1998.01.13:1225 %%BeginProcSet: texc.pro /TeXDict 250 dict def TeXDict begin /N{def}def /B{bind def}N /S{exch}N /X{S N}B /TR{translate}N /isls false N /vsize 11 72 mul N /hsize 8.5 72 mul N /landplus90{false}def /@rigin{isls{[0 landplus90{1 -1}{-1 1} ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[matrix currentmatrix{dup dup round sub abs 0.00001 lt{round}if} forall round exch round exch]setmatrix}N /@landscape{/isls true N}B /@manualfeed{statusdict /manualfeed true put}B /@copies{/#copies X}B /FMat[1 0 0 -1 0 0]N /FBB[0 0 0 0]N /nn 0 N /IE 0 N /ctr 0 N /df-tail{ /nn 8 dict N nn begin /FontType 3 N /FontMatrix fntrx N /FontBBox FBB N string /base X array /BitMaps X /BuildChar{CharBuilder}N /Encoding IE N end dup{/foo setfont}2 array copy cvx N load 0 nn put /ctr 0 N[}B /df{ /sf 1 N /fntrx FMat N df-tail}B /dfs{div /sf X /fntrx[sf 0 0 sf neg 0 0] N df-tail}B /E{pop nn dup definefont setfont}B /ch-width{ch-data dup length 5 sub get}B /ch-height{ch-data dup length 4 sub get}B /ch-xoff{ 128 ch-data dup length 3 sub get sub}B /ch-yoff{ch-data dup length 2 sub get 127 sub}B /ch-dx{ch-data dup length 1 sub get}B /ch-image{ch-data dup type /stringtype ne{ctr get /ctr ctr 1 add N}if}B /id 0 N /rw 0 N /rc 0 N /gp 0 N /cp 0 N /G 0 N /sf 0 N /CharBuilder{save 3 1 roll S dup /base get 2 index get S /BitMaps get S get /ch-data X pop /ctr 0 N ch-dx 0 ch-xoff ch-yoff ch-height sub ch-xoff ch-width add ch-yoff setcachedevice ch-width ch-height true[1 0 0 -1 -.1 ch-xoff sub ch-yoff .1 sub]/id ch-image N /rw ch-width 7 add 8 idiv string N /rc 0 N /gp 0 N /cp 0 N{rc 0 ne{rc 1 sub /rc X rw}{G}ifelse}imagemask restore}B /G{{id gp get /gp gp 1 add N dup 18 mod S 18 idiv pl S get exec}loop}B /adv{cp add /cp X}B /chg{rw cp id gp 4 index getinterval putinterval dup gp add /gp X adv}B /nd{/cp 0 N rw exit}B /lsh{rw cp 2 copy get dup 0 eq{pop 1}{ dup 255 eq{pop 254}{dup dup add 255 and S 1 and or}ifelse}ifelse put 1 adv}B /rsh{rw cp 2 copy get dup 0 eq{pop 128}{dup 255 eq{pop 127}{dup 2 idiv S 128 and or}ifelse}ifelse put 1 adv}B /clr{rw cp 2 index string putinterval adv}B /set{rw cp fillstr 0 4 index getinterval putinterval adv}B /fillstr 18 string 0 1 17{2 copy 255 put pop}for N /pl[{adv 1 chg} {adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{ adv rsh nd}{1 add adv}{/rc X nd}{1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]dup{bind pop}forall N /D{/cc X dup type /stringtype ne{] }if nn /base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{dup dup length 1 sub dup 2 index S get sf div put}if put /ctr ctr 1 add N}B /I{ cc 1 add D}B /bop{userdict /bop-hook known{bop-hook}if /SI save N @rigin 0 0 moveto /V matrix currentmatrix dup 1 get dup mul exch 0 get dup mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N /eop{SI restore userdict /eop-hook known{eop-hook}if showpage}N /@start{userdict /start-hook known{start-hook}if pop /VResolution X /Resolution X 1000 div /DVImag X /IE 256 array N 0 1 255{IE S 1 string dup 0 3 index put cvn put}for 65781.76 div /vsize X 65781.76 div /hsize X}N /p{show}N /RMat[1 0 0 -1 0 0]N /BDot 260 string N /rulex 0 N /ruley 0 N /v{/ruley X /rulex X V}B /V {}B /RV statusdict begin /product where{pop product dup length 7 ge{0 7 getinterval dup(Display)eq exch 0 4 getinterval(NeXT)eq or}{pop false} ifelse}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale rulex ruley false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR rulex ruley scale 1 1 false RMat{BDot}imagemask grestore}}ifelse B /QV{gsave newpath transform round exch round exch itransform moveto rulex 0 rlineto 0 ruley neg rlineto rulex neg 0 rlineto fill grestore}B /a{moveto}B /delta 0 N /tail {dup /delta X 0 rmoveto}B /M{S p delta add tail}B /b{S p tail}B /c{-4 M} B /d{-3 M}B /e{-2 M}B /f{-1 M}B /g{0 M}B /h{1 M}B /i{2 M}B /j{3 M}B /k{ 4 M}B /w{0 rmoveto}B /l{p -4 w}B /m{p -3 w}B /n{p -2 w}B /o{p -1 w}B /q{ p 1 w}B /r{p 2 w}B /s{p 3 w}B /t{p 4 w}B /x{0 S rmoveto}B /y{3 2 roll p a}B /bos{/SS save N}B /eos{SS restore}B end %%EndProcSet TeXDict begin 39158280 55380996 1000 300 300 () @start /Fa 1 1 df0 D E /Fb 21 121 df12 D<903901FC0FEE903907 0E307E010C136091380CE07C90391C00C01C140113381638A2EC0380A20003B612F03A00 70038070A2EC070016E013E0A391380E01C0A2EA01C0A2ED0388141CA201801490000314 01ED00E04A13001300A248133038C63060EAE63838CC3180D8781FC8FC2725819C25>15 D97 D<123F1207A2120EA45AA4EA39C0EA3E60EA3830A2EA7038A4EAE0 70A3136013E0EAC0C012C1EA6180EA6300123C0D1D7B9C13>IIIII<13F3EA018FEA030FEA0607EA0E0E120C121CA2EA381CA413381230A2EA1878 13F0EA0F701200A213E0A2EAC0C012E1EAC300127E101A7D9113>III108 D<393C1E078039266318C0394683A0E0384703C0008E13 80A2120EA2391C0701C0A3EC0380D8380E1388A2EC0708151039701C032039300C01C01D 127C9122>IIII114 DI<13C01201A3EA03 80A4EAFFE0EA0700A3120EA45AA4EA3840A31380EA1900120E0B1A7D990E>II120 D E /Fc 3 51 df<1360AAB512F0A238006000AA1416 7E9119>43 D<120C121C12EC120CAFEAFFC00A137D9211>49 D<121FEA60C01360EAF070 13301260EA0070A2136013C012011380EA02005AEA08101210EA2020EA7FE012FF0C137E 9211>I E /Fd 5 111 df<126012F0A212701210A41220A212401280040C7C830C>59 D<130113031306A3130CA31318A31330A31360A213C0A3EA0180A3EA0300A31206A25AA3 5AA35AA35AA35AA210297E9E15>61 D99 D102 D110 D E /Fe 11 106 df<1203A4EAC30CEAE31CEA7338EA1FE0EA0780A2EA1FE0EA7338EAE3 1CEAC30CEA0300A40E127D9215>3 D12 D17 D<1306A25B131C13185B017FB512F090B612F8D80380C8 FC48C9FC123C12F0123C120E12036C7E6CB612F86D14F00130C8FC7F7FA27FA225187E95 2A>40 D<1460A214C0A2EB0180A3EB0300A21306A25BA25BA35BA25BA25BA2485AA248C7 FCA31206A25AA25AA25AA35AA25A124013287A9D00>54 D<13101338A2136CA313C6A2EA 0183A238030180A2380600C0A3481360A2481330A2481318A348130CA24813061402171A 7E981C>94 D<00C0130214060060130CA26C1318A36C1330A26C1360A26C13C0A3380301 80A238018300A2EA00C6A2136CA31338A21310171A7E981C>I<133C13E0EA01C0138012 03AD13005A121C12F0121C12077E1380AD120113C0EA00E0133C0E297D9E15>102 D<12F0121C12077E1380AD120113C0EA00E0133C13E0EA01C013801203AD13005A121C12 F00E297D9E15>I<134013C0EA0180A3EA0300A21206A35AA25AA35AA25AA35AA21260A3 7EA27EA37EA27EA37EA2EA0180A3EA00C013400A2A7D9E10>I<12C0A21260A37EA27EA3 7EA27EA37EA2EA0180A3EA00C0A2EA0180A3EA0300A21206A35AA25AA35AA25AA35AA20A 2A7E9E10>I E /Ff 5 88 df<38C00180AEB5FCA211107E8916>32 D66 D<3801F180EA07FBEA0FFFEA1F 0FEA3C07EA38031270A200F0C7FC5AA77E38700380A21238383C0700EA1F0FEA0FFE6C5A EA01F011197E9816>I<387FFFC0B5FC7EEA1C01A490C7FCA2131CA2EA1FFCA3EA1C1CA2 90C7FC14E0A5EA7FFFB5FC7E13197F9816>69 D<38FC07E0EAFE0FEAFC07387001C0A300 301380EA3803A313E3EA39F3A213B300191300A61313EA1B1BEA0F1EA2EA0E0E13197F98 16>87 D E /Fg 35 118 df<126012F0A2126004047D830A>46 D50 DI<1330 A2137013F012011370120212041208121812101220124012C0EAFFFEEA0070A5EA03FE0F 157F9412>III<1240EA7FFE13FC13F8EAC008EA80101320EA00401380A2EA0100A25A 12021206A2120EA512040F167E9512>I<13101338A3135CA3138EA3EA0107A200031380 EA0203A23807FFC0EA0401A2380800E0A21218003813F038FE03FE17177F961A>65 D69 D71 D<38FF83FE381C0070AA381FFFF0381C0070AA38FF83FE17177F961A>II<38FF80FE381C0078146014401480EB0100130613085B1338 1378139CEA1D0E121EEA1C07EB0380EB01C0A2EB00E014701478147C38FF80FF18177F96 1B>75 DI<00FEEB 03F8001E14C000171305A338138009A23811C011A33810E021A2EB7041A3EB3881A2EB1D 01A2130EA2123839FE040FF81D177F9620>I<00FC13FE001E1338001F13101217EA1380 EA11C0A2EA10E013701338A2131C130E130F1307EB0390EB01D0A2EB00F0147014301238 00FE131017177F961A>I<13FCEA0303380E01C0381C00E0481370003013300070133800 60131800E0131CA700701338A200301330003813706C13E0380E01C038030300EA00FC16 177E961B>II82 DI<387FFFF8386038180040 1308A200801304A300001300AF3807FFC016177F9619>I<38FF80FE381C00381410B06C 132012066C13403801818038007E0017177F961A>I<3AFF07FC3F803A3C01E00E00D81C 001304A2EB0170000E5CA2EB023800075CA2EB041CD803845BA2EB880ED801C85BA2EBD8 0F3900F00780A3D96003C7FCA321177F9624>87 D97 D99 D<137E130EA8EA07CEEA1C3EEA300E1270126012E0A412601270EA301EEA182E3807 CFC012177F9614>II<12FC121CA8137CEA1D8EEA1E07121CAA38FF9FE0 1317809614>104 D<1218123CA212181200A5127C121CAC12FF081780960A>I<12FC121C B3A3EAFF80091780960A>108 D 110 DI115 D<1208A31218A21238EAFF80EA3800A71340A4EA1C80EA0F00 0A147F930E>II E /Fh 38 122 df12 D<903801C070A349485AA490380701C0A4 90380E0380B71280A27E26001C07C7FCEB380EA5495A007FB61280B7FCA22600E038C7FC 48485AA448485AA4380701C0A321257D9C28>35 D<127812FCA4127806067D850D>46 D<1360EA01E0120F12FF12F31203B3A2387FFF80A2111B7D9A18>49 DI< EA03F8EA1FFEEA3C1FEB0F80387C07C0127E127C123838000F80A2EB1E005BEA03F8EA00 1EEB0F80EB07C0A214E01230127812FCA214C038780F80EB1F00EA1FFEEA07F8131B7E9A 18>II<38180180381FFF005B5B5B13 C00018C7FCA4EA19F8EA1E0E38180F80EA1007000013C014E0A3127812F8A214C012F038 600F8038381F00EA1FFEEA07F0131B7E9A18>I<137EEA03FF38078180380F03C0EA1E07 123CEB038048C7FCA212F813F8EAFB0E38FA0780EAFC0314C000F813E0A41278A214C012 3CEB0780381E0F00EA07FEEA03F8131B7E9A18>I<1260387FFFE0A214C01480A238E003 00EAC0065B5BC65AA25B13E0A212015B1203A41207A66C5A131C7D9B18>II<90381FE0209038FFF8E03803F80F3807C003380F800148C7FC123E1560127E12 7C00FC1400A8007C1460127E123E15C07E390F8001803907C003003803F80E3800FFFCEB 1FE01B1C7D9B22>67 D69 DI73 D76 D97 DIIII<137F3801E3803803 C7C0EA0787120FEB8380EB8000A5EAFFF8A2EA0F80AEEA7FF8A2121D809C0F>I<3803F8 F0380E0F38121E381C0730003C1380A4001C1300EA1E0FEA0E0EEA1BF80010C7FC1218A2 EA1FFF14C06C13E04813F0387801F838F00078A300701370007813F0381E03C03807FF00 151B7F9118>II<121E123FA4121EC7FCA6B4FCA2121FAEEAFFE0A20B1E7F9D0E>I108 D<39FF0FC07E903831E18F3A1F40F20780D980FC13C0 A2EB00F8AB3AFFE7FF3FF8A225127F9128>I<38FF0FC0EB31E0381F40F0EB80F8A21300 AB38FFE7FFA218127F911B>II<38FF3F80EBE1 E0381F80F0EB0078147C143C143EA6143C147C1478EB80F0EBC1E0EB3F0090C7FCA6EAFF E0A2171A7F911B>I 114 DI<1203A45AA25AA2EA3FFC12FFEA1F00A9 130CA4EA0F08EA0798EA03F00E1A7F9913>I<38FF07F8A2EA1F00AC1301120F380786FF EA01F818127F911B>I<38FFC1FCA2381F0060EB80E0000F13C013C03807C180A23803E3 00A2EA01F6A213FE6C5AA21378A2133016127F9119>I<39FF8FF8FEA2391F03E030A201 831370000FEBF0601386D807C613C0EBCEF8EBEC790003EB7D80EBF83D0001EB3F00A249 7E0000131EEBE00EA21F127F9122>I<38FFC7FCA2381F8180EA0F833807C700EA03EEEA 01FC5B1200137C13FEEA01DFEA039F38070F80380607C0380C03E038FF07FCA216127F91 19>I<38FFC1FCA2381F0060EB80E0000F13C013C03807C180A23803E300A2EA01F713F6 EA00FE5BA21378A21330A21370EA706012F85BEAF9800073C7FC123E161A7F9119>I E /Fi 42 121 df34 D<38380180EA7C03A238EE0700A2130EA35B127C5B1238C65AA35BA2485AA3485AEB8380 380707C0A2380E0EE0A3121CA2383807C0A23818038013207F9C16>37 D<12301278127C123C121CA41238127812F012E01240060D789816>39 D<1238127C127EA2123E120E121E121C127812F01260070B798416>44 D<127012F8A312700505788416>46 D<12E0B51280A338E00F00131EEA001C5B13781370 5BA2485AA3485AA448C7FCA7111A7E9916>55 D57 D<127012F8A312701200A8127012F8A31270051278 9116>I60 D<12C012F012FC123EEA0F 806C7EEA01F06C7E133EEB1F801307131FEB3E0013F8485AEA07C0485A003EC7FC12FC12 F012C011157E9616>62 D66 D<3801F180EA07FBEA0FFFEA1F0FEA3C07EA38031270A200F0C7FC5AA77E38700380A212 38383C0700EA1F0FEA0FFE6C5AEA01F011197E9816>I<387FFFC0B5FC7EEA1C01A490C7 FCA2131CA2EA1FFCA3EA1C1CA290C7FC14E0A5EA7FFFB5FC7E13197F9816>69 DII<387E1FC038FF3FE0 387F1FC0381D07001387A313C7A2121CA213E7A31367A21377A21337A31317EA7F1FEAFF 9FEA7F0F13197F9816>78 DI83 D<387FFFE0B5FCA2EAE0E0A400001300AFEA07FC487E6C5A13197F98 16>I<38FC07E0EAFE0FEAFC07387001C0A300301380EA3803A313E3EA39F3A213B30019 1300A61313EA1B1BEA0F1EA2EA0E0E13197F9816>87 D<12C07EA21270A27EA27EA27EA2 7EA26C7EA26C7EA26C7EA21370A27FA27FA27FA27FA2EB0380A2130111207E9C16>92 D97 D<127E12FE127E120EA4133E13FF000F1380EB83C0EB 00E0120E1470A614E0EA0F01EB83C0EBFF80000E1300EA063C1419809816>II<133F5B7F1307A4EA03C7EA0FF748B4FCEA3C1F487EEA700712E0A6EA700FA2 EA3C1F381FFFE0380FE7F03807C7E014197F9816>I I<131FEB7F8013FFEA01E7EBC30013C0A2EA7FFFB5FCA2EA01C0ACEA3FFE487E6C5A1119 7F9816>I<3803E3C03807F7E0EA0FFF381C1CC038380E00A56C5AEA0FF8485AEA1BE000 38C7FC1218EA1FFC13FF481380387803C038E000E0A4387001C0EA7C07383FFF80380FFE 00EA03F8131C7F9116>I<127E12FE127E120EA4133C13FEEA0FFFEB87801303120EAA38 7FC7F038FFE7F8387FC7F01519809816>II108 D<38F9C38038FFEFC0EBFFE0EA3C78A2EA3870AA38FE7CF8A2EB3C781512809116>II< EA03E0EA0FF8487EEA3C1E487EEA700738E00380A5EAF00700701300EA780FEA3C1EEA1F FC6C5AEA03E011127E9116>II<38FF 0FC0EB3FE0137F3807F040EBC0005BA290C7FCA8EAFFFCA313127F9116>114 DI<12035AA4EA7FFFB5FCA20007C7FCA75B EB0380A3EB8700EA03FE6C5A6C5A11177F9616>I<387E1F80EAFE3FEA7E1FEA0E03AB13 0F380FFFF03807FBF83803E3F01512809116>I<387F1FC000FF13E0007F13C0381C0700 EA1E0FEA0E0EA36C5AA4EA03B8A3EA01F0A26C5A13127F9116>I<38FF1FE013BF131F38 380380A413E33819F300A213B3EA1DB7A4EA0F1EA313127F9116>I<387F1FC0133F131F 380F1C00EA073CEA03B813F012016C5A12017FEA03B8EA073C131CEA0E0E387F1FC038FF 3FE0387F1FC013127F9116>I E /Fj 59 123 df11 D<137E3801C180EA0301380703C0120EEB018090C7FCA5B512C0EA0E01B0387F87F8151D 809C17>I34 D<13E0EA0310EA0608A2120EA45BA25B6C5AEC3FE09038800F80EC06000003 130412073809C00800115BEA30E03820F020EA607038E03840EB3C80131C90380F00207F 0070EB8040383009C0391830E180390FC03F001B1F7E9D20>38 D<1380EA010012021206 5AA25AA25AA35AA412E0AC1260A47EA37EA27EA27E12027EEA0080092A7C9E10>40 D<7E12407E12307EA27EA27EA37EA41380AC1300A41206A35AA25AA25A12205A5A092A7E 9E10>I<1306ADB612E0A2D80006C7FCAD1B1C7E9720>43 D<126012F0A212701210A412 20A212401280040C7C830C>I<126012F0A2126004047C830C>46 D48 D<5A1207123F12C71207B3A5EAFFF80D1C7C9B15 >I II<130C A2131C133CA2135C13DC139CEA011C120312021204120C1208121012301220124012C0B5 12C038001C00A73801FFC0121C7F9B15>II<13F0EA030CEA0404EA0C0EEA181E1230130CEA7000A21260EAE3E0EA E430EAE818EAF00C130EEAE0061307A51260A2EA7006EA300E130CEA1818EA0C30EA03E0 101D7E9B15>I<1240387FFF801400A2EA4002485AA25B485AA25B1360134013C0A21201 5BA21203A41207A66CC7FC111D7E9B15>II<126012F0A212601200AA126012F0A2 126004127C910C>58 D<126012F0A212601200AA126012F0A212701210A41220A2124012 80041A7C910C>I<007FB512C0B612E0C9FCA8B612E06C14C01B0C7E8F20>61 D63 D70 D73 D76 D79 D82 D<3807E080EA1C19EA30051303EA600112E01300A36C13007E127CEA7F C0EA3FF8EA1FFEEA07FFC61380130FEB07C0130313011280A300C01380A238E00300EAD0 02EACC0CEA83F8121E7E9C17>I<007FB512C038700F010060130000401440A200C01420 1280A300001400B1497E3803FFFC1B1C7F9B1E>I<3AFFE1FFC0FF3A1F003E003C001E01 3C13186C6D1310A32607801F1320A33A03C0278040A33A01E043C080A33A00F081E100A3 9038F900F3017913F2A2017E137E013E137CA2013C133C011C1338A20118131801081310 281D7F9B2B>87 D<12FEA212C0B3B312FEA207297C9E0C>91 DI<12FEA21206B3B312FEA2 0729809E0C>I97 D<12FC121CAA137CEA1D87381E0180 381C00C014E014601470A6146014E014C0381E018038190700EA10FC141D7F9C17>IIII<13F8EA01 8CEA071E1206EA0E0C1300A6EAFFE0EA0E00B0EA7FE00F1D809C0D>I I<12FC121CAA137C1387EA1D03001E1380121CAD38FF9FF0141D7F9C17>I<1218123CA2 1218C7FCA712FC121CB0EAFF80091D7F9C0C>I<13C0EA01E0A2EA00C01300A7EA07E012 00B3A21260EAF0C012F1EA6180EA3E000B25839C0D>I<12FC121CAAEB0FE0EB0780EB06 005B13105B5B13E0121DEA1E70EA1C781338133C131C7F130F148038FF9FE0131D7F9C16 >I<12FC121CB3A9EAFF80091D7F9C0C>I<39FC7E07E0391C838838391D019018001EEBE0 1C001C13C0AD3AFF8FF8FF8021127F9124>IIII<3803E080EA0E19EA1805EA3807EA7003A212E0A61270A2EA38071218EA0E1BEA03 E3EA0003A7EB1FF0141A7F9116>III<1204A4120CA2121C123CEAFFE0 EA1C00A91310A5120CEA0E20EA03C00C1A7F9910>I<38FC1F80EA1C03AD1307120CEA0E 1B3803E3F014127F9117>I<38FF07E0383C0380381C0100A2EA0E02A2EA0F06EA0704A2 EA0388A213C8EA01D0A2EA00E0A3134013127F9116>I<39FF3FC7E0393C0703C0001CEB 01801500130B000E1382A21311000713C4A213203803A0E8A2EBC06800011370A2EB8030 000013201B127F911E>I<38FF0FE0381E0700EA1C06EA0E046C5AEA039013B0EA01E012 007F12011338EA021C1204EA0C0E487E003C138038FE1FF014127F9116>I<38FF07E038 3C0380381C0100A2EA0E02A2EA0F06EA0704A2EA0388A213C8EA01D0A2EA00E0A31340A2 5BA212F000F1C7FC12F312661238131A7F9116>I I E end %%EndProlog %%BeginSetup %%Feature: *Resolution 300dpi TeXDict begin %%PaperSize: a4 %%BeginPaperSize: a4 a4 %%EndPaperSize %%EndSetup %%Page: 6 1 6 0 bop 0 42 a Fj(6)70 b Fg(WHA)m(T)14 b(A)e Ff(CWEB)f Fg(PR)o(OGRAM)h(LOOKS)h(LIKE)893 b Fi(CWEBx)13 b Fg(MANUAL)0 132 y Fh(1.)48 b(Compar)q(in)o(g)12 b(t)o(ext)g(\014le)q(s.)47 b Fj(Thi)q(s)11 b(i)q(s)g(an)h(en)o(t)o(irely)f(tr)q(ivial)f(program,)g (t)n(h)o(a)o(t)i(t)o(e)q(st)o(s)i(wh)o(et)n(h)o(er)f(t)o(w)o(o)e(t)o (ext)h(\014le)q(s)h(are)f(equal,)0 181 y(an)o(d)i(if)f(not)g(so,)h(p)q (oin)o(t)o(s)g(ou)o(t)g(t)n(h)o(e)h(\014rst)g(p)q(oin)o(t)e(of)g (di\013erence.)0 244 y Fh(#includ)o(e)41 b Fi()0 294 y Fh(#includ)o(e)g Fi()42 357 y Fh(t)o(yp)q(e)q(d)o(ef)18 b(c)o(h)o(ar)d(b)q(o)q(o)o(l)r Fj(;)0 457 y Fh(2.)48 b Fj(Th)o(e)15 b(ou)o(t)n(lin)o(e)f(of)g(t)n(h)o(e)h(program)e(i)q(s)h (s)q(imple.)j(W)m(e)d(read)h(c)o(h)o(aract)o(ers)i(f)q(rom)12 b(b)q(ot)n(h)j(inpu)o(t)f(\014le)q(s)h(in)o(t)o(o)f Fd(c)1634 463 y Fc(1)1667 457 y Fj(an)o(d)h Fd(c)1766 463 y Fc(2)1799 457 y Fj(u)o(n)o(t)o(il)e(t)n(h)o(e)0 506 y(compar)q(i)q(son)f(i)q(s)h (complet)o(e.)k(Lin)o(e)c(an)o(d)h(colu)o(mn)e(cou)o(n)o(t)o(s)k(are)e (m)o(ain)o(t)o(ain)o(e)q(d)d(in)j Fb(line)19 b Fj(an)o(d)13 b Fb(c)n(ol)6 b Fj(.)0 569 y Fh(#d)o(e\014n)o(e)42 b Fb(left)p 268 569 13 2 v 18 w(mar)n(gin)26 b Fj(1)83 b Fd(=)p Fe(\003)20 b Fj(left)o(most)13 b(colu)o(mn)f(n)n(u)o(m)n(b)q (er;)h(c)o(h)o(an)o(ge)i(t)o(o)f(0)g(if)f(y)o(ou)g(prefer)22 b Fe(\003)p Fd(=)42 632 y Fe(h)7 b Fj(F)m(u)o(nct)o(ions)18 b Fg(5)6 b Fe(i)42 694 y Fh(in)o(t)14 b Fb(main)26 b Fj(\()p Fh(in)o(t)14 b Fd(n;)20 b Fh(c)o(h)o(ar)c Fe(\003)o(\003)p Fb(ar)n(g)6 b Fj(\))42 744 y Fe(f)20 b Fh(FILE)c Fe(\003)p Fd(f)248 750 y Fc(1)267 744 y Fd(;)k Fe(\003)p Fd(f)340 750 y Fc(2)359 744 y Fj(;)83 b Fd(=)p Fe(\003)20 b Fj(t)n(h)o(e)15 b(t)o(w)o(o)e(inpu)o(t)h(\014le)q(s)21 b Fe(\003)p Fd(=)83 794 y Fh(in)o(t)14 b Fd(c)174 800 y Fc(1)193 794 y Fd(;)20 b(c)243 800 y Fc(2)262 794 y Fd(;)g Fb(c)n(ol)e Fe(\()11 b Fb(left)p 477 794 V 18 w(mar)n(gin)5 b Fj(;)83 844 y Fh(lon)o(g)16 b Fb(line)g Fe(\()11 b Fj(1;)83 907 y Fe(h)c Fj(Op)q(en)15 b(t)n(h)o(e)f(\014le)q(s)h Fd(f)391 913 y Fc(1)424 907 y Fj(an)o(d)f Fd(f)524 913 y Fc(2)543 907 y Fj(,)f(t)o(akin)o(g)h(t)n(h)o(e)q(ir)g(n)o(am)o(e)q(s)g(f)q(rom)e (t)n(h)o(e)i(comm)o(an)o(d)e(lin)o(e)h(or)h(f)q(rom)e(t)n(h)o(e)j(t)o (ermin)o(al;)d(in)h(cas)q(e)i(of)166 957 y(an)f(error)h(for)e(whic)o(h) h(no)f(reco)o(v)o(ery)j(i)q(s)d(p)q(oss)q(ible,)h(call)f Fb(exit)6 b Fj(\(1\))17 b Fg(6)7 b Fe(i)83 1007 y(h)g Fj(Searc)o(h)15 b(for)f(\014rst)h(di\013erence,)g(lea)o(vin)o(g)f Fd(c)745 1013 y Fc(1)775 1007 y Fe(6)p Fj(=)e Fd(c)837 1013 y Fc(2)869 1007 y Fj(if)h(an)o(d)h(only)f(if)g(a)g(di\013erence)j (w)o(as)e(fou)o(n)o(d)k Fg(3)6 b Fe(i)83 1057 y(h)h Fj(Rep)q(ort)14 b(t)n(h)o(e)h(ou)o(t)o(com)o(e)e(of)g(t)n(h)o(e)i(compar)q(i)q(son)g Fg(4)7 b Fe(i)83 1107 y Fh(ret)o(ur)q(n)13 b Fj(0;)83 b Fd(=)p Fe(\003)20 b Fj(su)o(cce)q(ssful)c(complet)o(ion)j Fe(\003)p Fd(=)42 1157 y Fe(g)0 1257 y Fh(3.)48 b Fj(Th)o(e)12 b(h)o(eart)h(of)f(t)n(h)o(e)h(program)d(i)q(s)i(t)n(hi)q(s)g(s)q(imple) d(lo)q(o)o(p.)17 b(Wh)o(en)12 b(w)o(e)g(reac)o(h)i(t)n(h)o(e)f(en)o(d)f (of)g(on)o(e)g(of)f(t)n(h)o(e)i(\014le)q(s,)g(t)n(h)o(e)f(\014le)q(s)h (m)o(a)o(t)o(c)o(h)e(if)0 1307 y(an)o(d)i(only)g(if)g(t)n(h)o(e)h(ot)n (h)o(er)h(\014le)f(h)o(as)f(also)g(reac)o(h)o(e)q(d)j(it)o(s)e(en)o(d.) k(F)m(or)c(t)n(hi)q(s)f(reason)i(t)n(h)o(e)f(t)o(e)q(st)h Fd(c)1357 1313 y Fc(1)1387 1307 y Fj(=)d Fd(c)1449 1313 y Fc(2)1468 1307 y Fj(,)h(whic)o(h)g(require)q(s)i(c)o(h)o(aract)o(ers) 0 1357 y(t)o(o)h(b)q(e)g(read)g(f)q(rom)e(b)q(ot)n(h)i(\014le)q(s,)g(m) n(ust)f(prece)q(d)o(e)k(t)n(h)o(e)d(t)o(e)q(st)h(for)e(\014le)h(en)o (d;)g(wh)o(en)h(only)d(on)o(e)i(\014le)g(en)o(ds,)g(it)g(i)q(s)f(t)n(h) o(e)h(form)o(er)f(t)o(e)q(st)0 1406 y(whic)o(h)f(breaks)h(t)n(h)o(e)f (lo)q(o)o(p.)0 1469 y Fe(h)7 b Fj(Searc)o(h)15 b(for)f(\014rst)h (di\013erence,)g(lea)o(vin)o(g)f Fd(c)662 1475 y Fc(1)692 1469 y Fe(6)p Fj(=)e Fd(c)754 1475 y Fc(2)786 1469 y Fj(if)h(an)o(d)h(only)f(if)g(a)g(di\013erence)j(w)o(as)e(fou)o(n)o(d)k Fg(3)6 b Fe(i)12 b(\021)42 1519 y Fh(while)i Fj(\(\()p Fd(c)217 1525 y Fc(1)248 1519 y Fe(\()d Fb(getc)t Fj(\()p Fd(f)412 1525 y Fc(1)431 1519 y Fj(\)\))h(=)g(\()p Fd(c)553 1525 y Fc(2)583 1519 y Fe(\()f Fb(getc)5 b Fj(\()p Fd(f)748 1525 y Fc(2)767 1519 y Fj(\)\))k Fe(^)g Fd(c)863 1525 y Fc(1)893 1519 y Fe(6)p Fj(=)j Fi(EOF)r Fj(\))83 1569 y Fh(if)20 b Fj(\()p Fd(c)166 1575 y Fc(1)196 1569 y Fj(=)12 b Fi('\\n')r Fj(\))20 b Fe(f)410 1565 y Fc(+)l(+)458 1569 y Fb(line)5 b Fj(;)21 b Fb(c)n(ol)c Fe(\()12 b Fb(left)p 745 1569 V 18 w(mar)n(gin)5 b Fj(;)20 b Fe(g)h Fh(els)q(e)1064 1565 y Fc(+)l(+)1113 1569 y Fb(c)n(ol)6 b Fj(;)0 1623 y Fg(This)12 b(co)q(d)o(e)f(is)h(us)q(e)q(d)e(in)h(s)q(ect)o(ion)f(2.)0 1723 y Fh(4.)48 b Fj(Wh)o(en)14 b(t)n(h)o(e)h(\014rst)g(di\013erence)h (o)q(ccurs)f(a)o(t)f(t)n(h)o(e)h(en)o(d)f(of)f(on)o(e)h(of)g(t)n(h)o(e) h(\014le)q(s,)f(or)g(a)o(t)f(t)n(h)o(e)i(en)o(d)f(of)g(a)g(lin)o(e,)e (w)o(e)j(giv)o(e)e(a)h(m)o(e)q(ssage)0 1773 y(in)o(dica)o(t)o(in)o(g)f (t)n(hi)q(s)g(f)q(act.)0 1836 y Fe(h)7 b Fj(Rep)q(ort)14 b(t)n(h)o(e)h(ou)o(t)o(com)o(e)e(of)g(t)n(h)o(e)i(compar)q(i)q(son)g Fg(4)7 b Fe(i)12 b(\021)42 1886 y Fh(if)19 b Fj(\()p Fd(c)124 1892 y Fc(1)154 1886 y Fj(=)12 b Fd(c)216 1892 y Fc(2)235 1886 y Fj(\))21 b Fb(printf)10 b Fj(\()p Fi("Files)p Ff( )p Fi(match.\\n")r Fj(\);)42 1936 y Fh(els)q(e)42 1986 y Fe(f)20 b Fb(printf)10 b Fj(\()p Fi("Files)p Ff( )p Fi(differ.\\n")r Fj(\);)83 2036 y Fh(if)20 b Fj(\()p Fd(c)166 2042 y Fc(1)196 2036 y Fj(=)12 b Fi(EOF)e Fe(_)f Fd(c)371 2042 y Fc(2)401 2036 y Fj(=)j Fi(EOF)r Fj(\))125 2086 y Fe(f)20 b Fb(the)p 226 2086 V 19 w(\014le)5 b Fj(\()p Fd(c)331 2092 y Fc(1)361 2086 y Fj(=)12 b Fi(EOF)r Fj(\);)20 b Fb(printf)10 b Fj(\()p Fi("is)p Ff( )p Fi(contained)p Ff( )p Fi(in)p Ff( )p Fi(th)o(e)p Ff( )p Fi(oth)o(er)p Ff( )p Fi(a)o(s)p Ff( )p Fi(ini)o(tial)p Ff( )o Fi(segme)o(nt.\\n)o(")r Fj(\);)18 b Fe(g)83 2136 y Fh(els)q(e)d(if)20 b Fj(\()p Fd(c)259 2142 y Fc(1)289 2136 y Fj(=)12 b Fi('\\n')e Fe(_)f Fd(c)486 2142 y Fc(2)516 2136 y Fj(=)j Fi('\\n')r Fj(\))125 2186 y Fe(f)20 b Fb(the)p 226 2186 V 19 w(\014le)5 b Fj(\()p Fd(c)331 2192 y Fc(1)361 2186 y Fj(=)12 b Fi('\\n')r Fj(\);)20 b Fb(printf)10 b Fj(\()p Fi("has)p Ff( )p Fi(a)p Ff( )p Fi(shorter)p Ff( )p Fi(lin)o(e)p Ff( )p Fi(num)o(ber)p Ff( )o Fi(\045ld)p Ff( )p Fi(t)o(han)p Ff( )p Fi(t)o(he)p Ff( )p Fi(ot)o(her.\\)o(n")r Fd(;)d Fb(l)o(ine)s Fj(\);)20 b Fe(g)83 2236 y Fh(els)q(e)i Fb(printf)10 b Fj(\()p Fi("First)p Ff( )p Fi(difference)p Ff( )p Fi(a)o(t)p Ff( )p Fi(lin)o(e)p Ff( )p Fi(\045ld)o(,)p Ff( )p Fi(co)o(lumn)p Ff( )o Fi(\045d.\\n)o(")r Fd(;)d Fb(l)o(ine)s Fd(;)g Fb(c)n(ol)f Fj(\);)42 2286 y Fe(g)0 2341 y Fg(This)12 b(co)q(d)o(e)f(is)h(us)q(e)q(d)e(in)h(s)q(ect)o(ion)f(2.)0 2440 y Fh(5.)48 b Fj(Th)o(e)13 b(fu)o(nct)o(ion)g Fb(the)p 385 2440 V 15 w(\014le)18 b Fj(st)o(art)o(s)c(a)f(s)q(en)o(t)o(ence)h (a)o(b)q(ou)o(t)f(t)n(h)o(e)h(\014rst)g(or)f(s)q(econ)o(d)g(\014le,)g (d)o(ep)q(en)o(din)o(g)g(on)g(it)o(s)g(b)q(o)q(olean)f(argu)o(m)o(en)o (t.)0 2503 y Fe(h)7 b Fj(F)m(u)o(nct)o(ions)18 b Fg(5)7 b Fe(i)k(\021)42 2553 y Fh(v)o(oid)j Fb(the)p 205 2553 V 19 w(\014le)26 b Fj(\()p Fh(b)q(o)q(o)o(l)16 b Fb(is)p 455 2553 V 18 w(\014rst)6 b Fj(\))21 b Fe(f)f Fb(printf)10 b Fj(\()p Fi("The)p Ff( )p Fi(\045s)p Ff( )p Fi(file)p Ff( )p Fi(")r Fd(;)d Fb(is)p 1116 2553 V 16 w(\014rst)17 b Fj(?)11 b Fi("first")h Fj(:)f Fi("second")r Fj(\);)19 b Fe(g)0 2607 y Fg(See)11 b(also)f(s)q(ect)o(ion)g(7.)0 2662 y(This)i(co)q(d)o(e)f(is)h(us)q(e)q(d)e(in)h(s)q(ect)o(ion)f(2.)p eop %%Page: 7 2 7 1 bop 0 42 a Fi(CWEBx)13 b Fg(MANUAL)895 b(WHA)m(T)13 b(A)f Ff(CWEB)f Fg(PR)o(OGRAM)i(LOOKS)g(LIKE)70 b Fj(7)0 132 y Fh(6.)48 b Fj(Th)o(ere)16 b(can)f(b)q(e)g(b)q(e)h(zero,)f(on)o(e) h(or)f(t)o(w)o(o)f(comm)o(an)o(d)e(lin)o(e)j(argu)o(m)o(en)o(t)o(s.)21 b(If)15 b(t)n(h)o(ere)h(are)g(non)o(e,)f(t)n(h)o(e)h(us)q(er)g(i)q(s)e (prompt)o(e)q(d)h(t)o(o)0 181 y(sup)o(ply)h(t)n(h)o(em,)f(an)o(d)h(if)f (t)n(h)o(ere)j(are)f(t)o(w)o(o)f(t)n(h)o(e)q(s)q(e)h(are)g(t)o(ak)o(en) f(as)g(t)n(h)o(e)h(\014le)g(n)o(am)o(e)q(s,)e(prompt)o(in)o(g)g(t)n(h)o (e)i(us)q(er)g(only)e(in)h(cas)q(e)h(a)f(\014le)0 231 y(could)d(not)g(b)q(e)h(o)o(p)q(en)o(e)q(d.)k(In)13 b(cas)q(e)h(just)g (on)o(e)f(argu)o(m)o(en)o(t)f(i)q(s)h(pre)q(s)q(en)o(t,)h(t)n(h)o(e)g (\014rst)h(\014le)e(i)q(s)g(assu)o(m)o(e)q(d)g(t)o(o)h(b)q(e)f(t)n(h)o (e)h(st)o(an)o(d)o(ard)g(inpu)o(t,)0 281 y(whic)o(h)h(do)q(e)q(s)h(not) f(h)o(a)o(v)o(e)g(t)o(o)h(b)q(e)g(o)o(p)q(en)o(e)q(d;)f(in)g(t)n(hi)q (s)h(cas)q(e)g(h)o(o)o(w)o(ev)o(er)g(w)o(e)g(will)e(not)h(read)h(a)g (\014le)f(n)o(am)o(e)f(f)q(rom)f(t)o(ermin)o(al)h(in)h(cas)q(e)0 331 y(t)n(h)o(e)g(s)q(econ)o(d)f(\014le)g(cannot)g(b)q(e)g(o)o(p)q(en)o (e)q(d.)0 393 y Fh(#d)o(e\014n)o(e)42 b Fb(r)n(e)n(ad)p 287 393 13 2 v 19 w(mo)n(de)27 b Fi("r")0 455 y Fe(h)7 b Fj(Op)q(en)15 b(t)n(h)o(e)g(\014le)q(s)f Fd(f)308 461 y Fc(1)341 455 y Fj(an)o(d)g Fd(f)441 461 y Fc(2)460 455 y Fj(,)g(t)o(akin)o(g)g(t)n(h)o(e)q(ir)g(n)o(am)o(e)q(s)g(f)q(rom)e (t)n(h)o(e)j(comm)o(an)o(d)c(lin)o(e)j(or)g(f)q(rom)e(t)n(h)o(e)j(t)o (ermin)o(al;)d(in)i(cas)q(e)h(of)e(an)125 505 y(error)i(for)e(whic)o(h) h(no)f(reco)o(v)o(ery)i(i)q(s)f(p)q(oss)q(ible,)f(call)h Fb(exit)5 b Fj(\(1\))18 b Fg(6)7 b Fe(i)k(\021)44 551 y Fa(\000)l(\000)93 555 y Fd(n)p Fj(;)153 551 y Fc(+)l(+)202 555 y Fb(ar)n(g)5 b Fj(;)83 b Fd(=)p Fe(\003)20 b Fj(ignore)14 b(\\argu)o(m)o(en)o(t")e(0,)h(whic)o(h)h(i)q(s)f(t)n(h)o(e)i(program)d (n)o(am)o(e)19 b Fe(\003)p Fd(=)42 605 y Fh(if)g Fj(\()p Fd(n)12 b Fj(=)g(0\))83 655 y Fe(f)21 b Fb(op)n(en)p 213 655 V 19 w(\014le)5 b Fj(\(&)p Fd(f)352 661 y Fc(1)371 655 y Fd(;)i Fi("First)p Ff( )p Fi(file)p Ff( )p Fi(to)p Ff( )p Fi(co)o(mpar)o(e")r Fd(;)g Fe(\014)p Fj(\);)17 b Fb(op)n(en)p 1080 655 V 20 w(\014le)5 b Fj(\(&)p Fd(f)1220 661 y Fc(2)1239 655 y Fd(;)i Fi("Second)p Ff( )p Fi(file)p Ff( )p Fi(to)p Ff( )o Fi(compa)o(re")r Fd(;)f Fe(\014)p Fj(\);)18 b Fe(g)42 704 y Fh(els)q(e)d(if)k Fj(\()p Fd(n)12 b Fj(=)g(1\))42 754 y Fe(f)20 b Fd(f)103 760 y Fc(1)133 754 y Fe(\()12 b Fb(stdin)5 b Fj(;)83 804 y Fh(if)20 b Fj(\(\()p Fd(f)184 810 y Fc(2)214 804 y Fe(\()11 b Fb(fop)n(en)6 b Fj(\()p Fe(\003)p Fb(ar)n(g)g Fd(;)h Fb(r)n(e)n(ad)p 568 804 V 18 w(mo)n(de)f Fj(\)\))12 b(=)g Fe(\014)p Fj(\))21 b Fe(f)f Fb(printf)11 b Fj(\()p Fi("Could)p Ff( )p Fi(not)p Ff( )p Fi(open)p Ff( )o Fi(file)p Ff( )o Fi(\045s.\\n)o(")r Fd(;)c Fe(\003)l Fb(ar)n(g)f Fj(\);)20 b Fb(exit)6 b Fj(\(1\);)21 b Fe(g)42 854 y(g)42 904 y Fh(els)q(e)15 b(if)k Fj(\()p Fd(n)12 b Fj(=)g(2\))42 953 y Fe(f)20 b Fb(op)n(en)p 171 953 V 19 w(\014le)6 b Fj(\(&)p Fd(f)311 959 y Fc(1)330 953 y Fd(;)h Fi("Give)p Ff( )p Fi(another)p Ff( )o Fi(first)o Ff( )p Fi(file)o(")r Fd(;)g Fe(\003)m Fb(ar)n(g)998 949 y Fc(+)l(+)1047 953 y Fj(\);)83 1003 y Fb(op)n(en)p 171 1003 V 19 w(\014le)f Fj(\(&)p Fd(f)311 1009 y Fc(2)330 1003 y Fd(;)h Fi("Give)p Ff( )p Fi(another)p Ff( )o Fi(secon)o(d)p Ff( )p Fi(fil)o(e")r Fd(;)g Fe(\003)l Fb(ar)n(g)f Fj(\);)42 1053 y Fe(g)42 1103 y Fh(els)q(e)21 b Fe(f)g Fb(printf)10 b Fj(\()p Fi("No)p Ff( )p Fi(more)p Ff( )p Fi(than)p Ff( )p Fi(two)p Ff( )p Fi(c)o(omman)o(d)p Ff( )p Fi(lin)o(e)p Ff( )p Fi(ar)o(gumen)o(ts)p Ff( )p Fi(ar)o(e)p Ff( )p Fi(all)o(owed.)o(\\n")r Fj(\);)17 b Fb(exit)6 b Fj(\(1\);)21 b Fe(g)0 1157 y Fg(This)12 b(co)q(d)o(e)f(is)h(us)q(e)q(d)e(in)h(s)q(ect)o(ion)f(2.)0 1256 y Fh(7.)48 b Fj(Th)o(e)18 b(fu)o(nct)o(ion)g Fb(op)n(en)p 423 1256 V 16 w(\014le)23 b Fj(will)17 b(try)h(t)o(o)g(o)o(p)q(en)g(t)n (h)o(e)g(\014le)g Fb(name)24 b Fj(for)18 b(readin)o(g,)h(an)o(d)e(if)g (t)n(hi)q(s)h(f)q(ails)f(it)g(will)g(prompt)f(for)0 1306 y(anot)n(h)o(er)h(\014le)g(n)o(am)o(e)f(u)o(n)o(t)o(il)g(it)g(h)o(as)h (su)o(cce)q(ss.)30 b(If)16 b(calle)q(d)h(wit)n(h)f Fb(name)22 b Fj(=)17 b Fe(\014)p Fj(,)g(t)n(h)o(e)h(fu)o(nct)o(ion)f(st)o(art)o(s) h(wit)n(h)e(prompt)o(in)o(g)g(r)q(igh)o(t)0 1356 y(aw)o(ay)m(.)0 1418 y Fe(h)7 b Fj(F)m(u)o(nct)o(ions)18 b Fg(5)7 b Fe(i)k Fj(+)p Fe(\021)42 1468 y Fh(v)o(oid)j Fb(op)n(en)p 233 1468 V 19 w(\014le)26 b Fj(\()p Fh(FILE)17 b Fe(\003\003)p Fd(f)r(;)j Fh(c)o(h)o(ar)c Fe(\003)o Fb(pr)n(ompt)6 b Fd(;)20 b Fh(c)o(h)o(ar)c Fe(\003)o Fb(name)6 b Fj(\))42 1518 y Fe(f)20 b Fh(c)o(h)o(ar)15 b Fb(buf)c Fj([80)o(];)83 1580 y Fh(if)20 b Fj(\()p Fb(name)d Fj(=)12 b Fe(\014)d(_)g Fj(\()p Fe(\003)p Fd(f)17 b Fe(\()11 b Fb(fop)n(en)5 b Fj(\()p Fb(name)h Fd(;)h Fb(r)n(e)n(ad)p 831 1580 V 19 w(mo)n(de)f Fj(\)\))12 b(=)f Fe(\014)p Fj(\))125 1630 y Fh(do)22 b Fe(f)f Fb(printf)10 b Fj(\()p Fi("\045s:)p Ff( )p Fi(")r Fd(;)d Fb(pr)n(ompt)t Fj(\);)21 b Fb(\017ush)5 b Fj(\()p Fb(stdout)i Fj(\);)20 b Fb(sc)n(anf)12 b Fj(\()p Fi("\04579s")r Fd(;)7 b Fb(buf)i Fj(\);)21 b Fe(g)125 1680 y Fh(while)14 b Fj(\(\()p Fe(\003)p Fd(f)j Fe(\()11 b Fb(fop)n(en)5 b Fj(\()p Fb(buf)12 b Fd(;)7 b Fb(r)n(e)n(ad)p 676 1680 V 18 w(mo)n(de)f Fj(\)\))12 b(=)g Fe(\014)p Fj(\);)42 1730 y Fe(g)0 1829 y Fh(8.)48 b(In)o(d)o(ex.)0 1896 y Fb(ar)n(g)5 b Fj(:)42 b(2)p 115 1903 21 2 v -1 w(,)23 b(6.)0 1946 y Fh(b)q(o)q(o)o(l)r Fj(:)41 b(1)p 145 1953 V(,)22 b(5.)0 1996 y Fb(buf)11 b Fj(:)41 b(7)p 118 2003 V(.)0 2047 y Fb(c)n(ol)6 b Fj(:)41 b(2)p 108 2054 V(,)22 b(3,)g(4.)0 2097 y Fd(c)18 2103 y Fc(1)37 2097 y Fj(:)41 b(2)p 90 2104 V -1 w(,)23 b(3,)f(4.)0 2147 y Fd(c)18 2153 y Fc(2)37 2147 y Fj(:)41 b(2)p 90 2154 V -1 w(,)23 b(3,)f(4.)0 2197 y Fi(EOF)r Fj(:)41 b(3,)22 b(4.)0 2248 y Fb(exit)6 b Fj(:)41 b(6.)0 2298 y Fd(f)t Fj(:)h(7)p 78 2305 V(.)0 2348 y Fb(\017ush)6 b Fj(:)41 b(7.)0 2398 y Fb(fop)n(en)6 b Fj(:)41 b(6,)22 b(7.)0 2449 y Fd(f)20 2455 y Fc(1)39 2449 y Fj(:)41 b(2)p 92 2456 V(,)22 b(3,)g(6.)0 2499 y Fd(f)20 2505 y Fc(2)39 2499 y Fj(:)41 b(2)p 92 2506 V(,)22 b(3,)g(6.)0 2549 y Fb(getc)5 b Fj(:)41 b(3.)0 2599 y Fb(is)p 33 2599 13 2 v 15 w(\014rst)6 b Fj(:)41 b(5)p 176 2606 21 2 v -1 w(.)0 2650 y Fb(left)p 60 2650 13 2 v 14 w(mar)n(gin)6 b Fj(:)41 b(2)p 256 2657 21 2 v -1 w(,)21 b(3.)0 2700 y Fb(line)5 b Fj(:)41 b(2)p 124 2707 V(,)22 b(3,)g(4.)1017 1896 y Fb(main)5 b Fj(:)41 b(2)p 1166 1903 V(.)1017 1946 y Fd(n)p Fj(:)f(2)p 1094 1953 V(.)1017 1995 y Fb(name)5 b Fj(:)41 b(7)p 1172 2002 V(.)1017 2045 y Fb(op)n(en)p 1102 2045 13 2 v 16 w(\014le)5 b Fj(:)41 b(6,)21 b(7)p 1280 2052 21 2 v(.)1017 2095 y Fb(printf)10 b Fj(:)41 b(4,)20 b(5,)h(6,)g(7.)1017 2145 y Fb(pr)n(ompt)5 b Fj(:)41 b(7)p 1202 2152 V(.)1017 2195 y Fb(r)n(e)n(ad)p 1095 2195 13 2 v 15 w(mo)n(de)5 b Fj(:)41 b(6)p 1258 2202 21 2 v(,)21 b(7.)1017 2244 y Fb(sc)n(anf)11 b Fj(:)41 b(7.)1017 2294 y Fb(stdin)5 b Fj(:)41 b(6.)1017 2344 y Fb(stdout)6 b Fj(:)41 b(7.)1017 2394 y Fb(the)p 1074 2394 13 2 v 15 w(\014le)5 b Fj(:)41 b(4,)22 b(5)p 1252 2401 21 2 v(.)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF cwebx-3.04.orig/cwebhmac.tex100644 1750 1750 11364 6463323626 13654 0ustar jdgjdg% This is file cwebhmac.tex, part of the CWEBx distribution. % This file can be loaded after cwebxmac.tex, in order to automatically % insert \specials for use by the xhdvi previewer, or any other HyperTeX % dvi-file processing tool. Current version is alpha release. % Author: Marc van Leeuwen % Last modified: 8 november 1996. % The following macros are redefined % \stsec % \note
. % \finnote
. % \X:\X \ifx\hyperloaded\undefined \let\hyperloaded\relax \else \endinput\fi % Low level interface to hyperlinks and targets % \hyperanchor % general mechanism to define as anchor with \long\def\hyperanchor#1#2{\special{html:}{#2}\special{html:}} % \hyperlink % makes an active link referring to \def\hyperlink#1{\hyperanchor{href="#1"}} % \hypertarget % defines a local target for hyperlinks, that can be referenced by \def\hypertarget#1{\hyperanchor{name="#1"}} % \hyperdef % defines a hypertarget classified by and sequence \def\hyperdef#1#2{\hypertarget{#1.#2}} % \hyperref % creates a hyperlink to local target created by \hyperdef % are optional assignments for expansion of and {\catcode`\#=12 \gdef\sharp{#}} % to access neutralised '#' character \def\hyperref#1#2#3% {{#1\xdef\next{{\sharp#2.#3}}}\expandafter\hyperlink\next} % High level interface attached to CWEAVE-produced macros (or to those % accessed by them via the definition of cwebxmac.tex) % Target attached to beginning of each section. =sec, =\secno. \def\stsec {\endcodemode\noindent \hyperdef{sec}\secno{\let\*=\lapstar\bf\secstar.}\quad } \let\startsection=\stsec % this one is used; provides hook for extra actions % List of links attached to end of some sections. \def\note#1#2.% {\Y\noindent{\hangindent2\indentunit\baselineskip10pt\eightrm #1~\hypersecrefs#2, .\par}} % Same, but for references from the list of module names. \def\finnote#1#2.{\quad{\eightrm#1~\hypersecrefs#2, .}} % the following two are auxiliary macros that implement reference lists. % \hypersecrefs, , ..., , . % or \hypersecrefs, . % make list of hyperlinks to each of then mentioned sections \def\hypersecrefs#1, #2.% {\ifx|#2|\def\next{\finalsecrefs#1\ET\ET.#1\ETs\ETs}% final `.' follows below \else\hyperref{\let\*=\empty}{sec}{#1}{#1}, \let\next=\hypersecrefs \fi\next#2.% } % \finalsecref\ET\ET.\ETs\ETs. % or \finalsecref\ET\ET.\ETs\ETs. \def\finalsecrefs#1\ET#2\ET#3.#4\ETs#5\ETs#6.% {\ifx|#2#5|\hyperref{\let\*=\empty}{sec}{#1}{#1}% just 1 reference \else % \ET or \ETs \edef\next{\ifx|#5|#1\else#4\fi}% \toks0={\let\*=\empty}% \edef\next{{\the\toks0}{sec}{\next}{\next}}% \expandafter\hyperref\next% link to sec. with text #3#6% one is empty, the other is \ET or \ETs, as appropriate \hyperref{\let\*=\empty}{sec}{#2#5}{#2#5}% similar trick, link to sec. \fi } % we must change \ATP slightly, so that \X can recognise it and avoid a link \def\ATP{\X\ATP\kern-.5em:Preprocessor directives\X} \def\X#1:#2\X % module name {\langle\,${#2\eightrm\enspace\ifx\ATP#1\else\hypersecrefs#1, .\fi}$\,\rangle} {\catcode`_=12 \gdef\usplain{\def\_{_}}} \let\plainCident=\Cident \def\Cident#1{\def\idstart{#1}\futurelet\next\testid} \let\\=\Cident % cwebx only; a \hbox would prevent us from seeing "subscript" \def\testid {\ifx_\next \def\next_##1% {\hyperref\usplain{id}{\idstart##1}{\plainCident\idstart_{##1}}}% \else \hyperref\usplain{id}\idstart{\plainCident\idstart}% \let\next=\empty \fi\next } \def\indextestid {\ifx_\next \def\next_##1% {{\usplain\xdef\next{{id}{\idstart##1}}}% \expandafter\hyperdef\next{\plainCident\idstart_{##1}}% }% \else {\usplain\xdef\next{{id}{\idstart}}}% \expandafter\hyperdef\next{\plainCident\idstart}% \let\next=\empty \fi\next } {\let \inxentry=\relax % remove \outer-ness \toks0=\expandafter{\inx \let\testid=\indextestid}\xdef\inx{\the\toks0 } } \outer\def\inxentry#1#2, #3.% #1 is `h' or `m' for horiz/math mode {\par\hangindent2em\noindent \if#1m$#2$\else#2\relax\fi % \relax avoids fatal \next=\fi in \indextestid :\kern1em\indexrefs#3, .% } \def\indexrefs#1, #2.% {\hyperref{\let\*=\empty\def\[##1]{##1}}{sec}{#1}{#1}% \ifx|#2|\let\next=\empty\else, \let\next=\indexrefs\fi\next#2.% } \def\contentsline#1#2#3#4% #1=depth, #2=title, #3=secno, #4=pageno {\line{\hyperref{}{sec}{#3}{\ignorespaces#2} \leaders\hbox to .5em{.\hfil}\hfil \ \hyperref{}{sec}{#3}{#3 \hbox to3em{\hss#4}}% }% }