peg-0.1.18/0002755000175000000620000000000012744267363012076 5ustar piumartastaffpeg-0.1.18/LICENSE.txt0000644000175000000620000000142112744267363013715 0ustar piumartastaffCopyright (c) 2007-2013, Ian Piumarta All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, provided that the above copyright notice(s) and this permission notice appear in all copies or substantial portions of the Software. Inclusion of the above copyright notice(s) and this permission notice in supporting documentation would be appreciated but is not required. THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. peg-0.1.18/examples/0002755000175000000620000000000012744267362013713 5ustar piumartastaffpeg-0.1.18/examples/wc.ref0000644000175000000620000000057612744267362015030 0ustar piumartastaff
22 lines 425 chars 52 words peg-0.1.18/examples/accept.c0000644000175000000620000000016212744267362015313 0ustar piumartastaff#include #include #include "accept.peg.c" int main() { while (yyparse()); return 0; } peg-0.1.18/examples/localleg.leg0000644000175000000620000000051212744267362016162 0ustar piumartastaff%{ #define YY_CTX_LOCAL 1 #define YY_CTX_MEMBERS \ int count; %} Char = ('\n' | '\r\n' | '\r') { yy->count++ } | . %% #include #include int main() { yycontext yy; memset(&yy, 0, sizeof(yy)); while (yyparse(&yy)) ; printf("%d newlines\n", yy.count); yyrelease(&yy); return 0; } peg-0.1.18/examples/wc.leg0000644000175000000620000000065112744267362015015 0ustar piumartastaff%{ #include int lines= 0, words= 0, chars= 0; %} start = (line | word | char) line = < (( '\n' '\r'* ) | ( '\r' '\n'* )) > { lines++; chars += yyleng; } word = < [a-zA-Z]+ > { words++; chars += yyleng; printf("<%s>\n", yytext); } char = . { chars++; } %% int main() { while (yyparse()) ; printf("%d lines\n", lines); printf("%d chars\n", chars); printf("%d words\n", words); return 0; } peg-0.1.18/examples/username.leg0000644000175000000620000000024212744267362016217 0ustar piumartastaff%{ #include %} start = "username" { printf("%s", getlogin()); } | < . > { putchar(yytext[0]); } %% int main() { while (yyparse()); return 0; } peg-0.1.18/examples/localleg.ref0000644000175000000620000000001412744267362016164 0ustar piumartastaff24 newlines peg-0.1.18/examples/dc.ref0000644000175000000620000000000312744267362014766 0ustar piumartastaff42 peg-0.1.18/examples/dcv.c0000644000175000000620000000045512744267362014635 0ustar piumartastaff#include #include int stack[1024]; int stackp= -1; int var= 0; int vars[26]; int push(int n) { return stack[++stackp]= n; } int pop(void) { return stack[stackp--]; } int top(void) { return stack[stackp]; } #include "dcv.peg.c" int main() { while (yyparse()); return 0; } peg-0.1.18/examples/left.peg0000644000175000000620000000004312744267362015335 0ustar piumartastaff# Grammar S <- (S 'a' / 'a') !'a' peg-0.1.18/examples/rule.ref0000644000175000000620000000026012744267362015354 0ustar piumartastaffA 24 B 24 C 24 ABC 24 B 24 C 24 D 24 BCD 24 A 24 B 24 C 24 ABC 24 B 24 C 24 D 24 BCD 24 A 24 B 24 C 24 ABC 24 B 24 C 24 D 24 BCD 24 A 24 B 24 C 24 ABC 24 B 24 C 24 D 24 BCD 24 peg-0.1.18/examples/bench.bas0000644000175000000620000000024512744267362015460 0ustar piumartastaff100 let n=100000 120 let m=0 110 let s=0 130 let m=m+1 140 let s=s+m 150 if m typedef struct line line; struct line { int number; int length; char *text; }; line *lines= 0; int numLines= 0; int pc= -1, epc= -1; int batch= 0; int nextline(char *buf, int max); # define min(x, y) ((x) < (y) ? (x) : (y)) # define YY_INPUT(buf, result, max_size) \ { \ if ((pc >= 0) && (pc < numLines)) \ { \ line *linep= lines+pc++; \ result= min(max_size, linep->length); \ memcpy(buf, linep->text, result); \ } \ else \ result= nextline(buf, max_size); \ } union value { int number; char *string; int (*binop)(int lhs, int rhs); }; # define YYSTYPE union value int variables[26]; void accept(int number, char *line); void save(char *name); void load(char *name); void type(char *name); int lessThan(int lhs, int rhs) { return lhs < rhs; } int lessEqual(int lhs, int rhs) { return lhs <= rhs; } int notEqual(int lhs, int rhs) { return lhs != rhs; } int equalTo(int lhs, int rhs) { return lhs == rhs; } int greaterEqual(int lhs, int rhs) { return lhs >= rhs; } int greaterThan(int lhs, int rhs) { return lhs > rhs; } int input(void); int stack[1024], sp= 0; char *help; void error(char *fmt, ...); int findLine(int n, int create); %} line = - s:statement CR | - n:number < ( !CR . )* CR > { accept(n.number, yytext); } | - CR | - < ( !CR . )* CR > { epc= pc; error("syntax error"); } | - !. { exit(0); } statement = 'print'- expr-list | 'if'- e1:expression r:relop e2:expression { if (!r.binop(e1.number, e2.number)) yythunkpos= 0; } 'then'- statement | 'goto'- e:expression { epc= pc; if ((pc= findLine(e.number, 0)) < 0) error("no such line"); } | 'input'- var-list | 'let'- v:var EQUAL e:expression { variables[v.number]= e.number; } | 'gosub'- e:expression { epc= pc; if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs"); if (pc < 0) error("no such line"); } | 'return'- { epc= pc; if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); } | 'clear'- { while (numLines) accept(lines->number, "\n"); } | 'list'- { int i; for (i= 0; i < numLines; ++i) printf("%5d %s", lines[i].number, lines[i].text); } | 'run'- s:string { load(s.string); pc= 0; } | 'run'- { pc= 0; } | 'end'- { pc= -1; if (batch) exit(0); } | 'rem'- ( !CR . )* | ('bye'|'quit'|'exit')- { exit(0); } | 'save'- s:string { save(s.string); } | 'load'- s:string { load(s.string); } | 'type'- s:string { type(s.string); } | 'dir'- { system("ls *.bas"); } | 'help'- { fprintf(stderr, "%s", help); } expr-list = ( e:string { printf("%s", e.string); } | e:expression { printf("%d", e.number); } )? ( COMMA ( e:string { printf("%s", e.string); } | e:expression { printf("%d", e.number); } ) )* ( COMMA | !COMMA { printf("\n"); } ) var-list = v:var { variables[v.number]= input(); } ( COMMA v:var { variables[v.number]= input(); } )* expression = ( PLUS? l:term | MINUS l:term { l.number = -l.number } ) ( PLUS r:term { l.number += r.number } | MINUS r:term { l.number -= r.number } )* { $$.number = l.number } term = l:factor ( STAR r:factor { l.number *= r.number } | SLASH r:factor { l.number /= r.number } )* { $$.number = l.number } factor = v:var { $$.number = variables[v.number] } | n:number | OPEN expression CLOSE var = < [a-z] > - { $$.number = yytext[0] - 'a' } number = < digit+ > - { $$.number = atoi(yytext); } digit = [0-9] string = '"' < [^\"]* > '"' - { $$.string = yytext; } relop = '<=' - { $$.binop= lessEqual; } | '<>' - { $$.binop= notEqual; } | '<' - { $$.binop= lessThan; } | '>=' - { $$.binop= greaterEqual; } | '>' - { $$.binop= greaterThan; } | '=' - { $$.binop= equalTo; } EQUAL = '=' - CLOSE = ')' - OPEN = '(' - SLASH = '/' - STAR = '*' - MINUS = '-' - PLUS = '+' - COMMA = ',' - - = [ \t]* CR = '\n' | '\r' | '\r\n' %% #include #include char *help= "print | [, | ...] [,]\n" "if <|<=|<>|=|>=|> then \n" "input [, ...] let = \n" "goto gosub \n" "end return\n" "list clear\n" "run [\"filename\"] rem \n" "dir type \"filename\"\n" "save \"filename\" load \"filename\"\n" "bye|quit|exit help\n" ; void error(char *fmt, ...) { va_list ap; va_start(ap, fmt); if (epc > 0) fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text); else fprintf(stderr, "\n"); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); epc= pc= -1; } #ifdef USE_READLINE # include # include #endif int nextline(char *buf, int max) { pc= -1; if (batch) exit(0); if (isatty(fileno(stdin))) { # ifdef USE_READLINE char *line= readline(">"); if (line) { int len= strlen(line); if (len >= max) len= max - 1; strncpy(buf, line, len); (buf)[len]= '\n'; add_history(line); free(line); return len + 1; } else { printf("\n"); return 0; } # endif putchar('>'); fflush(stdout); } return fgets(buf, max, stdin) ? strlen(buf) : 0; } int maxLines= 0; int findLine(int n, int create) { int lo= 0, hi= numLines - 1; while (lo <= hi) { int mid= (lo + hi) / 2, lno= lines[mid].number; if (lno > n) hi= mid - 1; else if (lno < n) lo= mid + 1; else return mid; } if (create) { if (numLines == maxLines) { maxLines *= 2; lines= realloc(lines, sizeof(line) * maxLines); } if (lo < numLines) memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo)); ++numLines; lines[lo].number= n; lines[lo].text= 0; return lo; } return -1; } void accept(int n, char *s) { if (s[0] < 32) /* delete */ { int lno= findLine(n, 0); if (lno >= 0) { if (lno < numLines - 1) memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1)); --numLines; } } else /* insert */ { int lno= findLine(n, 1); if (lines[lno].text) free(lines[lno].text); lines[lno].length= strlen(s); lines[lno].text= strdup(s); } } char *extend(char *name) { static char path[1024]; int len= strlen(name); sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas")); return path; } void save(char *name) { FILE *f= fopen(name= extend(name), "w"); if (!f) perror(name); else { int i; for (i= 0; i < numLines; ++i) fprintf(f, "%d %s", lines[i].number, lines[i].text); fclose(f); } } void load(char *name) { FILE *f= fopen(name= extend(name), "r"); if (!f) perror(name); else { int lineNumber; char lineText[1024]; while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f)) accept(lineNumber, lineText); fclose(f); } } void type(char *name) { FILE *f= fopen(name= extend(name), "r"); if (!f) perror(name); else { int c, d; while ((c= getc(f)) >= 0) putchar(d= c); fclose(f); if ('\n' != d && '\r' != d) putchar('\n'); } } int input(void) { char line[32]; fgets(line, sizeof(line), stdin); return atoi(line); } int main(int argc, char **argv) { lines= malloc(sizeof(line) * (maxLines= 32)); numLines= 0; if (argc > 1) { batch= 1; while (argc-- > 1) load(*++argv); pc= 0; } while (!feof(stdin)) yyparse(); return 0; } peg-0.1.18/examples/test.bas0000644000175000000620000000024312744267362015356 0ustar piumartastaff10 let i=1 20 gosub 100 30 let i=i+1 40 if i<=10 then goto 20 50 end 100 let j=1 110 print " ", i*j, 120 let j=j+1 130 if j<=i then goto 110 140 print 150 return peg-0.1.18/examples/basic.ref0000644000175000000620000000024312744267362015467 0ustar piumartastaff 1 2 4 3 6 9 4 8 12 16 5 10 15 20 25 6 12 18 24 30 36 7 14 21 28 35 42 49 8 16 24 32 40 48 56 64 9 18 27 36 45 54 63 72 81 10 20 30 40 50 60 70 80 90 100 peg-0.1.18/examples/dcv.ref0000644000175000000620000000000712744267362015160 0ustar piumartastaff6 7 42 peg-0.1.18/examples/rule.c0000644000175000000620000000016012744267362015021 0ustar piumartastaff#include #include #include "rule.peg.c" int main() { while (yyparse()); return 0; } peg-0.1.18/examples/localpeg.c0000644000175000000620000000025412744267362015644 0ustar piumartastaff#include #define YY_CTX_LOCAL #include "test.peg.c" int main() { yycontext ctx; memset(&ctx, 0, sizeof(yycontext)); while (yyparse(&ctx)); return 0; } peg-0.1.18/examples/left.c0000644000175000000620000000043512744267362015011 0ustar piumartastaff#include #define YY_INPUT(buf, result, max) \ { \ int c= getchar(); \ result= (EOF == c) ? 0 : (*(buf)= c, 1); \ if (EOF != c) printf("<%c>\n", c); \ } #include "left.peg.c" int main() { printf(yyparse() ? "success\n" : "failure\n"); return 0; } peg-0.1.18/examples/accept.peg0000644000175000000620000000051412744267362015645 0ustar piumartastaffstart <- abcd+ abcd <- 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT} / 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT} bc <- 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); } cd <- 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); } peg-0.1.18/examples/test.ref0000644000175000000620000000017412744267362015370 0ustar piumartastaffa1 ab1 . a2 ac2 . a3 ad3 . a3 ae3 . a4 af4 afg4 . a4 af5 afh5 . a4 af4 afg4 . a4 af5 afh5 . af6 afi6 a6 . af6 af7 afj7 a6 . peg-0.1.18/examples/Makefile0000644000175000000620000000423212744267362015352 0ustar piumartastaffEXAMPLES = test rule accept wc dc dcv calc basic localpeg localleg erract CFLAGS = -g -O3 DIFF = diff TEE = cat > all : $(EXAMPLES) test : .FORCE ../peg -o test.peg.c test.peg $(CC) $(CFLAGS) -o test test.c echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo rule : .FORCE ../peg -o rule.peg.c rule.peg $(CC) $(CFLAGS) -o rule rule.c echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo accept : .FORCE ../peg -o accept.peg.c accept.peg $(CC) $(CFLAGS) -o accept accept.c echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo wc : .FORCE ../leg -o wc.leg.c wc.leg $(CC) $(CFLAGS) -o wc wc.leg.c cat wc.leg | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo dc : .FORCE ../peg -o dc.peg.c dc.peg $(CC) $(CFLAGS) -o dc dc.c echo ' 2 *3 *(3+ 4) ' | ./dc | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo dcv : .FORCE ../peg -o dcv.peg.c dcv.peg $(CC) $(CFLAGS) -o dcv dcv.c echo 'a = 6; b = 7; a * b' | ./dcv | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo calc : .FORCE ../leg -o calc.leg.c calc.leg $(CC) $(CFLAGS) -o calc calc.leg.c echo 'a = 6; b = 7; a * b' | ./calc | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo basic : .FORCE ../leg -o basic.leg.c basic.leg $(CC) $(CFLAGS) -o basic basic.leg.c ( echo 'load "test"'; echo "run" ) | ./basic | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo localpeg : .FORCE ../peg -o test.peg.c test.peg $(CC) $(CFLAGS) -o localpeg localpeg.c echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo localleg : .FORCE ../leg -o localleg.leg.c localleg.leg $(CC) $(CFLAGS) -o localleg localleg.leg.c ./$@ < localleg.leg | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo erract : .FORCE ../leg -o erract.leg.c erract.leg $(CC) $(CFLAGS) -o erract erract.leg.c echo '6*9' | ./$@ | $(TEE) $@.out $(DIFF) $@.ref $@.out rm -f $@.out @echo clean : .FORCE rm -f *~ *.o *.[pl]eg.[cd] $(EXAMPLES) rm -rf *.dSYM spotless : clean .FORCE : peg-0.1.18/examples/dcv.peg0000644000175000000620000000153512744267362015166 0ustar piumartastaff# Grammar Stmt <- SPACE Expr EOL { printf("%d\n", pop()); } / (!EOL .)* EOL { printf("error\n"); } Expr <- ID { var= yytext[0] } ASSIGN Sum { vars[var - 'a']= top(); } / Sum Sum <- Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); } / MINUS Product { int r= pop(), l= pop(); push(l - r); } )* Product <- Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); } / DIVIDE Value { int r= pop(), l= pop(); push(l / r); } )* Value <- NUMBER { push(atoi(yytext)); } / < ID > !ASSIGN { push(vars[yytext[0] - 'a']); } / OPEN Expr CLOSE # Lexemes NUMBER <- < [0-9]+ > SPACE ID <- < [a-z] > SPACE ASSIGN <- '=' SPACE PLUS <- '+' SPACE MINUS <- '-' SPACE TIMES <- '*' SPACE DIVIDE <- '/' SPACE OPEN <- '(' SPACE CLOSE <- ')' SPACE SPACE <- [ \t]* EOL <- '\n' / '\r\n' / '\r' / ';' peg-0.1.18/examples/erract.ref0000644000175000000620000000011012744267362015657 0ustar piumartastafffail at PLUS fail at subtraction got multiplication fail at subtraction peg-0.1.18/examples/fibonacci.bas0000644000175000000620000000037112744267362016316 0ustar piumartastaff100 let n=32 110 gosub 200 120 print "fibonacci(",n,") = ", m 130 end 200 let c=n 210 let b=1 220 if c<2 then goto 400 230 let c=c-1 240 let a=1 300 let c=c-1 310 let d=a+b 320 let a=b 330 let b=d+1 340 if c<>0 then goto 300 400 let m=b 410 return peg-0.1.18/examples/dc.peg0000644000175000000620000000122712744267362014776 0ustar piumartastaff# Grammar Expr <- SPACE Sum EOL { printf("%d\n", pop()); } / (!EOL .)* EOL { printf("error\n"); } Sum <- Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); } / MINUS Product { int r= pop(), l= pop(); push(l - r); } )* Product <- Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); } / DIVIDE Value { int r= pop(), l= pop(); push(l / r); } )* Value <- NUMBER { push(atoi(yytext)); } / OPEN Sum CLOSE # Lexemes NUMBER <- < [0-9]+ > SPACE PLUS <- '+' SPACE MINUS <- '-' SPACE TIMES <- '*' SPACE DIVIDE <- '/' SPACE OPEN <- '(' SPACE CLOSE <- ')' SPACE SPACE <- [ \t]* EOL <- '\n' / '\r\n' / '\r' peg-0.1.18/examples/test.peg0000644000175000000620000000101512744267362015362 0ustar piumartastaffstart <- body '.' { printf(".\n"); } body <- 'a' { printf("a1 "); } 'b' { printf("ab1 "); } / 'a' { printf("a2 "); } 'c' { printf("ac2 "); } / 'a' { printf("a3 "); } ( 'd' { printf("ad3 "); } / 'e' { printf("ae3 "); } ) / 'a' { printf("a4 "); } ( 'f' { printf("af4 "); } 'g' { printf("afg4 "); } / 'f' { printf("af5 "); } 'h' { printf("afh5 "); } ) / 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ") } 'i' &{ printf("afi6 ") } / 'f' &{ printf("af7 ") } 'j' &{ printf("afj7 ") } ) peg-0.1.18/examples/dc.c0000644000175000000620000000035212744267362014443 0ustar piumartastaff#include #include int stack[1024]; int stackp= -1; int push(int n) { return stack[++stackp]= n; } int pop(void) { return stack[stackp--]; } #include "dc.peg.c" int main() { while (yyparse()); return 0; } peg-0.1.18/examples/accept.ref0000644000175000000620000000022012744267362015640 0ustar piumartastaffA 3 B 3 C 3 ABC 3 B 3 C 3 D 3 BCD 3 A 3 B 3 C 3 ABC 3 B 3 C 3 D 3 BCD 3 A 3 B 3 C 3 ABC 3 B 3 C 3 D 3 BCD 3 A 3 B 3 C 3 ABC 3 B 3 C 3 D 3 BCD 3 peg-0.1.18/examples/localpeg.ref0000644000175000000620000000017412744267362016177 0ustar piumartastaffa1 ab1 . a2 ac2 . a3 ad3 . a3 ae3 . a4 af4 afg4 . a4 af5 afh5 . a4 af4 afg4 . a4 af5 afh5 . af6 afi6 a6 . af6 af7 afj7 a6 . peg-0.1.18/examples/calc.leg0000644000175000000620000000144012744267362015303 0ustar piumartastaff%{ #include int vars[26]; %} Stmt = - e:Expr EOL { printf("%d\n", e); } | ( !EOL . )* EOL { printf("error\n"); } Expr = i:ID ASSIGN s:Sum { $$= vars[i]= s; } | s:Sum { $$= s; } Sum = l:Product ( PLUS r:Product { l += r; } | MINUS r:Product { l -= r; } )* { $$= l; } Product = l:Value ( TIMES r:Value { l *= r; } | DIVIDE r:Value { l /= r; } )* { $$= l; } Value = i:NUMBER { $$= atoi(yytext); } | i:ID !ASSIGN { $$= vars[i]; } | OPEN i:Expr CLOSE { $$= i; } NUMBER = < [0-9]+ > - { $$= atoi(yytext); } ID = < [a-z] > - { $$= yytext[0] - 'a'; } ASSIGN = '=' - PLUS = '+' - MINUS = '-' - TIMES = '*' - DIVIDE = '/' - OPEN = '(' - CLOSE = ')' - - = [ \t]* EOL = '\n' | '\r\n' | '\r' | ';' %% int main() { while (yyparse()); return 0; } peg-0.1.18/examples/calc.ref0000644000175000000620000000000712744267362015306 0ustar piumartastaff6 7 42 peg-0.1.18/examples/erract.leg0000644000175000000620000000112712744267362015663 0ustar piumartastaff%{ #include %} Expr = a:NUMBER PLUS ~{ printf("fail at PLUS\n") } b:NUMBER { printf("got addition\n"); } | ( a:NUMBER MINUS b:NUMBER { printf("got subtraction\n"); } ) ~{ printf("fail at subtraction\n") } | a:NUMBER TIMES b:NUMBER { printf("got multiplication\n"); } | a:NUMBER DIVIDE b:NUMBER { printf("got division\n"); } NUMBER = < [0-9]+ > - { $$= atoi(yytext); } PLUS = '+' - MINUS = '-' - TIMES = '*' - DIVIDE = '/' - - = (SPACE | EOL)* SPACE = [ \t] EOL = '\n' | '\r\n' | '\r' | ';' %% int main() { while (yyparse()); return 0; } peg-0.1.18/examples/test.c0000644000175000000620000000013212744267362015030 0ustar piumartastaff#include #include "test.peg.c" int main() { while (yyparse()); return 0; } peg-0.1.18/README.txt0000644000175000000620000000216212744267363013573 0ustar piumartastaffBuilding on a Unix-like system ------------------------------ Type 'make' or 'make test'. The latter builds all the examples and runs them, comparing their output with the expected output. Type 'make install' to install the binaries and manual page under /usr/local. (Type 'make uninstall' to remove them.) You may have to do this using 'sudo' or while logged in as root. Edit 'Makefile' to change the way things are built and/or the places where things are installed. Building on MacOS X ------------------- Run the 'build-mac.sh' script from a terminal or by double-clicking on it in the Finder. You will need Xcode. The provided project is known to work with Xcode versions 3.2.6 and 4.3.2. Modify build-mac.sh and/or peg.xcodeproj to change the way things are built. Building on Windows ------------------- Run the 'build-win.cmd' script. You will need Visual Studio 2010 Express. Modify build-win.cmd, leg.vcxproj, leg.vcxproj.filters, peg.gyp, peg.sln, peg.vcxproj and/or peg.vcxproj.filters to change the way things are built. Local implementations of getopt() and basename() are provided in the 'win' directory. peg-0.1.18/ChangeLog0000644000175000000620000000722612744267363013655 0ustar piumartastaff2016-07-22 Ian Piumarta * src/version.h (PEG_LEVEL): Version 0.1.18. * src/tree.h: Rule_compile_c takes nolines argument for option -P. * src/compile.c, src/peg.c, src/leg.leg (main): Add option -P. Generate #line directives for all actions. * src/peg.1: Document option -P. 2016-07-14 Ian Piumarta * src/version.h: 0.1.17 * src/leg.leg: Remember line numbers for headers and trailer. Emit #line directives in the generated file. 2016-06-25 piumarta * src/version.h: 0.1.16 * src/tree.[ch], src/compile.c, src/leg.leg: Add @-actions. * src/peg.1: Explain @-actions. * src/peg/peg-c, src/leg.c: Regenerate C source. 2013-12-18 piumarta * src/version.h: 0.1.15 * src/compile.c: YY_FREE takes context and pointer as arguments. * YYRELEASE: Pass yyctx and pointer to YY_FREE. 2013-12-01 Ian Piumarta * src/version.h: 0.1.14 * src/peg.1: Fix several typos and escape backslashes (thanks to Giulio Paci). * LICENSE.txt: Replace "the the" with "the". 2013-08-16 Ian Piumarta * src/compile.c: Predicate actions can refer to yytext (thanks to Gregory Pakosz). * src/leg.leg: Hexadecimal character escapes are supported by leg (thanks to Hugo Etchegoyen). 2013-07-20 Ian Piumarta * src/getopt.c: Use BSD-licensed getopt() in Windows build. * src/compile.c: Verbose mode handles Variable nodes. 2013-06-03 Ian Piumarta * src/leg.leg, src/compile.c: Add error actions via "~" operator. * src/compile.c: Support declaration of local variables at the top level of semantic actions. Dynamically grow data structures to remove artificial limits on rule recursion (thanks to Alex Klinkhamer). Many small changes to better support C++. * src/peg.1: Update manual page to describe new features. Add build files for Win32 and MacOS thanks to Fyodor Sheremetyev). 2012-04-29 Ian Piumarta * compile.c: Move global state into a structure to facilitate reentrant and thread-safe parsers (thanks to Dmitry Lipovoi). 2012-03-29 Ian Piumarta * leg.leg: Allow nested, matched braces within actions. 2011-11-25 Ian Piumarta * compile.c: Fix matching of 8-bit chars to allow utf-8 sequences in matching expressions (thanks to Gregory Pakosz). 2011-11-24 Ian Piumarta * compile.c: Allow octal escapes in character classes. 2011-11-24 Ian Piumarta * Makefile: Remove dwarf sym dirs when cleaning. * compile.c: Fix size calculation when resizing text buffers. * leg.leg, peg.peg: Backslash can be escaped. 2009-08-26 Ian Piumarta * leg.leg: Fix match of a single single quote character. * examples/basic.leg: Rename getline -> nextline to avoid C namespace conflict. 2007-09-13 Ian Piumarta * leg.leg: Allow matched braces inside leg actions. Handle empty rules. Handle empty grammars. 2007-08-31 Ian Piumarta * compile.c: Grow buffers while (not if) they are too small. Remove dependencies on grammar files. Add more basic examples. 2007-05-15 Ian Piumarta First public release. peg-0.1.18/build-mac.sh0000755000175000000620000000016712744267363014274 0ustar piumartastaff#!/bin/bash xcodebuild -project peg.xcodeproj -configuration Release cp build/Release/peg ./ cp build/Release/leg ./ peg-0.1.18/Makefile0000644000175000000620000000265512744267363013544 0ustar piumartastaffCFLAGS = -g -Wall $(OFLAGS) $(XFLAGS) -Isrc OFLAGS = -O3 -DNDEBUG #OFLAGS = -pg OBJS = tree.o compile.o all : peg leg peg : peg.o $(OBJS) $(CC) $(CFLAGS) -o $@-new peg.o $(OBJS) mv $@-new $@ leg : leg.o $(OBJS) $(CC) $(CFLAGS) -o $@-new leg.o $(OBJS) mv $@-new $@ ROOT = PREFIX = /usr/local BINDIR = $(ROOT)$(PREFIX)/bin MANDIR = $(ROOT)$(PREFIX)/man/man1 install : $(BINDIR) $(BINDIR)/peg $(BINDIR)/leg $(MANDIR) $(MANDIR)/peg.1 $(BINDIR) : mkdir -p $(BINDIR) $(BINDIR)/% : % cp -p $< $@ strip $@ $(MANDIR) : mkdir -p $(MANDIR) $(MANDIR)/% : src/% cp -p $< $@ uninstall : .FORCE rm -f $(BINDIR)/peg rm -f $(BINDIR)/leg rm -f $(MANDIR)/peg.1 %.o : src/%.c $(CC) $(CFLAGS) -c -o $@ $< peg.o : src/peg.c src/peg.peg-c leg.o : src/leg.c check : check-peg check-leg check-peg : peg.peg-c .FORCE diff src/peg.peg-c peg.peg-c check-leg : leg.c .FORCE diff src/leg.c leg.c peg.peg-c : src/peg.peg peg ./peg -o $@ $< leg.c : src/leg.leg leg ./leg -o $@ $< new : newpeg newleg newpeg : peg.peg-c mv src/peg.peg-c src/peg.peg-c- mv peg.peg-c src/. newleg : leg.c mv src/leg.c src/leg.c- mv leg.c src/. test examples : peg leg .FORCE $(SHELL) -ec '(cd examples; $(MAKE))' clean : .FORCE rm -f src/*~ *~ *.o *.peg.[cd] *.leg.[cd] peg.peg-c leg.c $(SHELL) -ec '(cd examples; $(MAKE) $@)' spotless : clean .FORCE rm -f src/*- rm -rf build rm -f peg rm -f leg $(SHELL) -ec '(cd examples; $(MAKE) $@)' .FORCE : peg-0.1.18/peg.sln0000644000175000000620000000250512744267363013367 0ustar piumartastaffMicrosoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "leg", "leg.vcxproj", "{5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "peg", "peg.vcxproj", "{5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 Release|Win32 = Release|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65}.Debug|Win32.ActiveCfg = Debug|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65}.Debug|Win32.Build.0 = Debug|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65}.Release|Win32.ActiveCfg = Release|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65}.Release|Win32.Build.0 = Release|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66}.Debug|Win32.ActiveCfg = Debug|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66}.Debug|Win32.Build.0 = Debug|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66}.Release|Win32.ActiveCfg = Release|Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution EndGlobalSection EndGlobal peg-0.1.18/leg.vcxproj.filters0000644000175000000620000000113212744267363015724 0ustar piumartastaff {47FC5EC4-15EB-E92F-89D7-AFE51CF838A9} win peg-0.1.18/peg.gyp0000644000175000000620000000300612744267363013367 0ustar piumartastaff{ 'targets': [ { 'target_name': 'peg', 'type': 'executable', 'msvs_guid': '5ECEC9E5-8F23-47B6-93E0-C3B328B3BE65', 'sources': [ 'peg.c', 'tree.c', 'compile.c', ], 'conditions': [ ['OS=="win"', { 'include_dirs': [ 'win', ], 'sources': [ 'win/getopt.c', ], }], ], }, { 'target_name': 'leg', 'type': 'executable', 'msvs_guid': '5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66', 'sources': [ 'leg.c', 'tree.c', 'compile.c', ], 'conditions': [ ['OS=="win"', { 'include_dirs': [ 'win', ], 'sources': [ 'win/getopt.c', ], }], ], }, ], 'target_defaults': { 'configurations': { 'Debug': { 'defines': [ 'DEBUG', ], }, 'Release': { 'defines': [ 'NDEBUG', ], }, }, }, # define default project settings 'conditions': [ ['OS=="win"', { 'target_defaults': { 'defines': [ 'WIN32', '_WINDOWS', ], 'msvs_settings': { 'VCLinkerTool': { 'GenerateDebugInformation': 'true', # SubSystem values: # 0 == not set # 1 == /SUBSYSTEM:CONSOLE # 2 == /SUBSYSTEM:WINDOWS 'SubSystem': '1', }, }, }, }], ], } peg-0.1.18/win/0002755000175000000620000000000012744267362012672 5ustar piumartastaffpeg-0.1.18/win/getopt.c0000644000175000000620000003634612744267362014352 0ustar piumartastaff/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ /* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ /* * Copyright (c) 2002 Todd C. Miller * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F39502-99-1-0512. */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Dieter Baron and Thomas Klausner. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ #ifdef REPLACE_GETOPT int opterr = 1; /* if error message should be printed */ int optind = 1; /* index into parent argv vector */ int optopt = '?'; /* character checked for validity */ #undef optreset /* see getopt.h */ #define optreset __mingw_optreset int optreset; /* reset getopt */ char *optarg; /* argument associated with option */ #endif #define PRINT_ERROR ((opterr) && (*options != ':')) #define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ #define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ #define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ /* return values */ #define BADCH (int)'?' #define BADARG ((*options == ':') ? (int)':' : (int)'?') #define INORDER (int)1 #ifndef __CYGWIN__ #define __progname __argv[0] #else extern char __declspec(dllimport) *__progname; #endif #ifdef __CYGWIN__ static char EMSG[] = ""; #else #define EMSG "" #endif static int getopt_internal(int, char * const *, const char *, const struct option *, int *, int); static int parse_long_options(char * const *, const char *, const struct option *, int *, int); static int gcd(int, int); static void permute_args(int, int, int, char * const *); static char *place = EMSG; /* option letter processing */ /* XXX: set optreset to 1 rather than these two */ static int nonopt_start = -1; /* first non option argument (for permute) */ static int nonopt_end = -1; /* first option after non options (for permute) */ /* Error messages */ static const char recargchar[] = "option requires an argument -- %c"; static const char recargstring[] = "option requires an argument -- %s"; static const char ambig[] = "ambiguous option -- %.*s"; static const char noarg[] = "option doesn't take an argument -- %.*s"; static const char illoptchar[] = "unknown option -- %c"; static const char illoptstring[] = "unknown option -- %s"; static void _vwarnx(const char *fmt,va_list ap) { (void)fprintf(stderr,"%s: ",__progname); if (fmt != NULL) (void)vfprintf(stderr,fmt,ap); (void)fprintf(stderr,"\n"); } static void warnx(const char *fmt,...) { va_list ap; va_start(ap,fmt); _vwarnx(fmt,ap); va_end(ap); } /* * Compute the greatest common divisor of a and b. */ static int gcd(int a, int b) { int c; c = a % b; while (c != 0) { a = b; b = c; c = a % b; } return (b); } /* * Exchange the block from nonopt_start to nonopt_end with the block * from nonopt_end to opt_end (keeping the same order of arguments * in each block). */ static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char * const *nargv) { int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; char *swap; /* * compute lengths of blocks and number and size of cycles */ nnonopts = panonopt_end - panonopt_start; nopts = opt_end - panonopt_end; ncycle = gcd(nnonopts, nopts); cyclelen = (opt_end - panonopt_start) / ncycle; for (i = 0; i < ncycle; i++) { cstart = panonopt_end+i; pos = cstart; for (j = 0; j < cyclelen; j++) { if (pos >= panonopt_end) pos -= nnonopts; else pos += nopts; swap = nargv[pos]; /* LINTED const cast */ ((char **) nargv)[pos] = nargv[cstart]; /* LINTED const cast */ ((char **)nargv)[cstart] = swap; } } } /* * parse_long_options -- * Parse long options in argc/argv argument vector. * Returns -1 if short_too is set and the option does not match long_options. */ static int parse_long_options(char * const *nargv, const char *options, const struct option *long_options, int *idx, int short_too) { char *current_argv, *has_equal; size_t current_argv_len; int i, ambiguous, match; #define IDENTICAL_INTERPRETATION(_x, _y) \ (long_options[(_x)].has_arg == long_options[(_y)].has_arg && \ long_options[(_x)].flag == long_options[(_y)].flag && \ long_options[(_x)].val == long_options[(_y)].val) current_argv = place; match = -1; ambiguous = 0; optind++; if ((has_equal = strchr(current_argv, '=')) != NULL) { /* argument found (--option=arg) */ current_argv_len = has_equal - current_argv; has_equal++; } else current_argv_len = strlen(current_argv); for (i = 0; long_options[i].name; i++) { /* find matching long option */ if (strncmp(current_argv, long_options[i].name, current_argv_len)) continue; if (strlen(long_options[i].name) == current_argv_len) { /* exact match */ match = i; ambiguous = 0; break; } /* * If this is a known short option, don't allow * a partial match of a single character. */ if (short_too && current_argv_len == 1) continue; if (match == -1) /* partial match */ match = i; else if (!IDENTICAL_INTERPRETATION(i, match)) ambiguous = 1; } if (ambiguous) { /* ambiguous abbreviation */ if (PRINT_ERROR) warnx(ambig, (int)current_argv_len, current_argv); optopt = 0; return (BADCH); } if (match != -1) { /* option found */ if (long_options[match].has_arg == no_argument && has_equal) { if (PRINT_ERROR) warnx(noarg, (int)current_argv_len, current_argv); /* * XXX: GNU sets optopt to val regardless of flag */ if (long_options[match].flag == NULL) optopt = long_options[match].val; else optopt = 0; return (BADARG); } if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) { if (has_equal) optarg = has_equal; else if (long_options[match].has_arg == required_argument) { /* * optional argument doesn't use next nargv */ optarg = nargv[optind++]; } } if ((long_options[match].has_arg == required_argument) && (optarg == NULL)) { /* * Missing argument; leading ':' indicates no error * should be generated. */ if (PRINT_ERROR) warnx(recargstring, current_argv); /* * XXX: GNU sets optopt to val regardless of flag */ if (long_options[match].flag == NULL) optopt = long_options[match].val; else optopt = 0; --optind; return (BADARG); } } else { /* unknown option */ if (short_too) { --optind; return (-1); } if (PRINT_ERROR) warnx(illoptstring, current_argv); optopt = 0; return (BADCH); } if (idx) *idx = match; if (long_options[match].flag) { *long_options[match].flag = long_options[match].val; return (0); } else return (long_options[match].val); #undef IDENTICAL_INTERPRETATION } /* * getopt_internal -- * Parse argc/argv argument vector. Called by user level routines. */ static int getopt_internal(int nargc, char * const *nargv, const char *options, const struct option *long_options, int *idx, int flags) { char *oli; /* option letter list index */ int optchar, short_too; static int posixly_correct = -1; if (options == NULL) return (-1); /* * XXX Some GNU programs (like cvs) set optind to 0 instead of * XXX using optreset. Work around this braindamage. */ if (optind == 0) optind = optreset = 1; /* * Disable GNU extensions if POSIXLY_CORRECT is set or options * string begins with a '+'. * * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or * optreset != 0 for GNU compatibility. */ if (posixly_correct == -1 || optreset != 0) posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); if (*options == '-') flags |= FLAG_ALLARGS; else if (posixly_correct || *options == '+') flags &= ~FLAG_PERMUTE; if (*options == '+' || *options == '-') options++; optarg = NULL; if (optreset) nonopt_start = nonopt_end = -1; start: if (optreset || !*place) { /* update scanning pointer */ optreset = 0; if (optind >= nargc) { /* end of argument vector */ place = EMSG; if (nonopt_end != -1) { /* do permutation, if we have to */ permute_args(nonopt_start, nonopt_end, optind, nargv); optind -= nonopt_end - nonopt_start; } else if (nonopt_start != -1) { /* * If we skipped non-options, set optind * to the first of them. */ optind = nonopt_start; } nonopt_start = nonopt_end = -1; return (-1); } if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) { place = EMSG; /* found non-option */ if (flags & FLAG_ALLARGS) { /* * GNU extension: * return non-option as argument to option 1 */ optarg = nargv[optind++]; return (INORDER); } if (!(flags & FLAG_PERMUTE)) { /* * If no permutation wanted, stop parsing * at first non-option. */ return (-1); } /* do permutation */ if (nonopt_start == -1) nonopt_start = optind; else if (nonopt_end != -1) { permute_args(nonopt_start, nonopt_end, optind, nargv); nonopt_start = optind - (nonopt_end - nonopt_start); nonopt_end = -1; } optind++; /* process next argument */ goto start; } if (nonopt_start != -1 && nonopt_end == -1) nonopt_end = optind; /* * If we have "-" do nothing, if "--" we are done. */ if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { optind++; place = EMSG; /* * We found an option (--), so if we skipped * non-options, we have to permute. */ if (nonopt_end != -1) { permute_args(nonopt_start, nonopt_end, optind, nargv); optind -= nonopt_end - nonopt_start; } nonopt_start = nonopt_end = -1; return (-1); } } /* * Check long options if: * 1) we were passed some * 2) the arg is not just "-" * 3) either the arg starts with -- we are getopt_long_only() */ if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) { short_too = 0; if (*place == '-') place++; /* --foo long option */ else if (*place != ':' && strchr(options, *place) != NULL) short_too = 1; /* could be short option too */ optchar = parse_long_options(nargv, options, long_options, idx, short_too); if (optchar != -1) { place = EMSG; return (optchar); } } if ((optchar = (int)*place++) == (int)':' || (optchar == (int)'-' && *place != '\0') || (oli = strchr(options, optchar)) == NULL) { /* * If the user specified "-" and '-' isn't listed in * options, return -1 (non-option) as per POSIX. * Otherwise, it is an unknown option character (or ':'). */ if (optchar == (int)'-' && *place == '\0') return (-1); if (!*place) ++optind; if (PRINT_ERROR) warnx(illoptchar, optchar); optopt = optchar; return (BADCH); } if (long_options != NULL && optchar == 'W' && oli[1] == ';') { /* -W long-option */ if (*place) /* no space */ /* NOTHING */; else if (++optind >= nargc) { /* no arg */ place = EMSG; if (PRINT_ERROR) warnx(recargchar, optchar); optopt = optchar; return (BADARG); } else /* white space */ place = nargv[optind]; optchar = parse_long_options(nargv, options, long_options, idx, 0); place = EMSG; return (optchar); } if (*++oli != ':') { /* doesn't take argument */ if (!*place) ++optind; } else { /* takes (optional) argument */ optarg = NULL; if (*place) /* no white space */ optarg = place; else if (oli[1] != ':') { /* arg not optional */ if (++optind >= nargc) { /* no arg */ place = EMSG; if (PRINT_ERROR) warnx(recargchar, optchar); optopt = optchar; return (BADARG); } else optarg = nargv[optind]; } place = EMSG; ++optind; } /* dump back option letter */ return (optchar); } #ifdef REPLACE_GETOPT /* * getopt -- * Parse argc/argv argument vector. * * [eventually this will replace the BSD getopt] */ int getopt(int nargc, char * const *nargv, const char *options) { /* * We don't pass FLAG_PERMUTE to getopt_internal() since * the BSD getopt(3) (unlike GNU) has never done this. * * Furthermore, since many privileged programs call getopt() * before dropping privileges it makes sense to keep things * as simple (and bug-free) as possible. */ return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); } #endif /* REPLACE_GETOPT */ /* * getopt_long -- * Parse argc/argv argument vector. */ int getopt_long(int nargc, char * const *nargv, const char *options, const struct option *long_options, int *idx) { return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE)); } /* * getopt_long_only -- * Parse argc/argv argument vector. */ int getopt_long_only(int nargc, char * const *nargv, const char *options, const struct option *long_options, int *idx) { return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE|FLAG_LONGONLY)); } peg-0.1.18/win/libgen.h0000644000175000000620000000015412744267362014301 0ustar piumartastaff#ifndef _LIBGEN_H #define _LIBGEN_H char * basename (char *fname) { return fname; } #endif // _LIBGEN_H peg-0.1.18/win/getopt.h0000644000175000000620000000600612744267362014345 0ustar piumartastaff#ifndef __GETOPT_H__ /** * DISCLAIMER * This file has no copyright assigned and is placed in the Public Domain. * This file is a part of the w64 mingw-runtime package. * * The w64 mingw-runtime package and its code is distributed in the hope that it * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ #define __GETOPT_H__ /* All the headers include this file. */ #include #ifdef __cplusplus extern "C" { #endif extern int optind; /* index of first non-option in argv */ extern int optopt; /* single option character, as parsed */ extern int opterr; /* flag to enable built-in diagnostics... */ /* (user may set to zero, to suppress) */ extern char *optarg; /* pointer to argument of current option */ extern int getopt(int nargc, char * const *nargv, const char *options); #ifdef _BSD_SOURCE /* * BSD adds the non-standard `optreset' feature, for reinitialisation * of `getopt' parsing. We support this feature, for applications which * proclaim their BSD heritage, before including this header; however, * to maintain portability, developers are advised to avoid it. */ # define optreset __mingw_optreset extern int optreset; #endif #ifdef __cplusplus } #endif /* * POSIX requires the `getopt' API to be specified in `unistd.h'; * thus, `unistd.h' includes this header. However, we do not want * to expose the `getopt_long' or `getopt_long_only' APIs, when * included in this manner. Thus, close the standard __GETOPT_H__ * declarations block, and open an additional __GETOPT_LONG_H__ * specific block, only when *not* __UNISTD_H_SOURCED__, in which * to declare the extended API. */ #endif /* !defined(__GETOPT_H__) */ #if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) #define __GETOPT_LONG_H__ #ifdef __cplusplus extern "C" { #endif struct option /* specification for a long form option... */ { const char *name; /* option name, without leading hyphens */ int has_arg; /* does it take an argument? */ int *flag; /* where to save its status, or NULL */ int val; /* its associated status value */ }; enum /* permitted values for its `has_arg' field... */ { no_argument = 0, /* option never takes an argument */ required_argument, /* option always requires an argument */ optional_argument /* option may take an argument */ }; extern int getopt_long(int nargc, char * const *nargv, const char *options, const struct option *long_options, int *idx); extern int getopt_long_only(int nargc, char * const *nargv, const char *options, const struct option *long_options, int *idx); /* * Previous MinGW implementation had... */ #ifndef HAVE_DECL_GETOPT /* * ...for the long form API only; keep this for compatibility. */ # define HAVE_DECL_GETOPT 1 #endif #ifdef __cplusplus } #endif #endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ peg-0.1.18/win/unistd.h0000644000175000000620000000011612744267362014345 0ustar piumartastaff#ifndef _UNISTD_H #define _UNISTD_H #include "getopt.h" #endif // _UNISTD_H peg-0.1.18/leg.vcxproj0000644000175000000620000000711212744267363014261 0ustar piumartastaff Debug Win32 Release Win32 {5ECEC9E5-8F23-47B6-93E0-C3B328B3BE66} Win32Proj leg $(ProjectName) Application $(ExecutablePath);$(MSBuildProjectDirectory)\.\bin\;$(MSBuildProjectDirectory)\.\bin\ $(Configuration)\obj\$(ProjectName)\ $(SolutionDir)$(Configuration)\ win;%(AdditionalIncludeDirectories) WIN32;_WINDOWS;DEBUG;%(PreprocessorDefinitions) true $(OutDir)$(ProjectName).exe Console win;%(AdditionalIncludeDirectories) WIN32;_WINDOWS;DEBUG;%(PreprocessorDefinitions);%(PreprocessorDefinitions) win;%(AdditionalIncludeDirectories) WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions) true $(OutDir)$(ProjectName).exe Console win;%(AdditionalIncludeDirectories) WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions);%(PreprocessorDefinitions) peg-0.1.18/build-win.cmd0000644000175000000620000000016512744267363014455 0ustar piumartastaff@echo off call "%VS100COMNTOOLS%vsvars32.bat" msbuild peg.sln /p:Configuration=Release xcopy /Y /D Release\*.exe .\ peg-0.1.18/src/0002755000175000000620000000000012744267362012664 5ustar piumartastaffpeg-0.1.18/src/compile.c0000644000175000000620000005167612744267362014475 0ustar piumartastaff/* Copyright (c) 2007--2013 by Ian Piumarta * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the 'Software'), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, provided that the above copyright notice(s) and this * permission notice appear in all copies of the Software. Acknowledgement * of the use of this Software in supporting documentation would be * appreciated but is not required. * * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. * * Last edited: 2016-07-22 09:43:05 by piumarta on zora.local */ #include #include #include #include #ifdef WIN32 # undef inline # define inline __inline #endif #include "version.h" #include "tree.h" static int yyl(void) { static int prev= 0; return ++prev; } static void charClassSet (unsigned char bits[], int c) { bits[c >> 3] |= (1 << (c & 7)); } static void charClassClear(unsigned char bits[], int c) { bits[c >> 3] &= ~(1 << (c & 7)); } typedef void (*setter)(unsigned char bits[], int c); static inline int oigit(int c) { return ('0' <= c && c <= '7'); } static inline int higit(int c) { return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'); } static inline int hexval(int c) { if ('0' <= c && c <= '9') return c - '0'; if ('A' <= c && c <= 'F') return 10 - 'A' + c; if ('a' <= c && c <= 'f') return 10 - 'a' + c; return 0; } static int cnext(unsigned char **ccp) { unsigned char *cclass= *ccp; int c= *cclass++; if (c) { if ('\\' == c && *cclass) { switch (c= *cclass++) { case 'a': c= '\a'; break; /* bel */ case 'b': c= '\b'; break; /* bs */ case 'e': c= '\033'; break; /* esc */ case 'f': c= '\f'; break; /* ff */ case 'n': c= '\n'; break; /* nl */ case 'r': c= '\r'; break; /* cr */ case 't': c= '\t'; break; /* ht */ case 'v': c= '\v'; break; /* vt */ case 'x': c= 0; if (higit(*cclass)) c= (c << 4) + hexval(*cclass++); if (higit(*cclass)) c= (c << 4) + hexval(*cclass++); break; default: if (oigit(c)) { c -= '0'; if (oigit(*cclass)) c= (c << 3) + *cclass++ - '0'; if (oigit(*cclass)) c= (c << 3) + *cclass++ - '0'; } break; } } *ccp= cclass; } return c; } static char *makeCharClass(unsigned char *cclass) { unsigned char bits[32]; setter set; int c, prev= -1; static char string[256]; char *ptr; if ('^' == *cclass) { memset(bits, 255, 32); set= charClassClear; ++cclass; } else { memset(bits, 0, 32); set= charClassSet; } while (*cclass) { if ('-' == *cclass && cclass[1] && prev >= 0) { ++cclass; for (c= cnext(&cclass); prev <= c; ++prev) set(bits, prev); prev= -1; } else { c= cnext(&cclass); set(bits, prev= c); } } ptr= string; for (c= 0; c < 32; ++c) ptr += sprintf(ptr, "\\%03o", bits[c]); return string; } static void begin(void) { fprintf(output, "\n {"); } static void end(void) { fprintf(output, "\n }"); } static void label(int n) { fprintf(output, "\n l%d:;\t", n); } static void jump(int n) { fprintf(output, " goto l%d;", n); } static void save(int n) { fprintf(output, " int yypos%d= yy->__pos, yythunkpos%d= yy->__thunkpos;", n, n); } static void restore(int n) { fprintf(output, " yy->__pos= yypos%d; yy->__thunkpos= yythunkpos%d;", n, n); } static void Node_compile_c_ko(Node *node, int ko) { assert(node); switch (node->type) { case Rule: fprintf(stderr, "\ninternal error #1 (%s)\n", node->rule.name); exit(1); break; case Dot: fprintf(output, " if (!yymatchDot(yy)) goto l%d;", ko); break; case Name: fprintf(output, " if (!yy_%s(yy)) goto l%d;", node->name.rule->rule.name, ko); if (node->name.variable) fprintf(output, " yyDo(yy, yySet, %d, 0);", node->name.variable->variable.offset); break; case Character: case String: { int len= strlen(node->string.value); if (1 == len) { if ('\'' == node->string.value[0]) fprintf(output, " if (!yymatchChar(yy, '\\'')) goto l%d;", ko); else fprintf(output, " if (!yymatchChar(yy, '%s')) goto l%d;", node->string.value, ko); } else if (2 == len && '\\' == node->string.value[0]) fprintf(output, " if (!yymatchChar(yy, '%s')) goto l%d;", node->string.value, ko); else fprintf(output, " if (!yymatchString(yy, \"%s\")) goto l%d;", node->string.value, ko); } break; case Class: fprintf(output, " if (!yymatchClass(yy, (unsigned char *)\"%s\")) goto l%d;", makeCharClass(node->cclass.value), ko); break; case Action: fprintf(output, " yyDo(yy, yy%s, yy->__begin, yy->__end);", node->action.name); break; case Inline: fprintf(output, " yyText(yy, yy->__begin, yy->__end);\n"); fprintf(output, "#define yytext yy->__text\n"); fprintf(output, "#define yyleng yy->__textlen\n"); fprintf(output, "%s;\n", node->inLine.text); fprintf(output, "#undef yytext\n"); fprintf(output, "#undef yyleng\n"); break; case Predicate: fprintf(output, " yyText(yy, yy->__begin, yy->__end); {\n"); fprintf(output, "#define yytext yy->__text\n"); fprintf(output, "#define yyleng yy->__textlen\n"); fprintf(output, "if (!(%s)) goto l%d;\n", node->predicate.text, ko); fprintf(output, "#undef yytext\n"); fprintf(output, "#undef yyleng\n"); fprintf(output, " }"); break; case Error: { int eok= yyl(), eko= yyl(); Node_compile_c_ko(node->error.element, eko); jump(eok); label(eko); fprintf(output, " yyText(yy, yy->__begin, yy->__end); {\n"); fprintf(output, "#define yytext yy->__text\n"); fprintf(output, "#define yyleng yy->__textlen\n"); fprintf(output, " %s;\n", node->error.text); fprintf(output, "#undef yytext\n"); fprintf(output, "#undef yyleng\n"); fprintf(output, " }"); jump(ko); label(eok); } break; case Alternate: { int ok= yyl(); begin(); save(ok); for (node= node->alternate.first; node; node= node->alternate.next) if (node->alternate.next) { int next= yyl(); Node_compile_c_ko(node, next); jump(ok); label(next); restore(ok); } else Node_compile_c_ko(node, ko); end(); label(ok); } break; case Sequence: for (node= node->sequence.first; node; node= node->sequence.next) Node_compile_c_ko(node, ko); break; case PeekFor: { int ok= yyl(); begin(); save(ok); Node_compile_c_ko(node->peekFor.element, ko); restore(ok); end(); } break; case PeekNot: { int ok= yyl(); begin(); save(ok); Node_compile_c_ko(node->peekFor.element, ok); jump(ko); label(ok); restore(ok); end(); } break; case Query: { int qko= yyl(), qok= yyl(); begin(); save(qko); Node_compile_c_ko(node->query.element, qko); jump(qok); label(qko); restore(qko); end(); label(qok); } break; case Star: { int again= yyl(), out= yyl(); label(again); begin(); save(out); Node_compile_c_ko(node->star.element, out); jump(again); label(out); restore(out); end(); } break; case Plus: { int again= yyl(), out= yyl(); Node_compile_c_ko(node->plus.element, ko); label(again); begin(); save(out); Node_compile_c_ko(node->plus.element, out); jump(again); label(out); restore(out); end(); } break; default: fprintf(stderr, "\nNode_compile_c_ko: illegal node type %d\n", node->type); exit(1); } } static int countVariables(Node *node) { int count= 0; while (node) { ++count; node= node->variable.next; } return count; } static void defineVariables(Node *node) { int count= 0; while (node) { fprintf(output, "#define %s yy->__val[%d]\n", node->variable.name, --count); node->variable.offset= count; node= node->variable.next; } fprintf(output, "#define __ yy->__\n"); fprintf(output, "#define yypos yy->__pos\n"); fprintf(output, "#define yythunkpos yy->__thunkpos\n"); } static void undefineVariables(Node *node) { fprintf(output, "#undef yythunkpos\n"); fprintf(output, "#undef yypos\n"); fprintf(output, "#undef yy\n"); while (node) { fprintf(output, "#undef %s\n", node->variable.name); node= node->variable.next; } } static void Rule_compile_c2(Node *node) { assert(node); assert(Rule == node->type); if (!node->rule.expression) fprintf(stderr, "rule '%s' used but not defined\n", node->rule.name); else { int ko= yyl(), safe; if ((!(RuleUsed & node->rule.flags)) && (node != start)) fprintf(stderr, "rule '%s' defined but not used\n", node->rule.name); safe= ((Query == node->rule.expression->type) || (Star == node->rule.expression->type)); fprintf(output, "\nYY_RULE(int) yy_%s(yycontext *yy)\n{", node->rule.name); if (!safe) save(0); if (node->rule.variables) fprintf(output, " yyDo(yy, yyPush, %d, 0);", countVariables(node->rule.variables)); fprintf(output, "\n yyprintf((stderr, \"%%s\\n\", \"%s\"));", node->rule.name); Node_compile_c_ko(node->rule.expression, ko); fprintf(output, "\n yyprintf((stderr, \" ok %%s @ %%s\\n\", \"%s\", yy->__buf+yy->__pos));", node->rule.name); if (node->rule.variables) fprintf(output, " yyDo(yy, yyPop, %d, 0);", countVariables(node->rule.variables)); fprintf(output, "\n return 1;"); if (!safe) { label(ko); restore(0); fprintf(output, "\n yyprintf((stderr, \" fail %%s @ %%s\\n\", \"%s\", yy->__buf+yy->__pos));", node->rule.name); fprintf(output, "\n return 0;"); } fprintf(output, "\n}"); } if (node->rule.next) Rule_compile_c2(node->rule.next); } static char *header= "\ #include \n\ #include \n\ #include \n\ "; static char *preamble= "\ #ifndef YY_MALLOC\n\ #define YY_MALLOC(C, N) malloc(N)\n\ #endif\n\ #ifndef YY_REALLOC\n\ #define YY_REALLOC(C, P, N) realloc(P, N)\n\ #endif\n\ #ifndef YY_FREE\n\ #define YY_FREE(C, P) free(P)\n\ #endif\n\ #ifndef YY_LOCAL\n\ #define YY_LOCAL(T) static T\n\ #endif\n\ #ifndef YY_ACTION\n\ #define YY_ACTION(T) static T\n\ #endif\n\ #ifndef YY_RULE\n\ #define YY_RULE(T) static T\n\ #endif\n\ #ifndef YY_PARSE\n\ #define YY_PARSE(T) T\n\ #endif\n\ #ifndef YYPARSE\n\ #define YYPARSE yyparse\n\ #endif\n\ #ifndef YYPARSEFROM\n\ #define YYPARSEFROM yyparsefrom\n\ #endif\n\ #ifndef YYRELEASE\n\ #define YYRELEASE yyrelease\n\ #endif\n\ #ifndef YY_BEGIN\n\ #define YY_BEGIN ( yy->__begin= yy->__pos, 1)\n\ #endif\n\ #ifndef YY_END\n\ #define YY_END ( yy->__end= yy->__pos, 1)\n\ #endif\n\ #ifdef YY_DEBUG\n\ # define yyprintf(args) fprintf args\n\ #else\n\ # define yyprintf(args)\n\ #endif\n\ #ifndef YYSTYPE\n\ #define YYSTYPE int\n\ #endif\n\ #ifndef YY_STACK_SIZE\n\ #define YY_STACK_SIZE 128\n\ #endif\n\ \n\ #ifndef YY_BUFFER_SIZE\n\ #define YY_BUFFER_SIZE 1024\n\ #endif\n\ \n\ #ifndef YY_PART\n\ \n\ typedef struct _yycontext yycontext;\n\ typedef void (*yyaction)(yycontext *yy, char *yytext, int yyleng);\n\ typedef struct _yythunk { int begin, end; yyaction action; struct _yythunk *next; } yythunk;\n\ \n\ struct _yycontext {\n\ char *__buf;\n\ int __buflen;\n\ int __pos;\n\ int __limit;\n\ char *__text;\n\ int __textlen;\n\ int __begin;\n\ int __end;\n\ int __textmax;\n\ yythunk *__thunks;\n\ int __thunkslen;\n\ int __thunkpos;\n\ YYSTYPE __;\n\ YYSTYPE *__val;\n\ YYSTYPE *__vals;\n\ int __valslen;\n\ #ifdef YY_CTX_MEMBERS\n\ YY_CTX_MEMBERS\n\ #endif\n\ };\n\ \n\ #ifdef YY_CTX_LOCAL\n\ #define YY_CTX_PARAM_ yycontext *yyctx,\n\ #define YY_CTX_PARAM yycontext *yyctx\n\ #define YY_CTX_ARG_ yyctx,\n\ #define YY_CTX_ARG yyctx\n\ #ifndef YY_INPUT\n\ #define YY_INPUT(yy, buf, result, max_size) \\\n\ { \\\n\ int yyc= getchar(); \\\n\ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\\n\ yyprintf((stderr, \"<%c>\", yyc)); \\\n\ }\n\ #endif\n\ #else\n\ #define YY_CTX_PARAM_\n\ #define YY_CTX_PARAM\n\ #define YY_CTX_ARG_\n\ #define YY_CTX_ARG\n\ yycontext _yyctx= { 0, 0 };\n\ yycontext *yyctx= &_yyctx;\n\ #ifndef YY_INPUT\n\ #define YY_INPUT(buf, result, max_size) \\\n\ { \\\n\ int yyc= getchar(); \\\n\ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\\n\ yyprintf((stderr, \"<%c>\", yyc)); \\\n\ }\n\ #endif\n\ #endif\n\ \n\ YY_LOCAL(int) yyrefill(yycontext *yy)\n\ {\n\ int yyn;\n\ while (yy->__buflen - yy->__pos < 512)\n\ {\n\ yy->__buflen *= 2;\n\ yy->__buf= (char *)YY_REALLOC(yy, yy->__buf, yy->__buflen);\n\ }\n\ #ifdef YY_CTX_LOCAL\n\ YY_INPUT(yy, (yy->__buf + yy->__pos), yyn, (yy->__buflen - yy->__pos));\n\ #else\n\ YY_INPUT((yy->__buf + yy->__pos), yyn, (yy->__buflen - yy->__pos));\n\ #endif\n\ if (!yyn) return 0;\n\ yy->__limit += yyn;\n\ return 1;\n\ }\n\ \n\ YY_LOCAL(int) yymatchDot(yycontext *yy)\n\ {\n\ if (yy->__pos >= yy->__limit && !yyrefill(yy)) return 0;\n\ ++yy->__pos;\n\ return 1;\n\ }\n\ \n\ YY_LOCAL(int) yymatchChar(yycontext *yy, int c)\n\ {\n\ if (yy->__pos >= yy->__limit && !yyrefill(yy)) return 0;\n\ if ((unsigned char)yy->__buf[yy->__pos] == c)\n\ {\n\ ++yy->__pos;\n\ yyprintf((stderr, \" ok yymatchChar(yy, %c) @ %s\\n\", c, yy->__buf+yy->__pos));\n\ return 1;\n\ }\n\ yyprintf((stderr, \" fail yymatchChar(yy, %c) @ %s\\n\", c, yy->__buf+yy->__pos));\n\ return 0;\n\ }\n\ \n\ YY_LOCAL(int) yymatchString(yycontext *yy, const char *s)\n\ {\n\ int yysav= yy->__pos;\n\ while (*s)\n\ {\n\ if (yy->__pos >= yy->__limit && !yyrefill(yy)) return 0;\n\ if (yy->__buf[yy->__pos] != *s)\n\ {\n\ yy->__pos= yysav;\n\ return 0;\n\ }\n\ ++s;\n\ ++yy->__pos;\n\ }\n\ return 1;\n\ }\n\ \n\ YY_LOCAL(int) yymatchClass(yycontext *yy, unsigned char *bits)\n\ {\n\ int c;\n\ if (yy->__pos >= yy->__limit && !yyrefill(yy)) return 0;\n\ c= (unsigned char)yy->__buf[yy->__pos];\n\ if (bits[c >> 3] & (1 << (c & 7)))\n\ {\n\ ++yy->__pos;\n\ yyprintf((stderr, \" ok yymatchClass @ %s\\n\", yy->__buf+yy->__pos));\n\ return 1;\n\ }\n\ yyprintf((stderr, \" fail yymatchClass @ %s\\n\", yy->__buf+yy->__pos));\n\ return 0;\n\ }\n\ \n\ YY_LOCAL(void) yyDo(yycontext *yy, yyaction action, int begin, int end)\n\ {\n\ while (yy->__thunkpos >= yy->__thunkslen)\n\ {\n\ yy->__thunkslen *= 2;\n\ yy->__thunks= (yythunk *)YY_REALLOC(yy, yy->__thunks, sizeof(yythunk) * yy->__thunkslen);\n\ }\n\ yy->__thunks[yy->__thunkpos].begin= begin;\n\ yy->__thunks[yy->__thunkpos].end= end;\n\ yy->__thunks[yy->__thunkpos].action= action;\n\ ++yy->__thunkpos;\n\ }\n\ \n\ YY_LOCAL(int) yyText(yycontext *yy, int begin, int end)\n\ {\n\ int yyleng= end - begin;\n\ if (yyleng <= 0)\n\ yyleng= 0;\n\ else\n\ {\n\ while (yy->__textlen < (yyleng + 1))\n\ {\n\ yy->__textlen *= 2;\n\ yy->__text= (char *)YY_REALLOC(yy, yy->__text, yy->__textlen);\n\ }\n\ memcpy(yy->__text, yy->__buf + begin, yyleng);\n\ }\n\ yy->__text[yyleng]= '\\0';\n\ return yyleng;\n\ }\n\ \n\ YY_LOCAL(void) yyDone(yycontext *yy)\n\ {\n\ int pos;\n\ for (pos= 0; pos < yy->__thunkpos; ++pos)\n\ {\n\ yythunk *thunk= &yy->__thunks[pos];\n\ int yyleng= thunk->end ? yyText(yy, thunk->begin, thunk->end) : thunk->begin;\n\ yyprintf((stderr, \"DO [%d] %p %s\\n\", pos, thunk->action, yy->__text));\n\ thunk->action(yy, yy->__text, yyleng);\n\ }\n\ yy->__thunkpos= 0;\n\ }\n\ \n\ YY_LOCAL(void) yyCommit(yycontext *yy)\n\ {\n\ if ((yy->__limit -= yy->__pos))\n\ {\n\ memmove(yy->__buf, yy->__buf + yy->__pos, yy->__limit);\n\ }\n\ yy->__begin -= yy->__pos;\n\ yy->__end -= yy->__pos;\n\ yy->__pos= yy->__thunkpos= 0;\n\ }\n\ \n\ YY_LOCAL(int) yyAccept(yycontext *yy, int tp0)\n\ {\n\ if (tp0)\n\ {\n\ fprintf(stderr, \"accept denied at %d\\n\", tp0);\n\ return 0;\n\ }\n\ else\n\ {\n\ yyDone(yy);\n\ yyCommit(yy);\n\ }\n\ return 1;\n\ }\n\ \n\ YY_LOCAL(void) yyPush(yycontext *yy, char *text, int count)\n\ {\n\ yy->__val += count;\n\ while (yy->__valslen <= yy->__val - yy->__vals)\n\ {\n\ long offset= yy->__val - yy->__vals;\n\ yy->__valslen *= 2;\n\ yy->__vals= (YYSTYPE *)YY_REALLOC(yy, yy->__vals, sizeof(YYSTYPE) * yy->__valslen);\n\ yy->__val= yy->__vals + offset;\n\ }\n\ }\n\ YY_LOCAL(void) yyPop(yycontext *yy, char *text, int count) { yy->__val -= count; }\n\ YY_LOCAL(void) yySet(yycontext *yy, char *text, int count) { yy->__val[count]= yy->__; }\n\ \n\ #endif /* YY_PART */\n\ \n\ #define YYACCEPT yyAccept(yy, yythunkpos0)\n\ \n\ "; static char *footer= "\n\ \n\ #ifndef YY_PART\n\ \n\ typedef int (*yyrule)(yycontext *yy);\n\ \n\ YY_PARSE(int) YYPARSEFROM(YY_CTX_PARAM_ yyrule yystart)\n\ {\n\ int yyok;\n\ if (!yyctx->__buflen)\n\ {\n\ yyctx->__buflen= YY_BUFFER_SIZE;\n\ yyctx->__buf= (char *)YY_MALLOC(yyctx, yyctx->__buflen);\n\ yyctx->__textlen= YY_BUFFER_SIZE;\n\ yyctx->__text= (char *)YY_MALLOC(yyctx, yyctx->__textlen);\n\ yyctx->__thunkslen= YY_STACK_SIZE;\n\ yyctx->__thunks= (yythunk *)YY_MALLOC(yyctx, sizeof(yythunk) * yyctx->__thunkslen);\n\ yyctx->__valslen= YY_STACK_SIZE;\n\ yyctx->__vals= (YYSTYPE *)YY_MALLOC(yyctx, sizeof(YYSTYPE) * yyctx->__valslen);\n\ yyctx->__begin= yyctx->__end= yyctx->__pos= yyctx->__limit= yyctx->__thunkpos= 0;\n\ }\n\ yyctx->__begin= yyctx->__end= yyctx->__pos;\n\ yyctx->__thunkpos= 0;\n\ yyctx->__val= yyctx->__vals;\n\ yyok= yystart(yyctx);\n\ if (yyok) yyDone(yyctx);\n\ yyCommit(yyctx);\n\ return yyok;\n\ }\n\ \n\ YY_PARSE(int) YYPARSE(YY_CTX_PARAM)\n\ {\n\ return YYPARSEFROM(YY_CTX_ARG_ yy_%s);\n\ }\n\ \n\ YY_PARSE(yycontext *) YYRELEASE(yycontext *yyctx)\n\ {\n\ if (yyctx->__buflen)\n\ {\n\ yyctx->__buflen= 0;\n\ YY_FREE(yyctx, yyctx->__buf);\n\ YY_FREE(yyctx, yyctx->__text);\n\ YY_FREE(yyctx, yyctx->__thunks);\n\ YY_FREE(yyctx, yyctx->__vals);\n\ }\n\ return yyctx;\n\ }\n\ \n\ #endif\n\ "; void Rule_compile_c_header(void) { fprintf(output, "/* A recursive-descent parser generated by peg %d.%d.%d */\n", PEG_MAJOR, PEG_MINOR, PEG_LEVEL); fprintf(output, "\n"); fprintf(output, "%s", header); fprintf(output, "#define YYRULECOUNT %d\n", ruleCount); } int consumesInput(Node *node) { if (!node) return 0; switch (node->type) { case Rule: { int result= 0; if (RuleReached & node->rule.flags) fprintf(stderr, "possible infinite left recursion in rule '%s'\n", node->rule.name); else { node->rule.flags |= RuleReached; result= consumesInput(node->rule.expression); node->rule.flags &= ~RuleReached; } return result; } break; case Dot: return 1; case Name: return consumesInput(node->name.rule); case Character: case String: return strlen(node->string.value) > 0; case Class: return 1; case Action: return 0; case Inline: return 0; case Predicate: return 0; case Error: return consumesInput(node->error.element); case Alternate: { Node *n; for (n= node->alternate.first; n; n= n->alternate.next) if (!consumesInput(n)) return 0; } return 1; case Sequence: { Node *n; for (n= node->alternate.first; n; n= n->alternate.next) if (consumesInput(n)) return 1; } return 0; case PeekFor: return 0; case PeekNot: return 0; case Query: return 0; case Star: return 0; case Plus: return consumesInput(node->plus.element); default: fprintf(stderr, "\nconsumesInput: illegal node type %d\n", node->type); exit(1); } return 0; } void Rule_compile_c(Node *node, int nolines) { Node *n; for (n= rules; n; n= n->rule.next) consumesInput(n); fprintf(output, "%s", preamble); for (n= node; n; n= n->rule.next) fprintf(output, "YY_RULE(int) yy_%s(yycontext *yy); /* %d */\n", n->rule.name, n->rule.id); fprintf(output, "\n"); for (n= actions; n; n= n->action.list) { fprintf(output, "YY_ACTION(void) yy%s(yycontext *yy, char *yytext, int yyleng)\n{\n", n->action.name); defineVariables(n->action.rule->rule.variables); fprintf(output, " yyprintf((stderr, \"do yy%s\\n\"));\n", n->action.name); fprintf(output, " {\n"); if (!nolines) fprintf(output, "#line %i\n", n->action.line); fprintf(output, " %s;\n", n->action.text); fprintf(output, " }\n"); undefineVariables(n->action.rule->rule.variables); fprintf(output, "}\n"); } Rule_compile_c2(node); fprintf(output, footer, start->rule.name); } peg-0.1.18/src/tree.c0000644000175000000620000002022012744267362013761 0ustar piumartastaff/* Copyright (c) 2007 by Ian Piumarta * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the 'Software'), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, provided that the above copyright notice(s) and this * permission notice appear in all copies of the Software. Acknowledgement * of the use of this Software in supporting documentation would be * appreciated but is not required. * * THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. * * Last edited: 2016-07-15 10:25:14 by piumarta on zora */ #include #include #include #include #ifdef WIN32 # undef inline # define inline __inline #endif #include "tree.h" Node *actions= 0; Node *rules= 0; Node *thisRule= 0; Node *start= 0; FILE *output= 0; int actionCount= 0; int ruleCount= 0; int lastToken= -1; static inline Node *_newNode(int type, int size) { Node *node= calloc(1, size); node->type= type; return node; } #define newNode(T) _newNode(T, sizeof(struct T)) Node *makeRule(char *name) { Node *node= newNode(Rule); node->rule.name= strdup(name); node->rule.id= ++ruleCount; node->rule.flags= 0; node->rule.next= rules; rules= node; return node; } Node *findRule(char *name) { Node *n; char *ptr; for (ptr= name; *ptr; ptr++) if ('-' == *ptr) *ptr= '_'; for (n= rules; n; n= n->any.next) { assert(Rule == n->type); if (!strcmp(name, n->rule.name)) return n; } return makeRule(name); } Node *beginRule(Node *rule) { actionCount= 0; return thisRule= rule; } void Rule_setExpression(Node *node, Node *expression) { assert(node); #ifdef DEBUG Node_print(node); fprintf(stderr, " [%d]<- ", node->type); Node_print(expression); fprintf(stderr, "\n"); #endif assert(Rule == node->type); node->rule.expression= expression; if (!start || !strcmp(node->rule.name, "start")) start= node; } Node *makeVariable(char *name) { Node *node; assert(thisRule); for (node= thisRule->rule.variables; node; node= node->variable.next) if (!strcmp(name, node->variable.name)) return node; node= newNode(Variable); node->variable.name= strdup(name); node->variable.next= thisRule->rule.variables; thisRule->rule.variables= node; return node; } Node *makeName(Node *rule) { Node *node= newNode(Name); node->name.rule= rule; node->name.variable= 0; rule->rule.flags |= RuleUsed; return node; } Node *makeDot(void) { return newNode(Dot); } Node *makeCharacter(char *text) { Node *node= newNode(Character); node->character.value= strdup(text); return node; } Node *makeString(char *text) { Node *node= newNode(String); node->string.value= strdup(text); return node; } Node *makeClass(char *text) { Node *node= newNode(Class); node->cclass.value= (unsigned char *)strdup(text); return node; } Node *makeAction(int lineNumber, char *text) { Node *node= newNode(Action); char name[1024]; assert(thisRule); sprintf(name, "_%d_%s", ++actionCount, thisRule->rule.name); node->action.name= strdup(name); node->action.text= strdup(text); node->action.list= actions; node->action.rule= thisRule; node->action.line= lineNumber; actions= node; { char *ptr; for (ptr= node->action.text; *ptr; ++ptr) if ('$' == ptr[0] && '$' == ptr[1]) ptr[1]= ptr[0]= '_'; } return node; } Node *makeInline(char *text) { Node *node= newNode(Inline); node->inLine.text= strdup(text); return node; } Node *makePredicate(char *text) { Node *node= newNode(Predicate); node->predicate.text= strdup(text); return node; } Node *makeError(Node *e, char *text) { Node *node= newNode(Error); node->error.element= e; node->error.text= strdup(text); return node; } Node *makeAlternate(Node *e) { if (Alternate != e->type) { Node *node= newNode(Alternate); assert(e); assert(!e->any.next); node->alternate.first= node->alternate.last= e; return node; } return e; } Node *Alternate_append(Node *a, Node *e) { assert(a); a= makeAlternate(a); assert(a->alternate.last); assert(e); a->alternate.last->any.next= e; a->alternate.last= e; return a; } Node *makeSequence(Node *e) { if (Sequence != e->type) { Node *node= newNode(Sequence); assert(e); assert(!e->any.next); node->sequence.first= node->sequence.last= e; return node; } return e; } Node *Sequence_append(Node *a, Node *e) { assert(a); a= makeSequence(a); assert(a->sequence.last); assert(e); a->sequence.last->any.next= e; a->sequence.last= e; return a; } Node *makePeekFor(Node *e) { Node *node= newNode(PeekFor); node->peekFor.element= e; return node; } Node *makePeekNot(Node *e) { Node *node= newNode(PeekNot); node->peekNot.element= e; return node; } Node *makeQuery(Node *e) { Node *node= newNode(Query); node->query.element= e; return node; } Node *makeStar(Node *e) { Node *node= newNode(Star); node->star.element= e; return node; } Node *makePlus(Node *e) { Node *node= newNode(Plus); node->plus.element= e; return node; } static Node *stack[1024]; static Node **stackPointer= stack; #ifdef DEBUG static void dumpStack(void) { Node **p; for (p= stack + 1; p <= stackPointer; ++p) { fprintf(stderr, "### %d\t", p - stack); Node_print(*p); fprintf(stderr, "\n"); } } #endif Node *push(Node *node) { assert(node); assert(stackPointer < stack + 1023); #ifdef DEBUG dumpStack(); fprintf(stderr, " PUSH "); Node_print(node); fprintf(stderr, "\n"); #endif return *++stackPointer= node; } Node *top(void) { assert(stackPointer > stack); return *stackPointer; } Node *pop(void) { assert(stackPointer > stack); #ifdef DEBUG dumpStack(); fprintf(stderr, " POP\n"); #endif return *stackPointer--; } static void Node_fprint(FILE *stream, Node *node) { assert(node); switch (node->type) { case Rule: fprintf(stream, " %s", node->rule.name); break; case Variable: fprintf(stream, " %s:", node->variable.name); break; case Name: fprintf(stream, " %s", node->name.rule->rule.name); break; case Dot: fprintf(stream, " ."); break; case Character: fprintf(stream, " '%s'", node->character.value); break; case String: fprintf(stream, " \"%s\"", node->string.value); break; case Class: fprintf(stream, " [%s]", node->cclass.value); break; case Action: fprintf(stream, " { %s }", node->action.text); break; case Predicate: fprintf(stream, " ?{ %s }", node->action.text); break; case Alternate: node= node->alternate.first; fprintf(stream, " ("); Node_fprint(stream, node); while ((node= node->any.next)) { fprintf(stream, " |"); Node_fprint(stream, node); } fprintf(stream, " )"); break; case Sequence: node= node->sequence.first; fprintf(stream, " ("); Node_fprint(stream, node); while ((node= node->any.next)) Node_fprint(stream, node); fprintf(stream, " )"); break; case PeekFor: fprintf(stream, "&"); Node_fprint(stream, node->query.element); break; case PeekNot: fprintf(stream, "!"); Node_fprint(stream, node->query.element); break; case Query: Node_fprint(stream, node->query.element); fprintf(stream, "?"); break; case Star: Node_fprint(stream, node->query.element); fprintf(stream, "*"); break; case Plus: Node_fprint(stream, node->query.element); fprintf(stream, "+"); break; default: fprintf(stream, "\nunknown node type %d\n", node->type); exit(1); } } void Node_print(Node *node) { Node_fprint(stderr, node); } static void Rule_fprint(FILE *stream, Node *node) { assert(node); assert(Rule == node->type); fprintf(stream, "%s.%d =", node->rule.name, node->rule.id); if (node->rule.expression) Node_fprint(stream, node->rule.expression); else fprintf(stream, " UNDEFINED"); fprintf(stream, " ;\n"); } void Rule_print(Node *node) { Rule_fprint(stderr, node); } peg-0.1.18/src/peg.10000644000175000000620000010463412744267362013527 0ustar piumartastaff.\" Copyright (c) 2007,2016 by Ian Piumarta .\" All rights reserved. .\" .\" Permission is hereby granted, free of charge, to any person obtaining a .\" copy of this software and associated documentation files (the 'Software'), .\" to deal in the Software without restriction, including without limitation .\" the rights to use, copy, modify, merge, publish, distribute, and/or sell .\" copies of the Software, and to permit persons to whom the Software is .\" furnished to do so, provided that the above copyright notice(s) and this .\" permission notice appear in all copies of the Software. Acknowledgement .\" of the use of this Software in supporting documentation would be .\" appreciated but is not required. .\" .\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. .\" .\" Last edited: 2016-07-22 09:47:29 by piumarta on zora.local .\" .TH PEG 1 "September 2013" "Version 0.1" .SH NAME peg, leg \- parser generators .SH SYNOPSIS .B peg .B [\-hvV \-ooutput] .I [filename ...] .sp 0 .B leg .B [\-hvV \-ooutput] .I [filename ...] .SH DESCRIPTION .I peg and .I leg are tools for generating recursive\-descent parsers: programs that perform pattern matching on text. They process a Parsing Expression Grammar (PEG) [Ford 2004] to produce a program that recognises legal sentences of that grammar. .I peg processes PEGs written using the original syntax described by Ford; .I leg processes PEGs written using slightly different syntax and conventions that are intended to make it an attractive replacement for parsers built with .IR lex (1) and .IR yacc (1). Unlike .I lex and .IR yacc , .I peg and .I leg support unlimited backtracking, provide ordered choice as a means for disambiguation, and can combine scanning (lexical analysis) and parsing (syntactic analysis) into a single activity. .PP .I peg reads the specified .IR filename s, or standard input if no .IR filename s are given, for a grammar describing the parser to generate. .I peg then generates a C source file that defines a function .IR yyparse(). This C source file can be included in, or compiled and then linked with, a client program. Each time the client program calls .IR yyparse () the parser consumes input text according to the parsing rules, starting from the first rule in the grammar. .IR yyparse () returns non\-zero if the input could be parsed according to the grammar; it returns zero if the input could not be parsed. .PP The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols in the generated parser. This is intended to reduce the risk of namespace pollution in client programs. (The choice of 'yy' is historical; see .IR lex (1) and .IR yacc (1), for example.) .SH OPTIONS .I peg and .I leg provide the following options: .TP .B \-h prints a summary of available options and then exits. .TP .B \-ooutput writes the generated parser to the file .B output instead of the standard output. .TP .B \-P suppresses #line directives in the output. .TP .B \-v writes verbose information to standard error while working. .TP .B \-V writes version information to standard error then exits. .SH A SIMPLE EXAMPLE The following .I peg input specifies a grammar with a single rule (called 'start') that is satisfied when the input contains the string "username". .nf start <\- "username" .fi (The quotation marks are .I not part of the matched text; they serve to indicate a literal string to be matched.) In other words, .IR yyparse () in the generated C source will return non\-zero only if the next eight characters read from the input spell the word "username". If the input contains anything else, .IR yyparse () returns zero and no input will have been consumed. (Subsequent calls to .IR yyparse () will also return zero, since the parser is effectively blocked looking for the string "username".) To ensure progress we can add an alternative clause to the 'start' rule that will match any single character if "username" is not found. .nf start <\- "username" / . .fi .IR yyparse () now always returns non\-zero (except at the very end of the input). To do something useful we can add actions to the rules. These actions are performed after a complete match is found (starting from the first rule) and are chosen according to the 'path' taken through the grammar to match the input. (Linguists would call this path a 'phrase marker'.) .nf start <\- "username" { printf("%s\\n", getlogin()); } / < . > { putchar(yytext[0]); } .fi The first line instructs the parser to print the user's login name whenever it sees "username" in the input. If that match fails, the second line tells the parser to echo the next character on the input the standard output. Our parser is now performing useful work: it will copy the input to the output, replacing all occurrences of "username" with the user's account name. .PP Note the angle brackets ('<' and '>') that were added to the second alternative. These have no effect on the meaning of the rule, but serve to delimit the text made available to the following action in the variable .IR yytext . .PP If the above grammar is placed in the file .BR username.peg , running the command .nf peg \-o username.c username.peg .fi will save the corresponding parser in the file .BR username.c . To create a complete program this parser could be included by a C program as follows. .nf #include /* printf(), putchar() */ #include /* getlogin() */ #include "username.c" /* yyparse() */ int main() { while (yyparse()) /* repeat until EOF */ ; return 0; } .fi .SH PEG GRAMMARS A grammar consists of a set of named rules. .nf name <\- pattern .fi The .B pattern contains one or more of the following elements. .TP .B name The element stands for the entire pattern in the rule with the given .BR name . .TP .BR \(dq characters \(dq A character or string enclosed in double quotes is matched literally. The ANSI C escape sequences are recognised within the .IR characters . .TP .BR ' characters ' A character or string enclosed in single quotes is matched literally, as above. .TP .BR [ characters ] A set of characters enclosed in square brackets matches any single character from the set, with escape characters recognised as above. If the set begins with an uparrow (^) then the set is negated (the element matches any character .I not in the set). Any pair of characters separated with a dash (\-) represents the range of characters from the first to the second, inclusive. A single alphabetic character or underscore is matched by the following set. .nf [a\-zA\-Z_] .fi Similarly, the following matches any single non\-digit character. .nf [^0\-9] .fi .TP .B . A dot matches any character. Note that the only time this fails is at the end of file, where there is no character to match. .TP .BR ( \ pattern\ ) Parentheses are used for grouping (modifying the precedence of the operators described below). .TP .BR { \ action\ } Curly braces surround actions. The action is arbitrary C source code to be executed at the end of matching. Any braces within the action must be properly nested. Any input text that was matched before the action and delimited by angle brackets (see below) is made available within the action as the contents of the character array .IR yytext . The length of (number of characters in) .I yytext is available in the variable .IR yyleng . (These variable names are historical; see .IR lex (1).) .TP .B < An opening angle bracket always matches (consuming no input) and causes the parser to begin accumulating matched text. This text will be made available to actions in the variable .IR yytext . .TP .B > A closing angle bracket always matches (consuming no input) and causes the parser to stop accumulating text for .IR yytext . .PP The above .IR element s can be made optional and/or repeatable with the following suffixes: .TP .RB element\ ? The element is optional. If present on the input, it is consumed and the match succeeds. If not present on the input, no text is consumed and the match succeeds anyway. .TP .RB element\ + The element is repeatable. If present on the input, one or more occurrences of .I element are consumed and the match succeeds. If no occurrences of .I element are present on the input, the match fails. .TP .RB element\ * The element is optional and repeatable. If present on the input, one or more occurrences of .I element are consumed and the match succeeds. If no occurrences of .I element are present on the input, the match succeeds anyway. .PP The above elements and suffixes can be converted into predicates (that match arbitrary input text and subsequently succeed or fail .I without consuming that input) with the following prefixes: .TP .BR & \ element The predicate succeeds only if .I element can be matched. Input text scanned while matching .I element is not consumed from the input and remains available for subsequent matching. .TP .BR ! \ element The predicate succeeds only if .I element cannot be matched. Input text scanned while matching .I element is not consumed from the input and remains available for subsequent matching. A popular idiom is .nf !. .fi which matches the end of file, after the last character of the input has already been consumed. .PP A special form of the '&' predicate is provided: .TP .BR & {\ expression\ } In this predicate the simple C .I expression .RB ( not statement) is evaluated immediately when the parser reaches the predicate. If the .I expression yields non\-zero (true) the 'match' succeeds and the parser continues with the next element in the pattern. If the .I expression yields zero (false) the 'match' fails and the parser backs up to look for an alternative parse of the input. .PP Several elements (with or without prefixes and suffixes) can be combined into a .I sequence by writing them one after the other. The entire sequence matches only if each individual element within it matches, from left to right. .PP Sequences can be separated into disjoint alternatives by the alternation operator '/'. .TP .RB sequence\-1\ / \ sequence\-2\ / \ ...\ / \ sequence\-N Each sequence is tried in turn until one of them matches, at which time matching for the overall pattern succeeds. If none of the sequences matches then the match of the overall pattern fails. .PP Finally, the pound sign (#) introduces a comment (discarded) that continues until the end of the line. .PP To summarise the above, the parser tries to match the input text against a pattern containing literals, names (representing other rules), and various operators (written as prefixes, suffixes, juxtaposition for sequencing and and infix alternation operator) that modify how the elements within the pattern are matched. Matches are made from left to right, 'descending' into named sub\-rules as they are encountered. If the matching process fails, the parser 'back tracks' ('rewinding' the input appropriately in the process) to find the nearest alternative 'path' through the grammar. In other words the parser performs a depth\-first, left\-to\-right search for the first successfully\-matching path through the rules. If found, the actions along the successful path are executed (in the order they were encountered). .PP Note that predicates are evaluated .I immediately during the search for a successful match, since they contribute to the success or failure of the search. Actions, however, are evaluated only after a successful match has been found. .SH PEG GRAMMAR FOR PEG GRAMMARS The grammar for .I peg grammars is shown below. This will both illustrate and formalise the above description. .nf Grammar <\- Spacing Definition+ EndOfFile Definition <\- Identifier LEFTARROW Expression Expression <\- Sequence ( SLASH Sequence )* Sequence <\- Prefix* Prefix <\- AND Action / ( AND | NOT )? Suffix Suffix <\- Primary ( QUERY / STAR / PLUS )? Primary <\- Identifier !LEFTARROW / OPEN Expression CLOSE / Literal / Class / DOT / Action / BEGIN / END Identifier <\- < IdentStart IdentCont* > Spacing IdentStart <\- [a\-zA\-Z_] IdentCont <\- IdentStart / [0\-9] Literal <\- ['] < ( !['] Char )* > ['] Spacing / ["] < ( !["] Char )* > ["] Spacing Class <\- '[' < ( !']' Range )* > ']' Spacing Range <\- Char '\-' Char / Char Char <\- '\\\\' [abefnrtv'"\\[\\]\\\\] / '\\\\' [0\-3][0\-7][0\-7] / '\\\\' [0\-7][0\-7]? / '\\\\' '\-' / !'\\\\' . LEFTARROW <\- '<\-' Spacing SLASH <\- '/' Spacing AND <\- '&' Spacing NOT <\- '!' Spacing QUERY <\- '?' Spacing STAR <\- '*' Spacing PLUS <\- '+' Spacing OPEN <\- '(' Spacing CLOSE <\- ')' Spacing DOT <\- '.' Spacing Spacing <\- ( Space / Comment )* Comment <\- '#' ( !EndOfLine . )* EndOfLine Space <\- ' ' / '\\t' / EndOfLine EndOfLine <\- '\\r\\n' / '\\n' / '\\r' EndOfFile <\- !. Action <\- '{' < [^}]* > '}' Spacing BEGIN <\- '<' Spacing END <\- '>' Spacing .fi .SH LEG GRAMMARS .I leg is a variant of .I peg that adds some features of .IR lex (1) and .IR yacc (1). It differs from .I peg in the following ways. .TP .BI %{\ text... \ %} A declaration section can appear anywhere that a rule definition is expected. The .I text between the delimiters '%{' and '%}' is copied verbatim to the generated C parser code .I before the code that implements the parser itself. .TP .IB name\ = \ pattern The 'assignment' operator replaces the left arrow operator '<\-'. .TP .B rule\-name Hyphens can appear as letters in the names of rules. Each hyphen is converted into an underscore in the generated C source code. A single hyphen '\-' is a legal rule name. .nf \- = [ \\t\\n\\r]* number = [0\-9]+ \- name = [a\-zA\-Z_][a\-zA_Z_0\-9]* \- l\-paren = '(' \- r\-paren = ')' \- .fi This example shows how ignored whitespace can be obvious when reading the grammar and yet unobtrusive when placed liberally at the end of every rule associated with a lexical element. .TP .IB seq\-1\ | \ seq\-2 The alternation operator is vertical bar '|' rather than forward slash '/'. The .I peg rule .nf name <\- sequence\-1 / sequence\-2 / sequence\-3 .fi is therefore written .nf name = sequence\-1 | sequence\-2 | sequence\-3 ; .fi in .I leg (with the final semicolon being optional, as described next). .TP .IB @{\ action\ } Actions prefixed with an 'at' symbol will be performed during parsing, at the time they are encountered while matching the input text with a rule. Because of back-tracking in the PEG parsing algorithm, actions prefixed with '@' might be performed multiple times for the same input text. (The usual behviour of actions is that they are saved up until matching is complete, and then those that are part of the final derivation are performed in left-to-right order.) The variable .I yytext is available within these actions. .TP .IB exp \ ~ \ {\ action\ } A postfix operator .BI ~ {\ action\ } can be placed after any expression and will behave like a normal action (arbitrary C code) except that it is invoked only when .I exp fails. It binds less tightly than any other operator except alternation and sequencing, and is intended to make error handling and recovery code easier to write. Note that .I yytext and .I yyleng are not available inside these actions, but the pointer variable .I yy is available to give the code access to any user\-defined members of the parser state (see "CUSTOMISING THE PARSER" below). Note also that .I exp is always a single expression; to invoke an error action for any failure within a sequence, parentheses must be used to group the sequence into a single expression. .nf rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); } | ... rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); } | ... .fi .TP .IB pattern\ ; A semicolon punctuator can optionally terminate a .IR pattern . .TP .BI %% \ text... A double percent '%%' terminates the rules (and declarations) section of the grammar. All .I text following '%%' is copied verbatim to the generated C parser code .I after the parser implementation code. .TP .BI $$\ = \ value A sub\-rule can return a semantic .I value from an action by assigning it to the pseudo\-variable '$$'. All semantic values must have the same type (which defaults to 'int'). This type can be changed by defining YYSTYPE in a declaration section. .TP .IB identifier : name The semantic value returned (by assigning to '$$') from the sub\-rule .I name is associated with the .I identifier and can be referred to in subsequent actions. .PP The desk calculator example below illustrates the use of '$$' and ':'. .SH LEG EXAMPLE: A DESK CALCULATOR The extensions in .I leg described above allow useful parsers and evaluators (including declarations, grammar rules, and supporting C functions such as 'main') to be kept within a single source file. To illustrate this we show a simple desk calculator supporting the four common arithmetic operators and named variables. The intermediate results of arithmetic evaluation will be accumulated on an implicit stack by returning them as semantic values from sub\-rules. .nf %{ #include /* printf() */ #include /* atoi() */ int vars[26]; %} Stmt = \- e:Expr EOL { printf("%d\\n", e); } | ( !EOL . )* EOL { printf("error\\n"); } Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; } | s:Sum { $$ = s; } Sum = l:Product ( PLUS r:Product { l += r; } | MINUS r:Product { l \-= r; } )* { $$ = l; } Product = l:Value ( TIMES r:Value { l *= r; } | DIVIDE r:Value { l /= r; } )* { $$ = l; } Value = i:NUMBER { $$ = atoi(yytext); } | i:ID !ASSIGN { $$ = vars[i]; } | OPEN i:Expr CLOSE { $$ = i; } NUMBER = < [0\-9]+ > \- { $$ = atoi(yytext); } ID = < [a\-z] > \- { $$ = yytext[0] \- 'a'; } ASSIGN = '=' \- PLUS = '+' \- MINUS = '\-' \- TIMES = '*' \- DIVIDE = '/' \- OPEN = '(' \- CLOSE = ')' \- \- = [ \\t]* EOL = '\\n' | '\\r\\n' | '\\r' | ';' %% int main() { while (yyparse()) ; return 0; } .fi .SH LEG GRAMMAR FOR LEG GRAMMARS The grammar for .I leg grammars is shown below. This will both illustrate and formalise the above description. .nf grammar = \- ( declaration | definition )+ trailer? end\-of\-file declaration = '%{' < ( !'%}' . )* > RPERCENT trailer = '%%' < .* > definition = identifier EQUAL expression SEMICOLON? expression = sequence ( BAR sequence )* sequence = error+ error = prefix ( TILDE action )? prefix = AND action | ( AND | NOT )? suffix suffix = primary ( QUERY | STAR | PLUS )? primary = identifier COLON identifier !EQUAL | identifier !EQUAL | OPEN expression CLOSE | literal | class | DOT | action | BEGIN | END identifier = < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \- literal = ['] < ( !['] char )* > ['] \- | ["] < ( !["] char )* > ["] \- class = '[' < ( !']' range )* > ']' \- range = char '\-' char | char char = '\\\\' [abefnrtv'"\\[\\]\\\\] | '\\\\' [0\-3][0\-7][0\-7] | '\\\\' [0\-7][0\-7]? | !'\\\\' . action = '{' < braces* > '}' \- braces = '{' braces* '}' | !'}' . EQUAL = '=' \- COLON = ':' \- SEMICOLON = ';' \- BAR = '|' \- AND = '&' \- NOT = '!' \- QUERY = '?' \- STAR = '*' \- PLUS = '+' \- OPEN = '(' \- CLOSE = ')' \- DOT = '.' \- BEGIN = '<' \- END = '>' \- TILDE = '~' \- RPERCENT = '%}' \- \- = ( space | comment )* space = ' ' | '\\t' | end\-of\-line comment = '#' ( !end\-of\-line . )* end\-of\-line end\-of\-line = '\\r\\n' | '\\n' | '\\r' end\-of\-file = !. .fi .SH CUSTOMISING THE PARSER The following symbols can be redefined in declaration sections to modify the generated parser code. .TP .B YYSTYPE The semantic value type. The pseudo\-variable '$$' and the identifiers 'bound' to rule results with the colon operator ':' should all be considered as being declared to have this type. The default value is 'int'. .TP .B YYPARSE The name of the main entry point to the parser. The default value is 'yyparse'. .TP .B YYPARSEFROM The name of an alternative entry point to the parser. This function expects one argument: the function corresponding to the rule from which the search for a match should begin. The default is 'yyparsefrom'. Note that yyparse() is defined as .nf int yyparse() { return yyparsefrom(yy_foo); } .fi where 'foo' is the name of the first rule in the grammar. .TP .BI YY_INPUT( buf , \ result , \ max_size ) This macro is invoked by the parser to obtain more input text. .I buf points to an area of memory that can hold at most .I max_size characters. The macro should copy input text to .I buf and then assign the integer variable .I result to indicate the number of characters copied. If no more input is available, the macro should assign 0 to .IR result . By default, the YY_INPUT macro is defined as follows. .nf #define YY_INPUT(buf, result, max_size) \\ { \\ int yyc= getchar(); \\ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\ } .fi Note that if YY_CTX_LOCAL is defined (see below) then an additional first argument, containing the parser context, is passed to YY_INPUT. .TP .B YY_DEBUG If this symbols is defined then additional code will be included in the parser that prints vast quantities of arcane information to the standard error while the parser is running. .TP .B YY_BEGIN This macro is invoked to mark the start of input text that will be made available in actions as 'yytext'. This corresponds to occurrences of '<' in the grammar. These are converted into predicates that are expected to succeed. The default definition .nf #define YY_BEGIN (yybegin= yypos, 1) .fi therefore saves the current input position and returns 1 ('true') as the result of the predicate. .TP .B YY_END This macros corresponds to '>' in the grammar. Again, it is a predicate so the default definition saves the input position before 'succeeding'. .nf #define YY_END (yyend= yypos, 1) .fi .TP .BI YY_PARSE( T ) This macro declares the parser entry points (yyparse and yyparsefrom) to be of type .IR T . The default definition .nf #define YY_PARSE(T) T .fi leaves yyparse() and yyparsefrom() with global visibility. If they should not be externally visible in other source files, this macro can be redefined to declare them 'static'. .nf #define YY_PARSE(T) static T .fi .TP .B YY_CTX_LOCAL If this symbol is defined during compilation of a generated parser then global parser state will be kept in a structure of type 'yycontext' which can be declared as a local variable. This allows multiple instances of parsers to coexist and to be thread\-safe. The parsing function .IR yyparse () will be declared to expect a first argument of type 'yycontext *', an instance of the structure holding the global state for the parser. This instance must be allocated and initialised to zero by the client. A trivial but complete example is as follows. .nf #include #define YY_CTX_LOCAL #include "the\-generated\-parser.peg.c" int main() { yycontext ctx; memset(&ctx, 0, sizeof(yycontext)); while (yyparse(&ctx)); return 0; } .fi Note that if this symbol is undefined then the compiled parser will statically allocate its global state and will be neither reentrant nor thread\-safe. Note also that the parser yycontext structure is initialised automatically the first time .IR yyparse () is called; this structure .B must therefore be properly initialised to zero before the first call to .IR yyparse (). .TP .B YY_CTX_MEMBERS If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS can be defined to expand to any additional member field declarations that the client would like included in the declaration of the 'yycontext' structure type. These additional members are otherwise ignored by the generated parser. The instance of 'yycontext' associated with the currently\-active parser is available within actions as the pointer variable .IR yy . .TP .B YY_BUFFER_SIZE The initial size of the text buffer, in bytes. The default is 1024 and the buffer size is doubled whenever required to meet demand during parsing. An application that typically parses much longer strings could increase this to avoid unnecessary buffer reallocation. .TP .B YY_STACK_SIZE The initial size of the variable and action stacks. The default is 128, which is doubled whenever required to meet demand during parsing. Applications that have deep call stacks with many local variables, or that perform many actions after a single successful match, could increase this to avoid unnecessary buffer reallocation. .TP .BI YY_MALLOC( YY , \ SIZE ) The memory allocator for all parser\-related storage. The parameters are the current yycontext structure and the number of bytes to allocate. The default definition is: .RI malloc( SIZE ) .TP .BI YY_REALLOC( YY , \ PTR , \ SIZE ) The memory reallocator for dynamically\-grown storage (such as text buffers and variable stacks). The parameters are the current yycontext structure, the previously\-allocated storage, and the number of bytes to which that storage should be grown. The default definition is: .RI realloc( PTR , \ SIZE ) .TP .BI YY_FREE( YY , \ PTR ) The memory deallocator. The parameters are the current yycontext structure and the storage to deallocate. The default definition is: .RI free( PTR ) .TP .B YYRELEASE The name of the function that releases all resources held by a yycontext structure. The default value is 'yyrelease'. .PP The following variables can be referred to within actions. .TP .B char *yybuf This variable points to the parser's input buffer used to store input text that has not yet been matched. .TP .B int yypos This is the offset (in yybuf) of the next character to be matched and consumed. .TP .B char *yytext The most recent matched text delimited by '<' and '>' is stored in this variable. .TP .B int yyleng This variable indicates the number of characters in 'yytext'. .TP .B yycontext *yy This variable points to the instance of 'yycontext' associated with the currently\-active parser. .PP Programs that wish to release all the resources associated with a parser can use the following function. .TP .BI yyrelease(yycontext * yy ) Returns all parser\-allocated storage associated with .I yy to the system. The storage will be reallocated on the next call to .IR yyparse (). .PP Note that the storage for the yycontext structure itself is never allocated or reclaimed implicitly. The application must allocate these structures in automatic storage, or use .IR calloc () and .IR free () to manage them explicitly. The example in the following section demonstrates one approach to resource management. .SH LEG EXAMPLE: EXTENDING THE PARSER'S CONTEXT The .I yy variable passed to actions contains the state of the parser plus any additional fields defined by YY_CTX_MEMBERS. Theses fields can be used to store application\-specific information that is global to a particular call of .IR yyparse (). A trivial but complete .I leg example follows in which the yycontext structure is extended with a .I count of the number of newline characters seen in the input so far (the grammar otherwise consumes and ignores the entire input). The caller of .IR yyparse () uses .I count to print the number of lines of input that were read. .nf %{ #define YY_CTX_LOCAL 1 #define YY_CTX_MEMBERS \\ int count; %} Char = ('\\n' | '\\r\\n' | '\\r') { yy\->count++ } | . %% #include #include int main() { /* create a local parser context in automatic storage */ yycontext yy; /* the context *must* be initialised to zero before first use*/ memset(&yy, 0, sizeof(yy)); while (yyparse(&yy)) ; printf("%d newlines\\n", yy.count); /* release all resources associated with the context */ yyrelease(&yy); return 0; } .fi .SH DIAGNOSTICS .I peg and .I leg warn about the following conditions while converting a grammar into a parser. .TP .B syntax error The input grammar was malformed in some way. The error message will include the text about to be matched (often backed up a huge amount from the actual location of the error) and the line number of the most recently considered character (which is often the real location of the problem). .TP .B rule 'foo' used but not defined The grammar referred to a rule named 'foo' but no definition for it was given. Attempting to use the generated parser will likely result in errors from the linker due to undefined symbols associated with the missing rule. .TP .B rule 'foo' defined but not used The grammar defined a rule named 'foo' and then ignored it. The code associated with the rule is included in the generated parser which will in all other respects be healthy. .TP .B possible infinite left recursion in rule 'foo' There exists at least one path through the grammar that leads from the rule 'foo' back to (a recursive invocation of) the same rule without consuming any input. .PP Left recursion, especially that found in standards documents, is often 'direct' and implies trivial repetition. .nf # (6.7.6) direct\-abstract\-declarator = LPAREN abstract\-declarator RPAREN | direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET | direct\-abstract\-declarator? LBRACKET STAR RBRACKET | direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN .fi The recursion can easily be eliminated by converting the parts of the pattern following the recursion into a repeatable suffix. .nf # (6.7.6) direct\-abstract\-declarator = direct\-abstract\-declarator\-head? direct\-abstract\-declarator\-tail* direct\-abstract\-declarator\-head = LPAREN abstract\-declarator RPAREN direct\-abstract\-declarator\-tail = LBRACKET assign\-expr? RBRACKET | LBRACKET STAR RBRACKET | LPAREN param\-type\-list? RPAREN .fi .SH CAVEATS A parser that accepts empty input will .I always succeed. Consider the following example, not atypical of a first attempt to write a PEG\-based parser: .nf Program = Expression* Expression = "whatever" %% int main() { while (yyparse()) puts("success!"); return 0; } .fi This program loops forever, no matter what (if any) input is provided on stdin. Many fixes are possible, the easiest being to insist that the parser always consumes some non\-empty input. Changing the first line to .nf Program = Expression+ .fi accomplishes this. If the parser is expected to consume the entire input, then explicitly requiring the end\-of\-file is also highly recommended: .nf Program = Expression+ !. .fi This works because the parser will only fail to match ("!" predicate) any character at all ("." expression) when it attempts to read beyond the end of the input. .SH BUGS You have to type 'man peg' to read the manual page for .IR leg (1). .PP The 'yy' and 'YY' prefixes cannot be changed. .PP Left recursion is detected in the input grammar but is not handled correctly in the generated parser. .PP Diagnostics for errors in the input grammar are obscure and not particularly helpful. .PP The operators .BR ! \ \c and .B ~ should really be named the other way around. .PP Several commonly\-used .IR lex (1) features (yywrap(), yyin, etc.) are completely absent. .PP The generated parser does not contain '#line' directives to direct C compiler errors back to the grammar description when appropriate. .SH SEE ALSO D. Val Schorre, .I META II, a syntax\-oriented compiler writing language, 19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311. Describes a self\-implementing parser generator for analytic grammars with no backtracking. .PP Alexander Birman, .I The TMG Recognition Schema, Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the power and complexity of recursive\-descent parsing with backtracking. .PP Bryan Ford, .I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation, ACM SIGPLAN Symposium on Principles of Programming Languages, 2004. Defines PEGs and analyses them in relation to context\-free and regular grammars. Introduces the syntax adopted in .IR peg . .PP The standard Unix utilities .IR lex (1) and .IR yacc (1) which influenced the syntax and features of .IR leg . .PP The source code for .I peg and .I leg whose grammar parsers are written using themselves. .PP The latest version of this software and documentation: .nf http://piumarta.com/software/peg .fi .SH AUTHOR .IR peg , .I leg and this manual page were written by Ian Piumarta (first\-name at last\-name dot com) while investigating the viability of regular and parsing\-expression grammars for efficiently extracting type and signature information from C header files. .PP Please send bug reports and suggestions for improvements to the author at the above address. peg-0.1.18/src/peg.peg0000644000175000000620000000571612744267362014143 0ustar piumartastaff# PE Grammar for PE Grammars # # Adapted from [1] by Ian Piumarta . # # Local modifications (marked '#ikp') to support: # C text in '{ ... }' copied verbatim to output as 'semantic action' # input consumed between '<' and '>' is 'char yytext[]' in semantic actions # # Best viewed using 140 columns monospaced with tabs every 8. # # [1] Bryan Ford. "Parsing Expression Grammars: A Recognition-Based Syntactic # Foundation." Symposium on Principles of Programming Languages, # January 14--16, 2004, Venice, Italy. # # Last edited: 2016-07-15 10:27:27 by piumarta on zora # Hierarchical syntax Grammar <- Spacing Definition+ EndOfFile Definition <- Identifier { if (push(beginRule(findRule(yytext)))->rule.expression) fprintf(stderr, "rule '%s' redefined\n", yytext); } LEFTARROW Expression { Node *e= pop(); Rule_setExpression(pop(), e); } &{ YYACCEPT } Expression <- Sequence (SLASH Sequence { Node *f= pop(); push(Alternate_append(pop(), f)); } )* Sequence <- Prefix (Prefix { Node *f= pop(); push(Sequence_append(pop(), f)); } #ikp expanded from 'Seq <- Prefix*' )* / { push(makePredicate("1")); } #ikp added Prefix <- AND Action { push(makePredicate(yytext)); } #ikp added / AND Suffix { push(makePeekFor(pop())); } #ikp expanded from 'Prefix <- (AND/NOT)? Suffix' / NOT Suffix { push(makePeekNot(pop())); } / Suffix Suffix <- Primary (QUESTION { push(makeQuery(pop())); } / STAR { push(makeStar (pop())); } / PLUS { push(makePlus (pop())); } )? Primary <- Identifier !LEFTARROW { push(makeName(findRule(yytext))); } / OPEN Expression CLOSE / Literal { push(makeString(yytext)); } / Class { push(makeClass(yytext)); } / DOT { push(makeDot()); } / Action { push(makeAction(0, yytext)); } #ikp added / BEGIN { push(makePredicate("YY_BEGIN")); } #ikp added / END { push(makePredicate("YY_END")); } #ikp added # Lexical syntax Identifier <- < IdentStart IdentCont* > Spacing #ikp inserted < ... > IdentStart <- [a-zA-Z_] IdentCont <- IdentStart / [0-9] Literal <- ['] < (!['] Char )* > ['] Spacing #ikp inserted < ... > / ["] < (!["] Char )* > ["] Spacing #ikp inserted < ... > Class <- '[' < (!']' Range)* > ']' Spacing #ikp inserted < ... > Range <- Char '-' Char / Char Char <- '\\' [abefnrtv'"\[\]\\] #ikp added missing ANSI escapes: abefv / '\\' [0-3][0-7][0-7] / '\\' [0-7][0-7]? / '\\' '-' #ikp added / !'\\' . LEFTARROW <- '<-' Spacing SLASH <- '/' Spacing AND <- '&' Spacing NOT <- '!' Spacing QUESTION <- '?' Spacing STAR <- '*' Spacing PLUS <- '+' Spacing OPEN <- '(' Spacing CLOSE <- ')' Spacing DOT <- '.' Spacing Spacing <- (Space / Comment)* Comment <- '#' (!EndOfLine .)* EndOfLine Space <- ' ' / '\t' / EndOfLine EndOfLine <- '\r\n' / '\n' / '\r' EndOfFile <- !. Action <- '{' < [^}]* > '}' Spacing #ikp added BEGIN <- '<' Spacing #ikp added END <- '>' Spacing #ikp added peg-0.1.18/src/leg.leg0000644000175000000620000001704012744267362014124 0ustar piumartastaff# LE Grammar for LE Grammars # # Copyright (c) 2007 by Ian Piumarta # All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the 'Software'), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, provided that the above copyright notice(s) and this # permission notice appear in all copies of the Software. Acknowledgement # of the use of this Software in supporting documentation would be # appreciated but is not required. # # THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK. # # Last edited: 2016-07-22 09:45:53 by piumarta on zora.local %{ # include "tree.h" # include "version.h" # include # include # include # include # include # include typedef struct Header Header; struct Header { int line; char *text; Header *next; }; FILE *input= 0; int verboseFlag= 0; int nolinesFlag= 0; static int lineNumber= 0; static int headerLine= 0; static int actionLine= 0; static char *fileName= 0; static int trailerLine= 0; static char *trailer= 0; static Header *headers= 0; void makeHeader(int line, char *text); void makeTrailer(int line, char *text); void yyerror(char *message); # define YY_INPUT(buf, result, max) \ { \ int c= getc(input); \ /* if ('\n' == c || '\r' == c) ++lineNumber; */ \ result= (EOF == c) ? 0 : (*(buf)= c, 1); \ } # define YY_LOCAL(T) static T # define YY_RULE(T) static T %} # Hierarchical syntax grammar= - ( declaration | definition )+ trailer? end-of-file declaration= '%{' { headerLine= lineNumber; } < ( !'%}' (end-of-line | .) )* > RPERCENT { makeHeader(headerLine, yytext); } #{YYACCEPT} trailer= '%%' { headerLine= lineNumber } < .* > { makeTrailer(headerLine, yytext); } #{YYACCEPT} definition= identifier { if (push(beginRule(findRule(yytext)))->rule.expression) fprintf(stderr, "rule '%s' redefined\n", yytext); } EQUAL expression { Node *e= pop(); Rule_setExpression(pop(), e); } SEMICOLON? #{YYACCEPT} expression= sequence (BAR sequence { Node *f= pop(); push(Alternate_append(pop(), f)); } )* sequence= error (error { Node *f= pop(); push(Sequence_append(pop(), f)); } )* error= prefix (TILDE action { push(makeError(pop(), yytext)); } )? prefix= AT action { push(makeInline(yytext)); } | AND action { push(makePredicate(yytext)); } | AND suffix { push(makePeekFor(pop())); } | NOT suffix { push(makePeekNot(pop())); } | suffix suffix= primary (QUESTION { push(makeQuery(pop())); } | STAR { push(makeStar (pop())); } | PLUS { push(makePlus (pop())); } )? primary= identifier { push(makeVariable(yytext)); } COLON identifier !EQUAL { Node *name= makeName(findRule(yytext)); name->name.variable= pop(); push(name); } | identifier !EQUAL { push(makeName(findRule(yytext))); } | OPEN expression CLOSE | literal { push(makeString(yytext)); } | class { push(makeClass(yytext)); } | DOT { push(makeDot()); } | action { push(makeAction(actionLine, yytext)); } | BEGIN { push(makePredicate("YY_BEGIN")); } | END { push(makePredicate("YY_END")); } # Lexical syntax identifier= < [-a-zA-Z_][-a-zA-Z_0-9]* > - literal= ['] < ( !['] char )* > ['] - | ["] < ( !["] char )* > ["] - class= '[' < ( !']' range )* > ']' - range= char '-' char | char char= '\\' [-abefnrtv'"\[\]\\] | '\\' 'x'[0-9A-Fa-f][0-9A-Fa-f] | '\\' 'x'[0-9A-Fa-f] | '\\' [0-3][0-7][0-7] | '\\' [0-7][0-7]? | !'\\' . action= '{' { actionLine= lineNumber } < braces* > '}' - braces= '{' braces* '}' | !'}' ( end-of-line | . ) EQUAL= '=' - COLON= ':' - SEMICOLON= ';' - BAR= '|' - AND= '&' - NOT= '!' - AT= '@' - QUESTION= '?' - STAR= '*' - PLUS= '+' - OPEN= '(' - CLOSE= ')' - DOT= '.' - BEGIN= '<' - END= '>' - TILDE= '~' - RPERCENT= '%}' - -= (space | comment)* space= ' ' | '\t' | end-of-line comment= '#' (!end-of-line .)* end-of-line end-of-line= ( '\r\n' | '\n' | '\r' ) { ++lineNumber } end-of-file= !. %% void yyerror(char *message) { fprintf(stderr, "%s:%d: %s", fileName, lineNumber, message); if (yyctx->__text[0]) fprintf(stderr, " near token '%s'", yyctx->__text); if (yyctx->__pos < yyctx->__limit || !feof(input)) { yyctx->__buf[yyctx->__limit]= '\0'; fprintf(stderr, " before text \""); while (yyctx->__pos < yyctx->__limit) { if ('\n' == yyctx->__buf[yyctx->__pos] || '\r' == yyctx->__buf[yyctx->__pos]) break; fputc(yyctx->__buf[yyctx->__pos++], stderr); } if (yyctx->__pos == yyctx->__limit) { int c; while (EOF != (c= fgetc(input)) && '\n' != c && '\r' != c) fputc(c, stderr); } fputc('\"', stderr); } fprintf(stderr, "\n"); exit(1); } void makeHeader(int line, char *text) { Header *header= (Header *)malloc(sizeof(Header)); header->line= line; header->text= strdup(text); header->next= headers; headers= header; } void makeTrailer(int line, char *text) { trailerLine= line; trailer= strdup(text); } static void version(char *name) { printf("%s version %d.%d.%d\n", name, PEG_MAJOR, PEG_MINOR, PEG_LEVEL); } static void usage(char *name) { version(name); fprintf(stderr, "usage: %s [