./0000755000175000017500000000000011620140775007662 5ustar renerene./assert.c0000644000175000017500000002406711620140753011334 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "tune.h" #include #include #include #include #include #include "ucppi.h" #include "mem.h" #include "nhash.h" /* * Assertion support. Each assertion is indexed by its predicate, and * the list of 'questions' which yield a true answer. */ static HTT assertions; static int assertions_init_done = 0; static struct assert *new_assertion(void) { struct assert *a = getmem(sizeof(struct assert)); a->nbval = 0; return a; } static void del_token_fifo(struct token_fifo *tf) { size_t i; for (i = 0; i < tf->nt; i ++) if (S_TOKEN(tf->t[i].type)) freemem(tf->t[i].name); if (tf->nt) freemem(tf->t); } static void del_assertion(void *va) { struct assert *a = va; size_t i; for (i = 0; i < a->nbval; i ++) del_token_fifo(a->val + i); if (a->nbval) freemem(a->val); freemem(a); } /* * print the contents of a token list */ static void print_token_fifo(struct token_fifo *tf) { size_t i; for (i = 0; i < tf->nt; i ++) if (ttMWS(tf->t[i].type)) fputc(' ', emit_output); else fputs(token_name(tf->t + i), emit_output); } /* * print all assertions related to a given name */ static void print_assert(void *va) { struct assert *a = va; size_t i; for (i = 0; i < a->nbval; i ++) { fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a)); print_token_fifo(a->val + i); fprintf(emit_output, ")\n"); } } /* * compare two token_fifo, return 0 if they are identical, 1 otherwise. * All whitespace tokens are considered identical, but sequences of * whitespace are not shrinked. */ int cmp_token_list(struct token_fifo *f1, struct token_fifo *f2) { size_t i; if (f1->nt != f2->nt) return 1; for (i = 0; i < f1->nt; i ++) { if (ttMWS(f1->t[i].type) && ttMWS(f2->t[i].type)) continue; if (f1->t[i].type != f2->t[i].type) return 1; if (f1->t[i].type == MACROARG && f1->t[i].line != f2->t[i].line) return 1; if (S_TOKEN(f1->t[i].type) && strcmp(f1->t[i].name, f2->t[i].name)) return 1; } return 0; } /* * for #assert * Assertions are not part of the ISO-C89 standard, but they are sometimes * encountered, for instance in Solaris standard include files. */ int handle_assert(struct lexer_state *ls) { int ina = 0, ltww; struct token t; struct token_fifo *atl = 0; struct assert *a; char *aname; int ret = -1; long l = ls->line; int nnp; size_t i; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { if (!(a = HTT_get(&assertions, ls->ctok->name))) { a = new_assertion(); aname = sdup(ls->ctok->name); ina = 1; } goto handle_assert_next; } error(l, "illegal assertion name for #assert"); goto handle_assert_warp_ign; } goto handle_assert_trunc; handle_assert_next: while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type != LPAR) { error(l, "syntax error in #assert"); goto handle_assert_warp_ign; } goto handle_assert_next2; } goto handle_assert_trunc; handle_assert_next2: atl = getmem(sizeof(struct token_fifo)); atl->art = atl->nt = 0; for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { if (ls->ctok->type == NEWLINE) break; if (ltww && ttMWS(ls->ctok->type)) continue; ltww = ttMWS(ls->ctok->type); if (ls->ctok->type == LPAR) nnp ++; else if (ls->ctok->type == RPAR) { if (!(-- nnp)) goto handle_assert_next3; } t.type = ls->ctok->type; if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); aol(atl->t, atl->nt, t, TOKEN_LIST_MEMG); } goto handle_assert_trunc; handle_assert_next3: while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #assert"); } } if (atl->nt && ttMWS(atl->t[atl->nt - 1].type) && (-- atl->nt) == 0) freemem(atl->t); if (atl->nt == 0) { error(l, "void assertion in #assert"); goto handle_assert_error; } for (i = 0; i < a->nbval && cmp_token_list(atl, a->val + i); i ++); if (i != a->nbval) { /* we already have it */ ret = 0; goto handle_assert_error; } /* This is a new assertion. Let's keep it. */ aol(a->val, a->nbval, *atl, TOKEN_LIST_MEMG); if (ina) { HTT_put(&assertions, a, aname); freemem(aname); } if (emit_assertions) { fprintf(emit_output, "#assert %s(", HASH_ITEM_NAME(a)); print_token_fifo(atl); fputs(")\n", emit_output); } freemem(atl); return 0; handle_assert_trunc: error(l, "unfinished #assert"); handle_assert_error: if (atl) { del_token_fifo(atl); freemem(atl); } if (ina) { freemem(aname); freemem(a); } return ret; handle_assert_warp_ign: while (!next_token(ls) && ls->ctok->type != NEWLINE); if (ina) { freemem(aname); freemem(a); } return ret; } /* * for #unassert */ int handle_unassert(struct lexer_state *ls) { int ltww; struct token t; struct token_fifo atl; struct assert *a; int ret = -1; long l = ls->line; int nnp; size_t i; atl.art = atl.nt = 0; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { if (!(a = HTT_get(&assertions, ls->ctok->name))) { ret = 0; goto handle_unassert_warp; } goto handle_unassert_next; } error(l, "illegal assertion name for #unassert"); goto handle_unassert_warp; } goto handle_unassert_trunc; handle_unassert_next: while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type != LPAR) { error(l, "syntax error in #unassert"); goto handle_unassert_warp; } goto handle_unassert_next2; } if (emit_assertions) fprintf(emit_output, "#unassert %s\n", HASH_ITEM_NAME(a)); HTT_del(&assertions, HASH_ITEM_NAME(a)); return 0; handle_unassert_next2: for (nnp = 1, ltww = 1; nnp && !next_token(ls);) { if (ls->ctok->type == NEWLINE) break; if (ltww && ttMWS(ls->ctok->type)) continue; ltww = ttMWS(ls->ctok->type); if (ls->ctok->type == LPAR) nnp ++; else if (ls->ctok->type == RPAR) { if (!(-- nnp)) goto handle_unassert_next3; } t.type = ls->ctok->type; if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); aol(atl.t, atl.nt, t, TOKEN_LIST_MEMG); } goto handle_unassert_trunc; handle_unassert_next3: while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #unassert"); } } if (atl.nt && ttMWS(atl.t[atl.nt - 1].type) && (-- atl.nt) == 0) freemem(atl.t); if (atl.nt == 0) { error(l, "void assertion in #unassert"); return ret; } for (i = 0; i < a->nbval && cmp_token_list(&atl, a->val + i); i ++); if (i != a->nbval) { /* we have it, undefine it */ del_token_fifo(a->val + i); if (i < (a->nbval - 1)) mmvwo(a->val + i, a->val + i + 1, (a->nbval - i - 1) * sizeof(struct token_fifo)); if ((-- a->nbval) == 0) freemem(a->val); if (emit_assertions) { fprintf(emit_output, "#unassert %s(", HASH_ITEM_NAME(a)); print_token_fifo(&atl); fputs(")\n", emit_output); } } ret = 0; goto handle_unassert_finish; handle_unassert_trunc: error(l, "unfinished #unassert"); handle_unassert_finish: if (atl.nt) del_token_fifo(&atl); return ret; handle_unassert_warp: while (!next_token(ls) && ls->ctok->type != NEWLINE); return ret; } /* * Add the given assertion (as string). */ int make_assertion(char *aval) { struct lexer_state lls; size_t n = strlen(aval) + 1; char *c = sdup(aval); int ret; *(c + n - 1) = '\n'; init_buf_lexer_state(&lls, 0); lls.flags = DEFAULT_LEXER_FLAGS; lls.input = 0; lls.input_string = (unsigned char *)c; lls.pbuf = 0; lls.ebuf = n; lls.line = -1; ret = handle_assert(&lls); freemem(c); free_lexer_state(&lls); return ret; } /* * Remove the given assertion (as string). */ int destroy_assertion(char *aval) { struct lexer_state lls; size_t n = strlen(aval) + 1; char *c = sdup(aval); int ret; *(c + n - 1) = '\n'; init_buf_lexer_state(&lls, 0); lls.flags = DEFAULT_LEXER_FLAGS; lls.input = 0; lls.input_string = (unsigned char *)c; lls.pbuf = 0; lls.ebuf = n; lls.line = -1; ret = handle_unassert(&lls); freemem(c); free_lexer_state(&lls); return ret; } /* * erase the assertion table */ void wipe_assertions(void) { if (assertions_init_done) HTT_kill(&assertions); assertions_init_done = 0; } /* * initialize the assertion table */ void init_assertions(void) { wipe_assertions(); HTT_init(&assertions, del_assertion); assertions_init_done = 1; } /* * retrieve an assertion from the hash table */ struct assert *get_assertion(char *name) { return HTT_get(&assertions, name); } /* * print already defined assertions */ void print_assertions(void) { HTT_scan(&assertions, print_assert); } ./lexer.c0000644000175000017500000006135311620140753011151 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "tune.h" #include #include #include #include #include "ucppi.h" #include "mem.h" #ifdef UCPP_MMAP #include #include #include #endif /* * Character classes for description of the automaton. * The characters used for representing classes should not appear * explicitely in an automaton rule. */ #define SPC ' ' /* whitespace characters */ #define ALP 'Z' /* A-Z, a-z, _ */ #define NUM '9' /* 0-9 */ #define ANY 'Y' /* any character */ #define VCH 'F' /* void character (for end of input) */ /* * flags and macros to test those flags * STO: the currently read string is a complete token * PUT: the currently read character must be added to the string * FRZ: the currently read character must be kept and read again */ #define MOD_MK 255 #define noMOD(x) ((x) & 255) #define STO(x) ((x) | 256) #define ttSTO(x) ((x) & 256) #define FRZ(x) ((x) | 512) #define ttFRZ(x) ((x) & 512) #define PUT(x) ((x) | 1024) #define ttPUT(x) ((x) & 1024) /* order is important */ enum { S_START, S_SPACE, S_BANG, S_STRING, S_STRING2, S_COLON, S_SHARP, S_PCT, S_PCT2, S_PCT3, S_AMPER, S_CHAR, S_CHAR2, S_STAR, S_PLUS, S_MINUS, S_DOT, S_DOT2, S_SLASH, S_NUMBER, S_NUMBER2, S_LT, S_LT2, S_EQ, S_GT, S_GT2, S_CIRC, S_PIPE, S_BACKSLASH, S_COMMENT, S_COMMENT2, S_COMMENT3, S_COMMENT4, S_COMMENT5, S_NAME, S_NAME_BS, S_LCHAR, MSTATE, S_ILL, S_DDOT, S_DDSHARP, S_BS, S_ROGUE_BS, S_BEHEAD, S_DECAY, S_TRUNC, S_TRUNCC, S_OUCH }; #define CMT(x) ((x) >= S_COMMENT && (x) <= S_COMMENT5) #define CMCR 2 /* * This is the description of the automaton. It is not used "as is" * but copied at execution time into a table. * * To my utmost displeasure, there are a few hacks in read_token() * (which uses the transformed automaton) about the special handling * of slashes, sharps, and the letter L. */ static struct machine_state { int state; unsigned char input[CMCR]; int new_state; } cppms[] = { /* S_START is the generic beginning state */ { S_START, { ANY }, S_ILL }, #ifdef SEMPER_FIDELIS { S_START, { SPC }, PUT(S_SPACE) }, #else { S_START, { SPC }, S_SPACE }, #endif { S_START, { '\n' }, STO(NEWLINE) }, { S_START, { '!' }, S_BANG }, { S_START, { '"' }, PUT(S_STRING) }, { S_START, { '#' }, S_SHARP }, { S_START, { '%' }, S_PCT }, { S_START, { '&' }, S_AMPER }, { S_START, { '\'' }, PUT(S_CHAR) }, { S_START, { '(' }, STO(LPAR) }, { S_START, { ')' }, STO(RPAR) }, { S_START, { '*' }, S_STAR }, { S_START, { '+' }, S_PLUS }, { S_START, { ',' }, STO(COMMA) }, { S_START, { '-' }, S_MINUS }, { S_START, { '.' }, PUT(S_DOT) }, #ifdef SEMPER_FIDELIS { S_START, { '/' }, PUT(S_SLASH) }, #else { S_START, { '/' }, S_SLASH }, #endif { S_START, { NUM }, PUT(S_NUMBER) }, { S_START, { ':' }, S_COLON }, { S_START, { ';' }, STO(SEMIC) }, { S_START, { '<' }, S_LT }, { S_START, { '=' }, S_EQ }, { S_START, { '>' }, S_GT }, { S_START, { '?' }, STO(QUEST) }, { S_START, { ALP }, PUT(S_NAME) }, { S_START, { 'L' }, PUT(S_LCHAR) }, { S_START, { '[' }, STO(LBRK) }, { S_START, { ']' }, STO(RBRK) }, { S_START, { '^' }, S_CIRC }, { S_START, { '{' }, STO(LBRA) }, { S_START, { '|' }, S_PIPE }, { S_START, { '}' }, STO(RBRA) }, { S_START, { '~' }, STO(NOT) }, { S_START, { '\\' }, S_BACKSLASH }, /* after a space */ { S_SPACE, { ANY }, FRZ(STO(NONE)) }, #ifdef SEMPER_FIDELIS { S_SPACE, { SPC }, PUT(S_SPACE) }, #else { S_SPACE, { SPC }, S_SPACE }, #endif /* after a ! */ { S_BANG, { ANY }, FRZ(STO(LNOT)) }, { S_BANG, { '=' }, STO(NEQ) }, /* after a " */ { S_STRING, { ANY }, PUT(S_STRING) }, { S_STRING, { VCH }, FRZ(S_TRUNC) }, { S_STRING, { '\n' }, FRZ(S_BEHEAD) }, { S_STRING, { '\\' }, PUT(S_STRING2) }, { S_STRING, { '"' }, PUT(STO(STRING)) }, { S_STRING2, { ANY }, PUT(S_STRING) }, { S_STRING2, { VCH }, FRZ(S_TRUNC) }, /* after a # */ { S_SHARP, { ANY }, FRZ(STO(SHARP)) }, { S_SHARP, { '#' }, STO(DSHARP) }, /* after a : */ { S_COLON, { ANY }, FRZ(STO(COLON)) }, { S_COLON, { '>' }, STO(DIG_RBRK) }, /* after a % */ { S_PCT, { ANY }, FRZ(STO(PCT)) }, { S_PCT, { '=' }, STO(ASPCT) }, { S_PCT, { '>' }, STO(DIG_RBRA) }, { S_PCT, { ':' }, S_PCT2 }, /* after a %: */ { S_PCT2, { ANY }, FRZ(STO(DIG_SHARP)) }, { S_PCT2, { '%' }, S_PCT3 }, /* after a %:% */ { S_PCT3, { ANY }, FRZ(S_DDSHARP) }, { S_PCT3, { ':' }, STO(DIG_DSHARP) }, /* after a & */ { S_AMPER, { ANY }, FRZ(STO(AND)) }, { S_AMPER, { '=' }, STO(ASAND) }, { S_AMPER, { '&' }, STO(LAND) }, /* after a ' */ { S_CHAR, { ANY }, PUT(S_CHAR) }, { S_CHAR, { VCH }, FRZ(S_TRUNC) }, { S_CHAR, { '\'' }, PUT(STO(CHAR)) }, { S_CHAR, { '\\' }, PUT(S_CHAR2) }, /* after a \ in a character constant useful only for '\'' */ { S_CHAR2, { ANY }, PUT(S_CHAR) }, { S_CHAR2, { VCH }, FRZ(S_TRUNC) }, /* after a * */ { S_STAR, { ANY }, FRZ(STO(STAR)) }, { S_STAR, { '=' }, STO(ASSTAR) }, /* after a + */ { S_PLUS, { ANY }, FRZ(STO(PLUS)) }, { S_PLUS, { '+' }, STO(PPLUS) }, { S_PLUS, { '=' }, STO(ASPLUS) }, /* after a - */ { S_MINUS, { ANY }, FRZ(STO(MINUS)) }, { S_MINUS, { '-' }, STO(MMINUS) }, { S_MINUS, { '=' }, STO(ASMINUS) }, { S_MINUS, { '>' }, STO(ARROW) }, /* after a . */ { S_DOT, { ANY }, FRZ(STO(DOT)) }, { S_DOT, { NUM }, PUT(S_NUMBER) }, { S_DOT, { '.' }, S_DOT2 }, /* after .. */ { S_DOT2, { ANY }, FRZ(S_DDOT) }, { S_DOT2, { '.' }, STO(MDOTS) }, /* after a / */ { S_SLASH, { ANY }, FRZ(STO(SLASH)) }, { S_SLASH, { '=' }, STO(ASSLASH) }, #ifdef SEMPER_FIDELIS { S_SLASH, { '*' }, PUT(S_COMMENT) }, { S_SLASH, { '/' }, PUT(S_COMMENT5) }, #else { S_SLASH, { '*' }, S_COMMENT }, { S_SLASH, { '/' }, S_COMMENT5 }, #endif /* * There is a little hack in read_token() to disable * this last rule, if C++ (C99) comments are not enabled. */ /* after a number */ { S_NUMBER, { ANY }, FRZ(STO(NUMBER)) }, { S_NUMBER, { ALP, NUM }, PUT(S_NUMBER) }, { S_NUMBER, { '.' }, PUT(S_NUMBER) }, { S_NUMBER, { 'E', 'e' }, PUT(S_NUMBER2) }, { S_NUMBER, { 'P', 'p' }, PUT(S_NUMBER2) }, { S_NUMBER2, { ANY }, FRZ(STO(NUMBER)) }, { S_NUMBER2, { ALP, NUM }, PUT(S_NUMBER) }, { S_NUMBER2, { '+', '-' }, PUT(S_NUMBER) }, /* after a < */ { S_LT, { ANY }, FRZ(STO(LT)) }, { S_LT, { '=' }, STO(LEQ) }, { S_LT, { '<' }, S_LT2 }, { S_LT, { ':' }, STO(DIG_LBRK) }, { S_LT, { '%' }, STO(DIG_LBRA) }, { S_LT2, { ANY }, FRZ(STO(LSH)) }, { S_LT2, { '=' }, STO(ASLSH) }, /* after a > */ { S_GT, { ANY }, FRZ(STO(GT)) }, { S_GT, { '=' }, STO(GEQ) }, { S_GT, { '>' }, S_GT2 }, { S_GT2, { ANY }, FRZ(STO(RSH)) }, { S_GT2, { '=' }, STO(ASRSH) }, /* after a = */ { S_EQ, { ANY }, FRZ(STO(ASGN)) }, { S_EQ, { '=' }, STO(SAME) }, #ifdef CAST_OP { S_EQ, { '>' }, STO(CAST) }, #endif /* after a \ */ { S_BACKSLASH, { ANY }, FRZ(S_BS) }, { S_BACKSLASH, { 'U', 'u' }, FRZ(S_NAME_BS) }, /* after a letter */ { S_NAME, { ANY }, FRZ(STO(NAME)) }, { S_NAME, { ALP, NUM }, PUT(S_NAME) }, { S_NAME, { '\\' }, S_NAME_BS }, /* after a \ in an identifier */ { S_NAME_BS, { ANY }, FRZ(S_ROGUE_BS) }, { S_NAME_BS, { 'u', 'U' }, PUT(S_NAME) }, /* after a L */ { S_LCHAR, { ANY }, FRZ(S_NAME) }, { S_LCHAR, { '"' }, PUT(S_STRING) }, { S_LCHAR, { '\'' }, PUT(S_CHAR) }, /* after a ^ */ { S_CIRC, { ANY }, FRZ(STO(CIRC)) }, { S_CIRC, { '=' }, STO(ASCIRC) }, /* after a | */ { S_PIPE, { ANY }, FRZ(STO(OR)) }, { S_PIPE, { '=' }, STO(ASOR) }, { S_PIPE, { '|' }, STO(LOR) }, /* after a / and * */ #ifdef SEMPER_FIDELIS { S_COMMENT, { ANY }, PUT(S_COMMENT) }, { S_COMMENT, { VCH }, FRZ(S_TRUNCC) }, { S_COMMENT, { '*' }, PUT(S_COMMENT2) }, { S_COMMENT2, { ANY }, FRZ(S_COMMENT) }, { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) }, { S_COMMENT2, { '*' }, PUT(S_COMMENT2) }, { S_COMMENT2, { '/' }, STO(PUT(COMMENT)) }, { S_COMMENT5, { ANY }, PUT(S_COMMENT5) }, { S_COMMENT5, { VCH }, FRZ(S_DECAY) }, { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) }, #else { S_COMMENT, { ANY }, S_COMMENT }, { S_COMMENT, { VCH }, FRZ(S_TRUNCC) }, { S_COMMENT, { '*' }, S_COMMENT2 }, { S_COMMENT2, { ANY }, FRZ(S_COMMENT) }, { S_COMMENT2, { VCH }, FRZ(S_TRUNCC) }, { S_COMMENT2, { '*' }, S_COMMENT2 }, { S_COMMENT2, { '/' }, STO(COMMENT) }, { S_COMMENT5, { ANY }, S_COMMENT5 }, { S_COMMENT5, { VCH }, FRZ(S_DECAY) }, { S_COMMENT5, { '\n' }, FRZ(STO(COMMENT)) }, #endif /* dummy end of machine description */ { 0, { 0 }, 0 } }; /* * cppm is the table used to store the automaton: if we are in state s * and we read character c, we apply the action cppm[s][c] (jumping to * another state, or emitting a token). * cppm_vch is the table for the special virtual character "end of input" */ static int cppm[MSTATE][MAX_CHAR_VAL]; static int cppm_vch[MSTATE]; /* * init_cppm() fills cppm[][] with the information stored in cppms[]. * It must be called before beginning the lexing process. */ void init_cppm(void) { int i, j, k, c; static unsigned char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; static unsigned char lower[] = "abcdefghijklmnopqrstuvwxyz"; unsigned char *cp; for (i = 0; i < MSTATE; i ++) { for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[i][j] = S_OUCH; cppm_vch[i] = S_OUCH; } for (i = 0; cppms[i].input[0]; i ++) for (k = 0; k < CMCR; k ++) { int s = cppms[i].state; int ns = cppms[i].new_state; switch (c = cppms[i].input[k]) { case 0: break; case SPC: /* see space_char() also */ cppm[s][' '] = ns; cppm[s]['\t'] = ns; cppm[s]['\v'] = ns; cppm[s]['\f'] = ns; #ifdef UNBREAKABLE_SPACE if (MAX_CHAR_VAL > UNBREAKABLE_SPACE) cppm[s][UNBREAKABLE_SPACE] = ns; #endif break; case ALP: for (cp = upper; *cp; cp ++) cppm[s][(int)*cp] = ns; for (cp = lower; *cp; cp ++) cppm[s][(int)*cp] = ns; cppm[s]['_'] = ns; break; case NUM: for (j = '0'; j <= '9'; j ++) cppm[s][j] = ns; break; case ANY: for (j = 0; j < MAX_CHAR_VAL; j ++) cppm[s][j] = ns; cppm_vch[s] = ns; break; case VCH: cppm_vch[s] = ns; break; default: cppm[s][c] = ns; break; } } } /* * Make some character as equivalent to a letter for identifiers. */ void set_identifier_char(int c) { cppm[S_START][c] = PUT(S_NAME); cppm[S_NAME][c] = PUT(S_NAME); } /* * Remove the "identifier" status from a character. */ void unset_identifier_char(int c) { cppm[S_START][c] = S_ILL; cppm[S_NAME][c] = FRZ(STO(NAME)); } int space_char(int c) { if (c == ' ' || c == '\t' || c == '\v' || c == '\f' #ifdef UNBREAKABLE_SPACE || c == UNBREAKABLE_SPACE #endif ) return 1; return 0; } #ifndef NO_UCPP_BUF /* * our output buffer is full, flush it */ void flush_output(struct lexer_state *ls) { size_t x = ls->sbuf, y = 0, z; if (ls->sbuf == 0) return; do { z = fwrite(ls->output_buf + y, 1, x, ls->output); x -= z; y += z; } while (z && x > 0); if (!y) { error(ls->line, "could not flush output (disk full ?)"); die(); } ls->sbuf = 0; } #endif /* * Output one character; flush the buffer if needed. * This function should not be called, except by put_char(). */ static inline void write_char(struct lexer_state *ls, unsigned char c) { #ifndef NO_UCPP_BUF ls->output_buf[ls->sbuf ++] = c; if (ls->sbuf == OUTPUT_BUF_MEMG) flush_output(ls); #else if (putc((int)c, ls->output) == EOF) { error(ls->line, "output write error (disk full ?)"); die(); } #endif if (c == '\n') { ls->oline ++; } } /* * schedule a character for output */ void put_char(struct lexer_state *ls, unsigned char c) { if (ls->flags & KEEP_OUTPUT) write_char(ls, c); } /* * get next raw input character */ static inline int read_char(struct lexer_state *ls) { unsigned char c; if (!ls->input) { return ((ls->pbuf ++) < ls->ebuf) ? ls->input_string[ls->pbuf - 1] : -1; } while (1) { #ifndef NO_UCPP_BUF if (ls->pbuf == ls->ebuf) { #ifdef UCPP_MMAP if (ls->from_mmap) { munmap((void *)ls->input_buf, ls->ebuf); ls->from_mmap = 0; ls->input_buf = ls->input_buf_sav; } #endif ls->ebuf = fread(ls->input_buf, 1, INPUT_BUF_MEMG, ls->input); ls->pbuf = 0; } if (ls->ebuf == 0) return -1; c = ls->input_buf[ls->pbuf ++]; #else int x = getc(ls->input); if (x == EOF) return -1; c = x; #endif if (ls->flags & COPY_LINE) { if (c == '\n') { ls->copy_line[ls->cli] = 0; ls->cli = 0; } else if (ls->cli < (COPY_LINE_LENGTH - 1)) { ls->copy_line[ls->cli ++] = c; } } if (ls->macfile && c == '\n') { ls->macfile = 0; continue; } ls->macfile = 0; if (c == '\r') { /* * We found a '\r'; we handle it as a newline * and ignore the next newline. This should work * with all combinations of Msdos, MacIntosh and * Unix files on these three platforms. On other * platforms, native file formats are always * supported. */ ls->macfile = 1; c = '\n'; } break; } return c; } /* * next_fifo_char(), char_lka1() and char_lka2() give a two character * look-ahead on the input stream; this is needed for trigraphs */ static inline int next_fifo_char(struct lexer_state *ls) { int c; if (ls->nlka != 0) { c = ls->lka[0]; ls->lka[0] = ls->lka[1]; ls->nlka --; } else c = read_char(ls); return c; } static inline int char_lka1(struct lexer_state *ls) { if (ls->nlka == 0) { ls->lka[0] = read_char(ls); ls->nlka ++; } return ls->lka[0]; } static inline int char_lka2(struct lexer_state *ls) { #ifdef AUDIT if (ls->nlka == 0) ouch("always in motion future is"); #endif if (ls->nlka == 1) { ls->lka[1] = read_char(ls); ls->nlka ++; } return ls->lka[1]; } static struct trigraph { int old, new; } trig[9] = { { '=', '#' }, { '/', '\\' }, { '\'', '^' }, { '(', '[' }, { ')', ']' }, { '!', '|' }, { '<', '{' }, { '>', '}' }, { '-', '~' } }; /* * Returns the next character, after treatment of trigraphs and terminating * backslashes. Return value is -1 if there is no more input. */ static inline int next_char(struct lexer_state *ls) { int c; if (!ls->discard) return ls->last; ls->discard = 0; do { c = next_fifo_char(ls); /* check trigraphs */ if (c == '?' && char_lka1(ls) == '?' && (ls->flags & HANDLE_TRIGRAPHS)) { int i, d; d = char_lka2(ls); for (i = 0; i < 9; i ++) if (d == trig[i].old) { if (ls->flags & WARN_TRIGRAPHS) { ls->count_trigraphs ++; } if (ls->flags & WARN_TRIGRAPHS_MORE) { warning(ls->line, "trigraph ?""?%c " "encountered", d); } next_fifo_char(ls); next_fifo_char(ls); c = trig[i].new; break; } } if (c == '\\' && char_lka1(ls) == '\n') { ls->line ++; next_fifo_char(ls); } else if (c == '\r' && char_lka1(ls) == '\n') { ls->line ++; next_fifo_char(ls); c = '\n'; return c; } else { ls->last = c; return c; } } while (1); } /* * wrapper for next_char(), to be called from outside * (used by #error, #include directives) */ int grap_char(struct lexer_state *ls) { return next_char(ls); } /* * Discard the current character, so that the next call to next_char() * will step into the input stream. */ void discard_char(struct lexer_state *ls) { #ifdef AUDIT if (ls->discard) ouch("overcollecting garbage"); #endif ls->discard = 1; ls->utf8 = 0; if (ls->last == '\n') ls->line ++; } /* * Convert an UTF-8 encoded character to a Universal Character Name * using \u (or \U when appropriate). */ static int utf8_to_string(unsigned char buf[], unsigned long utf8) { unsigned long val = 0; static char hex[16] = "0123456789abcdef"; if (utf8 & 0x80UL) { unsigned long x1, x2, x3, x4; x1 = (utf8 >> 24) & 0x7fUL; x2 = (utf8 >> 16) & 0x7fUL; x3 = (utf8 >> 8) & 0x7fUL; x4 = (utf8) & 0x3fUL; x1 &= 0x07UL; if (x2 & 0x40UL) x2 &= 0x0fUL; if (x3 & 0x40UL) x3 &= 0x1fUL; val = x4 | (x3 << 6) | (x2 << 12) | (x1 << 16); } else val = utf8; if (val < 128) { buf[0] = val; buf[1] = 0; return 1; } else if (val < 0xffffUL) { buf[0] = '\\'; buf[1] = 'u'; buf[2] = hex[(size_t)(val >> 12)]; buf[3] = hex[(size_t)((val >> 8) & 0xfU)]; buf[4] = hex[(size_t)((val >> 4) & 0xfU)]; buf[5] = hex[(size_t)(val & 0xfU)]; buf[6] = 0; return 6; } buf[0] = '\\'; buf[1] = 'U'; buf[2] = '0'; buf[3] = '0'; buf[4] = hex[(size_t)(val >> 20)]; buf[5] = hex[(size_t)((val >> 16) & 0xfU)]; buf[6] = hex[(size_t)((val >> 12) & 0xfU)]; buf[7] = hex[(size_t)((val >> 8) & 0xfU)]; buf[8] = hex[(size_t)((val >> 4) & 0xfU)]; buf[9] = hex[(size_t)(val & 0xfU)]; buf[10] = 0; return 10; } /* * Scan the identifier and put it in canonical form: * -- tranform \U0000xxxx into \uxxxx * -- inside \u and \U, make letters low case * -- report (some) incorrect use of UCN */ static void canonize_id(struct lexer_state *ls, char *id) { char *c, *d; for (c = d = id; *c;) { if (*c == '\\') { int i; if (!*(c + 1)) goto canon_error; if (*(c + 1) == 'U') { for (i = 0; i < 8 && *(c + i + 2); i ++); if (i != 8) goto canon_error; *(d ++) = '\\'; c += 2; for (i = 0; i < 4 && *(c + i) == '0'; i ++); if (i == 4) { *(d ++) = 'u'; c += 4; } else { *(d ++) = 'U'; i = 8; } for (; i > 0; i --) { switch (*c) { case 'A': *(d ++) = 'a'; break; case 'B': *(d ++) = 'b'; break; case 'C': *(d ++) = 'c'; break; case 'D': *(d ++) = 'd'; break; case 'E': *(d ++) = 'e'; break; case 'F': *(d ++) = 'f'; break; default: *(d ++) = *c; break; } c ++; } } else if (*(c + 1) == 'u') { for (i = 0; i < 4 && *(c + i + 2); i ++); if (i != 4) goto canon_error; *(d ++) = '\\'; *(d ++) = 'u'; c += 2; for (; i > 0; i --) { switch (*c) { case 'A': *(d ++) = 'a'; break; case 'B': *(d ++) = 'b'; break; case 'C': *(d ++) = 'c'; break; case 'D': *(d ++) = 'd'; break; case 'E': *(d ++) = 'e'; break; case 'F': *(d ++) = 'f'; break; default: *(d ++) = *c; break; } c ++; } } else goto canon_error; continue; } *(d ++) = *(c ++); } *d = 0; return; canon_error: for (; *c; *(d ++) = *(c ++)); if (ls->flags & WARN_STANDARD) { warning(ls->line, "malformed identifier with UCN: '%s'", id); } *d = 0; } /* * Run the automaton, in order to get the next token. * This function should not be called, except by next_token() * * return value: 1 on error, 2 on end-of-file, 0 otherwise. */ static inline int read_token(struct lexer_state *ls) { int cstat = S_START, nstat; size_t ltok = 0; int c, outc = 0, ucn_in_id = 0; int shift_state; unsigned long utf8; long l = ls->line; ls->ctok->line = l; if (ls->pending_token) { if ((ls->ctok->type = ls->pending_token) == BUNCH) { ls->ctok->name[0] = '\\'; ls->ctok->name[1] = 0; } ls->pending_token = 0; return 0; } if (ls->flags & UTF8_SOURCE) { utf8 = ls->utf8; shift_state = 0; } if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT)) for (; ls->line > ls->oline;) put_char(ls, '\n'); do { c = next_char(ls); if (c < 0) { if ((ls->flags & UTF8_SOURCE) && shift_state) { if (ls->flags & WARN_STANDARD) warning(ls->line, "truncated UTF-8 " "character"); shift_state = 0; utf8 = 0; } if (cstat == S_START) return 2; nstat = cppm_vch[cstat]; } else { if (ls->flags & UTF8_SOURCE) { if (shift_state) { if ((c & 0xc0) != 0x80) { if (ls->flags & WARN_STANDARD) warning(ls->line, "truncated " "UTF-8 " "character"); shift_state = 0; utf8 = 0; c = '_'; } else { utf8 = (utf8 << 8) | c; if (-- shift_state) { ls->discard = 1; continue; } c = '_'; } } else if ((c & 0xc0) == 0xc0) { if ((c & 0x30) == 0x30) { shift_state = 3; } else if (c & 0x20) { shift_state = 2; } else { shift_state = 1; } utf8 = c; ls->discard = 1; continue; } else utf8 = 0; } nstat = cppm[cstat][c < MAX_CHAR_VAL ? c : 0]; } #ifdef AUDIT if (nstat == S_OUCH) { ouch("bad move..."); } #endif /* * disable C++-like comments */ if (nstat == S_COMMENT5 && !(ls->flags & CPLUSPLUS_COMMENTS)) nstat = FRZ(STO(SLASH)); if (noMOD(nstat) >= MSTATE && !ttSTO(nstat)) switch (noMOD(nstat)) { case S_ILL: if (ls->flags & CCHARSET) { error(ls->line, "illegal character '%c'", c); return 1; } nstat = PUT(STO(BUNCH)); break; case S_BS: ls->ctok->name[0] = '\\'; ltok ++; nstat = FRZ(STO(BUNCH)); if (!(ls->flags & LEXER)) put_char(ls, '\\'); break; case S_ROGUE_BS: ls->pending_token = BUNCH; nstat = FRZ(STO(NAME)); break; case S_DDOT: ls->pending_token = DOT; nstat = FRZ(STO(DOT)); break; case S_DDSHARP: ls->pending_token = PCT; nstat = FRZ(STO(DIG_SHARP)); break; case S_BEHEAD: error(l, "unfinished string at end of line"); return 1; case S_DECAY: warning(l, "unterminated // comment"); nstat = FRZ(STO(COMMENT)); break; case S_TRUNC: error(l, "truncated token"); return 1; case S_TRUNCC: error(l, "truncated comment"); return 1; #ifdef AUDIT case S_OUCH: ouch("machine went out of control"); break; #endif } if (!ttFRZ(nstat)) { discard_char(ls); if (!(ls->flags & LEXER) && ls->condcomp) { int z = ttSTO(nstat) ? S_ILL : noMOD(nstat); if (cstat == S_NAME || z == S_NAME || ((CMT(cstat) || CMT(z)) && (ls->flags & DISCARD_COMMENTS))) { outc = 0; } else if (z == S_LCHAR || z == S_SLASH || (z == S_SHARP && ls->ltwnl) || (z == S_PCT && ls->ltwnl) || (z == S_BACKSLASH)) { outc = c; } else if (z == S_PCT2 && ls->ltwnl) { outc = -1; } else if (z == S_PCT3 && ls->ltwnl) { /* we have %:% but this still might not be a %:%: */ outc = -2; } else { if (outc < 0) { put_char(ls, '%'); put_char(ls, ':'); if (outc == -2) put_char(ls, '%'); outc = 0; } else if (outc) { put_char(ls, outc); outc = 0; } put_char(ls, c); } } } else if (outc == '/' && !(ls->flags & LEXER) && ls->condcomp) { /* this is a hack: we need to dump a pending slash */ put_char(ls, outc); outc = 0; } if (ttPUT(nstat)) { if (cstat == S_NAME_BS) { ucn_in_id = 1; wan(ls->ctok->name, ltok, '\\', ls->tknl); } if ((ls->flags & UTF8_SOURCE) && utf8) { unsigned char buf[11]; int i, j; for (i = 0, j = utf8_to_string(buf, utf8); i < j; i ++) wan(ls->ctok->name, ltok, buf[i], ls->tknl); /* if (j > 1) ucn_in_id = 1; */ } else wan(ls->ctok->name, ltok, (unsigned char)c, ls->tknl); } if (ttSTO(nstat)) { if (S_TOKEN(noMOD(nstat))) { wan(ls->ctok->name, ltok, (unsigned char)0, ls->tknl); } ls->ctok->type = noMOD(nstat); break; } cstat = noMOD(nstat); } while (1); if (!(ls->flags & LEXER) && (ls->flags & DISCARD_COMMENTS) && ls->ctok->type == COMMENT) put_char(ls, ' '); if (ucn_in_id && ls->ctok->type == NAME) canonize_id(ls, ls->ctok->name); return 0; } /* * fills ls->ctok with the next token */ int next_token(struct lexer_state *ls) { if (ls->flags & READ_AGAIN) { ls->flags &= ~READ_AGAIN; if (!(ls->flags & LEXER)) { char *c = S_TOKEN(ls->ctok->type) ? ls->ctok->name : token_name(ls->ctok); if (ls->ctok->type == OPT_NONE) { ls->ctok->type = NONE; #ifdef SEMPER_FIDELIS ls->ctok->name[0] = ' '; ls->ctok->name[1] = 0; #endif put_char(ls, ' '); } else if (ls->ctok->type != NAME && !(ls->ltwnl && (ls->ctok->type == SHARP || ls->ctok->type == DIG_SHARP))) for (; *c; c ++) put_char(ls, *c); } return 0; } return read_token(ls); } ./arith.h0000644000175000017500000002422311620140753011141 0ustar renerene/* * Integer arithmetic evaluation, header file. * * (c) Thomas Pornin 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * This arithmetic evaluator uses two files: this header file (arith.h) * and the source file (arith.c). To use this code, the source file should * be included from another .c file which defines some macros (see below). * Then the functions defined in the arith.c file become available to the * including source file. If those functions are defined with external * linkage (that is, `ARITH_FUNCTION_HEADER' does not contain `static'), * it is possible for other source files to use the arithmetic functions * by including the arith.h header only. The source file which includes * arith.c should *not* include arith.h. * * If the #include is for arith.h, the following macros should be * defined: * * -- If the evaluator is supposed to use a native type: * NATIVE_SIGNED the native signed integer type * NATIVE_UNSIGNED the native unsigned integer type * * -- If the evaluator is supposed to use an emulated type: * SIMUL_ARITH_SUBTYPE the native unsigned type used for the simulation * SIMUL_SUBTYPE_BITS the native unsigned type size * SIMUL_NUMBITS the emulated type size * * -- For both cases: * ARITH_TYPENAME the central arithmetic type name * ARITH_FUNCTION_HEADER the qualifiers to add to function definitions * * The presence (respectively absence) of the NATIVE_SIGNED macro triggers * the use of the native type evaluator (respectively simulated type * evaluator). * * If the #include is for arith.c, the macros for arith.h should be defined, * and the following should be defined as well: * * -- If the evaluator is supposed to use a native type: * NATIVE_UNSIGNED_BITS the native unsigned type size * NATIVE_SIGNED_MIN the native signed minimum value * NATIVE_SIGNED_MAX the native signed maximum value * (the last two macros must evaluate to signed constant expressions) * * -- For both cases: * ARITH_WARNING(type) code to perform on warning * ARITH_ERROR(type) code to perform on error * * The macro ARITH_WARNING() and ARITH_ERROR() are invoked with a * numerical argument which is one of the enumeration constants * defined below (ARITH_EXCEP_*) that identifies the specific problem. * * If the #include is for arith.c, the macro ARITHMETIC_CHECKS may be * defined. When this macro is defined, checks are performed so that all * operation which would lead to undefined or implementation-defined * behaviour are first reported through ARITH_WARNING(). Code is smaller * and faster without these checks, of course. Regardless of the status * of that macro, divisions by 0 and overflows on signed division are * reported as errors through ARITH_ERROR(). * */ #ifndef ARITH_H__ #define ARITH_H__ enum { /* Warnings */ ARITH_EXCEP_CONV_O, /* overflow on conversion */ ARITH_EXCEP_NEG_O, /* overflow on unary minus */ ARITH_EXCEP_NOT_T, /* trap representation on bitwise inversion */ ARITH_EXCEP_PLUS_O, /* overflow on addition */ ARITH_EXCEP_PLUS_U, /* underflow on addition */ ARITH_EXCEP_MINUS_O, /* overflow on subtraction */ ARITH_EXCEP_MINUS_U, /* underflow on subtraction */ ARITH_EXCEP_AND_T, /* trap representation on bitwise and */ ARITH_EXCEP_XOR_T, /* trap representation on bitwise xor */ ARITH_EXCEP_OR_T, /* trap representation on bitwise or */ ARITH_EXCEP_LSH_W, /* left shift by type width or more */ ARITH_EXCEP_LSH_C, /* left shift by negative count */ ARITH_EXCEP_LSH_O, /* overflow on left shift */ ARITH_EXCEP_LSH_U, /* underflow on left shift */ ARITH_EXCEP_RSH_W, /* right shift by type width or more */ ARITH_EXCEP_RSH_C, /* right shift by negative count */ ARITH_EXCEP_RSH_N, /* right shift of negative value */ ARITH_EXCEP_STAR_O, /* overflow on multiplication */ ARITH_EXCEP_STAR_U, /* underflow on multiplication */ /* Errors */ ARITH_EXCEP_SLASH_D, /* division by 0 */ ARITH_EXCEP_SLASH_O, /* overflow on division */ ARITH_EXCEP_PCT_D, /* division by 0 on modulus operator */ ARITH_EXCEP_CONST_O /* constant too large */ }; #define arith_strc_(x, y) x ## y #define arith_strc(x, y) arith_strc_(x, y) #define arith_u arith_strc(u_, ARITH_TYPENAME) #define arith_s arith_strc(s_, ARITH_TYPENAME) #define arith_op_u(op) arith_strc(ARITH_TYPENAME, arith_strc(_u_, op)) #define arith_op_s(op) arith_strc(ARITH_TYPENAME, arith_strc(_s_, op)) #define ARITH_DECL_MONO_U_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_u(op)(arith_u x) #define ARITH_DECL_MONO_U_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_u(op)(arith_u x) #define ARITH_DECL_MONO_U_I(op) ARITH_FUNCTION_HEADER int \ arith_op_u(op)(arith_u x) #define ARITH_DECL_MONO_U_L(op) ARITH_FUNCTION_HEADER unsigned long \ arith_op_u(op)(arith_u x) #define ARITH_DECL_MONO_S_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_s(op)(arith_s x) #define ARITH_DECL_MONO_S_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_s(op)(arith_s x) #define ARITH_DECL_MONO_S_I(op) ARITH_FUNCTION_HEADER int \ arith_op_s(op)(arith_s x) #define ARITH_DECL_MONO_S_L(op) ARITH_FUNCTION_HEADER long \ arith_op_s(op)(arith_s x) #define ARITH_DECL_MONO_I_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_u(op)(int x) #define ARITH_DECL_MONO_L_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_u(op)(unsigned long x) #define ARITH_DECL_MONO_I_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_s(op)(int x) #define ARITH_DECL_MONO_L_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_s(op)(long x) #define ARITH_DECL_MONO_ST_US(op) ARITH_FUNCTION_HEADER char *arith_op_u(op) \ (char *c, arith_u *ru, arith_s *rs, int *sp) #define ARITH_DECL_BI_UU_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_u(op)(arith_u x, arith_u y) #define ARITH_DECL_BI_UI_U(op) ARITH_FUNCTION_HEADER arith_u \ arith_op_u(op)(arith_u x, int y) #define ARITH_DECL_BI_UU_I(op) ARITH_FUNCTION_HEADER int \ arith_op_u(op)(arith_u x, arith_u y) #define ARITH_DECL_BI_SS_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_s(op)(arith_s x, arith_s y) #define ARITH_DECL_BI_SI_S(op) ARITH_FUNCTION_HEADER arith_s \ arith_op_s(op)(arith_s x, int y) #define ARITH_DECL_BI_SS_I(op) ARITH_FUNCTION_HEADER int \ arith_op_s(op)(arith_s x, arith_s y) #endif #ifdef NATIVE_SIGNED typedef NATIVE_SIGNED arith_s; typedef NATIVE_UNSIGNED arith_u; #else #if SIMUL_NUMBITS > (2 * SIMUL_SUBTYPE_BITS) #error Native subtype too small for arithmetic simulation. #endif #define SIMUL_MSW_WIDTH (SIMUL_NUMBITS / 2) #define SIMUL_LSW_WIDTH ((SIMUL_NUMBITS + 1) / 2) typedef struct { SIMUL_ARITH_SUBTYPE msw, lsw; } arith_u, arith_s; #endif /* functions with the unsigned type */ ARITH_DECL_MONO_S_U(to_u); ARITH_DECL_MONO_I_U(fromint); ARITH_DECL_MONO_L_U(fromulong); ARITH_DECL_MONO_U_I(toint); ARITH_DECL_MONO_U_L(toulong); ARITH_DECL_MONO_U_U(neg); ARITH_DECL_MONO_U_U(not); ARITH_DECL_MONO_U_I(lnot); ARITH_DECL_MONO_U_I(lval); ARITH_DECL_BI_UU_U(plus); ARITH_DECL_BI_UU_U(minus); ARITH_DECL_BI_UI_U(lsh); ARITH_DECL_BI_UI_U(rsh); ARITH_DECL_BI_UU_I(lt); ARITH_DECL_BI_UU_I(leq); ARITH_DECL_BI_UU_I(gt); ARITH_DECL_BI_UU_I(geq); ARITH_DECL_BI_UU_I(same); ARITH_DECL_BI_UU_I(neq); ARITH_DECL_BI_UU_U(and); ARITH_DECL_BI_UU_U(xor); ARITH_DECL_BI_UU_U(or); ARITH_DECL_BI_UU_U(star); ARITH_DECL_BI_UU_U(slash); ARITH_DECL_BI_UU_U(pct); /* functions with the signed type */ ARITH_DECL_MONO_U_S(to_s); ARITH_DECL_MONO_I_S(fromint); ARITH_DECL_MONO_L_S(fromlong); ARITH_DECL_MONO_S_I(toint); ARITH_DECL_MONO_S_L(tolong); ARITH_DECL_MONO_S_S(neg); ARITH_DECL_MONO_S_S(not); ARITH_DECL_MONO_S_I(lnot); ARITH_DECL_MONO_S_I(lval); ARITH_DECL_BI_SS_S(plus); ARITH_DECL_BI_SS_S(minus); ARITH_DECL_BI_SI_S(lsh); ARITH_DECL_BI_SI_S(rsh); ARITH_DECL_BI_SS_I(lt); ARITH_DECL_BI_SS_I(leq); ARITH_DECL_BI_SS_I(gt); ARITH_DECL_BI_SS_I(geq); ARITH_DECL_BI_SS_I(same); ARITH_DECL_BI_SS_I(neq); ARITH_DECL_BI_SS_S(and); ARITH_DECL_BI_SS_S(xor); ARITH_DECL_BI_SS_S(or); ARITH_DECL_BI_SS_S(star); ARITH_DECL_BI_SS_S(slash); ARITH_DECL_BI_SS_S(pct); /* conversions from string */ ARITH_DECL_MONO_ST_US(octconst); ARITH_DECL_MONO_ST_US(hexconst); ARITH_DECL_MONO_ST_US(decconst); ./tune.h0000644000175000017500000004140211620140753011003 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__TUNE__ #define UCPP__TUNE__ #ifdef UCPP_CONFIG #include "config.h" #else /* ====================================================================== */ /* * The LOW_MEM macro triggers the use of macro storage which uses less * memory. It actually also improves performance on large, modern machines * (due to less cache pressure). This option implies no limitation (except * on the number of arguments a macro may, which is then limited to 32766) * so it is on by default. Non-LOW_MEM code is considered deprecated. */ #define LOW_MEM /* ====================================================================== */ /* * Define AMIGA for systems using "drive letters" at the beginning of * some paths; define MSDOS on systems with drive letters and using * backslashes to seperate directory components. */ /* #define AMIGA */ /* #define MSDOS */ /* ====================================================================== */ /* * Define this if your compiler does not know the strftime() function; * TurboC 2.01 under Msdos does not know strftime(). */ /* #define NOSTRFTIME */ /* ====================================================================== */ /* * Buffering: there are two levels of buffering on input and output streams: * the standard libc buffering (manageable with setbuf() and setvbuf()) * and some buffering provided by ucpp itself. The ucpp buffering uses * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG * (as defined below). * You can disable one or both of these bufferings by defining the macros * NO_LIBC_BUF and NO_UCPP_BUF. */ /* #define NO_LIBC_BUF */ /* #define NO_UCPP_BUF */ /* * On Unix stations, the system call mmap() might be used on input files. * This option is a subclause of ucpp internal buffering. On one station, * a 10% speed improvement was observed. Do not define this unless the * host architecture has the following characteristics: * -- Posix / Single Unix compliance * -- Text files correspond one to one with memory representation * If a file is not seekable or not mmapable, ucpp will revert to the * standard fread() solution. * * This feature is still considered beta quality. On some systems where * files can be bigger than memory address space (mainly, 32-bit systems * with files bigger than 4 GB), this option makes ucpp fail to operate * on those extremely large files. */ #define UCPP_MMAP /* * Performance issues: * -- On memory-starved systems, such as Minix-i86, do not use ucpp * buffering; keep only libc buffering. * -- If you do not use libc buffering, activate the UCPP_MMAP option. * Note that the UCPP_MMAP option is ignored if ucpp buffering is not * activated. * * On an Athlon 1200 running FreeBSD 4.7, the best performances are * achieved when libc buffering is activated and/or UCPP_MMAP is on. */ /* ====================================================================== */ /* * Define this if you want ucpp to generate tokenized PRAGMA tokens; * otherwise, it will generate raw string contents. This setting is * irrelevant to the stand-alone version of ucpp. */ #define PRAGMA_TOKENIZE /* * Define this to the special character that marks the end of tokens with * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma() * directives which use this character will be a bit more difficult to * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be * embedded inside tokens, neither directly nor by macro substitution and * stringization. Besides, '\n' is portable. */ #define PRAGMA_TOKEN_END ((unsigned char)'\n') /* * Define this if you want ucpp to include encountered #pragma directives * in its output in non-lexer mode; _Pragma() are translated to equivalent * #pragma directives. */ #define PRAGMA_DUMP /* * According to my interpretation of the C99 standard, _Pragma() are * evaluated wherever macro expansion could take place. However, Neil Booth, * whose mother language is English (contrary to me) and who is well aware * of the C99 standard (and especially the C preprocessor) told me that * it was unclear whether _Pragma() are evaluated inside directives such * as #if, #include and #line. If you want to disable the evaluation of * _Pragma() inside such directives, define the following macro. */ /* #define NO_PRAGMA_IN_DIRECTIVE */ /* * The C99 standard mandates that the operator `##' must yield a single, * valid token, lest undefined behaviour befall upon thy head. Hence, * for instance, `+ ## +=' is forbidden, because `++=' is not a valid * token (although it is a valid list of two tokens, `++' and `='). * However, ucpp only emits a warning for such sin, and unmerges the * tokens (thus emitting `+' then `+=' for that example). When ucpp * produces text output, those two tokens will be separated by a space * character so that the basic rule of text output is preserved: when * parsed again, text output yields the exact same stream of tokens. * That extra space is virtual: it does not count as a true whitespace * token for stringization. * * However, it might be desirable, for some uses other than preprocessing * C source code, not to emit that extra space at all. To make ucpp behave * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this * can trigger spurious token merging. For instance, with that macro * activated, `+ ## +=' will be output as `++=' which, if preprocessed * again, will read as `++' followed by `='. * * All this is irrelevant to lexer mode; and trying to merge incompatible * tokens is a shooting offence, anyway. */ /* #define DSHARP_TOKEN_MERGE */ /* ====================================================================== */ /* * Define INMACRO_FLAG to include two flags to the structure lexer_state, * that tell whether tokens come from a macro-replacement, and count those * macro-replacements. */ /* #define INMACRO_FLAG */ /* ====================================================================== */ /* * Paths where files are looked for by default, when #include is used. * Typical path is /usr/local/include and /usr/include, in that order. * If you want to set up no path, define the macro to 0. * * For Linux, get gcc includes too, or you will miss things like stddef.h. * The exact path varies much, depending on the distribution. */ #define STD_INCLUDE_PATH "/usr/local/include", "/usr/include" /* ====================================================================== */ /* * Arithmetic code for evaluation of #if expressions. Evaluation * uses either a native machine type, or an emulated two's complement * type. Division by 0 and overflow on division are considered as errors * and reported as such. If ARITHMETIC_CHECKS is defined, all other * operations that imply undefined or implementation-defined behaviour * are reported as warnings but otherwise performed nonetheless. * * For native type evaluation, the following macros should be defined: * NATIVE_SIGNED the native signed type * NATIVE_UNSIGNED the native corresponding unsigned type * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits * NATIVE_SIGNED_MIN the native signed type minimum value * NATIVE_SIGNED_MAX the native signed type maximum value * * The code in the arith.c file performs some tricky detection * operations on the native type representation and possible existence * of a trap representation. These operations assume a C99-compliant * compiler; on a C90-only compiler, the operations are valid but may * yield incorrect results. You may force those settings with some * more macros: see the comments in arith.c (look for "ARCH_DEFINED"). * Remember that this is mostly a non-issue, unless you are building * ucpp with a pre-C99 cross-compiler and either the host or target * architecture uses a non-two's complement representation of signed * integers. Such a combination is pretty rare nowadays, so the best * you can do is forgetting completely this paragraph and live in peace. * * * If you do not have a handy native type (for instance, you compile ucpp * with a C90 compiler which lacks the "long long" type, or you compile * ucpp for a cross-compiler which should support an evaluation integer * type of a size that is not available on the host machine), you may use * a simulated type. The type uses two's complement representation and * may have any width from 2 bits to twice the underlying native type * width, inclusive (odd widths are allowed). To use an emulated type, * make sure that NATIVE_SIGNED is not defined, and define the following * macros: * SIMUL_ARITH_SUBTYPE the native underlying type to use * SIMUL_SUBTYPE_BITS the native underlying type width * SIMUL_NUMBITS the emulated type width * * Undefined and implementation-defined behaviours are warned upon, if * ARITHMETIC_CHECKS is defined. Results are truncated to the type * width; shift count for the << and >> operators is reduced modulo the * emulatd type width; right shifting of a signed negative value performs * sign extension (the result is left-padded with bits set to 1). */ /* * For native type evaluation with a 64-bit "long long" type. */ #define NATIVE_SIGNED long long #define NATIVE_UNSIGNED unsigned long long #define NATIVE_UNSIGNED_BITS 64 #define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1) #define NATIVE_SIGNED_MAX 9223372036854775807LL /* * For emulation of a 64-bit type using a native 32-bit "unsigned long" * type. #undef NATIVE_SIGNED #define SIMUL_ARITH_SUBTYPE unsigned long #define SIMUL_SUBTYPE_BITS 32 #define SIMUL_NUMBITS 64 */ /* * Comment out the following line if you want to deactivate arithmetic * checks (warnings upon undefined and implementation-defined * behaviour). Arithmetic checks slow down a bit arithmetic operations, * especially multiplications, but this should not be an issue with * typical C source code. */ #define ARITHMETIC_CHECKS /* ====================================================================== */ /* * To force signedness of wide character constants, define WCHAR_SIGNEDNESS * to 0 for unsigned, 1 for signed. By default, wide character constants * are signed if the native `char' type is signed, and unsigned otherwise. #define WCHAR_SIGNEDNESS 0 */ /* * Standard assertions. They should include one cpu() assertion, one machine() * assertion (identical to cpu()), and one or more system() assertions. * * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux) * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux) * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris) * * These are only suggestions. On Solaris, machine() should be defined * for i386 or sparc (standard system header use such an assertion). For * cross-compilation, define assertions related to the target architecture. * * If you want no standard assertion, define STD_ASSERT to 0. */ /* #define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \ "system(freebsd)" */ /* ====================================================================== */ /* * System predefined macros. Nothing really mandatory, but some programs * might rely on those. * Each string must be either "name" or "name=token-list". If you want * no predefined macro, define STD_MACROS to 0. */ /* #define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \ "__FreeBSD__=4", "__unix__", "__i386__" */ /* ====================================================================== */ /* * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers. * See cpp.h for the definition of these flags. */ #define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \ | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \ | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \ | KEEP_OUTPUT | HANDLE_TRIGRAPHS \ | HANDLE_ASSERTIONS) #define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \ | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \ | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS) /* ====================================================================== */ /* * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp(). * This is non-ANSI, but it improves performance on some POSIX system. * On typical C source code, such improvement is completely negligeable. */ /* #define POSIX_JMP */ /* ====================================================================== */ /* * Maximum value (plus one) of a character handled by the lexer; 128 is * alright for ASCII native source code, but 256 is needed for EBCDIC. * 256 is safe in both cases; you will have big problems if you set * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory * model), define MAX_CHAR_VAL to 128. * * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL * values of type int (MSTATE is defined in lexer.c and is about 40). */ #define MAX_CHAR_VAL 128 /* * If you want some extra character to be considered as whitespace, * define this macro to that space. On ISO-8859-1 machines, 160 is * the code for the unbreakable space. */ /* #define UNBREAKABLE_SPACE 160 */ /* * If you want whitespace tokens contents to be recorded (making them * tokens with a string content), define this. The macro STRING_TOKEN * will be adjusted accordingly. * Without this option, whitespace tokens are not even returned by the * lex() function. This is irrelevant for the non-lexer mode (almost -- * it might slow down a bit ucpp, and with this option, comments will be * kept inside #pragma directives). */ /* #define SEMPER_FIDELIS */ #endif /* End of options overridable by UCPP_CONFIG and config.h */ /* ====================================================================== */ /* * Some constants used for memory increment granularity. Increasing these * values reduces the number of calls to malloc() but increases memory * consumption. * * Values should be powers of 2. */ /* for cpp.c */ #define COPY_LINE_LENGTH 80 #define INPUT_BUF_MEMG 8192 #define OUTPUT_BUF_MEMG 8192 #define TOKEN_NAME_MEMG 64 /* must be at least 4 */ #define TOKEN_LIST_MEMG 32 #define INCPATH_MEMG 16 #define GARBAGE_LIST_MEMG 32 #define LS_STACK_MEMG 4 #define FNAME_MEMG 32 /* ====================================================================== */ /* To protect the innocent. */ #if defined(NO_UCPP_BUF) && defined(UCPP_MMAP) #undef UCPP_MMAP #endif #if defined(UCPP_MMAP) || defined(POSIX_JMP) #ifndef _POSIX_SOURCE #define _POSIX_SOURCE 1 #endif #endif /* * C90 does not know about the "inline" keyword, but C99 does know, * and some C90 compilers know it as an extension. This part detects * these occurrences. */ #ifndef INLINE #if __STDC__ && __STDC_VERSION__ >= 199901L /* this is a C99 compiler, keep inline unchanged */ #elif defined(__GNUC__) /* this is GNU gcc; modify inline. The semantics is not identical to C99 but the differences are irrelevant as long as inline functions are static */ #undef inline #define inline __inline__ #elif defined(__DECC) && defined(__linux__) /* this is Compaq C under Linux, use __inline__ */ #undef inline #define inline __inline__ #else /* unknown compiler -> deactivate inline */ #undef inline #define inline #endif #else /* INLINE has been set, use its value */ #undef inline #define inline INLINE #endif #endif ./ucpp.10000644000175000017500000001025311620140753010710 0ustar renerene.TH UCPP 1 "Oct 21 2000" .SH NAME ucpp \- C preprocessor .SH SYNOPSIS .B ucpp [ .I options ] [ .I file ] .SH DESCRIPTION .LP .B ucpp is a C preprocessor mostly compatible with ISO-C99. It is rather strict and uses only a small amount of memory. It uses standard input as primary input if no file argument is given. .SH OPTIONS There are several classes of options. .TP .B Language Options .TP .BI \-C keep comments in the output. .TP .BI \-s if a rogue '#' is encountered, do not emit an error and keep it in the output. .TP .BI \-l supress the emission of '#line' directives in the output. .TP .BI \-lg convert the '#line' to the gcc-style equivalent. .TP .BI \-CC disable C++-like comments (a '//' begins a comment, up to the end of the line). Use this option to get closer to C90 behaviour. .TP .B \-a, \-na handle assertions (defined with #assert); .B \-a also defines the standard assertions .I #machine , .I #cpu and .I #system (see .B \-e to get the local definition of such assertions). .TP .BI \-a0 disable assertion support. .TP .BI \-V disable support for macros with a variable number of arguments: in C99, a macro may be declared with .I ... as the last argument; inside the replacement list, .I __VA_ARGS__ is replaced with the optional extra arguments given in the call to the macro. Use this option to get closer to C90 behaviour. .TP .BI \-u enable UTF-8 support: with this option, the source is considered as an ISO/10646 source, encoded in UTF-8. Characters represented as two bytes or more are considered as alphabetic characters, like letters, and therefore usable in identifiers. These characters hold the same syntactic value than the corresponding Universal Character Names. .TP .BI \-X enable .B \-a, \-u and .B \-Y. This should make .B ucpp behave closer to what is requested from a "modern" C preprocessor. .TP .BI \-c90 enable .B \-V and .B \-CC, and do not define .B __STDC_VERSION__. This should make .B ucpp mimic older C90 behaviour. .TP .BI \-t disable trigraph support; this seems to be required for some legacy code. .TP .B Warning Options .TP .BI \-wt emit a final warning when trigraphs are encountered. .TP .BI \-wtt emit warnings for each trigraph encountered. .TP .BI \-wa emit annoying warnings (these are usually useless). .TP .BI \-w0 supress standard warnings. .TP .B Directory Options .TP .BI \-I directory .TP .BI "\-I " directory add .I directory to the include path, before the standard include path. .TP .BI \-J directory .TP .BI "\-J " directory add .I directory to the include path, after the standard include path. .TP .BI \-zI do not use the standard (compile-time) include path. .TP .BI \-M emit only the names of encountered files, separated by spaces; this is intended for automatic generation of Makefile dependencies. .TP .BI \-Ma do the same as .B \-M but also for system files. .TP .BI "\-o " file direct the ouput to .I file instead of standard output. .TP .B Macro Options .TP .BI \-D macro predefine .I macro with content .B 1. .TP .BI \-D macro=def predefine .I macro with the content .I def. .TP .BI \-U macro undefine .I macro. .TP .BI \-Y predefine system-dependant macros. .TP .BI \-Z do not predefine special macros such as .B __TIME__. .TP .BI \-A foo(bar) add .I foo(bar) to the list of assertions. .TP .BI \-B foo(bar) remove .I foo(bar) of the list of assertions; you may also use .BI \-B foo to remove all .BI \-B foo(xxx) from the list of assertions. .TP .BI \-d instead of normal output, emit '#define' directives representing all macros defined during processing. .TP .BI \-e instead of normal output, emit '#assert' directives representing all assertions defined during processing. .TP .B Miscellaneous Options .TP .BI \-v print version number, include path and (optionaly) defined assertions. .TP .BI \-h print some help. .SH ENVIRONMENT .PP .B ucpp is not itself affected by environment variables. However, it uses library functions that might be affected, depending on the system. .SH AUTHOR Thomas Pornin .SH BUGS .PP .B ucpp is considered stable software. However improbable it is, please report bugs to the author (possibly with a file that exhibits the problem) if the latest version, available from this site: .TP http://pornin.nerim.net/ucpp/ .PP has the bug. ./CHANGELOG0000644000175000017500000000142311620140753011070 0ustar renereneucpp-1.3.2 * Fixed Issue 8, Included files missing a "terminating carriage return character" will interrupt preprocessing in sample.c/LEXER mode. (http://code.google.com/p/ucpp/issues/detail?id=8) ucpp-1.3.1 * Fixed Issue 5, "\r\n" carriage return characters are double counted. (http://code.google.com/p/ucpp/issues/detail?id=5) * Fixed Issue 6, Included files missing a "terminating carriage return character" will interrupt preprocessing in ucpp (STAND_ALONE mode). (http://code.google.com/p/ucpp/issues/detail?id=6) * Fixed Issue 7, STD_MACROS & STD_ASSERTS undefined when trying to build ucpp -DSTAND_ALONE. (http://code.google.com/p/ucpp/issues/detail?id=7) * Build ucpp & libucpp with 'make'. ucpp-1.3 * Original import into svn at code.google.com/p/ucpp ./Makefile0000644000175000017500000000715411620140753011325 0ustar renerene# Makefile for ucpp # # (c) Thomas Pornin 1999 - 2002 # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 4. The name of the authors may not be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT # OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, # EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .POSIX: # ----- user configurable part ----- # Edit the variables to suit your system. # # use -DAUDIT to enable some internal sanity checks # use -DMEM_CHECK to check the return value of malloc() # (superseded by AUDIT) # use -DMEM_DEBUG to enable memory leak research (warning: this # slows down ucpp a bit, and greatly increases memory consumption) # use -DINLINE=foobar to enable use of the 'foobar' # non standard qualifier, as an equivalent to the C99 'inline' # qualifier. See tune.h for details. # # Two FLAGS lines are given for each system type; chose the first one for # debug, the second one for a fast binary. # for a generic compiler called cc #CC = cc #FLAGS = -DAUDIT #FLAGS = -O -DMEM_CHECK # for Minix-86 #CC = cc #LDFLAGS = -i #FLAGS = -m -DAUDIT #FLAGS = -O -m -DMEM_CHECK # for gcc CC = gcc FLAGS = -O3 -W -Wall -ansi #FLAGS = -g -W -Wall -ansi -DAUDIT -DMEM_DEBUG #FLAGS = -O3 -mcpu=pentiumpro -fomit-frame-pointer -W -Wall -ansi -DMEM_CHECK #FLAGS = -O -pg -W -Wall -ansi -DMEM_CHECK #LDFLAGS = -pg # for the Compaq C compiler on Alpha/Linux #CC = ccc #FLAGS = -w0 -g -DAUDIT #FLAGS = -w0 -fast -DMEM_CHECK # for the Sun Workshop C Compiler #CC = cc #FLAGS = -g -Xa -DAUDIT #FLAGS = -Xa -fast -DMEM_CHECK # flags for the link step LIBS = #LIBS = libefence.a #LIBS = -lgc_dbg STAND_ALONE = -DSTAND_ALONE ifdef STAND_ALONE CSRC = mem.c nhash.c cpp.c lexer.c assert.c macro.c eval.c FINAL_STEP = $(CC) $(LDFLAGS) -DUCPP_CONFIG $(STAND_ALONE) -o ucpp $(CSRC) $(LIBS) endif # ----- nothing should be changed below this line ----- COBJ = mem.o nhash.o cpp.o lexer.o assert.o macro.o eval.o CFLAGS = $(FLAGS) all: ucpp @ar cq libucpp.a *.o clean: @rm -f *.o ucpp core *.a ucpp: $(COBJ) @$(FINAL_STEP) assert.o: tune.h ucppi.h cpp.h nhash.h mem.h @$(CC) $(CFLAGS) -c assert.c cpp.o: tune.h ucppi.h cpp.h nhash.h mem.h @$(CC) $(CFLAGS) -c cpp.c eval.o: tune.h ucppi.h cpp.h nhash.h mem.h arith.c arith.h @$(CC) $(CFLAGS) -c eval.c lexer.o: tune.h ucppi.h cpp.h nhash.h mem.h @$(CC) $(CFLAGS) -c lexer.c macro.o: tune.h ucppi.h cpp.h nhash.h mem.h @$(CC) $(CFLAGS) -c macro.c mem.o: mem.h @$(CC) $(CFLAGS) -c mem.c nhash.o: nhash.h mem.h @$(CC) $(CFLAGS) -c nhash.c ./cpp.h0000644000175000017500000002033411620140753010613 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__CPP__ #define UCPP__CPP__ /* * Uncomment the following if you want ucpp to use externally provided * error-reporting functions (ucpp_warning(), ucpp_error() and ucpp_ouch()) */ /* #define NO_UCPP_ERROR_FUNCTIONS */ /* * Tokens (do not change the order unless checking operators_name[] in cpp.c) * * It is important that the token NONE is 0 * Check the STRING_TOKEN macro */ #define CPPERR 512 enum { NONE, /* whitespace */ NEWLINE, /* newline */ COMMENT, /* comment */ NUMBER, /* number constant */ NAME, /* identifier */ BUNCH, /* non-C characters */ PRAGMA, /* a #pragma directive */ CONTEXT, /* new file or #line */ STRING, /* constant "xxx" */ CHAR, /* constant 'xxx' */ SLASH, /* / */ ASSLASH, /* /= */ MINUS, /* - */ MMINUS, /* -- */ ASMINUS, /* -= */ ARROW, /* -> */ PLUS, /* + */ PPLUS, /* ++ */ ASPLUS, /* += */ LT, /* < */ LEQ, /* <= */ LSH, /* << */ ASLSH, /* <<= */ GT, /* > */ GEQ, /* >= */ RSH, /* >> */ ASRSH, /* >>= */ ASGN, /* = */ SAME, /* == */ #ifdef CAST_OP CAST, /* => */ #endif NOT, /* ~ */ NEQ, /* != */ AND, /* & */ LAND, /* && */ ASAND, /* &= */ OR, /* | */ LOR, /* || */ ASOR, /* |= */ PCT, /* % */ ASPCT, /* %= */ STAR, /* * */ ASSTAR, /* *= */ CIRC, /* ^ */ ASCIRC, /* ^= */ LNOT, /* ! */ LBRA, /* { */ RBRA, /* } */ LBRK, /* [ */ RBRK, /* ] */ LPAR, /* ( */ RPAR, /* ) */ COMMA, /* , */ QUEST, /* ? */ SEMIC, /* ; */ COLON, /* : */ DOT, /* . */ MDOTS, /* ... */ SHARP, /* # */ DSHARP, /* ## */ OPT_NONE, /* optional space to separate tokens in text output */ DIGRAPH_TOKENS, /* there begin digraph tokens */ /* for DIG_*, do not change order, unless checking undig() in cpp.c */ DIG_LBRK, /* <: */ DIG_RBRK, /* :> */ DIG_LBRA, /* <% */ DIG_RBRA, /* %> */ DIG_SHARP, /* %: */ DIG_DSHARP, /* %:%: */ DIGRAPH_TOKENS_END, /* digraph tokens end here */ LAST_MEANINGFUL_TOKEN, /* reserved words will go there */ MACROARG, /* special token for representing macro arguments */ UPLUS = CPPERR, /* unary + */ UMINUS /* unary - */ }; #include "tune.h" #include #include struct token { int type; long line; char *name; }; struct token_fifo { struct token *t; size_t nt, art; }; struct lexer_state { /* input control */ FILE *input; #ifndef NO_UCPP_BUF unsigned char *input_buf; #ifdef UCPP_MMAP int from_mmap; unsigned char *input_buf_sav; #endif #endif unsigned char *input_string; size_t ebuf; size_t pbuf; int lka[2]; int nlka; int macfile; int last; int discard; unsigned long utf8; unsigned char copy_line[COPY_LINE_LENGTH]; int cli; /* output control */ FILE *output; struct token_fifo *output_fifo, *toplevel_of; #ifndef NO_UCPP_BUF unsigned char *output_buf; #endif size_t sbuf; /* token control */ struct token *ctok; struct token *save_ctok; size_t tknl; int ltwnl; int pending_token; #ifdef INMACRO_FLAG int inmacro; long macro_count; #endif /* lexer options */ long line; long oline; unsigned long flags; long count_trigraphs; struct garbage_fifo *gf; int ifnest; int condnest; int condcomp; int condmet; unsigned long condf[2]; }; /* * Flags for struct lexer_state */ /* warning flags */ #define WARN_STANDARD 0x000001UL /* emit standard warnings */ #define WARN_ANNOYING 0x000002UL /* emit annoying warnings */ #define WARN_TRIGRAPHS 0x000004UL /* warn when trigraphs are used */ #define WARN_TRIGRAPHS_MORE 0x000008UL /* extra-warn for trigraphs */ #define WARN_PRAGMA 0x000010UL /* warn for pragmas in non-lexer mode */ /* error flags */ #define FAIL_SHARP 0x000020UL /* emit errors on rogue '#' */ #define CCHARSET 0x000040UL /* emit errors on non-C characters */ /* emission flags */ #define DISCARD_COMMENTS 0x000080UL /* discard comments from text output */ #define CPLUSPLUS_COMMENTS 0x000100UL /* understand C++-like comments */ #define LINE_NUM 0x000200UL /* emit #line directives in output */ #define GCC_LINE_NUM 0x000400UL /* same as #line, with gcc-syntax */ /* language flags */ #define HANDLE_ASSERTIONS 0x000800UL /* understand assertions */ #define HANDLE_PRAGMA 0x001000UL /* emit PRAGMA tokens in lexer mode */ #define MACRO_VAARG 0x002000UL /* understand macros with '...' */ #define UTF8_SOURCE 0x004000UL /* identifiers are in UTF8 encoding */ #define HANDLE_TRIGRAPHS 0x008000UL /* handle trigraphs */ /* global ucpp behaviour */ #define LEXER 0x010000UL /* behave as a lexer */ #define KEEP_OUTPUT 0x020000UL /* emit the result of preprocessing */ #define COPY_LINE 0x040000UL /* make a copy of the parsed line */ /* internal flags */ #define READ_AGAIN 0x080000UL /* emit again the last token */ #define TEXT_OUTPUT 0x100000UL /* output text */ /* * Public function prototypes */ #ifndef NO_UCPP_BUF void flush_output(struct lexer_state *); #endif void init_assertions(void); int make_assertion(char *); int destroy_assertion(char *); void print_assertions(void); void init_macros(void); int define_macro(struct lexer_state *, char *); int undef_macro(struct lexer_state *, char *); void print_defines(void); void set_init_filename(char *, int); void init_cpp(void); void init_include_path(char *[]); void init_lexer_state(struct lexer_state *); void init_lexer_mode(struct lexer_state *); void free_lexer_state(struct lexer_state *); void wipeout(void); int lex(struct lexer_state *); int check_cpp_errors(struct lexer_state *); void add_incpath(char *); void init_tables(int); int enter_file(struct lexer_state *, unsigned long); int cpp(struct lexer_state *); void set_identifier_char(int c); void unset_identifier_char(int c); #ifdef UCPP_MMAP FILE *fopen_mmap_file(char *); void set_input_file(struct lexer_state *, FILE *); #endif struct stack_context { char *long_name, *name; long line; }; struct stack_context *report_context(void); extern int no_special_macros, system_macros, emit_dependencies, emit_defines, emit_assertions; extern int c99_compliant, c99_hosted; extern FILE *emit_output; extern char *current_filename, *current_long_filename; extern char *operators_name[]; extern struct protect { char *macro; int state; struct found_file *ff; } protect_detect; void ucpp_ouch(char *, ...); void ucpp_error(long, char *, ...); void ucpp_warning(long, char *, ...); extern int *transient_characters; /* * Errors from CPPERR_EOF and above are not real erros, only show-stoppers. * Errors below CPPERR_EOF are real ones. */ #define CPPERR_NEST 900 #define CPPERR_EOF 1000 /* * This macro tells whether the name field of a given token type is * relevant, or not. Irrelevant name field means that it might point * to outerspace. */ #ifdef SEMPER_FIDELIS #define STRING_TOKEN(x) ((x) == NONE || ((x) >= COMMENT && (x) <= CHAR)) #else #define STRING_TOKEN(x) ((x) >= NUMBER && (x) <= CHAR) #endif #endif ./hash.h0000644000175000017500000000420011620140753010746 0ustar renerene/* * (c) Thomas Pornin 1998, 1999, 2000 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__HASH__ #define UCPP__HASH__ struct hash_item; struct HT { struct hash_item **lists; int nb_lists; int (*cmpdata)(void *, void *); int (*hash)(void *); void (*deldata)(void *); }; int hash_string(char *); struct HT *newHT(int, int (*)(void *, void *), int (*)(void *), void (*)(void *)); void *putHT(struct HT *, void *); void *forceputHT(struct HT *, void *); void *getHT(struct HT *, void *); int delHT(struct HT *, void *); void killHT(struct HT *); void saveHT(struct HT *, void **); void restoreHT(struct HT *, void **); void tweakHT(struct HT *, void **, void *); void scanHT(struct HT *, void (*)(void *)); int hash_struct(void *); int cmp_struct(void *, void *); #endif ./ucppi.h0000644000175000017500000001340511620140753011152 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__UCPPI__ #define UCPP__UCPPI__ #include "tune.h" #include "cpp.h" #include "nhash.h" /* * A macro represented in a compact form; simple tokens are represented * by one byte, containing their number. Tokens with a string value are * followed by the value (string finished by a 0). Macro arguments are * followed by the argument number (in one byte -- thus implying a hard * limit of 254 arguments (number 255 is for __VA_ARGS__). */ struct comp_token_fifo { size_t length; size_t rp; unsigned char *t; }; /* These declarations are used only internally by ucpp */ /* * S_TOKEN(x) checks whether x is a token type with an embedded string * ttMWS(x) checks whether x is macro whitespace (space, comment...) * ttWHI(x) checks whether x is whitespace (MWS or newline) */ #define S_TOKEN(x) STRING_TOKEN(x) #define ttMWS(x) ((x) == NONE || (x) == COMMENT || (x) == OPT_NONE) #define ttWHI(x) (ttMWS(x) || (x) == NEWLINE) /* * Function prototypes */ /* * from lexer.c */ #define init_cppm ucpp_init_cppm #define put_char ucpp_put_char #define discard_char ucpp_discard_char #define next_token ucpp_next_token #define grap_char ucpp_grap_char #define space_char ucpp_space_char void init_cppm(void); void put_char(struct lexer_state *, unsigned char); void discard_char(struct lexer_state *); int next_token(struct lexer_state *); int grap_char(struct lexer_state *); int space_char(int); /* * from assert.c */ struct assert { hash_item_header head; /* first field */ size_t nbval; struct token_fifo *val; }; #define cmp_token_list ucpp_cmp_token_list #define handle_assert ucpp_handle_assert #define handle_unassert ucpp_handle_unassert #define get_assertion ucpp_get_assertion #define wipe_assertions ucpp_wipe_assertions int cmp_token_list(struct token_fifo *, struct token_fifo *); int handle_assert(struct lexer_state *); int handle_unassert(struct lexer_state *); struct assert *get_assertion(char *); void wipe_assertions(void); /* * from macro.c */ struct macro { hash_item_header head; /* first field */ int narg; char **arg; int nest; int vaarg; #ifdef LOW_MEM struct comp_token_fifo cval; #else struct token_fifo val; #endif }; #define print_token ucpp_print_token #define handle_define ucpp_handle_define #define handle_undef ucpp_handle_undef #define handle_ifdef ucpp_handle_ifdef #define handle_ifndef ucpp_handle_ifndef #define substitute_macro ucpp_substitute_macro #define get_macro ucpp_get_macro #define wipe_macros ucpp_wipe_macros #define dsharp_lexer ucpp_dsharp_lexer #define compile_time ucpp_compile_time #define compile_date ucpp_compile_date #ifdef PRAGMA_TOKENIZE #define tokenize_lexer ucpp_tokenize_lexer #endif void print_token(struct lexer_state *, struct token *, long); int handle_define(struct lexer_state *); int handle_undef(struct lexer_state *); int handle_ifdef(struct lexer_state *); int handle_ifndef(struct lexer_state *); int substitute_macro(struct lexer_state *, struct macro *, struct token_fifo *, int, int, long); struct macro *get_macro(char *); void wipe_macros(void); extern struct lexer_state dsharp_lexer; extern char compile_time[], compile_date[]; #ifdef PRAGMA_TOKENIZE extern struct lexer_state tokenize_lexer; #endif /* * from eval.c */ #define strtoconst ucpp_strtoconst #define eval_expr ucpp_eval_expr #define eval_line ucpp_eval_line unsigned long strtoconst(char *); unsigned long eval_expr(struct token_fifo *, int *, int); extern long eval_line; #define eval_exception ucpp_eval_exception #ifdef POSIX_JMP #define JMP_BUF sigjmp_buf #define catch(x) sigsetjmp((x), 0) #define throw(x) siglongjmp((x), 1) #else #define JMP_BUF jmp_buf #define catch(x) setjmp((x)) #define throw(x) longjmp((x), 1) #endif extern JMP_BUF eval_exception; /* * from cpp.c */ #define token_name ucpp_token_name #define throw_away ucpp_throw_away #define garbage_collect ucpp_garbage_collect #define init_buf_lexer_state ucpp_init_buf_lexer_state #ifdef PRAGMA_TOKENIZE #define compress_token_list ucpp_compress_token_list #endif char *token_name(struct token *); void throw_away(struct garbage_fifo *, char *); void garbage_collect(struct garbage_fifo *); void init_buf_lexer_state(struct lexer_state *, int); #ifdef PRAGMA_TOKENIZE struct comp_token_fifo compress_token_list(struct token_fifo *); #endif #define ouch ucpp_ouch #define error ucpp_error #define warning ucpp_warning #endif ./mem.h0000644000175000017500000001105611620140753010610 0ustar renerene/* * (c) Thomas Pornin 1998 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__MEM__ #define UCPP__MEM__ #include void die(void); #if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG void *getmem(size_t); #else #define getmem malloc #endif #if defined MEM_DEBUG void *getmem_debug(size_t, char *, int); #undef getmem #define getmem(x) getmem_debug(x, __FILE__, __LINE__) #endif #if defined AUDIT || defined MEM_DEBUG void freemem(void *); #else #define freemem free #endif #if defined MEM_DEBUG void freemem_debug(void *, char *, int); #undef freemem #define freemem(x) freemem_debug(x, __FILE__, __LINE__) #endif void *incmem(void *, size_t, size_t); char *sdup(char *); #if defined MEM_DEBUG void *incmem_debug(void *, size_t, size_t, char *, int); #undef incmem #define incmem(x, y, z) incmem_debug(x, y, z, __FILE__, __LINE__) void report_leaks(void); char *sdup_debug(char *, char *, int); #define sdup(x) sdup_debug(x, __FILE__, __LINE__) #endif #ifdef AUDIT void *mmv(void *, void *, size_t); void *mmvwo(void *, void *, size_t); #else #define mmv memcpy #define mmvwo memmove #endif /* * this macro adds the object obj at the end of the array list, handling * memory allocation when needed; ptr contains the number of elements in * the array, and memg is the granularity of memory allocations (a power * of 2 is recommanded, for optimization reasons). * * list and ptr may be updated, and thus need to be lvalues. */ #define aol(list, ptr, obj, memg) do { \ if (((ptr) % (memg)) == 0) { \ if ((ptr) != 0) { \ (list) = incmem((list), (ptr) * sizeof(obj), \ ((ptr) + (memg)) * sizeof(obj)); \ } else { \ (list) = getmem((memg) * sizeof(obj)); \ } \ } \ (list)[(ptr) ++] = (obj); \ } while (0) /* * bol() does the same as aol(), but adds the new item at the beginning * of the list; beware, the computational cost is greater. */ #define bol(list, ptr, obj, memg) do { \ if (((ptr) % (memg)) == 0) { \ if ((ptr) != 0) { \ (list) = incmem((list), (ptr) * sizeof(obj), \ ((ptr) + (memg)) * sizeof(obj)); \ } else { \ (list) = getmem((memg) * sizeof(obj)); \ } \ } \ if ((ptr) != 0) \ mmvwo((list) + 1, (list), (ptr) * sizeof(obj)); \ (ptr) ++; \ (list)[0] = (obj); \ } while (0) /* * mbol() does the same as bol(), but adds the new item at the given * emplacement; bol() is equivalent to mbol with 0 as last argument. */ #define mbol(list, ptr, obj, memg, n) do { \ if (((ptr) % (memg)) == 0) { \ if ((ptr) != 0) { \ (list) = incmem((list), (ptr) * sizeof(obj), \ ((ptr) + (memg)) * sizeof(obj)); \ } else { \ (list) = getmem((memg) * sizeof(obj)); \ } \ } \ if ((ptr) > n) \ mmvwo((list) + n + 1, (list) + n, \ ((ptr) - n) * sizeof(obj)); \ (ptr) ++; \ (list)[n] = (obj); \ } while (0) /* * this macro adds the object obj at the end of the array list, doubling * the size of list when needed; as for aol(), ptr and list must be * lvalues, and so must be llng */ #define wan(list, ptr, obj, llng) do { \ if ((ptr) == (llng)) { \ (llng) += (llng); \ (list) = incmem((list), (ptr) * sizeof(obj), \ (llng) * sizeof(obj)); \ } \ (list)[(ptr) ++] = (obj); \ } while (0) #endif ./eval.c0000644000175000017500000004203611620140753010756 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "tune.h" #include #include #include #include #include "ucppi.h" #include "mem.h" JMP_BUF eval_exception; long eval_line; static int emit_eval_warnings; /* * If you want to hardcode a conversion table, define a static array * of 256 int, and make transient_characters point to it. */ int *transient_characters = 0; #define OCTAL(x) ((x) >= '0' && (x) <= '7') #define DECIM(x) ((x) >= '0' && (x) <= '9') #define HEXAD(x) (DECIM(x) \ || (x) == 'a' || (x) == 'b' || (x) == 'c' \ || (x) == 'd' || (x) == 'e' || (x) == 'f' \ || (x) == 'A' || (x) == 'B' || (x) == 'C' \ || (x) == 'D' || (x) == 'E' || (x) == 'F') #define OVAL(x) ((int)((x) - '0')) #define DVAL(x) ((int)((x) - '0')) #define HVAL(x) (DECIM(x) ? DVAL(x) \ : (x) == 'a' || (x) == 'A' ? 10 \ : (x) == 'b' || (x) == 'B' ? 11 \ : (x) == 'c' || (x) == 'C' ? 12 \ : (x) == 'd' || (x) == 'D' ? 13 \ : (x) == 'e' || (x) == 'E' ? 14 : 15) #define ARITH_TYPENAME big #define ARITH_FUNCTION_HEADER static inline #define ARITH_ERROR(type) z_error(type) static void z_error(int type); #ifdef ARITHMETIC_CHECKS #define ARITH_WARNING(type) z_warn(type) static void z_warn(int type); #endif #include "arith.c" static void z_error(int type) { switch (type) { case ARITH_EXCEP_SLASH_D: error(eval_line, "division by 0"); break; case ARITH_EXCEP_SLASH_O: error(eval_line, "overflow on division"); break; case ARITH_EXCEP_PCT_D: error(eval_line, "division by 0 on modulus operator"); break; case ARITH_EXCEP_CONST_O: error(eval_line, "constant too large for destination type"); break; #ifdef AUDIT default: ouch("erroneous integer error: %d", type); #endif } throw(eval_exception); } #ifdef ARITHMETIC_CHECKS static void z_warn(int type) { switch (type) { case ARITH_EXCEP_CONV_O: warning(eval_line, "overflow on integer conversion"); break; case ARITH_EXCEP_NEG_O: warning(eval_line, "overflow on unary minus"); break; case ARITH_EXCEP_NOT_T: warning(eval_line, "bitwise inversion yields trap representation"); break; case ARITH_EXCEP_PLUS_O: warning(eval_line, "overflow on addition"); break; case ARITH_EXCEP_PLUS_U: warning(eval_line, "underflow on addition"); break; case ARITH_EXCEP_MINUS_O: warning(eval_line, "overflow on subtraction"); break; case ARITH_EXCEP_MINUS_U: warning(eval_line, "underflow on subtraction"); break; case ARITH_EXCEP_AND_T: warning(eval_line, "bitwise AND yields trap representation"); break; case ARITH_EXCEP_XOR_T: warning(eval_line, "bitwise XOR yields trap representation"); break; case ARITH_EXCEP_OR_T: warning(eval_line, "bitwise OR yields trap representation"); break; case ARITH_EXCEP_LSH_W: warning(eval_line, "left shift count greater than " "or equal to type width"); break; case ARITH_EXCEP_LSH_C: warning(eval_line, "left shift count negative"); break; case ARITH_EXCEP_LSH_O: warning(eval_line, "overflow on left shift"); break; case ARITH_EXCEP_RSH_W: warning(eval_line, "right shift count greater than " "or equal to type width"); break; case ARITH_EXCEP_RSH_C: warning(eval_line, "right shift count negative"); break; case ARITH_EXCEP_RSH_N: warning(eval_line, "right shift of negative value"); break; case ARITH_EXCEP_STAR_O: warning(eval_line, "overflow on multiplication"); break; case ARITH_EXCEP_STAR_U: warning(eval_line, "underflow on multiplication"); break; #ifdef AUDIT default: ouch("erroneous integer warning: %d", type); #endif } } #endif typedef struct { int sign; union { u_big uv; s_big sv; } u; } ppval; static int boolval(ppval x) { return x.sign ? big_s_lval(x.u.sv) : big_u_lval(x.u.uv); } #if !defined(WCHAR_SIGNEDNESS) # if CHAR_MIN == 0 # define WCHAR_SIGNEDNESS 0 # else # define WCHAR_SIGNEDNESS 1 # endif #endif /* * Check the suffix, return 1 if it is signed, 0 otherwise. 1 is * returned for a void suffix. Legal suffixes are: * unsigned: u U ul uL Ul UL lu Lu lU LU ull uLL Ull ULL llu LLu llU LLU * signed: l L ll LL */ static int pp_suffix(char *d, char *refc) { if (!*d) return 1; if (*d == 'u' || *d == 'U') { if (!*(++ d)) return 0; if (*d == 'l' || *d == 'L') { char *e = d + 1; if (*e && *e != *d) goto suffix_error; if (!*e || !*(e + 1)) return 0; goto suffix_error; } goto suffix_error; } if (*d == 'l' || *d == 'L') { if (!*(++ d)) return 1; if (*d == *(d - 1)) { d ++; if (!*d) return 1; } if (*d == 'u' || *d == 'U') { d ++; if (!*d) return 0; } goto suffix_error; } suffix_error: error(eval_line, "invalid integer constant '%s'", refc); throw(eval_exception); return 666; } static unsigned long pp_char(char *c, char *refc) { unsigned long r = 0; c ++; if (*c == '\\') { int i; c ++; switch (*c) { case 'n': r = '\n'; c ++; break; case 't': r = '\t'; c ++; break; case 'v': r = '\v'; c ++; break; case 'b': r = '\b'; c ++; break; case 'r': r = '\r'; c ++; break; case 'f': r = '\f'; c ++; break; case 'a': r = '\a'; c ++; break; case '\\': r = '\\'; c ++; break; case '\?': r = '\?'; c ++; break; case '\'': r = '\''; c ++; break; case '\"': r = '\"'; c ++; break; case 'u': for (i = 0, c ++; i < 4 && HEXAD(*c); i ++, c ++) { r = (r * 16) + HVAL(*c); } if (i != 4) { error(eval_line, "malformed UCN in %s", refc); throw(eval_exception); } break; case 'U': for (i = 0, c ++; i < 8 && HEXAD(*c); i ++, c ++) { r = (r * 16) + HVAL(*c); } if (i != 8) { error(eval_line, "malformed UCN in %s", refc); throw(eval_exception); } break; case 'x': for (c ++; HEXAD(*c); c ++) r = (r * 16) + HVAL(*c); break; default: if (OCTAL(*c)) { r = OVAL(*(c ++)); if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++)); if (OCTAL(*c)) r = (r * 8) + OVAL(*(c ++)); } else { error(eval_line, "invalid escape sequence " "'\\%c'", *c); throw(eval_exception); } } } else if (*c == '\'') { error(eval_line, "empty character constant"); throw(eval_exception); } else { r = *((unsigned char *)(c ++)); } if (transient_characters && r < 256) { r = transient_characters[(size_t)r]; } if (*c != '\'' && emit_eval_warnings) { warning(eval_line, "multicharacter constant"); } return r; } static ppval pp_strtoconst(char *refc) { ppval q; char *c = refc, *d; u_big ru; s_big rs; int sp, dec; if (*c == '\'' || *c == 'L') { q.sign = (*c == 'L') ? WCHAR_SIGNEDNESS : 1; if (*c == 'L' && *(++ c) != '\'') { error(eval_line, "invalid wide character constant: %s", refc); throw(eval_exception); } if (q.sign) { q.u.sv = big_s_fromlong(pp_char(c, refc)); } else { q.u.uv = big_u_fromulong(pp_char(c, refc)); } return q; } if (*c == '0') { /* octal or hexadecimal */ dec = 0; c ++; if (*c == 'x' || *c == 'X') { c ++; d = big_u_hexconst(c, &ru, &rs, &sp); } else { d = big_u_octconst(c, &ru, &rs, &sp); } } else { dec = 1; d = big_u_decconst(c, &ru, &rs, &sp); } q.sign = pp_suffix(d, refc); if (q.sign) { if (!sp) { if (dec) { error(eval_line, "constant too large " "for destination type"); throw(eval_exception); } else { warning(eval_line, "constant is so large " "that it is unsigned"); } q.u.uv = ru; q.sign = 0; } else { q.u.sv = rs; } } else { q.u.uv = ru; } return q; } /* * Used by #line directives -- anything beyond what can be put in an * unsigned long, is considered absurd. */ unsigned long strtoconst(char *c) { ppval q = pp_strtoconst(c); if (q.sign) q.u.uv = big_s_to_u(q.u.sv); return big_u_toulong(q.u.uv); } #define OP_UN(x) ((x) == LNOT || (x) == NOT || (x) == UPLUS \ || (x) == UMINUS) static ppval eval_opun(int op, ppval v) { if (op == LNOT) { v.sign = 1; v.u.sv = big_s_fromint(big_s_lnot(v.u.sv)); return v; } if (v.sign) { switch (op) { case NOT: v.u.sv = big_s_not(v.u.sv); break; case UPLUS: break; case UMINUS: v.u.sv = big_s_neg(v.u.sv); break; } } else { switch (op) { case NOT: v.u.uv = big_u_not(v.u.uv); break; case UPLUS: break; case UMINUS: v.u.uv = big_u_neg(v.u.uv); break; } } return v; } #define OP_BIN(x) ((x) == STAR || (x) == SLASH || (x) == PCT \ || (x) == PLUS || (x) == MINUS || (x) == LSH \ || (x) == RSH || (x) == LT || (x) == LEQ \ || (x) == GT || (x) == GEQ || (x) == SAME \ || (x) == NEQ || (x) == AND || (x) == CIRC \ || (x) == OR || (x) == LAND || (x) == LOR \ || (x) == COMMA) static ppval eval_opbin(int op, ppval v1, ppval v2) { ppval r; int iv2 = 0; switch (op) { case STAR: case SLASH: case PCT: case PLUS: case MINUS: case AND: case CIRC: case OR: /* promote operands, adjust signedness of result */ if (!v1.sign || !v2.sign) { if (v1.sign) { v1.u.uv = big_s_to_u(v1.u.sv); v1.sign = 0; } else if (v2.sign) { v2.u.uv = big_s_to_u(v2.u.sv); v2.sign = 0; } r.sign = 0; } else { r.sign = 1; } break; case LT: case LEQ: case GT: case GEQ: case SAME: case NEQ: /* promote operands */ if (!v1.sign || !v2.sign) { if (v1.sign) { v1.u.uv = big_s_to_u(v1.u.sv); v1.sign = 0; } else if (v2.sign) { v2.u.uv = big_s_to_u(v2.u.sv); v2.sign = 0; } } /* fall through */ case LAND: case LOR: /* result is signed anyway */ r.sign = 1; break; case LSH: case RSH: /* result is as signed as left operand; convert right operand to int */ r.sign = v1.sign; if (v2.sign) { iv2 = big_s_toint(v2.u.sv); } else { iv2 = big_u_toint(v2.u.uv); } break; case COMMA: if (emit_eval_warnings) { warning(eval_line, "ISO C forbids evaluated comma " "operators in #if expressions"); } r.sign = v2.sign; break; #ifdef AUDIT default: ouch("a good operator is a dead operator"); #endif } #define SBINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, v2.u.sv); \ else r.u.uv = big_u_ ## x (v1.u.uv, v2.u.uv); #define NSSBINOP(x) if (v1.sign) r.u.sv = big_s_fromint(big_s_ ## x \ (v1.u.sv, v2.u.sv)); else r.u.sv = big_s_fromint( \ big_u_ ## x (v1.u.uv, v2.u.uv)); #define LBINOP(x) if (v1.sign) r.u.sv = big_s_fromint( \ big_s_lval(v1.u.sv) x big_s_lval(v2.u.sv)); \ else r.u.sv = big_s_fromint( \ big_u_lval(v1.u.uv) x big_u_lval(v2.u.uv)); #define ABINOP(x) if (r.sign) r.u.sv = big_s_ ## x (v1.u.sv, iv2); \ else r.u.uv = big_u_ ## x (v1.u.uv, iv2); switch (op) { case STAR: SBINOP(star); break; case SLASH: SBINOP(slash); break; case PCT: SBINOP(pct); break; case PLUS: SBINOP(plus); break; case MINUS: SBINOP(minus); break; case LSH: ABINOP(lsh); break; case RSH: ABINOP(rsh); break; case LT: NSSBINOP(lt); break; case LEQ: NSSBINOP(leq); break; case GT: NSSBINOP(gt); break; case GEQ: NSSBINOP(geq); break; case SAME: NSSBINOP(same); break; case NEQ: NSSBINOP(neq); break; case AND: SBINOP(and); break; case CIRC: SBINOP(xor); break; case OR: SBINOP(or); break; case LAND: LBINOP(&&); break; case LOR: LBINOP(||); break; case COMMA: r = v2; break; } return r; } #define ttOP(x) (OP_UN(x) || OP_BIN(x) || (x) == QUEST || (x) == COLON) static int op_prec(int op) { switch (op) { case LNOT: case NOT: case UPLUS: case UMINUS: return 13; case STAR: case SLASH: case PCT: return 12; case PLUS: case MINUS: return 11; case LSH: case RSH: return 10; case LT: case LEQ: case GT: case GEQ: return 9; case SAME: case NEQ: return 8; case AND: return 7; case CIRC: return 6; case OR: return 5; case LAND: return 4; case LOR: return 3; case QUEST: return 2; case COMMA: return 1; } #ifdef AUDIT ouch("an unknown species should have a higher precedence"); #endif return 666; } /* * Perform the hard work of evaluation. * * This function works because: * -- all unary operators are right to left associative, and with * identical precedence * -- all binary operators are left to right associative * -- there is only one non-unary and non-binary operator: the quest-colon * * If do_eval is 0, the evaluation of operators is not done. This is * for sequence point operators (&&, || and ?:). */ static ppval eval_shrd(struct token_fifo *tf, int minprec, int do_eval) { ppval top; struct token *ct; top.sign = 1; if (tf->art == tf->nt) goto trunc_err; ct = tf->t + (tf->art ++); if (ct->type == LPAR) { top = eval_shrd(tf, 0, do_eval); if (tf->art == tf->nt) goto trunc_err; ct = tf->t + (tf->art ++); if (ct->type != RPAR) { error(eval_line, "a right parenthesis was expected"); throw(eval_exception); } } else if (ct->type == NUMBER || ct->type == CHAR) { top = pp_strtoconst(ct->name); } else if (OP_UN(ct->type)) { top = eval_opun(ct->type, eval_shrd(tf, op_prec(ct->type), do_eval)); goto eval_loop; } else if (ttOP(ct->type)) goto rogue_op_err; else { goto invalid_token_err; } eval_loop: if (tf->art == tf->nt) { return top; } ct = tf->t + (tf->art ++); if (OP_BIN(ct->type)) { int bp = op_prec(ct->type); if (bp > minprec) { ppval tr; if ((ct->type == LOR && boolval(top)) || (ct->type == LAND && !boolval(top))) { tr = eval_shrd(tf, bp, 0); if (do_eval) { top.sign = 1; if (ct->type == LOR) top.u.sv = big_s_fromint(1); if (ct->type == LAND) top.u.sv = big_s_fromint(0); } } else { tr = eval_shrd(tf, bp, do_eval); if (do_eval) top = eval_opbin(ct->type, top, tr); } goto eval_loop; } } else if (ct->type == QUEST) { int bp = op_prec(QUEST); ppval r1, r2; if (bp >= minprec) { int qv = boolval(top); r1 = eval_shrd(tf, bp, qv ? do_eval : 0); if (tf->art == tf->nt) goto trunc_err; ct = tf->t + (tf->art ++); if (ct->type != COLON) { error(eval_line, "a colon was expected"); throw(eval_exception); } r2 = eval_shrd(tf, bp, qv ? 0 : do_eval); if (do_eval) { if (qv) top = r1; else top = r2; } goto eval_loop; } } tf->art --; return top; trunc_err: error(eval_line, "truncated constant integral expression"); throw(eval_exception); rogue_op_err: error(eval_line, "rogue operator '%s' in constant integral " "expression", operators_name[ct->type]); throw(eval_exception); invalid_token_err: error(eval_line, "invalid token in constant integral expression"); throw(eval_exception); } #define UNARY(x) ((x) != NUMBER && (x) != NAME && (x) != CHAR \ && (x) != RPAR) /* * Evaluate the integer expression contained in the given token_fifo. * Evaluation is made by precedence of operators, as described in the * Dragon Book. The unary + and - are distinguished from their binary * counterparts using the Fortran way: a + or a - is considered unary * if it does not follow a constant, an identifier or a right parenthesis. */ unsigned long eval_expr(struct token_fifo *tf, int *ret, int ew) { size_t sart; ppval r; emit_eval_warnings = ew; if (catch(eval_exception)) goto eval_err; /* first, distinguish unary + and - from binary + and - */ for (sart = tf->art; tf->art < tf->nt; tf->art ++) { if (tf->t[tf->art].type == PLUS) { if (sart == tf->art || UNARY(tf->t[tf->art - 1].type)) tf->t[tf->art].type = UPLUS; } else if (tf->t[tf->art].type == MINUS) { if (sart == tf->art || UNARY(tf->t[tf->art - 1].type)) tf->t[tf->art].type = UMINUS; } } tf->art = sart; r = eval_shrd(tf, 0, 1); if (tf->art < tf->nt) { error(eval_line, "trailing garbage in constant integral " "expression"); goto eval_err; } *ret = 0; return boolval(r); eval_err: *ret = 1; return 0; } ./sample.c0000644000175000017500000000610011620140753011300 0ustar renerene/* * Sample code showing how to use ucpp as an integrated lexer. * This file is public domain. */ /* * This is an example of how to use ucpp as a preprocessor and lexer * into another project. The steps are those described in ucpp README * file. To use this code, compile the ucpp source files with * STAND_ALONE not defined, and link them with this code. The resulting * binary will take a C source file as standard input, preprocess it, * and output each non-whitespace token on stdout, with its numerical * value (defined as an enum in cpp.h) and its contents. This code * defines no system include path. * * This code supposes that the ucpp files are compiled with PRAGMA_TOKENIZE * enabled (see the tune.h file). */ #include #include #include #include "mem.h" #include "cpp.h" int main(int argc, char *argv[]) { int i, r; struct lexer_state ls; /* step 1 */ init_cpp(); /* step 2 */ no_special_macros = 0; emit_defines = emit_assertions = 0; /* step 3 -- with assertions */ init_tables(1); /* step 4 -- no default include path */ init_include_path(0); /* step 5 -- no need to reset the two emit_* variables set in 2 */ emit_dependencies = 0; /* step 6 -- we work with stdin, this is not a real filename */ set_init_filename("[stdin]", 0); /* step 7 -- we make sure that assertions are on, and pragma are handled */ init_lexer_state(&ls); init_lexer_mode(&ls); ls.flags |= HANDLE_ASSERTIONS | HANDLE_PRAGMA | LINE_NUM; /* step 8 -- input is from stdin */ ls.input = stdin; /* step 9 -- we do not have any macro to define, but we add any argument as an include path */ for (i = 1; i < argc; i ++) add_incpath(argv[i]); /* step 10 -- we are a lexer and we want CONTEXT tokens */ enter_file(&ls, ls.flags); /* read tokens until end-of-input is reached -- errors (non-zero return values different from CPPERR_EOF) are ignored */ while ((r = lex(&ls)) < CPPERR_EOF) { if (r) { /* error condition -- no token was retrieved */ continue; } /* we print each token: its numerical value, and its string content; if this is a PRAGMA token, the string content is in fact a compressed token list, that we uncompress and print. */ if (ls.ctok->type == PRAGMA) { unsigned char *c = (unsigned char *)(ls.ctok->name); printf("line %ld: <#pragma>\n", ls.line); for (; *c; c ++) { int t = *c; if (STRING_TOKEN(t)) { printf(" <%2d> ", t); for (c ++; *c != PRAGMA_TOKEN_END; c ++) putchar(*c); putchar('\n'); } else { printf(" <%2d> `%s'\n", t, operators_name[t]); } } } else if (ls.ctok->type == CONTEXT) { printf("new context: file '%s', line %ld\n", ls.ctok->name, ls.ctok->line); } else if (ls.ctok->type == NEWLINE) { printf("[newline]\n"); } else { printf("line %ld: <%2d> `%s'\n", ls.ctok->line, ls.ctok->type, STRING_TOKEN(ls.ctok->type) ? ls.ctok->name : operators_name[ls.ctok->type]); } } /* give back memory and exit */ wipeout(); free_lexer_state(&ls); #ifdef MEM_DEBUG report_leaks(); #endif return 0; } ./cpp.c0000644000175000017500000017643411620140753010623 0ustar renerene/* * C and T preprocessor, and integrated lexer * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #define VERS_MAJ 1 #define VERS_MIN 3 /* uncomment the following if you cannot set it with a compiler flag */ /* #define STAND_ALONE */ #include "tune.h" #include #include #include #include #include #include #include #include "ucppi.h" #include "mem.h" #include "nhash.h" #ifdef UCPP_MMAP #include #include #include #include #endif /* * The standard path where includes are looked for. */ #ifdef STAND_ALONE static char *include_path_std[] = { STD_INCLUDE_PATH, 0 }; #endif static char **include_path; static size_t include_path_nb = 0; int no_special_macros = 0; int emit_dependencies = 0, emit_defines = 0, emit_assertions = 0; FILE *emit_output; #ifdef STAND_ALONE static char *system_macros_def[] = { STD_MACROS, 0 }; static char *system_assertions_def[] = { STD_ASSERT, 0 }; #endif char *current_filename = 0, *current_long_filename = 0; static int current_incdir = -1; #ifndef NO_UCPP_ERROR_FUNCTIONS /* * "ouch" is the name for an internal ucpp error. If AUDIT is not defined, * no code calling this function will be generated; a "ouch" may still be * emitted by getmem() (in mem.c) if MEM_CHECK is defined, but this "ouch" * does not use this function. */ void ucpp_ouch(char *fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr, "%s: ouch, ", current_filename); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); die(); } /* * report an error, with current_filename, line, and printf-like syntax */ void ucpp_error(long line, char *fmt, ...) { va_list ap; va_start(ap, fmt); if (line > 0) fprintf(stderr, "%s: line %ld: ", current_filename, line); else if (line == 0) fprintf(stderr, "%s: ", current_filename); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); if (line >= 0) { struct stack_context *sc = report_context(); size_t i; for (i = 0; sc[i].line >= 0; i ++) fprintf(stderr, "\tincluded from %s:%ld\n", sc[i].long_name ? sc[i].long_name : sc[i].name, sc[i].line); freemem(sc); } va_end(ap); } /* * like error(), with the mention "warning" */ void ucpp_warning(long line, char *fmt, ...) { va_list ap; va_start(ap, fmt); if (line > 0) fprintf(stderr, "%s: warning: line %ld: ", current_filename, line); else if (line == 0) fprintf(stderr, "%s: warning: ", current_filename); else fprintf(stderr, "warning: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); if (line >= 0) { struct stack_context *sc = report_context(); size_t i; for (i = 0; sc[i].line >= 0; i ++) fprintf(stderr, "\tincluded from %s:%ld\n", sc[i].long_name ? sc[i].long_name : sc[i].name, sc[i].line); freemem(sc); } va_end(ap); } #endif /* NO_UCPP_ERROR_FUNCTIONS */ /* * Some memory allocations are manually garbage-collected; essentially, * strings duplicated in the process of macro replacement. Each such * string is referenced in the garbage_fifo, which is cleared when all * nested macros have been resolved. */ struct garbage_fifo { char **garbage; size_t ngarb, memgarb; }; /* * throw_away() marks a string to be collected later */ void throw_away(struct garbage_fifo *gf, char *n) { wan(gf->garbage, gf->ngarb, n, gf->memgarb); } /* * free marked strings */ void garbage_collect(struct garbage_fifo *gf) { size_t i; for (i = 0; i < gf->ngarb; i ++) freemem(gf->garbage[i]); gf->ngarb = 0; } static void init_garbage_fifo(struct garbage_fifo *gf) { gf->garbage = getmem((gf->memgarb = GARBAGE_LIST_MEMG) * sizeof(char *)); gf->ngarb = 0; } static void free_garbage_fifo(struct garbage_fifo *gf) { garbage_collect(gf); freemem(gf->garbage); freemem(gf); } /* * order is important: it must match the token-constants declared as an * enum in the header file. */ char *operators_name[] = { " ", "\n", " ", "0000", "name", "bunch", "pragma", "context", "\"dummy string\"", "'dummy char'", "/", "/=", "-", "--", "-=", "->", "+", "++", "+=", "<", "<=", "<<", "<<=", ">", ">=", ">>", ">>=", "=", "==", #ifdef CAST_OP "=>", #endif "~", "!=", "&", "&&", "&=", "|", "||", "|=", "%", "%=", "*", "*=", "^", "^=", "!", "{", "}", "[", "]", "(", ")", ",", "?", ";", ":", ".", "...", "#", "##", " ", "ouch", "<:", ":>", "<%", "%>", "%:", "%:%:" }; /* the ascii representation of a token */ #ifdef SEMPER_FIDELIS #define tname(x) (ttWHI((x).type) ? " " : S_TOKEN((x).type) \ ? (x).name : operators_name[(x).type]) #else #define tname(x) (S_TOKEN((x).type) ? (x).name \ : operators_name[(x).type]) #endif char *token_name(struct token *t) { return tname(*t); } /* * To speed up deeply nested and repeated inclusions, we: * -- use a hash table to remember where we found each file * -- remember when the file is protected by a #ifndef/#define/#endif * construction; we can then avoid including several times a file * when this is not necessary. * -- remember in which directory, in the include path, the file was found. */ struct found_file { hash_item_header head; /* first field */ char *name; char *protect; }; /* * For files from system include path. */ struct found_file_sys { hash_item_header head; /* first field */ struct found_file *rff; int incdir; }; static HTT found_files, found_files_sys; static int found_files_init_done = 0, found_files_sys_init_done = 0; static struct found_file *new_found_file(void) { struct found_file *ff = getmem(sizeof(struct found_file)); ff->name = 0; ff->protect = 0; return ff; } static void del_found_file(void *m) { struct found_file *ff = (struct found_file *)m; if (ff->name) freemem(ff->name); if (ff->protect) freemem(ff->protect); freemem(ff); } static struct found_file_sys *new_found_file_sys(void) { struct found_file_sys *ffs = getmem(sizeof(struct found_file_sys)); ffs->rff = 0; ffs->incdir = -1; return ffs; } static void del_found_file_sys(void *m) { struct found_file_sys *ffs = (struct found_file_sys *)m; freemem(ffs); } /* * To keep up with the #ifndef/#define/#endif protection mechanism * detection. */ struct protect protect_detect; static struct protect *protect_detect_stack = 0; void set_init_filename(char *x, int real_file) { if (current_filename) freemem(current_filename); current_filename = sdup(x); current_long_filename = 0; current_incdir = -1; if (real_file) { protect_detect.macro = 0; protect_detect.state = 1; protect_detect.ff = new_found_file(); protect_detect.ff->name = sdup(x); HTT_put(&found_files, protect_detect.ff, x); } else { protect_detect.state = 0; } } static void init_found_files(void) { if (found_files_init_done) HTT_kill(&found_files); HTT_init(&found_files, del_found_file); found_files_init_done = 1; if (found_files_sys_init_done) HTT_kill(&found_files_sys); HTT_init(&found_files_sys, del_found_file_sys); found_files_sys_init_done = 1; } /* * Set the lexer state at the beginning of a file. */ static void reinit_lexer_state(struct lexer_state *ls, int wb) { #ifndef NO_UCPP_BUF ls->input_buf = wb ? getmem(INPUT_BUF_MEMG) : 0; #ifdef UCPP_MMAP ls->from_mmap = 0; #endif #endif ls->input = 0; ls->ebuf = ls->pbuf = 0; ls->nlka = 0; ls->macfile = 0; ls->discard = 1; ls->last = 0; /* we suppose '\n' is not 0 */ ls->line = 1; ls->ltwnl = 1; ls->oline = 1; ls->pending_token = 0; ls->cli = 0; ls->copy_line[COPY_LINE_LENGTH - 1] = 0; ls->ifnest = 0; ls->condf[0] = ls->condf[1] = 0; } /* * Initialize the struct lexer_state, with optional input and output buffers. */ void init_buf_lexer_state(struct lexer_state *ls, int wb) { reinit_lexer_state(ls, wb); #ifndef NO_UCPP_BUF ls->output_buf = wb ? getmem(OUTPUT_BUF_MEMG) : 0; #endif ls->sbuf = 0; ls->output_fifo = 0; ls->ctok = getmem(sizeof(struct token)); ls->ctok->name = getmem(ls->tknl = TOKEN_NAME_MEMG); ls->pending_token = 0; ls->flags = 0; ls->count_trigraphs = 0; ls->gf = getmem(sizeof(struct garbage_fifo)); init_garbage_fifo(ls->gf); ls->condcomp = 1; ls->condnest = 0; #ifdef INMACRO_FLAG ls->inmacro = 0; ls->macro_count = 0; #endif } /* * Initialize the (complex) struct lexer_state. */ void init_lexer_state(struct lexer_state *ls) { init_buf_lexer_state(ls, 1); ls->input = 0; } /* * Restore what is needed from a lexer_state. This is used for #include. */ static void restore_lexer_state(struct lexer_state *ls, struct lexer_state *lsbak) { #ifndef NO_UCPP_BUF freemem(ls->input_buf); ls->input_buf = lsbak->input_buf; #ifdef UCPP_MMAP ls->from_mmap = lsbak->from_mmap; ls->input_buf_sav = lsbak->input_buf_sav; #endif #endif ls->input = lsbak->input; ls->ebuf = lsbak->ebuf; ls->pbuf = lsbak->pbuf; ls->nlka = lsbak->nlka; ls->discard = lsbak->discard; ls->line = lsbak->line; ls->oline = lsbak->oline; ls->ifnest = lsbak->ifnest; ls->condf[0] = lsbak->condf[0]; ls->condf[1] = lsbak->condf[1]; } /* * close input file operations on a struct lexer_state */ static void close_input(struct lexer_state *ls) { #ifdef UCPP_MMAP if (ls->from_mmap) { munmap((void *)ls->input_buf, ls->ebuf); ls->from_mmap = 0; ls->input_buf = ls->input_buf_sav; } #endif if (ls->input) { fclose(ls->input); ls->input = 0; } } /* * file_context (and the two functions push_ and pop_) are used to save * all that is needed when including a file. */ static struct file_context { struct lexer_state ls; char *name, *long_name; int incdir; } *ls_stack; static size_t ls_depth = 0; static void push_file_context(struct lexer_state *ls) { struct file_context fc; fc.name = current_filename; fc.long_name = current_long_filename; fc.incdir = current_incdir; mmv(&(fc.ls), ls, sizeof(struct lexer_state)); aol(ls_stack, ls_depth, fc, LS_STACK_MEMG); ls_depth --; aol(protect_detect_stack, ls_depth, protect_detect, LS_STACK_MEMG); protect_detect.macro = 0; } static void pop_file_context(struct lexer_state *ls) { #ifdef AUDIT if (ls_depth <= 0) ouch("prepare to meet thy creator"); #endif close_input(ls); restore_lexer_state(ls, &(ls_stack[-- ls_depth].ls)); if (protect_detect.macro) freemem(protect_detect.macro); protect_detect = protect_detect_stack[ls_depth]; if (current_filename) freemem(current_filename); current_filename = ls_stack[ls_depth].name; current_long_filename = ls_stack[ls_depth].long_name; current_incdir = ls_stack[ls_depth].incdir; if (ls_depth == 0) { freemem(ls_stack); freemem(protect_detect_stack); } } /* * report_context() returns the list of successive includers of the * current file, ending with a dummy entry with a negative line number. * The caller is responsible for freeing the returned pointer. */ struct stack_context *report_context(void) { struct stack_context *sc; size_t i; sc = getmem((ls_depth + 1) * sizeof(struct stack_context)); for (i = 0; i < ls_depth; i ++) { sc[i].name = ls_stack[ls_depth - i - 1].name; sc[i].long_name = ls_stack[ls_depth - i - 1].long_name; sc[i].line = ls_stack[ls_depth - i - 1].ls.line - 1; } sc[ls_depth].line = -1; return sc; } /* * init_lexer_mode() is used to end initialization of a struct lexer_state * if it must be used for a lexer */ void init_lexer_mode(struct lexer_state *ls) { ls->flags = DEFAULT_LEXER_FLAGS; ls->output_fifo = getmem(sizeof(struct token_fifo)); ls->output_fifo->art = ls->output_fifo->nt = 0; ls->toplevel_of = ls->output_fifo; ls->save_ctok = ls->ctok; } /* * release memory used by a struct lexer_state; this implies closing * any input stream held by this structure. */ void free_lexer_state(struct lexer_state *ls) { close_input(ls); #ifndef NO_UCPP_BUF if (ls->input_buf) { freemem(ls->input_buf); ls->input_buf = 0; } if (ls->output_buf) { freemem(ls->output_buf); ls->output_buf = 0; } #endif if (ls->ctok && (!ls->output_fifo || ls->output_fifo->nt == 0)) { freemem(ls->ctok->name); freemem(ls->ctok); ls->ctok = 0; } if (ls->gf) { free_garbage_fifo(ls->gf); ls->gf = 0; } if (ls->output_fifo) { freemem(ls->output_fifo); ls->output_fifo = 0; } } /* * Print line information. */ static void print_line_info(struct lexer_state *ls, unsigned long flags) { char *fn = current_long_filename ? current_long_filename : current_filename; char *b, *d; b = getmem(50 + strlen(fn)); if (flags & GCC_LINE_NUM) { sprintf(b, "# %ld \"%s\"\n", ls->line, fn); } else { sprintf(b, "#line %ld \"%s\"\n", ls->line, fn); } for (d = b; *d; d ++) put_char(ls, (unsigned char)(*d)); freemem(b); } /* * Enter a file; this implies the possible emission of a #line directive. * The flags used are passed as second parameter instead of being * extracted from the struct lexer_state. * * As a command-line option, gcc-like directives (with only a '#', * without 'line') may be produced. * * enter_file() returns 1 if a (CONTEXT) token was produced, 0 otherwise. */ int enter_file(struct lexer_state *ls, unsigned long flags) { char *fn = current_long_filename ? current_long_filename : current_filename; if (!(flags & LINE_NUM)) return 0; if ((flags & LEXER) && !(flags & TEXT_OUTPUT)) { struct token t; t.type = CONTEXT; t.line = ls->line; t.name = fn; print_token(ls, &t, 0); return 1; } print_line_info(ls, flags); ls->oline --; /* emitted #line troubled oline */ return 0; } #ifdef UCPP_MMAP /* * We open() the file, then fdopen() it and fseek() to its end. If the * fseek() worked, we try to mmap() the file, up to the point where we * arrived. * On an architecture where end-of-lines are multibytes and translated * into single '\n', bad things could happen. We strongly hope that, if * we could fseek() to the end but could not mmap(), then we can get back. */ static void *find_file_map; static size_t map_length; FILE *fopen_mmap_file(char *name) { FILE *f; int fd; long l; find_file_map = 0; fd = open(name, O_RDONLY, 0); if (fd < 0) return 0; l = lseek(fd, 0, SEEK_END); f = fdopen(fd, "r"); if (!f) { close(fd); return 0; } if (l < 0) return f; /* not seekable */ map_length = l; if ((find_file_map = mmap(0, map_length, PROT_READ, MAP_PRIVATE, fd, 0)) == MAP_FAILED) { /* we could not mmap() the file; get back */ find_file_map = 0; if (fseek(f, 0, SEEK_SET)) { /* bwaah... can't get back. This file is cursed. */ fclose(f); return 0; } } return f; } void set_input_file(struct lexer_state *ls, FILE *f) { ls->input = f; if (find_file_map) { ls->from_mmap = 1; ls->input_buf_sav = ls->input_buf; ls->input_buf = find_file_map; ls->pbuf = 0; ls->ebuf = map_length; } else { ls->from_mmap = 0; } } #endif /* * Find a file by looking through the include path. * return value: a FILE * on the file, opened in "r" mode, or 0. * * find_file_error will contain: * FF_ERROR on error (file not found or impossible to read) * FF_PROTECT file is protected and therefore useless to read * FF_KNOWN file is already known * FF_UNKNOWN file was not already known */ static int find_file_error; enum { FF_ERROR, FF_PROTECT, FF_KNOWN, FF_UNKNOWN }; static FILE *find_file(char *name, int localdir) { FILE *f; int i, incdir = -1; size_t nl = strlen(name); char *s = 0; struct found_file *ff = 0, *nff; int lf = 0; int nffa = 0; find_file_error = FF_ERROR; protect_detect.state = -1; protect_detect.macro = 0; if (localdir) { int i; char *rfn = current_long_filename ? current_long_filename : current_filename; for (i = strlen(rfn) - 1; i >= 0; i --) #ifdef MSDOS if (rfn[i] == '\\') break; #else if (rfn[i] == '/') break; #endif #if defined MSDOS if (i >= 0 && *name != '\\' && (nl < 2 || name[1] != ':')) #elif defined AMIGA if (i >= 0 && *name != '/' && (nl < 2 || name[1] != ':')) #else if (i >= 0 && *name != '/') #endif { /* * current file is somewhere else, and the provided * file name is not absolute, so we must adjust the * base for looking for the file; besides, * found_files and found_files_loc are irrelevant * for this search. */ s = getmem(i + 2 + nl); mmv(s, rfn, i); #ifdef MSDOS s[i] = '\\'; #else s[i] = '/'; #endif mmv(s + i + 1, name, nl); s[i + 1 + nl] = 0; ff = HTT_get(&found_files, s); } else ff = HTT_get(&found_files, name); } if (!ff) { struct found_file_sys *ffs = HTT_get(&found_files_sys, name); if (ffs) { ff = ffs->rff; incdir = ffs->incdir; } } /* * At that point: if the file was found in the cache, ff points to * the cached descriptive structure; its name is s if s is not 0, * name otherwise. */ if (ff) goto found_file_cache; /* * This is the first time we find the file, or it was not protected. */ protect_detect.ff = new_found_file(); nffa = 1; if (localdir && #ifdef UCPP_MMAP (f = fopen_mmap_file(s ? s : name)) #else (f = fopen(s ? s : name, "r")) #endif ) { lf = 1; goto found_file; } /* * If s contains a name, that name is now irrelevant: it was a * filename for a search in the current directory, and the file * was not found. */ if (s) { freemem(s); s = 0; } for (i = 0; (size_t)i < include_path_nb; i ++) { size_t ni = strlen(include_path[i]); s = getmem(ni + nl + 2); mmv(s, include_path[i], ni); #ifdef AMIGA /* contributed by Volker Barthelmann */ if (ni == 1 && *s == '.') { *s = 0; ni = 0; } if (ni > 0 && s[ni - 1] != ':' && s[ni - 1] != '/') { s[ni] = '/'; mmv(s + ni + 1, name, nl + 1); } else { mmv(s + ni, name, nl + 1); } #else s[ni] = '/'; mmv(s + ni + 1, name, nl + 1); #endif #ifdef MSDOS /* on msdos systems, replace all / by \ */ { char *c; for (c = s; *c; c ++) if (*c == '/') *c = '\\'; } #endif incdir = i; if ((ff = HTT_get(&found_files, s)) != 0) { /* * The file is known, but not as a system include * file under the name provided. */ struct found_file_sys *ffs = new_found_file_sys(); ffs->rff = ff; ffs->incdir = incdir; HTT_put(&found_files_sys, ffs, name); freemem(s); s = 0; if (nffa) { del_found_file(protect_detect.ff); protect_detect.ff = 0; nffa = 0; } goto found_file_cache; } #ifdef UCPP_MMAP f = fopen_mmap_file(s); #else f = fopen(s, "r"); #endif if (f) goto found_file; freemem(s); s = 0; } zero_out: if (s) freemem(s); if (nffa) { del_found_file(protect_detect.ff); protect_detect.ff = 0; nffa = 0; } return 0; /* * This part is invoked when the file was found in the * cache. */ found_file_cache: if (ff->protect) { if (get_macro(ff->protect)) { /* file is protected, do not include it */ find_file_error = FF_PROTECT; goto zero_out; } /* file is protected but the guardian macro is not available; disable guardian detection. */ protect_detect.state = 0; } protect_detect.ff = ff; #ifdef UCPP_MMAP f = fopen_mmap_file(HASH_ITEM_NAME(ff)); #else f = fopen(HASH_ITEM_NAME(ff), "r"); #endif if (!f) goto zero_out; find_file_error = FF_KNOWN; goto found_file_2; /* * This part is invoked when we found a new file, which was not * yet referenced. If lf == 1, then the file was found directly, * otherwise it was found in some system include directory. * A new found_file structure has been allocated and is in * protect_detect.ff */ found_file: if (f && ((emit_dependencies == 1 && lf && current_incdir == -1) || emit_dependencies == 2)) { fprintf(emit_output, " %s", s ? s : name); } nff = protect_detect.ff; nff->name = sdup(name); #ifdef AUDIT if ( #endif HTT_put(&found_files, nff, s ? s : name) #ifdef AUDIT ) ouch("filename collided with a wraith") #endif ; if (!lf) { struct found_file_sys *ffs = new_found_file_sys(); ffs->rff = nff; ffs->incdir = incdir; HTT_put(&found_files_sys, ffs, name); } if (s) freemem(s); s = 0; find_file_error = FF_UNKNOWN; ff = nff; found_file_2: if (s) freemem(s); current_long_filename = HASH_ITEM_NAME(ff); #ifdef NO_LIBC_BUF setbuf(f, 0); #endif current_incdir = incdir; return f; } /* * Find the named file by looking through the end of the include path. * This is for #include_next directives. * #include_next and #include_next "foo" are considered identical, * for all practical purposes. */ static FILE *find_file_next(char *name) { int i; size_t nl = strlen(name); FILE *f; struct found_file *ff; find_file_error = FF_ERROR; protect_detect.state = -1; protect_detect.macro = 0; for (i = current_incdir + 1; (size_t)i < include_path_nb; i ++) { char *s; size_t ni = strlen(include_path[i]); s = getmem(ni + nl + 2); mmv(s, include_path[i], ni); s[ni] = '/'; mmv(s + ni + 1, name, nl + 1); #ifdef MSDOS /* on msdos systems, replace all / by \ */ { char *c; for (c = s; *c; c ++) if (*c == '/') *c = '\\'; } #endif ff = HTT_get(&found_files, s); if (ff) { /* file was found in the cache */ if (ff->protect) { if (get_macro(ff->protect)) { find_file_error = FF_PROTECT; freemem(s); return 0; } /* file is protected but the guardian macro is not available; disable guardian detection. */ protect_detect.state = 0; } protect_detect.ff = ff; #ifdef UCPP_MMAP f = fopen_mmap_file(HASH_ITEM_NAME(ff)); #else f = fopen(HASH_ITEM_NAME(ff), "r"); #endif if (!f) { /* file is referenced but yet unavailable. */ freemem(s); return 0; } find_file_error = FF_KNOWN; freemem(s); s = HASH_ITEM_NAME(ff); } else { #ifdef UCPP_MMAP f = fopen_mmap_file(s); #else f = fopen(s, "r"); #endif if (f) { if (emit_dependencies == 2) { fprintf(emit_output, " %s", s); } ff = protect_detect.ff = new_found_file(); ff->name = sdup(s); #ifdef AUDIT if ( #endif HTT_put(&found_files, ff, s) #ifdef AUDIT ) ouch("filename collided with a wraith") #endif ; find_file_error = FF_UNKNOWN; freemem(s); s = HASH_ITEM_NAME(ff); } } if (f) { current_long_filename = s; current_incdir = i; return f; } freemem(s); } return 0; } /* * The #if directive. This function parse the expression, performs macro * expansion (and handles the "defined" operator), and call eval_expr. * return value: 1 if the expression is true, 0 if it is false, -1 on error. */ static int handle_if(struct lexer_state *ls) { struct token_fifo tf, tf1, tf2, tf3, *save_tf; long l = ls->line; unsigned long z; int ret = 0, ltww = 1; /* first, get the whole line */ tf.art = tf.nt = 0; while (!next_token(ls) && ls->ctok->type != NEWLINE) { struct token t; if (ltww && ttMWS(ls->ctok->type)) continue; ltww = ttMWS(ls->ctok->type); t.type = ls->ctok->type; t.line = l; if (S_TOKEN(ls->ctok->type)) { t.name = sdup(ls->ctok->name); throw_away(ls->gf, t.name); } aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } if (ltww && tf.nt) if ((-- tf.nt) == 0) freemem(tf.t); if (tf.nt == 0) { error(l, "void condition for a #if/#elif"); return -1; } /* handle the "defined" operator */ tf1.art = tf1.nt = 0; while (tf.art < tf.nt) { struct token *ct, rt; struct macro *m; size_t nidx, eidx; ct = tf.t + (tf.art ++); if (ct->type == NAME && !strcmp(ct->name, "defined")) { if (tf.art >= tf.nt) goto store_token; nidx = tf.art; if (ttMWS(tf.t[nidx].type)) if (++ nidx >= tf.nt) goto store_token; if (tf.t[nidx].type == NAME) { eidx = nidx; goto check_macro; } if (tf.t[nidx].type != LPAR) goto store_token; if (++ nidx >= tf.nt) goto store_token; if (ttMWS(tf.t[nidx].type)) if (++ nidx >= tf.nt) goto store_token; if (tf.t[nidx].type != NAME) goto store_token; eidx = nidx + 1; if (eidx >= tf.nt) goto store_token; if (ttMWS(tf.t[eidx].type)) if (++ eidx >= tf.nt) goto store_token; if (tf.t[eidx].type != RPAR) goto store_token; goto check_macro; } store_token: aol(tf1.t, tf1.nt, *ct, TOKEN_LIST_MEMG); continue; check_macro: m = get_macro(tf.t[nidx].name); rt.type = NUMBER; rt.name = m ? "1L" : "0L"; aol(tf1.t, tf1.nt, rt, TOKEN_LIST_MEMG); tf.art = eidx + 1; } freemem(tf.t); if (tf1.nt == 0) { error(l, "void condition (after expansion) for a #if/#elif"); return -1; } /* perform all macro substitutions */ tf2.art = tf2.nt = 0; save_tf = ls->output_fifo; ls->output_fifo = &tf2; while (tf1.art < tf1.nt) { struct token *ct; ct = tf1.t + (tf1.art ++); if (ct->type == NAME) { struct macro *m = get_macro(ct->name); if (m) { if (substitute_macro(ls, m, &tf1, 0, #ifdef NO_PRAGMA_IN_DIRECTIVE 1, #else 0, #endif ct->line)) { ls->output_fifo = save_tf; goto error1; } continue; } } else if ((ct->type == SHARP || ct->type == DIG_SHARP) && (ls->flags & HANDLE_ASSERTIONS)) { /* we have an assertion; parse it */ int nnp, ltww = 1; size_t i = tf1.art; struct token_fifo atl; char *aname; struct assert *a; int av = 0; struct token rt; atl.art = atl.nt = 0; while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++; if (i >= tf1.nt) goto assert_error; if (tf1.t[i].type != NAME) goto assert_error; aname = tf1.t[i ++].name; while (i < tf1.nt && ttMWS(tf1.t[i].type)) i ++; if (i >= tf1.nt) goto assert_generic; if (tf1.t[i].type != LPAR) goto assert_generic; i ++; for (nnp = 1; nnp && i < tf1.nt; i ++) { if (ltww && ttMWS(tf1.t[i].type)) continue; if (tf1.t[i].type == LPAR) nnp ++; else if (tf1.t[i].type == RPAR && (-- nnp) == 0) { tf1.art = i + 1; break; } ltww = ttMWS(tf1.t[i].type); aol(atl.t, atl.nt, tf1.t[i], TOKEN_LIST_MEMG); } if (nnp) goto assert_error; if (ltww && atl.nt && (-- atl.nt) == 0) freemem(atl.t); if (atl.nt == 0) goto assert_error; /* the assertion is in aname and atl; check it */ a = get_assertion(aname); if (a) for (i = 0; i < a->nbval; i ++) if (!cmp_token_list(&atl, a->val + i)) { av = 1; break; } rt.type = NUMBER; rt.name = av ? "1" : "0"; aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG); if (atl.nt) freemem(atl.t); continue; assert_generic: tf1.art = i; rt.type = NUMBER; rt.name = get_assertion(aname) ? "1" : "0"; aol(tf2.t, tf2.nt, rt, TOKEN_LIST_MEMG); continue; assert_error: error(l, "syntax error for assertion in #if"); ls->output_fifo = save_tf; goto error1; } aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); } ls->output_fifo = save_tf; freemem(tf1.t); if (tf2.nt == 0) { error(l, "void condition (after expansion) for a #if/#elif"); return -1; } /* * suppress whitespace and replace rogue identifiers by 0 */ tf3.art = tf3.nt = 0; while (tf2.art < tf2.nt) { struct token *ct = tf2.t + (tf2.art ++); if (ttMWS(ct->type)) continue; if (ct->type == NAME) { /* * a rogue identifier; we replace it with "0". */ struct token rt; rt.type = NUMBER; rt.name = "0"; aol(tf3.t, tf3.nt, rt, TOKEN_LIST_MEMG); continue; } aol(tf3.t, tf3.nt, *ct, TOKEN_LIST_MEMG); } freemem(tf2.t); if (tf3.nt == 0) { error(l, "void condition (after expansion) for a #if/#elif"); return -1; } eval_line = l; z = eval_expr(&tf3, &ret, (ls->flags & WARN_STANDARD) != 0); freemem(tf3.t); if (ret) return -1; return (z != 0); error1: if (tf1.nt) freemem(tf1.t); if (tf2.nt) freemem(tf2.t); return -1; } /* * A #include was found; parse the end of line, replace macros if * necessary. * * If nex is set to non-zero, the directive is considered as a #include_next * (extension to C99, mimicked from GNU) */ static int handle_include(struct lexer_state *ls, unsigned long flags, int nex) { int c, string_fname = 0; char *fname; unsigned char *fname2; size_t fname_ptr = 0; long l = ls->line; int x, y; FILE *f; struct token_fifo tf, tf2, *save_tf; size_t nl; int tgd; struct lexer_state alt_ls; #define left_angle(t) ((t) == LT || (t) == LEQ || (t) == LSH \ || (t) == ASLSH || (t) == DIG_LBRK || (t) == LBRA) #define right_angle(t) ((t) == GT || (t) == RSH || (t) == ARROW \ || (t) == DIG_RBRK || (t) == DIG_RBRA) while ((c = grap_char(ls)) >= 0 && c != '\n') { if (space_char(c)) { discard_char(ls); continue; } if (c == '<') { discard_char(ls); while ((c = grap_char(ls)) >= 0) { discard_char(ls); if (c == '\n') goto include_last_chance; if (c == '>') break; aol(fname, fname_ptr, (char)c, FNAME_MEMG); } aol(fname, fname_ptr, (char)0, FNAME_MEMG); string_fname = 0; goto do_include; } else if (c == '"') { discard_char(ls); while ((c = grap_char(ls)) >= 0) { discard_char(ls); if (c == '\n') { /* macro replacements won't save that one */ if (fname_ptr) freemem(fname); goto include_error; } if (c == '"') break; aol(fname, fname_ptr, (char)c, FNAME_MEMG); } aol(fname, fname_ptr, (char)0, FNAME_MEMG); string_fname = 1; goto do_include; } goto include_macro; } include_last_chance: /* * We found a '<' but not the trailing '>'; so we tokenize the * line, and try to act upon it. The standard lets us free in that * matter, and no sane programmer would use such a construct, but * it is no reason not to support it. */ if (fname_ptr == 0) goto include_error; fname2 = getmem(fname_ptr + 1); mmv(fname2 + 1, fname, fname_ptr); fname2[0] = '<'; /* * We merely copy the lexer_state structure; this should be ok, * since we do want to share the memory structure (garbage_fifo), * and do not touch any other context-full thing. */ alt_ls = *ls; alt_ls.input = 0; alt_ls.input_string = fname2; alt_ls.pbuf = 0; alt_ls.ebuf = fname_ptr + 1; tf.art = tf.nt = 0; while (!next_token(&alt_ls)) { if (!ttMWS(alt_ls.ctok->type)) { struct token t; t.type = alt_ls.ctok->type; t.line = l; if (S_TOKEN(alt_ls.ctok->type)) { t.name = sdup(alt_ls.ctok->name); throw_away(alt_ls.gf, t.name); } aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } } freemem(fname2); if (alt_ls.pbuf < alt_ls.ebuf) goto include_error; /* tokenizing failed */ goto include_macro2; include_error: error(l, "invalid '#include'"); return 1; include_macro: tf.art = tf.nt = 0; while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttMWS(ls->ctok->type)) { struct token t; t.type = ls->ctok->type; t.line = l; if (S_TOKEN(ls->ctok->type)) { t.name = sdup(ls->ctok->name); throw_away(ls->gf, t.name); } aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } } include_macro2: tf2.art = tf2.nt = 0; save_tf = ls->output_fifo; ls->output_fifo = &tf2; while (tf.art < tf.nt) { struct token *ct; ct = tf.t + (tf.art ++); if (ct->type == NAME) { struct macro *m = get_macro(ct->name); if (m) { if (substitute_macro(ls, m, &tf, 0, #ifdef NO_PRAGMA_IN_DIRECTIVE 1, #else 0, #endif ct->line)) { ls->output_fifo = save_tf; return -1; } continue; } } aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); } freemem(tf.t); ls->output_fifo = save_tf; for (x = 0; (size_t)x < tf2.nt && ttWHI(tf2.t[x].type); x ++); for (y = tf2.nt - 1; y >= 0 && ttWHI(tf2.t[y].type); y --); if ((size_t)x >= tf2.nt) goto include_macro_err; if (tf2.t[x].type == STRING) { if (y != x) goto include_macro_err; if (tf2.t[x].name[0] == 'L') { if (ls->flags & WARN_STANDARD) warning(l, "wide string for #include"); fname = sdup(tf2.t[x].name); nl = strlen(fname); *(fname + nl - 1) = 0; mmvwo(fname, fname + 2, nl - 2); } else { fname = sdup(tf2.t[x].name); nl = strlen(fname); *(fname + nl - 1) = 0; mmvwo(fname, fname + 1, nl - 1); } string_fname = 1; } else if (left_angle(tf2.t[x].type) && right_angle(tf2.t[y].type)) { int i, j; if (ls->flags & WARN_ANNOYING) warning(l, "reconstruction " "of in #include"); for (j = 0, i = x; i <= y; i ++) if (!ttWHI(tf2.t[i].type)) j += strlen(tname(tf2.t[i])); fname = getmem(j + 1); for (j = 0, i = x; i <= y; i ++) { if (ttWHI(tf2.t[i].type)) continue; strcpy(fname + j, tname(tf2.t[i])); j += strlen(tname(tf2.t[i])); } *(fname + j - 1) = 0; mmvwo(fname, fname + 1, j); string_fname = 0; } else goto include_macro_err; freemem(tf2.t); goto do_include_next; include_macro_err: error(l, "macro expansion did not produce a valid filename " "for #include"); if (tf2.nt) freemem(tf2.t); return 1; do_include: tgd = 1; while (!next_token(ls)) { if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #include"); tgd = 0; } if (ls->ctok->type == NEWLINE) break; } /* the increment of ls->line is intended so that the line numbering is reported correctly in report_context() even if the #include is at the end of the file with no trailing newline */ if (ls->ctok->type != NEWLINE) ls->line ++; do_include_next: if (!(ls->flags & LEXER) && (ls->flags & KEEP_OUTPUT)) put_char(ls, '\n'); push_file_context(ls); reinit_lexer_state(ls, 1); #ifdef MSDOS /* on msdos systems, replace all / by \ */ { char *d; for (d = fname; *d; d ++) if (*d == '/') *d = '\\'; } #endif f = nex ? find_file_next(fname) : find_file(fname, string_fname); if (!f) { current_filename = 0; pop_file_context(ls); if (find_file_error == FF_ERROR) { error(l, "file '%s' not found", fname); freemem(fname); return 1; } /* file was found, but it is useless to include it again */ freemem(fname); return 0; } #ifdef UCPP_MMAP set_input_file(ls, f); #else ls->input = f; #endif current_filename = fname; enter_file(ls, flags); return 0; #undef left_angle #undef right_angle } /* * for #line directives */ static int handle_line(struct lexer_state *ls, unsigned long flags) { char *fname; long l = ls->line; struct token_fifo tf, tf2, *save_tf; size_t nl, j; unsigned long z; tf.art = tf.nt = 0; while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (!ttMWS(ls->ctok->type)) { struct token t; t.type = ls->ctok->type; t.line = l; if (S_TOKEN(ls->ctok->type)) { t.name = sdup(ls->ctok->name); throw_away(ls->gf, t.name); } aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } } tf2.art = tf2.nt = 0; save_tf = ls->output_fifo; ls->output_fifo = &tf2; while (tf.art < tf.nt) { struct token *ct; ct = tf.t + (tf.art ++); if (ct->type == NAME) { struct macro *m = get_macro(ct->name); if (m) { if (substitute_macro(ls, m, &tf, 0, #ifdef NO_PRAGMA_IN_DIRECTIVE 1, #else 0, #endif ct->line)) { ls->output_fifo = save_tf; return -1; } continue; } } aol(tf2.t, tf2.nt, *ct, TOKEN_LIST_MEMG); } freemem(tf.t); for (tf2.art = 0; tf2.art < tf2.nt && ttWHI(tf2.t[tf2.art].type); tf2.art ++); ls->output_fifo = save_tf; if (tf2.art == tf2.nt || (tf2.t[tf2.art].type != NUMBER && tf2.t[tf2.art].type != CHAR)) { error(l, "not a valid number for #line"); goto line_macro_err; } for (j = 0; tf2.t[tf2.art].name[j]; j ++) if (tf2.t[tf2.art].name[j] < '0' || tf2.t[tf2.art].name[j] > '9') if (ls->flags & WARN_STANDARD) warning(l, "non-standard line number in #line"); if (catch(eval_exception)) goto line_macro_err; z = strtoconst(tf2.t[tf2.art].name); if (j > 10 || z > 2147483647U) { error(l, "out-of-bound line number for #line"); goto line_macro_err; } ls->oline = ls->line = z; if ((++ tf2.art) < tf2.nt) { size_t i; for (i = tf2.art; i < tf2.nt && ttMWS(tf2.t[i].type); i ++); if (i < tf2.nt) { if (tf2.t[i].type != STRING) { error(l, "not a valid filename for #line"); goto line_macro_err; } if (tf2.t[i].name[0] == 'L') { if (ls->flags & WARN_STANDARD) { warning(l, "wide string for #line"); } fname = sdup(tf2.t[i].name); nl = strlen(fname); *(fname + nl - 1) = 0; mmvwo(fname, fname + 2, nl - 2); } else { fname = sdup(tf2.t[i].name); nl = strlen(fname); *(fname + nl - 1) = 0; mmvwo(fname, fname + 1, nl - 1); } if (current_filename) freemem(current_filename); current_filename = fname; } for (i ++; i < tf2.nt && ttMWS(tf2.t[i].type); i ++); if (i < tf2.nt && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in #line"); } } freemem(tf2.t); enter_file(ls, flags); return 0; line_macro_err: if (tf2.nt) freemem(tf2.t); return 1; } /* * a #error directive: we emit the message without any modification * (except the usual backslash+newline and trigraphs) */ static void handle_error(struct lexer_state *ls) { int c; size_t p = 0, lp = 128; long l = ls->line; unsigned char *buf = getmem(lp); while ((c = grap_char(ls)) >= 0 && c != '\n') { discard_char(ls); wan(buf, p, (unsigned char)c, lp); } wan(buf, p, 0, lp); error(l, "#error%s", buf); freemem(buf); } /* * convert digraph tokens to their standard equivalent. */ static int undig(int type) { static int ud[6] = { LBRK, RBRK, LBRA, RBRA, SHARP, DSHARP }; return ud[type - DIG_LBRK]; } #ifdef PRAGMA_TOKENIZE /* * Make a compressed representation of a token list; the contents of * the token_fifo are freed. Values equal to 0 are replaced by * PRAGMA_TOKEN_END (by default, (unsigned char)'\n') and the compressed * string is padded by a 0 (so that it may be * handled like a string). * Digraph tokens are replaced by their non-digraph equivalents. */ struct comp_token_fifo compress_token_list(struct token_fifo *tf) { struct comp_token_fifo ct; size_t l; for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) { l ++; if (S_TOKEN(tf->t[tf->art].type)) l += strlen(tf->t[tf->art].name) + 1; } ct.t = getmem((ct.length = l) + 1); for (l = 0, tf->art = 0; tf->art < tf->nt; tf->art ++) { int tt = tf->t[tf->art].type; if (tt == 0) tt = PRAGMA_TOKEN_END; if (tt > DIGRAPH_TOKENS && tt < DIGRAPH_TOKENS_END) tt = undig(tt); ct.t[l ++] = tt; if (S_TOKEN(tt)) { char *tn = tf->t[tf->art].name; size_t sl = strlen(tn); mmv(ct.t + l, tn, sl); l += sl; ct.t[l ++] = PRAGMA_TOKEN_END; freemem(tn); } } ct.t[l] = 0; if (tf->nt) freemem(tf->t); ct.rp = 0; return ct; } #endif /* * A #pragma directive: we make a PRAGMA token containing the rest of * the line. * * We strongly hope that we are called only in LEXER mode. */ static void handle_pragma(struct lexer_state *ls) { unsigned char *buf; struct token t; long l = ls->line; #ifdef PRAGMA_TOKENIZE struct token_fifo tf; tf.art = tf.nt = 0; while (!next_token(ls) && ls->ctok->type != NEWLINE) if (!ttMWS(ls->ctok->type)) break; if (ls->ctok->type != NEWLINE) { do { struct token t; t.type = ls->ctok->type; if (ttMWS(t.type)) continue; if (S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } while (!next_token(ls) && ls->ctok->type != NEWLINE); } if (tf.nt == 0) { /* void pragma are silently ignored */ return; } buf = (compress_token_list(&tf)).t; #else int c, x = 1, y = 32; while ((c = grap_char(ls)) >= 0 && c != '\n') { discard_char(ls); if (!space_char(c)) break; } /* void #pragma are ignored */ if (c == '\n') return; buf = getmem(y); buf[0] = c; while ((c = grap_char(ls)) >= 0 && c != '\n') { discard_char(ls); wan(buf, x, c, y); } for (x --; x >= 0 && space_char(buf[x]); x --); x ++; wan(buf, x, 0, y); #endif t.type = PRAGMA; t.line = l; t.name = (char *)buf; aol(ls->output_fifo->t, ls->output_fifo->nt, t, TOKEN_LIST_MEMG); throw_away(ls->gf, (char *)buf); } /* * We saw a # at the beginning of a line (or preceeded only by whitespace). * We check the directive name and act accordingly. */ static int handle_cpp(struct lexer_state *ls, int sharp_type) { #define condfset(x) do { \ ls->condf[(x) / 32] |= 1UL << ((x) % 32); \ } while (0) #define condfclr(x) do { \ ls->condf[(x) / 32] &= ~(1UL << ((x) % 32)); \ } while (0) #define condfval(x) ((ls->condf[(x) / 32] & (1UL << ((x) % 32))) != 0) long l = ls->line; unsigned long save_flags = ls->flags; int ret = 0; save_flags = ls->flags; ls->flags |= LEXER; while (!next_token(ls)) { int t = ls->ctok->type; switch (t) { case COMMENT: if (ls->flags & WARN_ANNOYING) { warning(l, "comment in the middle of " "a cpp directive"); } /* fall through */ case NONE: continue; case NEWLINE: /* null directive */ if (ls->flags & WARN_ANNOYING) { /* truly an annoying warning; null directives are rare but may increase readability of some source files, and they are legal */ warning(l, "null cpp directive"); } if (!(ls->flags & LEXER)) put_char(ls, '\n'); goto handle_exit2; case NAME: break; default: if (ls->flags & FAIL_SHARP) { /* LPS 20050602 - ignores '#!' if on the first line */ if( ( l == 1 ) && ( ls->condcomp ) ) { ret = 1; } else /* LPS 20050602 */ if (ls->condcomp) { error(l, "rogue '#'"); ret = 1; } else { if (ls->flags & WARN_STANDARD) { warning(l, "rogue '#' in code " "compiled out"); ret = 0; } } ls->flags = save_flags; goto handle_warp_ign; } else { struct token u; u.type = sharp_type; u.line = l; ls->flags = save_flags; print_token(ls, &u, 0); print_token(ls, ls->ctok, 0); if (ls->flags & WARN_ANNOYING) { warning(l, "rogue '#' dumped"); } goto handle_exit3; } } if (ls->condcomp) { if (!strcmp(ls->ctok->name, "define")) { ret = handle_define(ls); goto handle_exit; } else if (!strcmp(ls->ctok->name, "undef")) { ret = handle_undef(ls); goto handle_exit; } else if (!strcmp(ls->ctok->name, "if")) { if ((++ ls->ifnest) > 63) goto too_many_if; condfclr(ls->ifnest - 1); ret = handle_if(ls); if (ret > 0) ret = 0; else if (ret == 0) { ls->condcomp = 0; ls->condmet = 0; ls->condnest = ls->ifnest - 1; } else ret = 1; goto handle_exit; } else if (!strcmp(ls->ctok->name, "ifdef")) { if ((++ ls->ifnest) > 63) goto too_many_if; condfclr(ls->ifnest - 1); ret = handle_ifdef(ls); if (ret > 0) ret = 0; else if (ret == 0) { ls->condcomp = 0; ls->condmet = 0; ls->condnest = ls->ifnest - 1; } else ret = 1; goto handle_exit; } else if (!strcmp(ls->ctok->name, "ifndef")) { if ((++ ls->ifnest) > 63) goto too_many_if; condfclr(ls->ifnest - 1); ret = handle_ifndef(ls); if (ret > 0) ret = 0; else if (ret == 0) { ls->condcomp = 0; ls->condmet = 0; ls->condnest = ls->ifnest - 1; } else ret = 1; goto handle_exit; } else if (!strcmp(ls->ctok->name, "else")) { if (ls->ifnest == 0 || condfval(ls->ifnest - 1)) { error(l, "rogue #else"); ret = 1; goto handle_warp; } condfset(ls->ifnest - 1); if (ls->ifnest == 1) protect_detect.state = 0; ls->condcomp = 0; ls->condmet = 1; ls->condnest = ls->ifnest - 1; goto handle_warp; } else if (!strcmp(ls->ctok->name, "elif")) { if (ls->ifnest == 0 || condfval(ls->ifnest - 1)) { error(l, "rogue #elif"); ret = 1; goto handle_warp_ign; } if (ls->ifnest == 1) protect_detect.state = 0; ls->condcomp = 0; ls->condmet = 1; ls->condnest = ls->ifnest - 1; goto handle_warp_ign; } else if (!strcmp(ls->ctok->name, "endif")) { if (ls->ifnest == 0) { error(l, "unmatched #endif"); ret = 1; goto handle_warp; } if ((-- ls->ifnest) == 0 && protect_detect.state == 2) { protect_detect.state = 3; } goto handle_warp; } else if (!strcmp(ls->ctok->name, "include")) { ret = handle_include(ls, save_flags, 0); goto handle_exit3; } else if (!strcmp(ls->ctok->name, "include_next")) { ret = handle_include(ls, save_flags, 1); goto handle_exit3; } else if (!strcmp(ls->ctok->name, "pragma")) { if (!(save_flags & LEXER)) { #ifdef PRAGMA_DUMP /* dump #pragma in output */ struct token u; u.type = sharp_type; u.line = l; ls->flags = save_flags; print_token(ls, &u, 0); print_token(ls, ls->ctok, 0); while (ls->flags |= LEXER, !next_token(ls)) { long save_line; ls->flags &= ~LEXER; save_line = ls->line; ls->line = l; print_token(ls, ls->ctok, 0); ls->line = save_line; if (ls->ctok->type == NEWLINE) break; } goto handle_exit3; #else if (ls->flags & WARN_PRAGMA) warning(l, "#pragma ignored " "and not dumped"); goto handle_warp_ign; #endif } if (!(ls->flags & HANDLE_PRAGMA)) goto handle_warp_ign; handle_pragma(ls); goto handle_exit; } else if (!strcmp(ls->ctok->name, "error")) { ret = 1; handle_error(ls); goto handle_exit; } else if (!strcmp(ls->ctok->name, "line")) { ret = handle_line(ls, save_flags); goto handle_exit; } else if ((ls->flags & HANDLE_ASSERTIONS) && !strcmp(ls->ctok->name, "assert")) { ret = handle_assert(ls); goto handle_exit; } else if ((ls->flags & HANDLE_ASSERTIONS) && !strcmp(ls->ctok->name, "unassert")) { ret = handle_unassert(ls); goto handle_exit; } } else { if (!strcmp(ls->ctok->name, "else")) { if (condfval(ls->ifnest - 1) && (ls->flags & WARN_STANDARD)) { warning(l, "rogue #else in code " "compiled out"); } if (ls->condnest == ls->ifnest - 1) { if (!ls->condmet) ls->condcomp = 1; } condfset(ls->ifnest - 1); if (ls->ifnest == 1) protect_detect.state = 0; goto handle_warp; } else if (!strcmp(ls->ctok->name, "elif")) { if (condfval(ls->ifnest - 1) && (ls->flags & WARN_STANDARD)) { warning(l, "rogue #elif in code " "compiled out"); } if (ls->condnest != ls->ifnest - 1 || ls->condmet) goto handle_warp_ign; if (ls->ifnest == 1) protect_detect.state = 0; ret = handle_if(ls); if (ret > 0) { ls->condcomp = 1; ls->condmet = 1; ret = 0; } else if (ret < 0) ret = 1; goto handle_exit; } else if (!strcmp(ls->ctok->name, "endif")) { if ((-- ls->ifnest) == ls->condnest) { if (ls->ifnest == 0 && protect_detect.state == 2) protect_detect.state = 3; ls->condcomp = 1; } goto handle_warp; } else if (!strcmp(ls->ctok->name, "if") || !strcmp(ls->ctok->name, "ifdef") || !strcmp(ls->ctok->name, "ifndef")) { if ((++ ls->ifnest) > 63) goto too_many_if; condfclr(ls->ifnest - 1); } goto handle_warp_ign; } /* * Unrecognized directive. We emit either an error or * an annoying warning, depending on a command-line switch. */ if (ls->flags & FAIL_SHARP) { error(l, "unknown cpp directive '#%s'", ls->ctok->name); goto handle_warp_ign; } else { struct token u; u.type = sharp_type; u.line = l; ls->flags = save_flags; print_token(ls, &u, 0); print_token(ls, ls->ctok, 0); if (ls->flags & WARN_ANNOYING) { warning(l, "rogue '#' dumped"); } } } return 1; handle_warp_ign: while (!next_token(ls)) if (ls->ctok->type == NEWLINE) break; goto handle_exit; handle_warp: while (!next_token(ls)) { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(l, "trailing garbage in " "preprocessing directive"); } if (ls->ctok->type == NEWLINE) break; } handle_exit: if (!(ls->flags & LEXER)) put_char(ls, '\n'); handle_exit3: if (protect_detect.state == 1) { protect_detect.state = 0; } else if (protect_detect.state == -1) { /* just after the #include */ protect_detect.state = 1; } handle_exit2: ls->flags = save_flags; return ret; too_many_if: error(l, "too many levels of conditional inclusion (max 63)"); ret = 1; goto handle_warp; #undef condfset #undef condfclr #undef condfval } /* * This is the main entry function. It maintains count of #, and call the * appropriate functions when it encounters a cpp directive or a macro * name. * return value: positive on error; CPPERR_EOF means "end of input reached" */ int cpp(struct lexer_state *ls) { int r = 0; while (next_token(ls)) { if (protect_detect.state == 3) { /* * At that point, protect_detect.ff->protect might * be non-zero, if the file has been recursively * included, and a guardian detected. */ if (!protect_detect.ff->protect) { /* Cool ! A new guardian has been detected. */ protect_detect.ff->protect = protect_detect.macro; } else if (protect_detect.macro) { /* We found a guardian but an old one. */ freemem(protect_detect.macro); } protect_detect.macro = 0; } if (ls->ifnest) { error(ls->line, "unterminated #if construction " "(depth %ld)", ls->ifnest); r = CPPERR_NEST; } if (ls_depth == 0) return CPPERR_EOF; close_input(ls); if (!(ls->flags & LEXER) && !ls->ltwnl) { put_char(ls, '\n'); ls->ltwnl = 1; } pop_file_context(ls); ls->oline ++; if (enter_file(ls, ls->flags)) { ls->ctok->type = NEWLINE; ls->ltwnl = 1; break; } } if (!(ls->ltwnl && (ls->ctok->type == SHARP || ls->ctok->type == DIG_SHARP)) && protect_detect.state == 1 && !ttWHI(ls->ctok->type)) { /* the first non-whitespace token encountered is not a sharp introducing a cpp directive */ protect_detect.state = 0; } if (protect_detect.state == 3 && !ttWHI(ls->ctok->type)) { /* a non-whitespace token encountered after the #endif */ protect_detect.state = 0; } if (ls->condcomp) { if (ls->ltwnl && (ls->ctok->type == SHARP || ls->ctok->type == DIG_SHARP)) { int x = handle_cpp(ls, ls->ctok->type); ls->ltwnl = 1; return r ? r : x; } if (ls->ctok->type == NAME) { struct macro *m; if ((m = get_macro(ls->ctok->name)) != 0) { int x; x = substitute_macro(ls, m, 0, 1, 0, ls->ctok->line); if (!(ls->flags & LEXER)) garbage_collect(ls->gf); return r ? r : x; } if (!(ls->flags & LEXER)) print_token(ls, ls->ctok, 0); } } else { if (ls->ltwnl && (ls->ctok->type == SHARP || ls->ctok->type == DIG_SHARP)) { int x = handle_cpp(ls, ls->ctok->type); ls->ltwnl = 1; return r ? r : x; } } if (ls->ctok->type == NEWLINE) ls->ltwnl = 1; else if (!ttWHI(ls->ctok->type)) ls->ltwnl = 0; return r ? r : -1; } #ifndef STAND_ALONE /* * llex() and lex() are the lexing functions, when the preprocessor is * linked to another code. llex() should be called only by lex(). */ static int llex(struct lexer_state *ls) { struct token_fifo *tf = ls->output_fifo; int r; if (tf->nt != 0) { if (tf->art < tf->nt) { #ifdef INMACRO_FLAG if (!ls->inmacro) { ls->inmacro = 1; ls->macro_count ++; } #endif ls->ctok = tf->t + (tf->art ++); if (ls->ctok->type > DIGRAPH_TOKENS && ls->ctok->type < DIGRAPH_TOKENS_END) { ls->ctok->type = undig(ls->ctok->type); } return 0; } else { #ifdef INMACRO_FLAG ls->inmacro = 0; #endif freemem(tf->t); tf->art = tf->nt = 0; garbage_collect(ls->gf); ls->ctok = ls->save_ctok; } } r = cpp(ls); if (ls->ctok->type > DIGRAPH_TOKENS && ls->ctok->type < LAST_MEANINGFUL_TOKEN) { ls->ctok->type = undig(ls->ctok->type); } if (r > 0) return r; if (r < 0) return 0; return llex(ls); } /* * lex() reads the next token from the processed stream and stores it * into ls->ctok. * return value: non zero on error (including CPPERR_EOF, which is not * quite an error) */ int lex(struct lexer_state *ls) { int r; do { r = llex(ls); #ifdef SEMPER_FIDELIS } while (!r && !ls->condcomp); #else } while (!r && (!ls->condcomp || (ttWHI(ls->ctok->type) && (!(ls->flags & LINE_NUM) || ls->ctok->type != NEWLINE)))); #endif return r; } #endif /* * check_cpp_errors() must be called when the end of input is reached; * it checks pending errors due to truncated constructs (actually none, * this is reserved for future evolutions). */ int check_cpp_errors(struct lexer_state *ls) { if (ls->flags & KEEP_OUTPUT) { put_char(ls, '\n'); } if (emit_dependencies) fputc('\n', emit_output); #ifndef NO_UCPP_BUF if (!(ls->flags & LEXER)) { flush_output(ls); } #endif if ((ls->flags & WARN_TRIGRAPHS) && ls->count_trigraphs) warning(0, "%ld trigraph(s) encountered", ls->count_trigraphs); return 0; } /* * init_cpp() initializes static tables inside ucpp. It needs not be * called more than once. */ void init_cpp(void) { init_cppm(); } /* * (re)init the global tables. * If standard_assertions is non 0, init the assertions table. */ void init_tables(int with_assertions) { time_t t; struct tm *ct; init_buf_lexer_state(&dsharp_lexer, 0); #ifdef PRAGMA_TOKENIZE init_buf_lexer_state(&tokenize_lexer, 0); #endif time(&t); ct = localtime(&t); #ifdef NOSTRFTIME /* we have a quite old compiler, that does not know the (standard since 1990) strftime() function. */ { char *c = asctime(ct); compile_time[0] = '"'; mmv(compile_time + 1, c + 11, 8); compile_time[9] = '"'; compile_time[10] = 0; compile_date[0] = '"'; mmv(compile_date + 1, c + 4, 7); mmv(compile_date + 8, c + 20, 4); compile_date[12] = '"'; compile_date[13] = 0; } #else strftime(compile_time, 12, "\"%H:%M:%S\"", ct); strftime(compile_date, 24, "\"%b %d %Y\"", ct); #endif init_macros(); if (with_assertions) init_assertions(); init_found_files(); } /* * Resets the include path. */ void init_include_path(char *incpath[]) { if (include_path_nb) { size_t i; for (i = 0; i < include_path_nb; i ++) freemem(include_path[i]); freemem(include_path); include_path_nb = 0; } if (incpath) { int i; for (i = 0; incpath[i]; i ++) aol(include_path, include_path_nb, sdup(incpath[i]), INCPATH_MEMG); } } /* * add_incpath() adds "path" to the standard include path. */ void add_incpath(char *path) { aol(include_path, include_path_nb, sdup(path), INCPATH_MEMG); } /* * This function cleans the memory. It should release all allocated * memory structures and may be called even if the current pre-processing * is not finished or reported an error. */ void wipeout() { struct lexer_state ls; if (include_path_nb > 0) { size_t i; for (i = 0; i < include_path_nb; i ++) freemem(include_path[i]); freemem(include_path); include_path = 0; include_path_nb = 0; } if (current_filename) freemem(current_filename); current_filename = 0; current_long_filename = 0; current_incdir = -1; protect_detect.state = 0; if (protect_detect.macro) freemem(protect_detect.macro); protect_detect.macro = 0; protect_detect.ff = 0; init_lexer_state(&ls); while (ls_depth > 0) pop_file_context(&ls); free_lexer_state(&ls); free_lexer_state(&dsharp_lexer); #ifdef PRAGMA_TOKENIZE free_lexer_state(&tokenize_lexer); #endif if (found_files_init_done) HTT_kill(&found_files); found_files_init_done = 0; if (found_files_sys_init_done) HTT_kill(&found_files_sys); found_files_sys_init_done = 0; wipe_macros(); wipe_assertions(); } #ifdef STAND_ALONE /* * print some help */ static void usage(char *command_name) { fprintf(stderr, "Usage: %s [options] [file]\n" "language options:\n" " -C keep comments in output\n" " -s keep '#' when no cpp directive is recognized\n" " -l do not emit line numbers\n" " -lg emit gcc-like line numbers\n" " -CC disable C++-like comments\n" " -a, -na, -a0 handle (or not) assertions\n" " -V disable macros with extra arguments\n" " -u understand UTF-8 in source\n" " -X enable -a, -u and -Y\n" " -c90 mimic C90 behaviour\n" " -t disable trigraph support\n" "warning options:\n" " -wt emit a final warning when trigaphs are encountered\n" " -wtt emit warnings for each trigaph encountered\n" " -wa emit warnings that are usually useless\n" " -w0 disable standard warnings\n" "directory options:\n" " -I directory add 'directory' before the standard include path\n" " -J directory add 'directory' after the standard include path\n" " -zI do not use the standard include path\n" " -M emit Makefile-like dependencies instead of normal " "output\n" " -Ma emit also dependancies for system files\n" " -o file store output in file\n" "macro and assertion options:\n" " -Dmacro predefine 'macro'\n" " -Dmacro=def predefine 'macro' with 'def' content\n" " -Umacro undefine 'macro'\n" " -Afoo(bar) assert foo(bar)\n" " -Bfoo(bar) unassert foo(bar)\n" " -Y predefine system-dependant macros\n" " -Z do not predefine special macros\n" " -d emit defined macros\n" " -e emit assertions\n" "misc options:\n" " -v print version number and settings\n" " -h show this help\n", command_name); } /* * print version and compile-time settings */ static void version(void) { size_t i; fprintf(stderr, "ucpp version %d.%d\n", VERS_MAJ, VERS_MIN); fprintf(stderr, "search path:\n"); for (i = 0; i < include_path_nb; i ++) fprintf(stderr, " %s\n", include_path[i]); } /* * parse_opt() initializes many things according to the command-line * options. * Return values: * 0 on success * 1 on semantic error (redefinition of a special macro, for instance) * 2 on syntaxic error (unknown options for instance) */ static int parse_opt(int argc, char *argv[], struct lexer_state *ls) { int i, ret = 0; char *filename = 0; int with_std_incpath = 1; int print_version = 0, print_defs = 0, print_asserts = 0; int system_macros = 0, standard_assertions = 1; init_lexer_state(ls); ls->flags = DEFAULT_CPP_FLAGS; emit_output = ls->output = stdout; for (i = 1; i < argc; i ++) if (argv[i][0] == '-') { if (!strcmp(argv[i], "-h")) { return 2; } else if (!strcmp(argv[i], "-C")) { ls->flags &= ~DISCARD_COMMENTS; } else if (!strcmp(argv[i], "-CC")) { ls->flags &= ~CPLUSPLUS_COMMENTS; } else if (!strcmp(argv[i], "-a")) { ls->flags |= HANDLE_ASSERTIONS; } else if (!strcmp(argv[i], "-na")) { ls->flags |= HANDLE_ASSERTIONS; standard_assertions = 0; } else if (!strcmp(argv[i], "-a0")) { ls->flags &= ~HANDLE_ASSERTIONS; } else if (!strcmp(argv[i], "-V")) { ls->flags &= ~MACRO_VAARG; } else if (!strcmp(argv[i], "-u")) { ls->flags |= UTF8_SOURCE; } else if (!strcmp(argv[i], "-X")) { ls->flags |= HANDLE_ASSERTIONS; ls->flags |= UTF8_SOURCE; system_macros = 1; } else if (!strcmp(argv[i], "-c90")) { ls->flags &= ~MACRO_VAARG; ls->flags &= ~CPLUSPLUS_COMMENTS; c99_compliant = 0; c99_hosted = -1; } else if (!strcmp(argv[i], "-t")) { ls->flags &= ~HANDLE_TRIGRAPHS; } else if (!strcmp(argv[i], "-wt")) { ls->flags |= WARN_TRIGRAPHS; } else if (!strcmp(argv[i], "-wtt")) { ls->flags |= WARN_TRIGRAPHS_MORE; } else if (!strcmp(argv[i], "-wa")) { ls->flags |= WARN_ANNOYING; } else if (!strcmp(argv[i], "-w0")) { ls->flags &= ~WARN_STANDARD; ls->flags &= ~WARN_PRAGMA; } else if (!strcmp(argv[i], "-s")) { ls->flags &= ~FAIL_SHARP; } else if (!strcmp(argv[i], "-l")) { ls->flags &= ~LINE_NUM; } else if (!strcmp(argv[i], "-lg")) { ls->flags |= GCC_LINE_NUM; } else if (!strcmp(argv[i], "-M")) { ls->flags &= ~KEEP_OUTPUT; emit_dependencies = 1; } else if (!strcmp(argv[i], "-Ma")) { ls->flags &= ~KEEP_OUTPUT; emit_dependencies = 2; } else if (!strcmp(argv[i], "-Y")) { system_macros = 1; } else if (!strcmp(argv[i], "-Z")) { no_special_macros = 1; } else if (!strcmp(argv[i], "-d")) { ls->flags &= ~KEEP_OUTPUT; print_defs = 1; } else if (!strcmp(argv[i], "-e")) { ls->flags &= ~KEEP_OUTPUT; print_asserts = 1; } else if (!strcmp(argv[i], "-zI")) { with_std_incpath = 0; } else if (!strcmp(argv[i], "-I") || !strcmp(argv[i], "-J")) { i ++; } else if (!strcmp(argv[i], "-o")) { if ((++ i) >= argc) { error(-1, "missing filename after -o"); return 2; } if (argv[i][0] == '-' && argv[i][1] == 0) { emit_output = ls->output = stdout; } else { ls->output = fopen(argv[i], "w"); if (!ls->output) { error(-1, "failed to open for " "writing: %s", argv[i]); return 2; } emit_output = ls->output; } } else if (!strcmp(argv[i], "-v")) { print_version = 1; } else if (argv[i][1] != 'I' && argv[i][1] != 'J' && argv[i][1] != 'D' && argv[i][1] != 'U' && argv[i][1] != 'A' && argv[i][1] != 'B') warning(-1, "unknown option '%s'", argv[i]); } else { if (filename != 0) { error(-1, "spurious filename '%s'", argv[i]); return 2; } filename = argv[i]; } init_tables(ls->flags & HANDLE_ASSERTIONS); init_include_path(0); if (filename) { #ifdef UCPP_MMAP FILE *f = fopen_mmap_file(filename); ls->input = 0; if (f) set_input_file(ls, f); #else ls->input = fopen(filename, "r"); #endif if (!ls->input) { error(-1, "file '%s' not found", filename); return 1; } #ifdef NO_LIBC_BUF setbuf(ls->input, 0); #endif set_init_filename(filename, 1); } else { ls->input = stdin; set_init_filename("", 0); } for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'I') add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]); if (system_macros) for (i = 0; system_macros_def[i]; i ++) ret = ret || define_macro(ls, system_macros_def[i]); for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'D') ret = ret || define_macro(ls, argv[i] + 2); for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'U') ret = ret || undef_macro(ls, argv[i] + 2); if (ls->flags & HANDLE_ASSERTIONS) { if (standard_assertions) for (i = 0; system_assertions_def[i]; i ++) make_assertion(system_assertions_def[i]); for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'A') ret = ret || make_assertion(argv[i] + 2); for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'B') ret = ret || destroy_assertion(argv[i] + 2); } else { for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && (argv[i][1] == 'A' || argv[i][1] == 'B')) warning(-1, "assertions disabled"); } if (with_std_incpath) { for (i = 0; include_path_std[i]; i ++) add_incpath(include_path_std[i]); } for (i = 1; i < argc; i ++) if (argv[i][0] == '-' && argv[i][1] == 'J') add_incpath(argv[i][2] ? argv[i] + 2 : argv[i + 1]); if (print_version) { version(); return 1; } if (print_defs) { print_defines(); emit_defines = 1; } if (print_asserts && (ls->flags & HANDLE_ASSERTIONS)) { print_assertions(); emit_assertions = 1; } return ret; } int main(int argc, char *argv[]) { struct lexer_state ls; int r, fr = 0; init_cpp(); if ((r = parse_opt(argc, argv, &ls)) != 0) { if (r == 2) usage(argv[0]); return EXIT_FAILURE; } enter_file(&ls, ls.flags); while ((r = cpp(&ls)) < CPPERR_EOF) fr = fr || (r > 0); fr = fr || check_cpp_errors(&ls); free_lexer_state(&ls); wipeout(); #ifdef MEM_DEBUG report_leaks(); #endif return fr ? EXIT_FAILURE : EXIT_SUCCESS; } #endif ./arith.c0000644000175000017500000011354711620140753011144 0ustar renerene/* * Integer arithmetic evaluation. * * (c) Thomas Pornin 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include "arith.h" #define ARITH_OCTAL(x) ((x) >= '0' && (x) <= '7') #define ARITH_OVAL(x) ((x) - '0') #define ARITH_DECIM(x) ((x) >= '0' && (x) <= '9') #define ARITH_DVAL(x) ((x) - '0') #define ARITH_HEXAD(x) (ARITH_DECIM(x) \ || (x) == 'a' || (x) == 'A' \ || (x) == 'b' || (x) == 'B' \ || (x) == 'c' || (x) == 'C' \ || (x) == 'd' || (x) == 'D' \ || (x) == 'e' || (x) == 'E' \ || (x) == 'f' || (x) == 'F') #define ARITH_HVAL(x) (ARITH_DECIM(x) ? ARITH_DVAL(x) \ : (x) == 'a' || (x) == 'A' ? 10 \ : (x) == 'b' || (x) == 'B' ? 11 \ : (x) == 'c' || (x) == 'C' ? 12 \ : (x) == 'd' || (x) == 'D' ? 13 \ : (x) == 'e' || (x) == 'E' ? 14 : 15) #ifdef NATIVE_SIGNED /* ====================================================================== */ /* Arithmetics with native types */ /* ====================================================================== */ /* * The following properties are imposed by the C standard: * * -- Arithmetics on the unsigned type should never overflow; every * result is reduced modulo some power of 2. The macro NATIVE_UNSIGNED_BITS * should have been defined to that specific exponent. * * -- The signed type should use either two's complement, one's complement * or a sign bit and a magnitude. There should be an integer N such that * the maximum signed value is (2^N)-1 and the minimum signed value is * either -(2^N) or -((2^N)-1). -(2^N) is possible only for two's complement. * * -- The maximum signed value is at most equal to the maximum unsigned * value. * * -- Trap representations can only be: * ** In two's complement, 1 as sign bit and 0 for all value bits. * This can happen only if the minimum signed value is -((2^N)-1). * ** In one's complement, all bits set to 1. * ** In mantissa + sign, sign bit to 1 and 0 for all value bits. * Unsigned values have no trap representation achievable with numerical * operators. Only signed values can have such representations, with * operators &, |, ^, ~, << and >>. If trap representations are possible, * such occurrences are reported as warnings. * * -- The operators +, -, * and << may overflow or underflow on signed * quantities, which is potentially an error. A warning is emitted. * * -- The operator >> yields an implementation-defined result on * signed negative quantities. Usually, the sign is extended, but this * is not guaranteed. A warning is emitted. * * -- The operators / and % used with a second operand of 0 cannot work. * An error is emitted when such a call is performed. Furthermore, in * two's complemement representation, with NATIVE_SIGNED_MIN == -(2^N) * for some N, the expression `NATIVE_SIGNED_MIN / (-1)' yields an * unrepresentable result, which is also an error. * * * For the value checks, we need to consider those different cases. So * we calculate the following macros: * -- TWOS_COMPLEMENT: is 1 if representation is two's complement, 0 * otherwise. * -- ONES_COMPLEMENT: is 1 if representation is one's complement, 0 * otherwise. * -- SIGNED_IS_BIGGER: 1 if the maximum signed value is equal to the * maximum unsigned value, 0 otherwise. NATIVE_SIGNED_MAX cannot * exceed the maximum unsigned value. If SIGNED_IS_BIGGER is 0, then * the maximum unsigned value is strictly superior to twice the * value of NATIVE_SIGNED_MAX (e.g. 65535 to 32767). * -- TRAP_REPRESENTATION: 1 if a trap representation is possible, 0 * otherwise. The only way trap representations are guaranteed * impossible is when TWOS_COMPLEMENT is set, and NATIVE_SIGNED_MIN * is equal to -NATIVE_SIGNED_MAX - 1. * * Those macros are calculated by some preprocessor directives. This * supposes that the implementation conforms to C99. Rules on preprocessing * were quite looser in C90, and it could be that an old compiler, used * for a cross-compiling task, does not get those right. Therefore, if * ARCH_DEFINED is defined prior to the inclusion of this file, those * four macros are supposed to be already defined. Otherwise they are * (re)defined. The macro ARCH_TRAP_DEFINED has the same meaning, but * is limited to the TRAP_REPRESENTATION macro (if ARCH_TRAP_DEFINED is * defined, the macro TRAP_REPRESENTATION is supposed to be already * defined; the three other macros are recalculated). * * * To sum up: * -- Whenever a division operator (/ or %) is invoked and would yield * an unrepresentable result, ARITH_ERROR() is invoked. * -- With ARITHMETIC_CHECKS undefined, ARITH_WARNING() is never invoked. * -- With ARITHMETIC_CHECKS defined: * ** If ARCH_DEFINED is defined, the including context must provide * the macros TWOS_COMPLEMENT, ONES_COMPLEMENT, SIGNED_IS_BIGGER * and TRAP_REPRESENTATION. * ** Otherwise, if ARCH_TRAP_DEFINED is defined, the including context * must provide the macro TRAP_REPRESENTATION. * The code then detects all operator invokations that would yield an * overflow, underflow, trap representation, or any implementation * defined result or undefined behaviour. The macro ARITH_WARNING() is * invoked for each detection. * -- Trap representation detection code supposes that the operands are * _not_ trap representation. */ #ifndef ARCH_DEFINED #undef TWOS_COMPLEMENT #undef ONES_COMPLEMENT #undef SIGNED_IS_BIGGER #ifndef ARCH_TRAP_DEFINED #undef TRAP_REPRESENTATION #endif #if (-1) & 3 == 3 /* * Two's complement. */ #define TWOS_COMPLEMENT 1 #define ONES_COMPLEMENT 0 #ifndef ARCH_TRAP_DEFINED #if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX #define TRAP_REPRESENTATION 0 #else #define TRAP_REPRESENTATION 1 #endif #endif #elif (-1) & 3 == 2 /* * One's complement. */ #define TWOS_COMPLEMENT 0 #define ONES_COMPLEMENT 1 #ifndef ARCH_TRAP_DEFINED #define TRAP_REPRESENTATION 1 #endif #else /* * Mantissa + sign. */ #define TWOS_COMPLEMENT 0 #define ONES_COMPLEMENT 0 #ifndef ARCH_TRAP_DEFINED #define TRAP_REPRESENTATION 1 #endif #endif /* * Maximum native unsigned value. The first macro is for #if directives, * the second macro is for use as constant expression in C code. */ #define NATIVE_UNSIGNED_MAX ((((1U << (NATIVE_UNSIGNED_BITS - 1)) - 1U) \ << 1) + 1U) #define NATIVE_UNSIGNED_MAX_A (((((arith_u)1 << (NATIVE_UNSIGNED_BITS - 1)) \ - (arith_u)1) << 1) + (arith_u)1) #if NATIVE_SIGNED_MAX == NATIVE_UNSIGNED_MAX #define SIGNED_IS_BIGGER 1 #else #define SIGNED_IS_BIGGER 0 #endif #endif #undef NEGATIVE_IS_BIGGER #if NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX #define NEGATIVE_IS_BIGGER 1 #else #define NEGATIVE_IS_BIGGER 0 #endif /* sanity check: we cannot have a trap representation if we have two's complement with NATIVE_SIGNED_MIN < -NATIVE_SIGNED_MAX */ #if TRAP_REPRESENTATION && NEGATIVE_IS_BIGGER #error Impossible to get trap representations. #endif /* operations on the unsigned type */ ARITH_DECL_MONO_S_U(to_u) { return (arith_u)x; } ARITH_DECL_MONO_I_U(fromint) { return (arith_u)x; } ARITH_DECL_MONO_L_U(fromulong) { return (arith_u)x; } ARITH_DECL_MONO_U_I(toint) { #if NATIVE_UNSIGNED_MAX > INT_MAX if (x > (arith_u)INT_MAX) return INT_MAX; #endif return (int)x; } ARITH_DECL_MONO_U_L(toulong) { #if NATIVE_UNSIGNED_MAX > LONG_MAX if (x > (arith_u)LONG_MAX) return LONG_MAX; #endif return (long)x; } ARITH_DECL_MONO_U_U(neg) { return -x; } ARITH_DECL_MONO_U_U(not) { return ~x; } ARITH_DECL_MONO_U_I(lnot) { return !x; } ARITH_DECL_MONO_U_I(lval) { return x != 0; } ARITH_DECL_BI_UU_U(plus) { return x + y; } ARITH_DECL_BI_UU_U(minus) { return x - y; } ARITH_DECL_BI_UU_I(lt) { return x < y; } ARITH_DECL_BI_UU_I(leq) { return x <= y; } ARITH_DECL_BI_UU_I(gt) { return x > y; } ARITH_DECL_BI_UU_I(geq) { return x >= y; } ARITH_DECL_BI_UU_I(same) { return x == y; } ARITH_DECL_BI_UU_I(neq) { return x != y; } ARITH_DECL_BI_UU_U(and) { return x & y; } ARITH_DECL_BI_UU_U(xor) { return x ^ y; } ARITH_DECL_BI_UU_U(or) { return x | y; } ARITH_DECL_BI_UU_U(star) { return x * y; } ARITH_DECL_BI_UI_U(lsh) { #ifdef ARITHMETIC_CHECKS if (y >= NATIVE_UNSIGNED_BITS) ARITH_WARNING(ARITH_EXCEP_LSH_W); else if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C); #endif return x << y; } ARITH_DECL_BI_UI_U(rsh) { #ifdef ARITHMETIC_CHECKS if (y >= NATIVE_UNSIGNED_BITS) ARITH_WARNING(ARITH_EXCEP_RSH_W); else if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C); #endif return x >> y; } ARITH_DECL_BI_UU_U(slash) { if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D); return x / y; } ARITH_DECL_BI_UU_U(pct) { if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D); return x % y; } /* operations on the signed type */ ARITH_DECL_MONO_U_S(to_s) { #ifdef ARITHMETIC_CHECKS #if !SIGNED_IS_BIGGER if (x > (arith_u)NATIVE_SIGNED_MAX) ARITH_WARNING(ARITH_EXCEP_CONV_O); #endif #endif return (arith_s)x; } ARITH_DECL_MONO_I_S(fromint) { return (arith_s)x; } ARITH_DECL_MONO_L_S(fromlong) { return (arith_s)x; } ARITH_DECL_MONO_S_I(toint) { #if NATIVE_SIGNED_MIN < INT_MIN if (x < (arith_s)INT_MIN) return INT_MIN; #endif #if NATIVE_SIGNED_MAX > INT_MAX if (x > (arith_s)INT_MAX) return INT_MAX; #endif return (int)x; } ARITH_DECL_MONO_S_L(tolong) { #if NATIVE_SIGNED_MIN < LONG_MIN if (x < (arith_s)LONG_MIN) return LONG_MIN; #endif #if NATIVE_SIGNED_MAX > LONG_MAX if (x > (arith_s)LONG_MAX) return LONG_MAX; #endif return (long)x; } ARITH_DECL_MONO_S_S(neg) { #ifdef ARITHMETIC_CHECKS #if NEGATIVE_IS_BIGGER if (x == NATIVE_SIGNED_MIN) ARITH_WARNING(ARITH_EXCEP_NEG_O); #endif #endif return -x; } ARITH_DECL_MONO_S_S(not) { #ifdef ARITHMETIC_CHECKS #if TRAP_REPRESENTATION if ( #if TWOS_COMPLEMENT (x == NATIVE_SIGNED_MAX) #elif ONES_COMPLEMENT (x == 0) #else (x == NATIVE_SIGNED_MAX) #endif ) ARITH_WARNING(ARITH_EXCEP_NOT_T); #endif #endif return ~x; } ARITH_DECL_MONO_S_I(lnot) { return !x; } ARITH_DECL_MONO_S_I(lval) { return x != 0; } /* * Addition of signed values: * -- overflows occur only when both operands are strictly positive * -- underflows occur only when both operands are strictly negative * -- overflow check (both operands > 0): * ** if SIGNED_IS_BIGGER == 1, overflows are kept as such in the * unsigned world (if the signed addition overflows, so does the * unsigned, and vice versa) * ** if SIGNED_IS_BIGGER == 0, no overflow can happen in the unsigned * world * -- underflow check (both operands < 0): * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement) * ++ we have a guaranteed underflow if one of the operand is equal * to NATIVE_SIGNED_MIN; otherwise, -x and -y are valid integers, * and we cast them into the unsigned world * ++ if SIGNED_IS_BIGGER == 1, underflows become unsigned overflows * with a non-zero result * ++ if SIGNED_IS_BIGGER == 0, no overflow happens in the unsigned * world; we use the fact that -NATIVE_SIGNED_MIN is then * exaxctly 1 more than NATIVE_SIGNED_MAX * ** if NEGATIVE_IS_BIGGER == 0, underflow check is identical to * overflow check on (signed) -x and -y. */ ARITH_DECL_BI_SS_S(plus) { #ifdef ARITHMETIC_CHECKS if (x > 0 && y > 0 && ( #if SIGNED_IS_BIGGER ((arith_u)((arith_u)x + (arith_u)y) < (arith_u)x) #else (((arith_u)x + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX) #endif )) ARITH_WARNING(ARITH_EXCEP_PLUS_O); else if (x < 0 && y < 0 && ( #if NEGATIVE_IS_BIGGER (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) || #if SIGNED_IS_BIGGER (((arith_u)(-x) + (arith_u)(-y) != 0) && (arith_u)((arith_u)(-x) + (arith_u)(-y)) < (arith_u)(-x)) #else (((arith_u)(-x) + (arith_u)(-y)) > ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX)) #endif #else #if SIGNED_IS_BIGGER ((arith_u)((arith_u)(-x) + (arith_u)(-y)) < (arith_u)(-x)) #else (((arith_u)(-x) + (arith_u)(-y)) > (arith_u)NATIVE_SIGNED_MAX) #endif #endif )) ARITH_WARNING(ARITH_EXCEP_PLUS_U); #endif return x + y; } /* * Subtraction of signed values: * -- overflow: only if x > 0 and y < 0 * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement) and * y == NATIVE_SIGNED_MIN then overflow * ** otherwise, cast x and -y to unsigned, then add and check * for overflows * -- underflow: only if x < 0 and y > 0 * ** if NEGATIVE_IS_BIGGER == 1 (must be two's complement): * ++ if x == NATIVE_SIGNED_MIN then underflow * ++ cast -x and y to unsigned, then add. If SIGNED_IS_BIGGER == 0, * just check. Otherwise, check for overflow with non-zero result. * ** if NEGATIVE_IS_BIGGER == 0: cast -x and y to unsigned, then * add. Overflow check as in addition. */ ARITH_DECL_BI_SS_S(minus) { #ifdef ARITHMETIC_CHECKS if (x > 0 && y < 0 && ( #if NEGATIVE_IS_BIGGER (y == NATIVE_SIGNED_MIN) || #endif #if SIGNED_IS_BIGGER ((arith_u)((arith_u)x + (arith_u)(-y)) < (arith_u)x) #else (((arith_u)x + (arith_u)(-y)) > (arith_u)NATIVE_SIGNED_MAX) #endif )) ARITH_WARNING(ARITH_EXCEP_MINUS_O); else if (x < 0 && y > 0 && ( #if NEGATIVE_IS_BIGGER (x == NATIVE_SIGNED_MIN) || #if SIGNED_IS_BIGGER ((((arith_u)(-x) + (arith_u)y) != 0) && ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x))) #else (((arith_u)(-x) + (arith_u)y) > ((arith_u)1 + (arith_u)NATIVE_SIGNED_MAX)) #endif #else #if SIGNED_IS_BIGGER ((arith_u)((arith_u)(-x) + (arith_u)y) < (arith_u)(-x)) #else (((arith_u)(-x) + (arith_u)y) > (arith_u)NATIVE_SIGNED_MAX) #endif #endif )) ARITH_WARNING(ARITH_EXCEP_MINUS_U); #endif return x - y; } ARITH_DECL_BI_SS_I(lt) { return x < y; } ARITH_DECL_BI_SS_I(leq) { return x <= y; } ARITH_DECL_BI_SS_I(gt) { return x > y; } ARITH_DECL_BI_SS_I(geq) { return x >= y; } ARITH_DECL_BI_SS_I(same) { return x == y; } ARITH_DECL_BI_SS_I(neq) { return x != y; } /* * Provided neither x nor y is a trap representation: * -- one's complement: impossible to get a trap representation * -- two's complement and sign + mantissa: trap representation if and * only if x and y are strictly negative and (-x) & (-y) == 0 * (in two's complement, -x is safe because overflow would occur only * if x was already a trap representation). */ ARITH_DECL_BI_SS_S(and) { #ifdef ARITHMETIC_CHECKS #if TRAP_REPRESENTATION && !ONES_COMPLEMENT if (x < 0 && y < 0 && ((-x) & (-y)) == 0) ARITH_WARNING(ARITH_EXCEP_AND_T); #endif #endif return x & y; } /* * Provided neither x nor y is a trap representation: * -- two's complement: trap if and only if x != NATIVE_SIGNED_MAX && ~x == y * -- one's complement: trap if and only if x != 0 && ~x == y * -- mantissa + sign: trap if and only if x != 0 && -x == y */ ARITH_DECL_BI_SS_S(xor) { #ifdef ARITHMETIC_CHECKS #if TRAP_REPRESENTATION if ( #if TWOS_COMPLEMENT (x != NATIVE_SIGNED_MAX && ~x == y) #elif ONES_COMPLEMENT (x != 0 && ~x == y) #else (x != 0 && -x == y) #endif ) ARITH_WARNING(ARITH_EXCEP_XOR_T); #endif #endif return x ^ y; } /* * Provided neither x nor y is a trap representation: * -- two's complement: impossible to trap * -- one's complement: trap if and only if x != 0 && y != 0 && (~x & ~y) == 0 * -- mantissa + sign: impossible to trap */ ARITH_DECL_BI_SS_S(or) { #ifdef ARITHMETIC_CHECKS #if TRAP_REPRESENTATION #if ONES_COMPLEMENT if (x != 0 && y != 0 && (~x & ~y) == 0) ARITH_WARNING(ARITH_EXCEP_OR_T); #endif #endif #endif return x | y; } /* * Left-shifting by a negative or greater than type width count is * forbidden. Left-shifting a negative value is forbidden (underflow). * Left-shifting a positive value can trigger an overflow. We check it * by casting into the unsigned world and simulating a truncation. * * If SIGNED_IS_BIGGER is set, then the signed type width is 1 more * than the unsigned type width (the sign bit is included in the width); * otherwise, if W is the signed type width, 1U << (W-1) is equal to * NATIVE_SIGNED_MAX + 1. */ ARITH_DECL_BI_SI_S(lsh) { #ifdef ARITHMETIC_CHECKS if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C); else if ( #if SIGNED_IS_BIGGER y > NATIVE_UNSIGNED_BITS #else y >= NATIVE_UNSIGNED_BITS || (y > 0 && (((arith_u)1 << (y - 1)) > (arith_u)NATIVE_SIGNED_MAX)) #endif ) ARITH_WARNING(ARITH_EXCEP_LSH_W); else if (x < 0) ARITH_WARNING(ARITH_EXCEP_LSH_U); else if (x > 0 && ((((arith_u)x << y) & NATIVE_SIGNED_MAX) >> y) != (arith_u)x) ARITH_WARNING(ARITH_EXCEP_LSH_O); #endif return x << y; } /* * Right-shifting is handled as left-shifting, except that the problem * is somehow simpler: there is no possible overflow or underflow. Only * right-shifting a negative value yields an implementation defined * result (_not_ an undefined behaviour). */ ARITH_DECL_BI_SI_S(rsh) { #ifdef ARITHMETIC_CHECKS if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C); else if ( #if SIGNED_IS_BIGGER y > NATIVE_UNSIGNED_BITS #else y >= NATIVE_UNSIGNED_BITS || (y > 0 && (((arith_u)1 << (y - 1)) > (arith_u)NATIVE_SIGNED_MAX)) #endif ) ARITH_WARNING(ARITH_EXCEP_RSH_W); else if (x < 0) ARITH_WARNING(ARITH_EXCEP_RSH_N); #endif return x >> y; } /* * Overflow can happen only if both operands have the same sign. * Underflow can happen only if both operands have opposite signs. * * Overflow checking: this is done quite inefficiently by performing * a division on the result and check if it matches the initial operand. */ ARITH_DECL_BI_SS_S(star) { #ifdef ARITHMETIC_CHECKS if (x == 0 || y == 0) return 0; if (x > 0 && y > 0) { if ((((arith_u)x * (arith_u)y) & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)y != (arith_u)x) ARITH_WARNING(ARITH_EXCEP_STAR_O); } else if (x < 0 && y < 0) { if ( #if NEGATIVE_IS_BIGGER (x == NATIVE_SIGNED_MIN || y == NATIVE_SIGNED_MIN) || #endif (((arith_u)(-x) * (arith_u)(-y)) & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)(-y) != (arith_u)(-x)) ARITH_WARNING(ARITH_EXCEP_STAR_O); } else if (x > 0 && y < 0) { if ((arith_u)x > (arith_u)1 && ( #if NEGATIVE_IS_BIGGER y == NATIVE_SIGNED_MIN || #endif (((arith_u)x * (arith_u)(-y)) & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)(-y) != (arith_u)x)) ARITH_WARNING(ARITH_EXCEP_STAR_U); } else { if ((arith_u)y > (arith_u)1 && ( #if NEGATIVE_IS_BIGGER x == NATIVE_SIGNED_MIN || #endif (((arith_u)y * (arith_u)(-x)) & (arith_u)NATIVE_SIGNED_MAX) / (arith_u)(-x) != (arith_u)y)) ARITH_WARNING(ARITH_EXCEP_STAR_U); } #endif return x * y; } /* * Division by 0 is an error. The only other possible problem is an * overflow of the result. Such an overflow can only happen in two's * complement representation, when NEGATIVE_IS_BIGGER is set, and * one attempts to divide NATIVE_SIGNED_MIN by -1: the result is then * -NATIVE_SIGNED_MIN, which is not representable by the type. This is * considered as an error, not a warning, because it actually triggers * an exception on modern Pentium-based PC. */ ARITH_DECL_BI_SS_S(slash) { if (y == 0) ARITH_ERROR(ARITH_EXCEP_SLASH_D); #if NEGATIVE_IS_BIGGER else if (x == NATIVE_SIGNED_MIN && y == (arith_s)(-1)) ARITH_ERROR(ARITH_EXCEP_SLASH_O); #endif return x / y; } /* * Only division by 0 needs to be checked. */ ARITH_DECL_BI_SS_S(pct) { if (y == 0) ARITH_ERROR(ARITH_EXCEP_PCT_D); return x % y; } ARITH_DECL_MONO_ST_US(octconst) { arith_u z = 0; for (; ARITH_OCTAL(*c); c ++) { arith_u w = ARITH_OVAL(*c); if (z > (NATIVE_UNSIGNED_MAX_A / 8)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z *= 8; #if 0 /* obsolete */ /* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 7 and 8 divides z */ if (z > (NATIVE_UNSIGNED_MAX_A - w)) ARITH_ERROR(ARITH_EXCEP_CONST_O); #endif z += w; } *ru = z; #if SIGNED_IS_BIGGER *rs = z; *sp = 1; #else if (z > NATIVE_SIGNED_MAX) { *sp = 0; } else { *rs = z; *sp = 1; } #endif return c; } ARITH_DECL_MONO_ST_US(decconst) { arith_u z = 0; for (; ARITH_DECIM(*c); c ++) { arith_u w = ARITH_DVAL(*c); if (z > (NATIVE_UNSIGNED_MAX_A / 10)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z *= 10; if (z > (NATIVE_UNSIGNED_MAX_A - w)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z += w; } *ru = z; #if SIGNED_IS_BIGGER *rs = z; *sp = 1; #else if (z > NATIVE_SIGNED_MAX) { *sp = 0; } else { *rs = z; *sp = 1; } #endif return c; } ARITH_DECL_MONO_ST_US(hexconst) { arith_u z = 0; for (; ARITH_HEXAD(*c); c ++) { arith_u w = ARITH_HVAL(*c); if (z > (NATIVE_UNSIGNED_MAX_A / 16)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z *= 16; #if 0 /* obsolete */ /* NATIVE_UNSIGNED_MAX_A is 2^N - 1, 0 <= w <= 15 and 16 divides z */ if (z > (NATIVE_UNSIGNED_MAX_A - w)) ARITH_ERROR(ARITH_EXCEP_CONST_O); #endif z += w; } *ru = z; #if SIGNED_IS_BIGGER *rs = z; *sp = 1; #else if (z > NATIVE_SIGNED_MAX) { *sp = 0; } else { *rs = z; *sp = 1; } #endif return c; } #else /* ====================================================================== */ /* Arithmetics with a simple simulated type */ /* ====================================================================== */ /* * We simulate a type with the following characteristics: * -- the signed type width is equal to the unsigned type width (which * means that there is one less value bit in the signed type); * -- the signed type uses two's complement representation; * -- there is no trap representation; * -- overflows and underflows are truncated (but a warning is emitted * if ARITHMETIC_CHECKS is defined); * -- overflow on integer division is still an error; * -- right-shifting of a negative value extends the sign; * -- the shift count value is first cast to unsigned, then reduced modulo * the type size. * * These characteristics follow what is usually found on modern * architectures. * * The maximum emulated type size is twice the size of the unsigned native * type which is used to emulate the type. */ #undef SIMUL_ONE_TMP #undef SIMUL_MSW_TMP1 #undef SIMUL_MSW_MASK #undef SIMUL_LSW_TMP1 #undef SIMUL_LSW_MASK #define SIMUL_ONE_TMP ((SIMUL_ARITH_SUBTYPE)1) #define SIMUL_MSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1)) #define SIMUL_MSW_MASK (SIMUL_MSW_TMP1 | (SIMUL_MSW_TMP1 - SIMUL_ONE_TMP)) #define SIMUL_LSW_TMP1 (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)) #define SIMUL_LSW_MASK (SIMUL_LSW_TMP1 | (SIMUL_LSW_TMP1 - SIMUL_ONE_TMP)) #undef TMSW #undef TLSW #define TMSW(x) ((x) & SIMUL_MSW_MASK) #define TLSW(x) ((x) & SIMUL_LSW_MASK) #undef SIMUL_ZERO #undef SIMUL_ONE #define SIMUL_ZERO arith_strc(ARITH_TYPENAME, _zero) #define SIMUL_ONE arith_strc(ARITH_TYPENAME, _one) static arith_u SIMUL_ZERO = { 0, 0 }; static arith_u SIMUL_ONE = { 0, 1 }; /* * We use the fact that both the signed and unsigned type are the same * structure. The difference between the signed and the unsigned type * is a type information, and, as such, is considered compile-time and * not maintained in the value structure itself. This is a job for * the programmer / compiler. */ ARITH_DECL_MONO_S_U(to_u) { return x; } ARITH_DECL_MONO_I_U(fromint) { arith_u z; if (x < 0) return arith_op_u(neg)(arith_op_u(fromint)(-x)); /* * This code works because types smaller than int are promoted * by the C compiler before evaluating the >> operator. */ z.msw = TMSW(((SIMUL_ARITH_SUBTYPE)x >> (SIMUL_LSW_WIDTH - 1)) >> 1); z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x); return z; } ARITH_DECL_MONO_L_U(fromulong) { arith_u z; #if (ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1 == 0 z.msw = 0; z.lsw = x; #else z.msw = TMSW(x >> SIMUL_LSW_WIDTH); z.lsw = TLSW((SIMUL_ARITH_SUBTYPE)x); #endif return z; } ARITH_DECL_MONO_U_I(toint) { #if ((INT_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0 if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)INT_MAX) return INT_MAX; return (int)x.lsw; #else #if (INT_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0 if (x.msw > (SIMUL_ARITH_SUBTYPE)(INT_MAX >> SIMUL_LSW_WIDTH)) return INT_MAX; #endif return ((int)x.msw << SIMUL_LSW_WIDTH) | (int)x.lsw; #endif } ARITH_DECL_MONO_U_L(toulong) { #if ((ULONG_MAX >> (SIMUL_LSW_WIDTH - 1)) >> 1) == 0 if (x.msw != 0 || x.lsw > (SIMUL_ARITH_SUBTYPE)ULONG_MAX) return ULONG_MAX; return (unsigned long)x.lsw; #else #if (ULONG_MAX >> (SIMUL_SUBTYPE_BITS - 1)) == 0 if (x.msw > (SIMUL_ARITH_SUBTYPE)(ULONG_MAX >> SIMUL_LSW_WIDTH)) return ULONG_MAX; #endif return ((unsigned long)x.msw << SIMUL_LSW_WIDTH) | (unsigned long)x.lsw; #endif } ARITH_DECL_MONO_U_U(neg) { x = arith_op_u(not)(x); return arith_op_u(plus)(x, SIMUL_ONE); } ARITH_DECL_MONO_U_U(not) { x.msw = TMSW(~x.msw); x.lsw = TLSW(~x.lsw); return x; } ARITH_DECL_MONO_U_I(lnot) { return x.msw == 0 && x.lsw == 0; } ARITH_DECL_MONO_U_I(lval) { return x.msw != 0 || x.lsw != 0; } ARITH_DECL_BI_UU_U(plus) { x.lsw = TLSW(x.lsw + y.lsw); x.msw = TMSW(x.msw + y.msw); if (x.lsw < y.lsw) x.msw = TMSW(x.msw + 1); return x; } ARITH_DECL_BI_UU_U(minus) { return arith_op_u(plus)(x, arith_op_u(neg)(y)); } ARITH_DECL_BI_UI_U(lsh) { if (y == 0) return x; #ifdef ARITHMETIC_CHECKS if (y < 0) ARITH_WARNING(ARITH_EXCEP_LSH_C); else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_LSH_W); #endif y = (unsigned)y % SIMUL_NUMBITS; if (y >= SIMUL_LSW_WIDTH) { /* * We use here the fact that the LSW size is always * equal to or greater than the MSW size. */ x.msw = TMSW(x.lsw << (y - SIMUL_LSW_WIDTH)); x.lsw = 0; return x; } x.msw = TMSW((x.msw << y) | (x.lsw >> (SIMUL_LSW_WIDTH - y))); x.lsw = TLSW(x.lsw << y); return x; } ARITH_DECL_BI_UI_U(rsh) { #ifdef ARITHMETIC_CHECKS if (y < 0) ARITH_WARNING(ARITH_EXCEP_RSH_C); else if (y >= SIMUL_NUMBITS) ARITH_WARNING(ARITH_EXCEP_RSH_W); #endif y = (unsigned)y % SIMUL_NUMBITS; if (y >= SIMUL_LSW_WIDTH) { x.lsw = x.msw >> (y - SIMUL_LSW_WIDTH); x.msw = 0; return x; } x.lsw = TLSW((x.lsw >> y) | (x.msw << (SIMUL_LSW_WIDTH - y))); x.msw >>= y; return x; } ARITH_DECL_BI_UU_I(lt) { return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw); } ARITH_DECL_BI_UU_I(leq) { return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw); } ARITH_DECL_BI_UU_I(gt) { return arith_op_u(lt)(y, x); } ARITH_DECL_BI_UU_I(geq) { return arith_op_u(leq)(y, x); } ARITH_DECL_BI_UU_I(same) { return x.msw == y.msw && x.lsw == y.lsw; } ARITH_DECL_BI_UU_I(neq) { return !arith_op_u(same)(x, y); } ARITH_DECL_BI_UU_U(and) { x.msw &= y.msw; x.lsw &= y.lsw; return x; } ARITH_DECL_BI_UU_U(xor) { x.msw ^= y.msw; x.lsw ^= y.lsw; return x; } ARITH_DECL_BI_UU_U(or) { x.msw |= y.msw; x.lsw |= y.lsw; return x; } #undef SIMUL_LSW_ODDLEN #undef SIMUL_LSW_HALFLEN #undef SIMUL_LSW_HALFMASK #define SIMUL_LSW_ODDLEN (SIMUL_LSW_WIDTH & 1) #define SIMUL_LSW_HALFLEN (SIMUL_LSW_WIDTH / 2) #define SIMUL_LSW_HALFMASK (~(~(SIMUL_ARITH_SUBTYPE)0 << SIMUL_LSW_HALFLEN)) ARITH_DECL_BI_UU_U(star) { arith_u z; SIMUL_ARITH_SUBTYPE a = x.lsw, b = y.lsw, t00, t01, t10, t11, c = 0, t; #if SIMUL_LSW_ODDLEN SIMUL_ARITH_SUBTYPE bms = b & (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)); b &= ~(SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)); #endif t00 = (a & SIMUL_LSW_HALFMASK) * (b & SIMUL_LSW_HALFMASK); t01 = (a & SIMUL_LSW_HALFMASK) * (b >> SIMUL_LSW_HALFLEN); t10 = (a >> SIMUL_LSW_HALFLEN) * (b & SIMUL_LSW_HALFMASK); t11 = (a >> SIMUL_LSW_HALFLEN) * (b >> SIMUL_LSW_HALFLEN); t = z.lsw = t00; z.lsw = TLSW(z.lsw + (t01 << SIMUL_LSW_HALFLEN)); if (t > z.lsw) c ++; t = z.lsw; z.lsw = TLSW(z.lsw + (t10 << SIMUL_LSW_HALFLEN)); if (t > z.lsw) c ++; #if SIMUL_LSW_ODDLEN t = z.lsw; z.lsw = TLSW(z.lsw + (t11 << (2 * SIMUL_LSW_HALFLEN))); if (t > z.lsw) c ++; if (bms && (a & SIMUL_ONE_TMP)) { t = z.lsw; z.lsw = TLSW(z.lsw + b); if (t > z.lsw) c ++; } #endif z.msw = TMSW(x.lsw * y.msw + x.msw * y.lsw + c + (t01 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN)) + (t10 >> (SIMUL_LSW_WIDTH - SIMUL_LSW_HALFLEN)) + (t11 >> (SIMUL_LSW_WIDTH - (2 * SIMUL_LSW_HALFLEN)))); return z; } /* * This function calculates the unsigned integer division, yielding * both quotient and remainder. The divider (y) MUST be non-zero. */ static void arith_op_u(udiv)(arith_u x, arith_u y, arith_u *q, arith_u *r) { int i, j; arith_u a; *q = SIMUL_ZERO; for (i = SIMUL_NUMBITS - 1; i >= 0; i --) { if (i >= (int)SIMUL_LSW_WIDTH && (y.msw & (SIMUL_ONE_TMP << (i - SIMUL_LSW_WIDTH)))) break; if (i < (int)SIMUL_LSW_WIDTH && (y.lsw & (SIMUL_ONE_TMP << i))) break; } a = arith_op_u(lsh)(y, SIMUL_NUMBITS - 1 - i); for (j = SIMUL_NUMBITS - 1 - i; j >= SIMUL_LSW_WIDTH; j --) { if (arith_op_u(leq)(a, x)) { x = arith_op_u(minus)(x, a); q->msw |= SIMUL_ONE_TMP << (j - SIMUL_LSW_WIDTH); } a = arith_op_u(rsh)(a, 1); } for (; j >= 0; j --) { if (arith_op_u(leq)(a, x)) { x = arith_op_u(minus)(x, a); q->lsw |= SIMUL_ONE_TMP << j; } a = arith_op_u(rsh)(a, 1); } *r = x; } ARITH_DECL_BI_UU_U(slash) { arith_u q, r; if (arith_op_u(same)(y, SIMUL_ZERO)) ARITH_ERROR(ARITH_EXCEP_SLASH_D); arith_op_u(udiv)(x, y, &q, &r); return q; } ARITH_DECL_BI_UU_U(pct) { arith_u q, r; if (arith_op_u(same)(y, SIMUL_ZERO)) ARITH_ERROR(ARITH_EXCEP_PCT_D); arith_op_u(udiv)(x, y, &q, &r); return r; } #undef SIMUL_TRAP #undef SIMUL_TRAPL #define SIMUL_TRAP (SIMUL_ONE_TMP << (SIMUL_MSW_WIDTH - 1)) #define SIMUL_TRAPL (SIMUL_ONE_TMP << (SIMUL_LSW_WIDTH - 1)) ARITH_DECL_MONO_U_S(to_s) { #ifdef ARITHMETIC_CHECKS if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_CONV_O); #endif return x; } ARITH_DECL_MONO_I_S(fromint) { return arith_op_u(fromint)(x); } ARITH_DECL_MONO_L_S(fromlong) { if (x < 0) return arith_op_u(neg)( arith_op_u(fromulong)((unsigned long)(-x))); return arith_op_u(fromulong)((unsigned long)x); } ARITH_DECL_MONO_S_I(toint) { if (x.msw & SIMUL_TRAP) return -arith_op_u(toint)(arith_op_u(neg)(x)); return arith_op_u(toint)(x); } ARITH_DECL_MONO_S_L(tolong) { if (x.msw & SIMUL_TRAP) return -(long)arith_op_u(toulong)(arith_op_u(neg)(x)); return (long)arith_op_u(toulong)(x); } ARITH_DECL_MONO_S_S(neg) { #ifdef ARITHMETIC_CHECKS if (x.lsw == 0 && x.msw == SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_NEG_O); #endif return arith_op_u(neg)(x); } ARITH_DECL_MONO_S_S(not) { return arith_op_u(not)(x); } ARITH_DECL_MONO_S_I(lnot) { return arith_op_u(lnot)(x); } ARITH_DECL_MONO_S_I(lval) { return arith_op_u(lval)(x); } ARITH_DECL_BI_SS_S(plus) { arith_u z = arith_op_u(plus)(x, y); #ifdef ARITHMETIC_CHECKS if (x.msw & y.msw & ~z.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_PLUS_U); else if (~x.msw & ~y.msw & z.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_PLUS_O); #endif return z; } ARITH_DECL_BI_SS_S(minus) { arith_s z = arith_op_u(minus)(x, y); #ifdef ARITHMETIC_CHECKS if (x.msw & ~y.msw & ~z.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_MINUS_U); else if (~x.msw & y.msw & z.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_MINUS_O); #endif return z; } /* * Since signed and unsigned widths are equal for the simulated type, * we can use the unsigned left shift function, which performs the * the checks on the type width. */ ARITH_DECL_BI_SI_S(lsh) { arith_s z = arith_op_u(lsh)(x, y); #ifdef ARITHMETIC_CHECKS if (x.msw & SIMUL_TRAP) ARITH_WARNING(ARITH_EXCEP_LSH_U); else { /* * To check for possible overflow, we right shift the * result. We need to make the shift count proper so that * we do not emit a double-warning. Besides, the left shift * could have been untruncated but yet affet the sign bit, * so we must test this explicitly. */ arith_s w = arith_op_u(rsh)(z, (unsigned)y % SIMUL_NUMBITS); if ((z.msw & SIMUL_TRAP) || w.msw != x.msw || w.lsw != x.lsw) ARITH_WARNING(ARITH_EXCEP_LSH_O); } #endif return z; } /* * We define that right shifting a negative value, besides being worth a * warning, duplicates the sign bit. This is the most useful and most * usually encountered behaviour, and the standard allows it. */ ARITH_DECL_BI_SI_S(rsh) { int xn = (x.msw & SIMUL_TRAP) != 0; arith_s z = arith_op_u(rsh)(x, y); int gy = (unsigned)y % SIMUL_NUMBITS; #ifdef ARITHMETIC_CHECKS if (xn) ARITH_WARNING(ARITH_EXCEP_RSH_N); #endif if (xn && gy > 0) { if (gy <= SIMUL_MSW_WIDTH) { z.msw |= TMSW(~(SIMUL_MSW_MASK >> gy)); } else { z.msw = SIMUL_MSW_MASK; z.lsw |= TLSW(~(SIMUL_LSW_MASK >> (gy - SIMUL_MSW_WIDTH))); } } return z; } ARITH_DECL_BI_SS_I(lt) { int xn = (x.msw & SIMUL_TRAP) != 0; int yn = (y.msw & SIMUL_TRAP) != 0; if (xn == yn) { return x.msw < y.msw || (x.msw == y.msw && x.lsw < y.lsw); } else { return xn; } } ARITH_DECL_BI_SS_I(leq) { int xn = (x.msw & SIMUL_TRAP) != 0; int yn = (y.msw & SIMUL_TRAP) != 0; if (xn == yn) { return x.msw < y.msw || (x.msw == y.msw && x.lsw <= y.lsw); } else { return xn; } } ARITH_DECL_BI_SS_I(gt) { return arith_op_s(lt)(y, x); } ARITH_DECL_BI_SS_I(geq) { return arith_op_s(leq)(y, x); } ARITH_DECL_BI_SS_I(same) { return x.msw == y.msw && x.lsw == y.lsw; } ARITH_DECL_BI_SS_I(neq) { return !arith_op_s(same)(x, y); } ARITH_DECL_BI_SS_S(and) { return arith_op_u(and)(x, y); } ARITH_DECL_BI_SS_S(xor) { return arith_op_u(xor)(x, y); } ARITH_DECL_BI_SS_S(or) { return arith_op_u(or)(x, y); } /* * This function calculates the signed integer division, yielding * both quotient and remainder. The divider (y) MUST be non-zero. */ static void arith_op_s(sdiv)(arith_s x, arith_s y, arith_s *q, arith_s *r) { arith_u a = x, b = y, c, d; int xn = 0, yn = 0; if (x.msw & SIMUL_TRAP) { a = arith_op_u(neg)(x); xn = 1; } if (y.msw & SIMUL_TRAP) { b = arith_op_u(neg)(y); yn = 1; } arith_op_u(udiv)(a, b, &c, &d); if (xn != yn) *q = arith_op_u(neg)(c); else *q = c; if (xn != yn) *r = arith_op_u(neg)(d); else *r = d; } /* * Overflow/underflow check is done the following way: obvious cases * are checked (both upper words non-null, both upper words null...) * and border-line occurrences are verified with an unsigned division * (which is quite computationaly expensive). */ ARITH_DECL_BI_SS_S(star) { #ifdef ARITHMETIC_CHECKS arith_s z = arith_op_u(star)(x, y); int warn = 0; if (x.msw > 0) { if (y.msw > 0 #if SIMUL_LSW_ODDLEN || (y.lsw & SIMUL_TRAPL) #endif ) warn = 1; } #if SIMUL_LSW_ODDLEN else if (y.msw > 0 && (x.lsw & SIMUL_TRAPL)) warn = 1; #endif if (!warn && (x.msw > 0 || y.msw > 0 #if SIMUL_LSW_ODDLEN || ((x.lsw | y.lsw) & SIMUL_TRAPL) #endif )) { if (x.msw == SIMUL_MSW_MASK && x.lsw == SIMUL_LSW_MASK) { if (y.msw == SIMUL_TRAP && y.lsw == 0) warn = 1; } else if (!(x.msw == 0 && x.lsw == 0) && !arith_op_s(same)(arith_op_s(slash)(z, x), y)) { } warn = 1; } if (warn) ARITH_WARNING(((x.msw ^ y.msw) & SIMUL_TRAP) ? ARITH_EXCEP_STAR_U : ARITH_EXCEP_STAR_O); return z; #else return arith_op_u(star)(x, y); #endif } ARITH_DECL_BI_SS_S(slash) { arith_s q, r; if (arith_op_s(same)(y, SIMUL_ZERO)) ARITH_ERROR(ARITH_EXCEP_SLASH_D); else if (x.msw == SIMUL_TRAP && x.lsw == 0 && y.msw == SIMUL_MSW_MASK && y.lsw == SIMUL_LSW_MASK) ARITH_ERROR(ARITH_EXCEP_SLASH_O); arith_op_s(sdiv)(x, y, &q, &r); return q; } ARITH_DECL_BI_SS_S(pct) { arith_s q, r; if (arith_op_s(same)(y, SIMUL_ZERO)) ARITH_ERROR(ARITH_EXCEP_PCT_D); arith_op_s(sdiv)(x, y, &q, &r); return r; } ARITH_DECL_MONO_ST_US(octconst) { arith_u z = { 0, 0 }; for (; ARITH_OCTAL(*c); c ++) { unsigned w = ARITH_OVAL(*c); if (z.msw > (SIMUL_MSW_MASK / 8)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z = arith_op_u(lsh)(z, 3); z.lsw |= w; } *ru = z; if (z.msw & SIMUL_TRAP) { *sp = 0; } else { *rs = z; *sp = 1; } return c; } ARITH_DECL_MONO_ST_US(decconst) { #define ARITH_ALPHA_TRAP (1U << (SIMUL_MSW_WIDTH - 1)) #define ARITH_ALPHA_MASK (ARITH_ALPHA_TRAP | (ARITH_ALPHA_TRAP - 1)) #define ARITH_ALPHA ((ARITH_ALPHA_MASK - 10 * (ARITH_ALPHA_TRAP / 5)) + 1) #define ARITH_ALPHA_A ((SIMUL_MSW_MASK - 10 * (SIMUL_TRAP / 5)) + 1) arith_u z = { 0, 0 }; for (; ARITH_DECIM(*c); c ++) { unsigned w = ARITH_DVAL(*c); SIMUL_ARITH_SUBTYPE t; if (z.msw > (SIMUL_MSW_MASK / 10) || (z.msw == (SIMUL_MSW_MASK / 10) && /* ARITH_ALPHA is between 1 and 9, inclusive. */ #if ARITH_ALPHA == 5 z.lsw >= SIMUL_TRAPL #else z.lsw > ((SIMUL_TRAPL / 5) * ARITH_ALPHA_A + ((SIMUL_TRAPL % 5) * ARITH_ALPHA_A) / 5) #endif )) ARITH_ERROR(ARITH_EXCEP_CONST_O); z = arith_op_u(plus)(arith_op_u(lsh)(z, 3), arith_op_u(lsh)(z, 1)); t = TLSW(z.lsw + w); if (t < z.lsw) z.msw ++; z.lsw = t; } *ru = z; if (z.msw & SIMUL_TRAP) { *sp = 0; } else { *rs = z; *sp = 1; } return c; #undef ARITH_ALPHA_A #undef ARITH_ALPHA #undef ARITH_ALPHA_TRAP #undef ARITH_ALPHA_MASK } ARITH_DECL_MONO_ST_US(hexconst) { arith_u z = { 0, 0 }; for (; ARITH_HEXAD(*c); c ++) { unsigned w = ARITH_HVAL(*c); if (z.msw > (SIMUL_MSW_MASK / 16)) ARITH_ERROR(ARITH_EXCEP_CONST_O); z = arith_op_u(lsh)(z, 4); z.lsw |= w; } *ru = z; if (z.msw & SIMUL_TRAP) { *sp = 0; } else { *rs = z; *sp = 1; } return c; } #endif #undef ARITH_HVAL #undef ARITH_HEXAD #undef ARITH_DVAL #undef ARITH_DECIM #undef ARITH_OVAL #undef ARITH_OCTAL ./nhash.c0000644000175000017500000003135311620140753011130 0ustar renerene/* * Mixed hash table / binary tree code. * (c) Thomas Pornin 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include #include "nhash.h" #include "mem.h" /* * Hash a string into an `unsigned' value. This function is derived * from the hash function used in the ELF binary object file format * hash tables. The result size is a 32-bit number if the `unsigned' * type is big enough to hold 32-bit arbitrary numbers, a 16-bit number * otherwise. */ static unsigned hash_string(char *name) { unsigned h = 0; for (h = 0; *name; name ++) { unsigned g; h = (h << 4) + *(unsigned char *)name; #if UINT_MAX >= 0xffffffffU g = h & 0xF0000000U; h ^= (g >> 24); #else g = h & 0xF000U; h ^= (g >> 12); #endif h &= ~g; } return h; } /* * Each item in the table is a structure beginning with a `hash_item_header' * structure. Those headers define binary trees such that all left-descendants * (respectively right-descendants) of a given tree node have an associated * hash value strictly smaller (respectively greater) than the hash value * associated with this node. * * The `ident' field points to an array of char. The `sizeof(unsigned)' * first `char' contain a copy of an `unsigned' value which is the hashed * string, except the least significant bit. When this bit is set to 0, * the node contains the unique item using that hash value. If the bit * is set to 1, then there are several items with that hash value. * * When several items share the same hash value, they are linked together * in a linked list by their `left' field. The node contains no data; * it is a "fake item". * * The `char' following the hash value encode the item name for true items. * For fake items, they contain the pointer to the first true item of the * corresponding link list (suitably aligned). * * There are HTT_NUM_TREES trees; the items are sorted among trees by the * lest significant bits of their hash value. */ static void internal_init(HTT *htt, void (*deldata)(void *), int reduced) { htt->deldata = deldata; if (reduced) { HTT2 *htt2 = (HTT2 *)htt; htt2->tree[0] = htt2->tree[1] = NULL; } else { unsigned u; for (u = 0; u < HTT_NUM_TREES; u ++) htt->tree[u] = NULL; } } /* see nhash.h */ void HTT_init(HTT *htt, void (*deldata)(void *)) { internal_init(htt, deldata, 0); } /* see nhash.h */ void HTT2_init(HTT2 *htt, void (*deldata)(void *)) { internal_init((HTT *)htt, deldata, 1); } #define PTR_SHIFT (sizeof(hash_item_header *) * \ ((sizeof(unsigned) + sizeof(hash_item_header *) - 1) / \ sizeof(hash_item_header *))) #define TREE(u) (*(reduced ? ((HTT2 *)htt)->tree + ((u) & 1) \ : htt->tree + ((u) & (HTT_NUM_TREES - 1)))) /* * Find a node for the given hash value. If `father' is not NULL, fill * `*father' with a pointer to the node's father. * If the return value is NULL, then no existing node was found; if `*father' * is also NULL, the tree is empty. If the return value is not NULL but * `*father' is NULL, then the found node is the tree root. * * If `father' is not NULL, then `*leftson' is filled with 1 if the node * was looked for as the father left son, 0 otherwise. */ static hash_item_header *find_node(HTT *htt, unsigned u, hash_item_header **father, int *leftson, int reduced) { hash_item_header *node = TREE(u); hash_item_header *nodef = NULL; int ls; u &= ~1U; while (node != NULL) { unsigned v = *(unsigned *)(node->ident); unsigned w = v & ~1U; if (u == w) break; nodef = node; if (u < w) { node = node->left; ls = 1; } else { node = node->right; ls = 0; } } if (father != NULL) { *father = nodef; *leftson = ls; } return node; } static void *internal_get(HTT *htt, char *name, int reduced) { unsigned u = hash_string(name), v; hash_item_header *node = find_node(htt, u, NULL, NULL, reduced); if (node == NULL) return NULL; v = *(unsigned *)(node->ident); if ((v & 1U) == 0) { return (strcmp(HASH_ITEM_NAME(node), name) == 0) ? node : NULL; } node = *(hash_item_header **)(node->ident + PTR_SHIFT); while (node != NULL) { if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node; node = node->left; } return NULL; } /* see nhash.h */ void *HTT_get(HTT *htt, char *name) { return internal_get(htt, name, 0); } /* see nhash.h */ void *HTT2_get(HTT2 *htt, char *name) { return internal_get((HTT *)htt, name, 1); } /* * Make an item identifier from its name and its hash value. */ static char *make_ident(char *name, unsigned u) { size_t n = strlen(name) + 1; char *ident = getmem(n + sizeof(unsigned)); *(unsigned *)ident = u & ~1U; memcpy(ident + sizeof(unsigned), name, n); return ident; } /* * Make an identifier for a fake item, pointing to a true item. */ static char *make_fake_ident(unsigned u, hash_item_header *next) { char *ident = getmem(PTR_SHIFT + sizeof(hash_item_header *)); *(unsigned *)ident = u | 1U; *(hash_item_header **)(ident + PTR_SHIFT) = next; return ident; } /* * Adding an item is straightforward: * 1. look for its emplacement * 2. if no node is found, use the item as a new node and link it to the tree * 3. if a node is found: * 3.1. if the node is real, check for name inequality, then create a * fake node and assemble the two-element linked list * 3.2. if the node is fake, look for the name in the list; if not found, * add the node at the list end */ static void *internal_put(HTT *htt, void *item, char *name, int reduced) { unsigned u = hash_string(name), v; int ls; hash_item_header *father; hash_item_header *node = find_node(htt, u, &father, &ls, reduced); hash_item_header *itemg = item, *pnode; if (node == NULL) { itemg->left = itemg->right = NULL; itemg->ident = make_ident(name, u); if (father == NULL) { TREE(u) = itemg; } else if (ls) { father->left = itemg; } else { father->right = itemg; } return NULL; } v = *(unsigned *)(node->ident); if ((v & 1U) == 0) { if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node; pnode = getmem(sizeof *pnode); pnode->left = node->left; pnode->right = node->right; pnode->ident = make_fake_ident(u, node); node->left = itemg; node->right = NULL; itemg->left = itemg->right = NULL; itemg->ident = make_ident(name, u); if (father == NULL) { TREE(u) = pnode; } else if (ls) { father->left = pnode; } else { father->right = pnode; } return NULL; } node = *(hash_item_header **)(node->ident + PTR_SHIFT); while (node != NULL) { if (strcmp(HASH_ITEM_NAME(node), name) == 0) return node; pnode = node; node = node->left; } itemg->left = itemg->right = NULL; itemg->ident = make_ident(name, u); pnode->left = itemg; return NULL; } /* see nhash.h */ void *HTT_put(HTT *htt, void *item, char *name) { return internal_put(htt, item, name, 0); } /* see nhash.h */ void *HTT2_put(HTT2 *htt, void *item, char *name) { return internal_put((HTT *)htt, item, name, 1); } /* * A fake node subnode list has shrunk to one item only; make the * node real again. * fnode the fake node * node the last remaining node * father the fake node father (NULL if the fake node is root) * leftson 1 if the fake node is a left son, 0 otehrwise * u the hash value for this node */ static void shrink_node(HTT *htt, hash_item_header *fnode, hash_item_header *node, hash_item_header *father, int leftson, unsigned u, int reduced) { node->left = fnode->left; node->right = fnode->right; if (father == NULL) { TREE(u) = node; } else if (leftson) { father->left = node; } else { father->right = node; } freemem(fnode->ident); freemem(fnode); } /* * Deletion algorithm: * 1. look for the node; if not found, exit * 2. if the node is real: * 2.1. check for equality; exit otherwise * 2.2. delete the node * 2.3. promote the leftest of right descendants or rightest of left * descendants * 3. if the node is fake: * 3.1. check the list items for equality; exit otherwise * 3.2. delete the correct item * 3.3. if there remains only one item, supress the fake node */ static int internal_del(HTT *htt, char *name, int reduced) { unsigned u = hash_string(name), v; int ls; hash_item_header *father; hash_item_header *node = find_node(htt, u, &father, &ls, reduced); hash_item_header *pnode, *fnode, *znode; char *tmp; if (node == NULL) return 0; v = *(unsigned *)(node->ident); if ((v & 1U) != 0) { fnode = node; node = znode = *(hash_item_header **)(node->ident + PTR_SHIFT); pnode = NULL; while (node != NULL) { if (strcmp(HASH_ITEM_NAME(node), name) == 0) break; pnode = node; node = node->left; } if (node == NULL) return 0; if (pnode == NULL) { /* * We supress the first item in the list. */ *(hash_item_header **)(fnode->ident + PTR_SHIFT) = node->left; if (node->left->left == NULL) { shrink_node(htt, fnode, node->left, father, ls, u, reduced); } } else { pnode->left = node->left; if (pnode->left == NULL && znode == pnode) { shrink_node(htt, fnode, pnode, father, ls, u, reduced); } } } else { if (strcmp(HASH_ITEM_NAME(node), name) != 0) return 0; if (node->left != NULL) { for (znode = node, pnode = node->left; pnode->right; znode = pnode, pnode = pnode->right); if (znode != node) { znode->right = pnode->left; pnode->left = node->left; } pnode->right = node->right; } else if (node->right != NULL) { for (znode = node, pnode = node->right; pnode->left; znode = pnode, pnode = pnode->left); if (znode != node) { znode->left = pnode->right; pnode->right = node->right; } pnode->left = node->left; } else pnode = NULL; if (father == NULL) { TREE(u) = pnode; } else if (ls) { father->left = pnode; } else { father->right = pnode; } } tmp = node->ident; htt->deldata(node); freemem(tmp); return 1; } /* see nhash.h */ int HTT_del(HTT *htt, char *name) { return internal_del(htt, name, 0); } /* see nhash.h */ int HTT2_del(HTT2 *htt, char *name) { return internal_del((HTT *)htt, name, 1); } /* * Apply `action()' on all nodes of the tree whose root is given as * parameter `node'. If `wipe' is non-zero, the nodes are removed * from memory. */ static void scan_node(hash_item_header *node, void (*action)(void *), int wipe) { unsigned v; if (node == NULL) return; scan_node(node->left, action, wipe); scan_node(node->right, action, wipe); v = *(unsigned *)(node->ident); if ((v & 1U) != 0) { hash_item_header *pnode, *nnode; for (pnode = *(hash_item_header **)(node->ident + PTR_SHIFT); pnode != NULL; pnode = nnode) { char *tmp = pnode->ident; nnode = pnode->left; action(pnode); if (wipe) freemem(tmp); } if (wipe) { freemem(node->ident); freemem(node); } } else { char *tmp = node->ident; action(node); if (wipe) freemem(tmp); } } /* see nhash.h */ void HTT_scan(HTT *htt, void (*action)(void *)) { unsigned u; for (u = 0; u < HTT_NUM_TREES; u ++) { scan_node(htt->tree[u], action, 0); } } /* see nhash.h */ void HTT2_scan(HTT2 *htt, void (*action)(void *)) { scan_node(htt->tree[0], action, 0); scan_node(htt->tree[1], action, 0); } /* see nhash.h */ void HTT_kill(HTT *htt) { unsigned u; for (u = 0; u < HTT_NUM_TREES; u ++) { scan_node(htt->tree[u], htt->deldata, 1); } } /* see nhash.h */ void HTT2_kill(HTT2 *htt) { scan_node(htt->tree[0], htt->deldata, 1); scan_node(htt->tree[1], htt->deldata, 1); } ./config.h0000644000175000017500000003620311620140753011300 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * (c) Louis P. Santillan 2011 * This file is derived from tune.h * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* ====================================================================== */ /* * The LOW_MEM macro triggers the use of macro storage which uses less * memory. It actually also improves performance on large, modern machines * (due to less cache pressure). This option implies no limitation (except * on the number of arguments a macro may, which is then limited to 32766) * so it is on by default. Non-LOW_MEM code is considered deprecated. */ #define LOW_MEM /* ====================================================================== */ /* * Define AMIGA for systems using "drive letters" at the beginning of * some paths; define MSDOS on systems with drive letters and using * backslashes to seperate directory components. */ /* #define AMIGA */ /* #define MSDOS */ /* ====================================================================== */ /* * Define this if your compiler does not know the strftime() function; * TurboC 2.01 under Msdos does not know strftime(). */ /* #define NOSTRFTIME */ /* ====================================================================== */ /* * Buffering: there are two levels of buffering on input and output streams: * the standard libc buffering (manageable with setbuf() and setvbuf()) * and some buffering provided by ucpp itself. The ucpp buffering uses * two buffers, of size respectively INPUT_BUF_MEMG and OUTPUT_BUF_MEMG * (as defined below). * You can disable one or both of these bufferings by defining the macros * NO_LIBC_BUF and NO_UCPP_BUF. */ /* #define NO_LIBC_BUF */ /* #define NO_UCPP_BUF */ /* * On Unix stations, the system call mmap() might be used on input files. * This option is a subclause of ucpp internal buffering. On one station, * a 10% speed improvement was observed. Do not define this unless the * host architecture has the following characteristics: * -- Posix / Single Unix compliance * -- Text files correspond one to one with memory representation * If a file is not seekable or not mmapable, ucpp will revert to the * standard fread() solution. * * This feature is still considered beta quality. On some systems where * files can be bigger than memory address space (mainly, 32-bit systems * with files bigger than 4 GB), this option makes ucpp fail to operate * on those extremely large files. */ #define UCPP_MMAP /* * Performance issues: * -- On memory-starved systems, such as Minix-i86, do not use ucpp * buffering; keep only libc buffering. * -- If you do not use libc buffering, activate the UCPP_MMAP option. * Note that the UCPP_MMAP option is ignored if ucpp buffering is not * activated. * * On an Athlon 1200 running FreeBSD 4.7, the best performances are * achieved when libc buffering is activated and/or UCPP_MMAP is on. */ /* ====================================================================== */ /* * Define this if you want ucpp to generate tokenized PRAGMA tokens; * otherwise, it will generate raw string contents. This setting is * irrelevant to the stand-alone version of ucpp. */ #define PRAGMA_TOKENIZE /* * Define this to the special character that marks the end of tokens with * a string value inside a tokenized PRAGMA token. The #pragma and _Pragma() * directives which use this character will be a bit more difficult to * decode (but ucpp will not mind). 0 cannot be used. '\n' is fine because * it cannot appear inside a #pragma or _Pragma(), since newlines cannot be * embedded inside tokens, neither directly nor by macro substitution and * stringization. Besides, '\n' is portable. */ #define PRAGMA_TOKEN_END ((unsigned char)'\n') /* * Define this if you want ucpp to include encountered #pragma directives * in its output in non-lexer mode; _Pragma() are translated to equivalent * #pragma directives. */ #define PRAGMA_DUMP /* * According to my interpretation of the C99 standard, _Pragma() are * evaluated wherever macro expansion could take place. However, Neil Booth, * whose mother language is English (contrary to me) and who is well aware * of the C99 standard (and especially the C preprocessor) told me that * it was unclear whether _Pragma() are evaluated inside directives such * as #if, #include and #line. If you want to disable the evaluation of * _Pragma() inside such directives, define the following macro. */ /* #define NO_PRAGMA_IN_DIRECTIVE */ /* * The C99 standard mandates that the operator `##' must yield a single, * valid token, lest undefined behaviour befall upon thy head. Hence, * for instance, `+ ## +=' is forbidden, because `++=' is not a valid * token (although it is a valid list of two tokens, `++' and `='). * However, ucpp only emits a warning for such sin, and unmerges the * tokens (thus emitting `+' then `+=' for that example). When ucpp * produces text output, those two tokens will be separated by a space * character so that the basic rule of text output is preserved: when * parsed again, text output yields the exact same stream of tokens. * That extra space is virtual: it does not count as a true whitespace * token for stringization. * * However, it might be desirable, for some uses other than preprocessing * C source code, not to emit that extra space at all. To make ucpp behave * that way, define the DSHARP_TOKEN_MERGE macro. Please note that this * can trigger spurious token merging. For instance, with that macro * activated, `+ ## +=' will be output as `++=' which, if preprocessed * again, will read as `++' followed by `='. * * All this is irrelevant to lexer mode; and trying to merge incompatible * tokens is a shooting offence, anyway. */ /* #define DSHARP_TOKEN_MERGE */ /* ====================================================================== */ /* * Define INMACRO_FLAG to include two flags to the structure lexer_state, * that tell whether tokens come from a macro-replacement, and count those * macro-replacements. */ /* #define INMACRO_FLAG */ /* ====================================================================== */ /* * Paths where files are looked for by default, when #include is used. * Typical path is /usr/local/include and /usr/include, in that order. * If you want to set up no path, define the macro to 0. * * For Linux, get gcc includes too, or you will miss things like stddef.h. * The exact path varies much, depending on the distribution. */ #define STD_INCLUDE_PATH "/usr/local/include", "/usr/include" /* ====================================================================== */ /* * Arithmetic code for evaluation of #if expressions. Evaluation * uses either a native machine type, or an emulated two's complement * type. Division by 0 and overflow on division are considered as errors * and reported as such. If ARITHMETIC_CHECKS is defined, all other * operations that imply undefined or implementation-defined behaviour * are reported as warnings but otherwise performed nonetheless. * * For native type evaluation, the following macros should be defined: * NATIVE_SIGNED the native signed type * NATIVE_UNSIGNED the native corresponding unsigned type * NATIVE_UNSIGNED_BITS the native unsigned type width, in bits * NATIVE_SIGNED_MIN the native signed type minimum value * NATIVE_SIGNED_MAX the native signed type maximum value * * The code in the arith.c file performs some tricky detection * operations on the native type representation and possible existence * of a trap representation. These operations assume a C99-compliant * compiler; on a C90-only compiler, the operations are valid but may * yield incorrect results. You may force those settings with some * more macros: see the comments in arith.c (look for "ARCH_DEFINED"). * Remember that this is mostly a non-issue, unless you are building * ucpp with a pre-C99 cross-compiler and either the host or target * architecture uses a non-two's complement representation of signed * integers. Such a combination is pretty rare nowadays, so the best * you can do is forgetting completely this paragraph and live in peace. * * * If you do not have a handy native type (for instance, you compile ucpp * with a C90 compiler which lacks the "long long" type, or you compile * ucpp for a cross-compiler which should support an evaluation integer * type of a size that is not available on the host machine), you may use * a simulated type. The type uses two's complement representation and * may have any width from 2 bits to twice the underlying native type * width, inclusive (odd widths are allowed). To use an emulated type, * make sure that NATIVE_SIGNED is not defined, and define the following * macros: * SIMUL_ARITH_SUBTYPE the native underlying type to use * SIMUL_SUBTYPE_BITS the native underlying type width * SIMUL_NUMBITS the emulated type width * * Undefined and implementation-defined behaviours are warned upon, if * ARITHMETIC_CHECKS is defined. Results are truncated to the type * width; shift count for the << and >> operators is reduced modulo the * emulatd type width; right shifting of a signed negative value performs * sign extension (the result is left-padded with bits set to 1). */ /* * For native type evaluation with a 64-bit "long long" type. */ #define NATIVE_SIGNED long long #define NATIVE_UNSIGNED unsigned long long #define NATIVE_UNSIGNED_BITS 64 #define NATIVE_SIGNED_MIN (-9223372036854775807LL - 1) #define NATIVE_SIGNED_MAX 9223372036854775807LL /* * For emulation of a 64-bit type using a native 32-bit "unsigned long" * type. #undef NATIVE_SIGNED #define SIMUL_ARITH_SUBTYPE unsigned long #define SIMUL_SUBTYPE_BITS 32 #define SIMUL_NUMBITS 64 */ /* * Comment out the following line if you want to deactivate arithmetic * checks (warnings upon undefined and implementation-defined * behaviour). Arithmetic checks slow down a bit arithmetic operations, * especially multiplications, but this should not be an issue with * typical C source code. */ #define ARITHMETIC_CHECKS /* ====================================================================== */ /* * To force signedness of wide character constants, define WCHAR_SIGNEDNESS * to 0 for unsigned, 1 for signed. By default, wide character constants * are signed if the native `char' type is signed, and unsigned otherwise. #define WCHAR_SIGNEDNESS 0 */ /* * Standard assertions. They should include one cpu() assertion, one machine() * assertion (identical to cpu()), and one or more system() assertions. * * for Linux/PC: cpu(i386), machine(i386), system(unix), system(linux) * for Linux/Alpha: cpu(alpha), machine(alpha), system(unix), system(linux) * for Sparc/Solaris: cpu(sparc), machine(sparc), system(unix), system(solaris) * * These are only suggestions. On Solaris, machine() should be defined * for i386 or sparc (standard system header use such an assertion). For * cross-compilation, define assertions related to the target architecture. * * If you want no standard assertion, define STD_ASSERT to 0. */ #define STD_ASSERT 0 /* #define STD_ASSERT "cpu(i386)", "machine(i386)", "system(unix)", \ "system(freebsd)" */ /* ====================================================================== */ /* * System predefined macros. Nothing really mandatory, but some programs * might rely on those. * Each string must be either "name" or "name=token-list". If you want * no predefined macro, define STD_MACROS to 0. */ #define STD_MACROS 0 /* #define STD_MACROS "__FreeBSD=4", "__unix", "__i386", \ "__FreeBSD__=4", "__unix__", "__i386__" */ /* ====================================================================== */ /* * Default flags; HANDLE_ASSERTIONS is required for Solaris system headers. * See cpp.h for the definition of these flags. */ #define DEFAULT_CPP_FLAGS (DISCARD_COMMENTS | WARN_STANDARD \ | WARN_PRAGMA | FAIL_SHARP | MACRO_VAARG \ | CPLUSPLUS_COMMENTS | LINE_NUM | TEXT_OUTPUT \ | KEEP_OUTPUT | HANDLE_TRIGRAPHS \ | HANDLE_ASSERTIONS) #define DEFAULT_LEXER_FLAGS (DISCARD_COMMENTS | WARN_STANDARD | FAIL_SHARP \ | MACRO_VAARG | CPLUSPLUS_COMMENTS | LEXER \ | HANDLE_TRIGRAPHS | HANDLE_ASSERTIONS) /* ====================================================================== */ /* * Define this to use sigsetjmp()/siglongjmp() instead of setjmp()/longjmp(). * This is non-ANSI, but it improves performance on some POSIX system. * On typical C source code, such improvement is completely negligeable. */ /* #define POSIX_JMP */ /* ====================================================================== */ /* * Maximum value (plus one) of a character handled by the lexer; 128 is * alright for ASCII native source code, but 256 is needed for EBCDIC. * 256 is safe in both cases; you will have big problems if you set * this value to INT_MAX or above. On Minix-i86 or Msdos (small memory * model), define MAX_CHAR_VAL to 128. * * Set MAX_CHAR_VAL to a power of two to increase lexing speed. Beware * that lexer.c defines a static array of size MSTATE * MAX_CHAR_VAL * values of type int (MSTATE is defined in lexer.c and is about 40). */ #define MAX_CHAR_VAL 128 /* * If you want some extra character to be considered as whitespace, * define this macro to that space. On ISO-8859-1 machines, 160 is * the code for the unbreakable space. */ /* #define UNBREAKABLE_SPACE 160 */ /* * If you want whitespace tokens contents to be recorded (making them * tokens with a string content), define this. The macro STRING_TOKEN * will be adjusted accordingly. * Without this option, whitespace tokens are not even returned by the * lex() function. This is irrelevant for the non-lexer mode (almost -- * it might slow down a bit ucpp, and with this option, comments will be * kept inside #pragma directives). */ /* #define SEMPER_FIDELIS */ /* End of options overridable by UCPP_CONFIG and config.h */ ./atest.c0000644000175000017500000001346411620140753011152 0ustar renerene#include #include #include #include #if defined TEST_NATIVE #define NATIVE_SIGNED int #define NATIVE_UNSIGNED unsigned #define NATIVE_UNSIGNED_BITS 32 #define NATIVE_SIGNED_MIN LONG_MIN #define NATIVE_SIGNED_MAX LONG_MAX #elif defined TEST_SIMUL #define SIMUL_ARITH_SUBTYPE unsigned short #define SIMUL_SUBTYPE_BITS 16 #define SIMUL_NUMBITS 31 #else #error ====== Either TEST_NATIVE or TEST_SIMUL must be defined. #endif #define ARITH_TYPENAME zoinx #define ARITH_FUNCTION_HEADER static inline #define ARITH_WARNING(type) z_warn(type) #define ARITH_ERROR(type) z_error(type) void z_warn(int type); void z_error(int type); #include "arith.c" #if defined TEST_NATIVE static inline u_zoinx unsigned_to_uz(unsigned x) { return (u_zoinx)x; } static inline s_zoinx int_to_sz(int x) { return (s_zoinx)x; } static inline void print_uz(u_zoinx x) { printf("%u", x); } static inline void print_sz(s_zoinx x) { printf("%d", x); } #else static inline u_zoinx unsigned_to_uz(unsigned x) { u_zoinx v; v.msw = (x >> 16) & 0x7FFFU; v.lsw = x & 0xFFFFU; return v; } static inline s_zoinx int_to_sz(int x) { return unsigned_to_uz((unsigned)x); } static inline void print_uz(u_zoinx x) { printf("%u", ((unsigned)(x.msw) << 16) + (unsigned)(x.lsw)); } static inline void print_sz(s_zoinx x) { if (x.msw & 0x4000U) { putchar('-'); x = zoinx_u_neg(x); } print_uz(x); } #endif static inline void print_int(int x) { printf("%d", x); } static jmp_buf jbuf; void z_warn(int type) { switch (type) { case ARITH_EXCEP_CONV_O: fputs("[overflow on conversion] ", stdout); break; case ARITH_EXCEP_NEG_O: fputs("[overflow on unary minus] ", stdout); break; case ARITH_EXCEP_NOT_T: fputs("[trap representation on bitwise inversion] ", stdout); break; case ARITH_EXCEP_PLUS_O: fputs("[overflow on addition] ", stdout); break; case ARITH_EXCEP_PLUS_U: fputs("[underflow on addition] ", stdout); break; case ARITH_EXCEP_MINUS_O: fputs("[overflow on subtraction] ", stdout); break; case ARITH_EXCEP_MINUS_U: fputs("[underflow on subtraction] ", stdout); break; case ARITH_EXCEP_AND_T: fputs("[trap representation on bitwise and] ", stdout); break; case ARITH_EXCEP_XOR_T: fputs("[trap representation on bitwise xor] ", stdout); break; case ARITH_EXCEP_OR_T: fputs("[trap representation on bitwise or] ", stdout); break; case ARITH_EXCEP_LSH_W: fputs("[left shift by type width or more] ", stdout); break; case ARITH_EXCEP_LSH_C: fputs("[left shift by negative count] ", stdout); break; case ARITH_EXCEP_LSH_O: fputs("[overflow on left shift] ", stdout); break; case ARITH_EXCEP_LSH_U: fputs("[underflow on left shift] ", stdout); break; case ARITH_EXCEP_RSH_W: fputs("[right shift by type width or more] ", stdout); break; case ARITH_EXCEP_RSH_C: fputs("[right shift by negative count] ", stdout); break; case ARITH_EXCEP_RSH_N: fputs("[right shift of negative value] ", stdout); break; case ARITH_EXCEP_STAR_O: fputs("[overflow on multiplication] ", stdout); break; case ARITH_EXCEP_STAR_U: fputs("[underflow on multiplication] ", stdout); break; default: fprintf(stdout, "UNKNOWN WARNING TYPE: %d\n", type); exit(EXIT_FAILURE); } } void z_error(int type) { switch (type) { case ARITH_EXCEP_SLASH_D: fputs("division by 0\n", stdout); break; case ARITH_EXCEP_SLASH_O: fputs("overflow on division\n", stdout); break; case ARITH_EXCEP_PCT_D: fputs("division by 0 on modulus operator\n", stdout); break; default: fprintf(stdout, "UNKNOWN ERROR TYPE: %d\n", type); exit(EXIT_FAILURE); } longjmp(jbuf, 1); } int main(void) { #define OPTRY_GEN(op, x, y, convx, convy, printz) do { \ printf("%s %s %s -> ", #x, #op, #y); \ if (!setjmp(jbuf)) { \ printz(zoinx_ ## op (convx(x), convy(y))); \ putchar('\n'); \ } \ } while (0) #define IDENT(x) x #define OPTRY_UU_U(op, x, y) \ OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_uz) #define OPTRY_UI_U(op, x, y) \ OPTRY_GEN(op, x, y, unsigned_to_uz, IDENT, print_uz) #define OPTRY_UU_I(op, x, y) \ OPTRY_GEN(op, x, y, unsigned_to_uz, unsigned_to_uz, print_int) #define OPTRY_SS_S(op, x, y) \ OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_sz) #define OPTRY_SI_S(op, x, y) \ OPTRY_GEN(op, x, y, int_to_sz, IDENT, print_sz) #define OPTRY_SS_I(op, x, y) \ OPTRY_GEN(op, x, y, int_to_sz, int_to_sz, print_int) OPTRY_UU_U(u_plus, 3, 4); OPTRY_UU_U(u_plus, 1549587182, 1790478233); OPTRY_UU_U(u_minus, 1549587182, 1790478233); OPTRY_UU_U(u_minus, 1790478233, 1549587182); OPTRY_UU_U(u_star, 432429875, 347785487); OPTRY_UU_U(u_slash, 432429875, 34487); OPTRY_UU_U(u_pct, 432429875, 34487); OPTRY_UI_U(u_lsh, 1783, 19); OPTRY_UI_U(u_lsh, 1783, 20); OPTRY_UI_U(u_lsh, 1783, 21); OPTRY_UI_U(u_rsh, 475902857, 7); OPTRY_UI_U(u_rsh, 475902857, 17); OPTRY_UI_U(u_rsh, 475902857, 38); OPTRY_SS_S(s_plus, 3, 4); OPTRY_SS_S(s_plus, 1549587182, 1790478233); OPTRY_SS_S(s_plus, -1549587182, -1790478233); OPTRY_SS_S(s_minus, 1549587182, 1790478233); OPTRY_SS_S(s_minus, 1790478233, 1549587182); OPTRY_SS_S(s_minus, -1790478233, -1549587182); OPTRY_SS_S(s_minus, -1790478233, 1549587182); OPTRY_SS_S(s_star, 432429875, 347785487); OPTRY_SS_S(s_star, 432429875, -347785487); OPTRY_SS_S(s_slash, 432429875, 34487); OPTRY_SS_S(s_slash, -432429875, 34487); OPTRY_SS_S(s_slash, 432429875, -34487); OPTRY_SS_S(s_slash, -432429875, -34487); OPTRY_SS_S(s_slash, 432429875, 0); OPTRY_SS_S(s_slash, -2147483647 - 1, -1); OPTRY_SS_S(s_pct, 432429875, 34487); OPTRY_SS_S(s_pct, 432429875, 0); OPTRY_SI_S(s_lsh, -1, 10); OPTRY_SI_S(s_lsh, 1783, 19); OPTRY_SI_S(s_lsh, 1783, 20); OPTRY_SI_S(s_lsh, 1783, 21); OPTRY_SI_S(s_rsh, -1024, 8); OPTRY_SI_S(s_rsh, 475902857, 7); OPTRY_SI_S(s_rsh, 475902857, 17); return 0; } ./README0000644000175000017500000011301311620140753010535 0ustar renereneucpp-1.3 is a C preprocessor compliant to ISO-C99. Author: Thomas Pornin Main site: http://pornin.nerim.net/ucpp/ INTRODUCTION ------------ A C preprocessor is a part of a C compiler responsible for macro replacement, conditional compilation and inclusion of header files. It is often found as a stand-alone program on Unix systems. ucpp is such a preprocessor; it is designed to be quick and light, but anyway fully compliant to the ISO standard 9899:1999, also known as C99. ucpp can be compiled as a stand-alone program, or linked to some other code; in the latter case, ucpp will output tokens, one at a time, on demand, as an integrated lexer. ucpp operates in two modes: -- lexer mode: ucpp is linked to some other code and outputs a stream of tokens (each call to the lex() function will yield one token) -- non-lexer mode: ucpp preprocesses text and outputs the resulting text to a file descriptor; if linked to some other code, the cpp() function must be called repeatedly, otherwise ucpp is a stand-alone binary. INSTALLATION ------------ 1. Uncompress the archive file and extract the source files. 2. Edit tune.h. Here is a short explanation of compile-time options: LOW_MEM Enable memory-saving functions; this is for low-end and old systems, but seems to be good for larger systems too. Keep it. NO_LIBC_BUF NO_UCPP_BUF Two options used to disable the two bufferings inside ucpp. Define both options for maximum memory savings but you will probably want to keep libc buffering for decent performance. Define none on large systems (modern 32 or 64-bit systems). UCPP_MMAP With this option, if ucpp internal buffering is active, ucpp will try to mmap() the input files. This might yield a slight performance improvement, but will work only on a limited set of architectures. PRAGMA_TOKENIZE Make ucpp generate tokenized PRAGMA tokens on #pragma and _Pragma(); tokenization is made this way: tokens are assembled as a null terminated array of unsigned chars; if a token has a string value (as defined by the STRING_TOKEN macro), the value follows the token, terminated by PRAGMA_TOKEN_END (by default, a newline character cast to unsigned char). Whitespace tokens are skipped. The "name" value of the PRAGMA token is a pointer to that array. This setting is irrelevant in non-lexer mode. PRAGMA_DUMP In non-lexer mode, keep #pragma in output; non-void _Pragma() are translated to the equivalent #pragma. Irrelevant in lexer mode. NO_PRAGMA_IN_DIRECTIVE Do not evaluate _Pragma() inside #if, #include, #include_next and #line directives; instead, emit an error (since the remaining _Pragma will surely imply a syntax error). DSHARP_TOKEN_MERGE When two tokens are to be merged with the `##' operator, but fail because they do not merge into a single valid token, ucpp keeps those two tokens separate by adding an extra space between them in text output. With this option on, that extra space is not added, which means that some tokens may merge partially if the text output is preprocessed again. See tune.h for details. INMACRO_FLAG In lexer mode, set the inmacro flag to 1 if the current token comes from a macro replacement, 0 otherwise. macro_count maintains an increasing counter of such replacements. CONTEXT tokens count as one macro replacement each. #pragma, and _Pragma() that do not come from a macro replacement, also count as one macro replacement each. This setting is irrelevant in non-lexer mode. STD_INCLUDE_PATH Default include path in stand-alone ucpp. STD_MACROS Default predefined macros in stand-alone ucpp. STD_ASSERT Default assertions in stand-alone ucpp. NATIVE_SIGNED NATIVE_UNSIGNED NATIVE_UNSIGNED_BITS NATIVE_SIGNED_MIN NATIVE_SIGNED_MAX SIMUL_ARITH_SUBTYPE SIMUL_SUBTYPE_BITS SIMUL_NUMBITS WCHAR_SIGNEDNESS Those options define how #if expressions are evaluated; see the cross-compilation section of this file for more info, and the comments in tune.h. Extra info is found in arith.h and arith.c, at the possible expense of your mental health. DEFAULT_LEXER_FLAGS DEFAULT_CPP_FLAGS Default flags in respectively lexer and non-lexer modes. POSIX_JMP Define this if your architecture defines sigsetjmp() and siglongjmp(); it is known to (very slightly) improve performance on AIX systems. MAX_CHAR_VAL ucpp will consider characters whose value is equal or above MAX_CHAR_VAL as outside the C source charset (so they will be treated just like '@', for instance). For ASCII systems, 128 is fine. 256 is a safer value, but uses more (static) memory. For performance reasons, use a power of two. If MAX_CHAR_VAL is correctly adjusted, ucpp should be compatible with any character set. UNBREAKABLE_SPACE If you want an extra-whitespace character, define this macro to that character. For instance, define this to 160 on an ISO-8859-1 system if you want the 'unbreakable space' to be considered as whitespace. SEMPER_FIDELIS With this option set, ucpp, when used as a lexer, will pass whitespace tokens to its caller, and those tokens will have their true content; this is intended for reconstruction of the source line. Beware that some comments may have embedded newlines. COPY_LINE_LENGTH ucpp can maintain a copy of the current source line, up to that length. Irrelevant to stand-alone version. *_MEMG Those settings modify ucpp behaviour, wrt memory allocations. With higher values, ucpp will perform less malloc() calls and will run faster, but it will use more memory. Reduce INPUT_BUF_MEMG and OUTPUT_BUF_MEMG on low-memory systems, if you kept ucpp buffering (see NO_UCPP_BUF option). 3. Edit the Makefile. You should define the variables CC and FLAGS; there are the following options: -DAUDIT Enable internal sanity checks; this slows down a bit ucpp. Do not define unless you plan to debug ucpp. -DMEM_CHECK With this setting, ucpp will check for the return value of malloc() and exit with a diagnostic when out of memory. MEM_CHECK is implied by AUDIT. -DMEM_DEBUG Enable memory debug code. This will track memory leaks and several occurrences of memory management errors; it will also slow down things and increase memory consumption, so you probably do not want to use this option. -DINLINE=foobar The ucpp code uses "inline" qualifier for some functions; by default, that qualifier is macro-replaced with nothing. Define INLINE to the correct replacement for your compiler, if supported. Note that all "inline" functions in ucpp are also "static". For any C99-compliant compiler, the GNU compiler (gcc), and the Compaq C compiler under Linux/Alpha, no -DINLINE is needed (see tune.h for details). 4. Compile by typing "make". This should produce the ucpp executable file. You might see some warning messages, especially with gcc: gcc believes some variables might be used prior to their initialization; ignore those messages. 5. Install wherever you want the binary and the man page ucpp.1. I have not provided an install sequence because I didn't bother. 6. If you do not have the make utility, compile each file separately and link them together. The exact details depend on your compiler. You must define the macro STAND_ALONE when compiling cpp.c (there is such a definition, commented out, in cpp.c, line 34). There is no "configure" script because: -- I do not like the very idea of a "configure" script. -- ucpp is written in ANSI-C and should be fairly portable. -- There is no such thing as "standard" settings for a C preprocessor. The predefined system macros, standard assertions,... must be tuned by the sysadmin. -- The primary goal of ucpp is to be included in compilers. The stand-alone version is mainly a debugging tool. Please note that you need an ISO-C90 (formerly ANSI) C compiler suite (including the standard library) to compile ucpp. If your compiler is not C99 (or later), read the cross-compilation section in this README file. The C90 and C99 standards state that external linkage names might be considered equal or different based upon only their first 6 characters; this rule might make ucpp not compile on a conformant C implementation. I have yet to see such an implementation, however. If you want to use ucpp as an integrated preprocessor and lexer, see the section REUSE. Compiling ucpp as a library is an exercise left to the reader. With the LOW_MEM code enabled, ucpp can run on a Minix-i86 or Msdos 16-bit small-memory-model machine. It will not be fully compliant on such an architecture to C99, since C99 states that at least one source code with 4095 simultaneously defined macros must be processed; ucpp will be limited to about 1500 macros (at most) due to memory restrictions. At least ucpp can preprocess its own code in these conditions. LOW_MEM is on by default because it seems to improve performance on large systems. LICENSE ------- The copyright notice and license is at the beginning of the Makefile and each source file. It is basically a BSD license, without the advertising subclause (which BSD dropped recently anyway) and with no reference to Berkeley (since the code is all mine, written from scratch). Informally, this means that you can reuse and redistribute the code as you want, provided that you state in the documentation (or any substantial part of the software) of redistributed code that I am the original author. (If you press a cdrom with 200 software packages, I do not insist on having my name on the cover of the cdrom -- just keep a Readme file somewhere on the cdrom, with the copyright notice included.) As a courteous gesture, if you reuse my code, please drop me a mail. It raises my self-esteem. REUSE ----- The code has been thought as part of a bigger project; it might be used as an integrated lexer, that will read files, process them as a C preprocessor, and output a stream of C tokens. To include this code into a project, compile with STAND_ALONE undefined. To use the preprocessor and lexer, several steps should be performed. See the file 'sample.c' for an example. 1. call init_cpp(). This function initializes the lexer automaton. 2. set the following global variables: no_special_macros non-zero if the special macros (__FILE__ and others) should not be defined. This is a global flag since it affects the redefinition of such macros (which are allowed if the special macros are not defined) c99_compliant if non-zero, define __STDC_VERSION__ to 199901L; this is the default; otherwise, do not define __STDC_VERSION__. Note that ucpp will accept to undefine __STDC_VERSION__ with a #undef directive. c99_hosted if strictly positive, define __STDC_HOSTED__ to 1. If zero, define __STDC_HOSTED__ to 0. If negative, do not define __STDC_HOSTED__. The default is 1. emit_defines and emit_assertions should be set to 0 for the step 3. 3. call init_tables(). This function initializes the macro table and other things; it will intialize assertions if it has a non-zero argument. 4. call init_include_path(). This function will reset the include path to the list of paths given as argument. 5. set the following global variables emit_dependencies set to 1 if dependencies should be emitted during preprocessing set to 2 if dependencies should also be emitted for system include files emit_defines set to non-zero if #define macro definitions should be emitted when macros are defined emit_assertions set to non-zero if #define macro definitions should be emitted when macros are defined emit_output the FILE * where the above items are sent if one of the three emit_ variables is set to non zero transient_characters this is for some cross-compilation; see the relevant part in this README file for details 6. call set_init_filename() with the initial filename as argument; the second argument indicates whether the filename is real or conventional ("real" means "an fopen() on it will work"). 7. initialize your struct lexer_state: call init_lexer_state() call init_lexer_mode() if the preprocessor is supposed to output a list of tokens, otherwise set the flags field to DEFAULT_CPP_FLAGS and set the output field to the FILE * where output should be sent (init_lexer_mode(), if called at all, must be called after init_lexer_state()) adjust the flags field; here is the meaning of flags: WARN_STANDARD emit the standard warnings WARN_ANNOYING emit the useless and annoying warnings WARN_TRIGRAPHS count trigraphs encountered; it is up to the caller to emit a warning if some trigraphs were indeed encountered; the count is stored in the count_trigraphs field of the struct lexer_state WARN_TRIGRAPHS_MORE emit a warning for each trigraph encountered WARN_PRAGMA emit a warning for each non-void _Pragma encountered in non-lexer mode (because these are dumped as #pragma in the output) and for each #pragma too, if ucpp was compiled without PRAGMA_DUMP FAIL_SHARP emit errors on '#' tokens beginning a line and not followed by a valid cpp directive CCHARSET emit errors when non-C characters are encountered; if this flag is not set, each non-C character will be considered as a BUNCH token (since C99 states that non-C characters are allowed as long as they "disappear" during preprocessing [through macro replacement and stringification for instance], this flag must not be set, for maximum C99 compliance) DISCARD_COMMENTS do not keep comments in output (irrelevant in lexer mode) CPLUSPLUS_COMMENTS understand new style comments (//) (mandatory for C99) LINE_NUM emit #line directives when entering a file, if not in lexer mode; emit CONTEXT token in lexer mode for #line and new files GCC_LINE_NUM if LINE_NUM is set, emit gcc-like directives instead of #line HANDLE_ASSERTIONS understand assertions in #if expressions (and #assert, #unassert) HANDLE_PRAGMA make PRAGMA tokens for #pragma; irrelevant in non-lexer mode (handling of some pragmas is required in C99 but is not of the competence of the preprocessor; without this flag, ucpp will ignore the contents of #pragma and _Pragma directives) MACRO_VAARG understand macros with a variable number of arguments (mandatory for C99) UTF8_SOURCE understand UTF-8 encoding: multibyte characters are considered equivalent to letters as far as syntax is concerned (they can be used in identifiers) LEXER act as a lexer, outputting tokens TEXT_OUTPUT this flag should be set to 0 if ucpp works as a lexer, 1 otherwise. It is somehow redundant with the LEXER flag, but the presence of those two different flags is needed in ucpp. KEEP_OUTPUT in non-lexer mode, emit the result of preprocessing COPY_LINE maintain a copy of the last read line in the copy_line field of the struct lexer_state ; see below for how to use this buffer HANDLE_TRIGRAPHS understand trigraphs, such as ??/ for \. This option should be set by default, except for some legacy code. There are other flags, but they are for private usage of ucpp. 8. adjust the input field in the lexer_state to the FILE * from where source file is read. If you use the UCPP_MMAP compile-time option, and your input file is eligible to mmap(), then you can call fopen_mmap_file() to open it, then set_input_file() to set ls->input and some other internal options. Do not call set_input_file() unless you just called fopen_mmap_file() just before on the same file. 9. call add_incpath() to add an include path, define_macro() and undef_macro() to add or remove macros, make_assertion() and destroy_assertion() to add or remove assertions. 10. call enter_file() (this is needed only in non-lexer mode, or if LINE_NUM is set). Afterwards: -- if you are in lexer mode, call lex(); each call will make the ctok field point to the next token. A non-zero return value is an error. lex() skips whitespace tokens. The memory used by the string value of some tokens (identifiers, numbers...) is automatically freed, so copy the contents of each such token if you want to keep it (tokens with a string content are identified by the STRING_TOKEN macro applied to their type). When lex() returned a non-zero value: if it is CPPERR_EOF, then end-of-input was reached. Otherwise, it is a genuine error and ls->ctok is an undefined token; skip it and call lex() again to ignore the error. -- otherwise, call cpp(); each call will analyze one or more tokens (one token if it did find neither a cpp directive nor a macro name). A positive return value is an error. For both functions, if the return value is CPPERR_EOF (which is a strictly positive value), then it means that the end of file was reached. Call check_cpp_errors() after end of file for pending errors (unfinished #if constructions for instance). In non-lexer mode, call flush_output(). In the struct lexer_state, the following fields might be read: line the current input line number oline the current output line number (in non-lexer mode) flags the flags described above count_trigraphs the number of trigraphs encountered inmacro the current token comes from a macro macro_count the current macro counter "flags" is an unsigned long and might be modified; the three others are of long type. To perform another preprocessing: use free_lexer_state() to release memory used by the buffers referenced in lexer_state, and go back to step 2. The different tables (macros, assertions...) should be reset to their respective initial contents. There is also the wipeout() function: when called, it should release (almost) all memory blocks allocated dynamically. After a wipeout(), ucpp should be back to its state at step 2 (init_cpp() initializes only static tables, that are never freed nor modified afterwards). The COPY_LINE buffer: the struct lexer_state contains two interesting fields, copy_line[] and cli. If the COPY_LINE flag is on, each read line is stored in this buffer, up to (at most) COPY_LINE_LENGTH - 1 characters (COPY_LINE_LENGTH is defined in tune.h). The last character of the buffer is always a zero, and if the line was read entirely, it is zero terminated; the trailing newline is not included. The purpose of this buffer is error-reporting. When an error occurs (cpp() returns a strictly positive value, or lex() returns a non-zero value), if your struct lexer_state is called ls, use this code: if (ls.cli != 0) ls.copy_line[ls.cli] = 0; This will add a trailing 0 if the line was not read entirely. ucpp may be configured at runtime to accept alternate characters as possible parts of identifiers. Typical intended usage is for the '$' and '@' characters. The two relevant functions are set_identifier_char() and unset_identifier_char(). When this call is issued: set_identifier_char('$'); then for all the remaining input, the '$' character will be considered as just another letter, as far as identifier tokenizing is concerned. This is for identifiers only; numeric constants are not modified by that setting. This call resets things back: unset_identifier_char('$'); Those two functions modify the static table which is initialized by init_cpp(). You may call init_cpp() at any time to restore the table to its standard state. When using this feature, take care of the following points: -- Do NOT use a character whose numeric value (as an `unsigned char' cast into an `int') is greater than or equal to MAX_CHAR_VAL (in tune.h). This would lead to unpredictable results, including an abrupt crash of ucpp. ucpp makes absolutely no check whatsoever on that matter: this is the programmer's responsibility. -- If you use a standard character such as '+' or '{', tokens which begin with those characters cease to exist. This can be troublesome. If you use set_identifier_char() on the '<' character, the handling of #include directives will be greatly disturbed. Therefore the use of any standard C character in set_identifier_char() of unset_identifier_char() is declared unsupported, forbidden and altogether unwise. -- Stricto sensu, when an extra character is declared as part of an identifier, ucpp behaviour cease to conform to C99, which mandates that characters such as '$' or '@' must be treated as independant tokens of their own. Therefore, if your purpose is to use ucpp in a conformant C implementation, the use of set_identifier_char() should be made at least a runtime option. -- When enabling a new character in the middle of a macro replacement, the effect of that replacement may be delayed up to the end of that macro (but this is a "may" !). If you wish to trigger this feature with a custom #pragma or _Pragma(), you should remember it (for instance, usine _Pragma() in a macro replacement, and then the extra character in the same macro replacement, is not reliable). COMPATIBILITY NOTES ------------------- The C language has a lengthening history. Nowadays, C comes in three flavours: -- Traditional C, aka "K&R". This is the language first described by Brian Kernighan and Dennis Ritchie, and implemented in the first C compiler that was ever coded. There are actually several dialects of K&R, and all of them are considered deprecated. -- ISO 9899:1990, aka C90, aka C89, aka ANSI-C. Formalized by ANSI in 1989 and adopted by ISO the next year, it is the C flavour many C compilers understand. It is mostly backward compatible with K&R C, but with enhancements, clarifications and several new features. -- ISO 9899:1999, aka C99. This is an evolution on C90, almost fully backward compatible with C90. C99 introduces many new and useful features, however, including in the preprocessor. There was also a normative addendum in 1995, that added a few features to C90 (for instance, digraphs) that are also present in C99. It is sometimes refered to as "C95" or "AMD 1". ucpp implements the C99 standard, but can be used in a stricter mode, to enforce C90 compatibility (it will, however, still recognize some constructions that are not in plain C90). ucpp also knows about several extensions to C99: -- Assertions: this is an extension to the defined() operator, with its own namespace. Assertions seem to be used in several places, therefore ucpp knows about them. It is recommended to enable assertions by default on Solaris systems. -- Unicode: the C99 norm specifies that extended characters, from the ISO-10646 charset (aka "unicode") can be used in identifiers with the notations \u and \U. ucpp also accepts (with the proper flag) the UTF-8 encoding in the source file for such characters. -- #include_next directive: it works as a #include, but will look for files only in the directories specified in the include path after the one the current file was found. This is a GNU-ism that is useful for writing transparent wrappers around header files. Assertions and unicode are activated by specific flags; the #include_next support is always active. The ucpp code itself should be compatible with any ISO-C90 compiler. The cpp.c file is rather big (~ 64kB), it might confuse old 16-bit C compilers; the macro.c file is somewhat large also (~ 47kB). The evaluation of #if expressions is subject to some subtleties, see the section "cross-compilation". The lexer code makes no assumption about the source character set, but the following: source characters (those which have a syntactic value in C; comment and string literal contents are not concerned) must have a strictly positive value that is strictly lower than MAX_CHAR_VAL. The strict positivity is already assured by the C standard, so you just need to adjust MAX_CHAR_VAL. ucpp has been tested succesfully on ASCII/ISO-8859-1 and EBCDIC systems. Beware that UTF-8 is NOT compatible with EBCDIC. Pragma handling: when used in non-lexer mode, ucpp tries to output a source text that, when read again, will yield the exact same stream of tokens. This is not completely true with regards to line numbering in some tricky macro replacements, but it should work correctly otherwise, especially with pragma directives if the compile-time option PRAGMA_DUMP was set: #pragma are dumped, non-void _Pragma() are converted to the corresponding #pragma and dumped also. ucpp does not macro-replace the contents of #pragma and _Pragma(); If you want a macro-replaced pragma, use this: #define pragma_(x) _Pragma(#x) #define pragma(x) pragma_(x) Anyway, pragmas do not nest (an _Pragma() cannot be evaluated if it is inside a #pragma or another _Pragma). I wrote ucpp according to what is found in "The C Programming Language" from Brian Kernighan and Dennis Ritchie (2nd edition) and the C99 standard; but I could have misinterpreted some points. On some tricky points I got help from the helpful people from the comp.std.c newsgroup. For assertions and #include_next, I mimicked the behaviour of GNU cpp, as is stated in the GNU cpp info documentation. An open question is related to the following code: #define undefined ! #define makeun(x) un ## x #if makeun(defined foo) qux #else bar #endif ucpp will replace 'defined foo' with 0 first (since foo is not defined), then it will replace the macro makeun, and the expression will become 'un0', which is replaced by 0 since this is a remaining identifier. The expression evaluates to false, and 'bar' is emitted. However, some other preprocessors will replace makeun first, considering that it is not part of a 'defined' operator application; this will produce the macro 'undefined', which is replaced, and the expression becomes '!foo'. 'foo' is replaced by 0, the expression evaluates to true, and 'qux' is emitted. My opinion is that the behaviour is undefined, because use of the 'defined' operator does not match an allowed form prior to macro replacement (I mean, its syntax matches, but its use is reconverted to inexistant and therefore is not anymore matching). Other people think that the behaviour is well-specified, and contrary to what ucpp does. The only thing clear to me is that the wording of the standard (paragraph 6.10.1.3) is unclear. Since the ucpp behaviour makes ucpp code simpler and cleaner, and that it is unlikely that any real-life code would ever be disturbed by that interpretation of the standard, ucpp will keep its current behaviour until convincing evidence of my misinterpretation of the standard is given to me. The problem can only occur if one uses ## to make a 'defined' operator disappear from a #if expression (everybody agrees that the generation of a 'defined' operator triggers undefined behaviour). Another point about macro replacement has been discussed at length in several occasions. It is about the following code: #define CAT(a, b) CAT_(a, b) #define CAT_(a, b) a ## b #define AB(x, y) CAT(x, y) CAT(A, B)(X, Y) ucpp will produce `CAT(X,Y)' as replacement for the last line, whereas some other preprocessors output `XY'. The answer to the question "which behaviour is correct" seems to be "this is not defined by the C standard". It is the answer that has been actually given by the C standardization committee in 1992, to the defect report #017, question 23, which asked that very same question. Since the wording of the standard has not changed in these parts from the 1990 to the 1999 version, the preprocessor behaviour on the above-stated code should still be considered as undefined. It seems, however, that there used to be a time (around 1988) when the committee members agreed upon a precise macro-replacement algorithm, which specified quite clearly the preprocessor behaviour in such situation. ucpp behaviour is occasionnaly claimed as "incorrect" with regards to that algorithm. Since that macro replacement algorithm has never been published, and the committee itself backed out from it in 1992, I decided to disregard those feeble claims. It is possible, however, that at some point in the future I rewrite the ucpp macro replacement code, since that code is a bit messy and might be made to use less memory in some occasions. It is then possible that, in the aftermath of such a rewrite, the ucpp behaviour for the above stated code become tunable. Don't hold your breath, though. About _Pragma: the standard is not clear about when this operator is evaluated, and if it is allowed inside #if directives and such. For ucpp, I coded _Pragma as a special macro with lazy replacement: it will be evaluated wherever a macro could be replaced, and only at the end of the macro replacement (for practical purposes, _Pragma can be considered as a macro taking one argument, and being replaced by nothing, except for some tricky uses of the # and ## operators). This means that, by default, ucpp will evaluate _Pragma inside some directives (mainly, #if, #include, #include_next and #line), but it can be taught not to do so by defining NO_PRAGMA_IN_DIRECTIVE in tune.h. CROSS-COMPILATION ----------------- If compiled with a C99 development suite, ucpp should be fully C99-compliant on the host platform (up to my own understanding of the standard -- remember that this software is distributed as-is, without any guarantee). However, if a pre-C99 compiler is used, or if the target machine is not the host machine (for instance when you build a cross-compiler), the evaluation of #if expressions is subject to some cross-compiling issues: -- character constants: when evaluating expressions, character constants are interpreted in the source character set context; this is allowed by the standard but this can lead to problems with code that expects this interpretation to match the one made in the C code. To ease cross-compilation, you can define a conversion array, and make the global variable transient_characters point to it. The array should contain 256 int; transient_characters[x] is the value of the character whose value is x in the source character set. This facility is provided for inclusion of ucpp inside another code; if you want a stand-alone ucpp with that conversion, hard-code the conversion table into eval.c and make transient_characters[] statically point to it. Alternatively, you could provide an option syntax to provide such a table on command-line, if you feel like it. -- wide character constants signedness: by default, ucpp makes wide characters as signed as what plain chars are on the build host. To force wide character constant signedness, define WCHAR_SIGNEDNESS to 0 (for unsigned) or 1 (for signed). Beware, however, that "native" wide character constants, even signed, are considered positive. Non-wide character constants are, according to the C99 standard, of type int, and therefore always signed. -- evaluation type: C90 states that all constants in #if expressions are considered as either long or unsigned long, and that the evaluation is performed with operands of that size. In C99, the situation is equivalent, except that the types used are intmax_t and uintmax_t, as defined in . ucpp can use two expression evaluators: one uses native integer types (one signed and one unsigned), the other evaluator emulates big integer numbers by representing them with two values of some unsigned type. The emulated type handles signed values in two's complement representation, and can be any width ranging from 2 bits to twice the size of the underlying native unsigned type used. An odd width is allowed. When right shifting an emulated signed negative value, it is left-padded with bits set to 1 (this is sign extension). When the ARITHMETIC_CHECKS macro is defined in tune.h, all occurrences of implementation-defined or undefined behaviour during arithmetic evaluation are reported as errors or warned upon. This includes all overflows and underflows on signed quantities, constants too large, and so on. Errors (which terminate immediately evaluation) are emitted for division by 0 (on / and % operators) and overflow (on / operator); otherwise, warnings are emitted and the faulty evaluation takes place. This prevents ucpp from crashing on typical x86 machines, while still allowing to use some extensions. FUTURE EVOLUTIONS ----------------- ucpp is quite complete now. There was a longstanding project of "traditional" preprocessing, but I dropped it because it would not map cleanly on the token-based ucpp structure. Maybe I will code a string-based preprocessor one day; it would certainly use some of the code from lexer.c, eval.c, mem.c and nhash.c. However, making such a tool is almost irrelevant nowadays. If one wants to handle such project, using ucpp as code base, I would happily provide some help, if needed. CHANGES ------- From 1.2 to 1.3: * brand new integer evaluation code, with precise evaluation and checks * new hash table implementation, with binary trees * relaxed attitude on failed `##' operators * bugfix on macro definition on command-line wrt nesting macros * support for up to 32766 macro arguments in LOW_MEM code * support for optional additional "identifier" characters such as '$' or '@' * bugfix: memory leak on void #assert From 1.1 to 1.2: * bugfix: numerous memory leaks * new function: wipeout(); this should release all malloc() blocks * bugfix: missing "newline" and trailing "context" tokens * improved included files name caching * included memory leak detection code From 1.0 to 1.1: * bugfix: missing newline when exiting from a non-newline-terminated file * bugfix: crash when resetting due to definition of the _Pragma pseudo-macro * bugfix: handling of additional "optional" whitespace with SEMPER_FIDELIS * improved handling of unreplaced arg macros wrt output line * tricky handling of utterly tricky #include * bugfix: spurious token `~=' eliminated From 0.9 to 1.0: * bugfix: crash after erroneous #assert * changed ERR_SHARP to FAIL_SHARP, EMUL_UINTMAX to SIMUL_UINTMAX * made "inline" default on gcc and DEC ccc (Linux/Alpha) * semantic of -I is now Unix-like (added directories are looked first) * added -J flag (to add include directories after the system ones) * cleaned up non-ascii issues * bugfix: missing brace in no-LOW_MEM code * bugfix: argument number check in variadic macros * bugfix: crash in non-lexer mode after some cases of unreplaced macro * bugfix: _Pragma() handling wrt # and ## * made evaluation of _Pragma() optional in #if, #include and #line * bugfix: re-dump of multiline #pragma * added the inmacro and macro_count flags * added mmap() support * added option to retain whitespace content in lexer mode From 0.8 to 0.9: * added check for division by 0 in #if evaluation * added check for non-standard line numbers * added check for trailing garbage in most directives * corrected signedness of char constants (always int, therefore always signed) * made LOW_MEM code, so that ucpp runs smoothly on low memory architectures * multiple bugfixes (using the GNU cpp testsuite) * added handling of _Pragma (as a macro) * added tokenization of pragma directives * added conservation of pragma directives in text output * produced Msdos 16-bit small memory model executable * produced Minix-86 executable From 0.7 to 0.8: * added some support for Amiga systems * fixed extra spacing in stringified tokens * fixed bug related to %:% and tolerated rogue sharps * namespace cleanup * bugfix for macro redefinition * added warning for evaluated comma operators in #if (ISO requirement) * -Dfoo now defines foo with content 1 (and not void content) * trigraphs can be disabled (for incorrect but legacy code) * fixed semantics for #include "file" (local directory) * fixed detection of protected files * produced a Msdos 16-bit executable From 0.6 to 0.7: * officially changed the goal to full C99 compliance * added the CONTEXT token and let NEWLINE tokens go * added report_context() for error reporting * enforced matching of #if/#endif (file-global nesting level = 0) * added support of C99 digraphs * added UTF-8 encoding support * added universal character names * rewrote #if expressions (sizes fixed, bignum, signed/unsigned fixed) * fixed incomplete evaluation of #if expressions * added transient_characters[] From 0.5 to 0.6: * disappearance of error_nonl() * added extra optional warnings for trigraphs * some bugfixes, especially in lexer mode * handled MacIntosh files correctly From 0.4 to 0.5: * nicer #pragma handling (a token can be emitted) * bugfix in lexer mode after #line and #error * sample.c an example of code linked with ucpp * made #if expressions conforming to standard signed/unsigned handling * added the copy_line[] buffer feature From 0.3 to 0.4: * relaxed interpretation of '#include foo' when foo ends up, after macro substitution, with a '' content * corrected the 'double-dot' bug * corrected two bugs related to the treatment of macro aborted calls (due to lack of arguments) * some namespaces cleanup, to ease integration into other code * documented the way to include ucpp into another program * made newlines embedded into strings illegal (and reported as such) From 0.2 to 0.3: * added support for system predefined macros * made several bugfixes * checked C99 compliance for most of the features * ucpp now accepts non-C characters on standard when used stand-alone * removed many useless spaces in the output From 0.1 to 0.2: * added support for assertions * added support for macros with variable arguments * split the pharaonic cpp.c file into many * made several bugfixes * relaxed the behaviour with regards to the void arguments * made C++-like comments an option THANKS TO --------- Volker Barthelmann, Neil Booth, Stephen Davies, Stéphane Ecolivet, Marc Espie, Marcus Holland-Moritz, Antoine Leca, Cyrille Lefevre, Dave Rivers, Loic Tortay and Laurent Wacrenier, for suggestions and beta-testing. Paul Eggert, Douglas A. Gwyn, Clive D.W. Feather, and the other guys from comp.std.c, for explanations about the standard. Dave Brolley, Jamie Lokier and Neil Booth, for discussion about tricky points on nesting macros. Brian Kernighan and Dennis Ritchie, for bringing C to mortal Men. ./macro.c0000644000175000017500000013542311620140753011133 0ustar renerene/* * (c) Thomas Pornin 1999 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "tune.h" #include #include #include #include #include "ucppi.h" #include "mem.h" #include "nhash.h" /* * we store macros in a hash table, and retrieve them using their name * as identifier. */ static HTT macros; static int macros_init_done = 0; static void del_macro(void *m) { struct macro *n = m; size_t i; for (i = 0; (int)i < n->narg; i ++) freemem(n->arg[i]); if (n->narg > 0) freemem(n->arg); #ifdef LOW_MEM if (n->cval.length) freemem(n->cval.t); #else if (n->val.nt) { for (i = 0; i < n->val.nt; i ++) if (S_TOKEN(n->val.t[i].type)) freemem(n->val.t[i].name); freemem(n->val.t); } #endif freemem(n); } static inline struct macro *new_macro(void) { struct macro *m = getmem(sizeof(struct macro)); m->narg = -1; m->nest = 0; #ifdef LOW_MEM m->cval.length = 0; #else m->val.nt = m->val.art = 0; #endif m->vaarg = 0; return m; } /* * for special macros, and the "defined" operator */ enum { MAC_NONE, MAC_DEFINED, MAC_LINE, MAC_FILE, MAC_DATE, MAC_TIME, MAC_STDC, MAC_PRAGMA }; #define MAC_SPECIAL MAC_LINE /* * returns 1 for "defined" * returns x > 1 for a special macro such as __FILE__ * returns 0 otherwise */ static inline int check_special_macro(char *name) { if (!strcmp(name, "defined")) return MAC_DEFINED; if (*name != '_') return MAC_NONE; if (*(name + 1) == 'P') { if (!strcmp(name, "_Pragma")) return MAC_PRAGMA; return MAC_NONE; } else if (*(name + 1) != '_') return MAC_NONE; if (no_special_macros) return MAC_NONE; if (!strcmp(name, "__LINE__")) return MAC_LINE; else if (!strcmp(name, "__FILE__")) return MAC_FILE; else if (!strcmp(name, "__DATE__")) return MAC_DATE; else if (!strcmp(name, "__TIME__")) return MAC_TIME; else if (!strcmp(name, "__STDC__")) return MAC_STDC; return MAC_NONE; } int c99_compliant = 1; int c99_hosted = 1; /* * add the special macros to the macro table */ static void add_special_macros(void) { struct macro *m; HTT_put(¯os, new_macro(), "__LINE__"); HTT_put(¯os, new_macro(), "__FILE__"); HTT_put(¯os, new_macro(), "__DATE__"); HTT_put(¯os, new_macro(), "__TIME__"); HTT_put(¯os, new_macro(), "__STDC__"); m = new_macro(); m->narg = 1; m->arg = getmem(sizeof(char *)); m->arg[0] = sdup("foo"); HTT_put(¯os, m, "_Pragma"); if (c99_compliant) { #ifndef LOW_MEM struct token t; #endif m = new_macro(); #ifdef LOW_MEM m->cval.t = getmem(9); m->cval.t[0] = NUMBER; mmv(m->cval.t + 1, "199901L", 8); m->cval.length = 9; #else t.type = NUMBER; t.line = 0; t.name = sdup("199901L"); aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); #endif HTT_put(¯os, m, "__STDC_VERSION__"); } if (c99_hosted) { #ifndef LOW_MEM struct token t; #endif m = new_macro(); #ifdef LOW_MEM m->cval.t = getmem(3); m->cval.t[0] = NUMBER; mmv(m->cval.t + 1, "1", 2); m->cval.length = 3; #else t.type = NUMBER; t.line = 0; t.name = sdup("1"); aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); #endif HTT_put(¯os, m, "__STDC_HOSTED__"); } } #ifdef LOW_MEM /* * We store macro arguments as a single-byte token MACROARG, followed * by the argument number as a one or two-byte value. If the argument * number is between 0 and 127 (inclusive), it is stored as such in * a single byte. Otherwise, it is supposed to be a 14-bit number, with * the 7 upper bits stored in the first byte (with the high bit set to 1) * and the 7 lower bits in the second byte. */ #endif /* * print the content of a macro, in #define form */ static void print_macro(void *vm) { struct macro *m = vm; char *mname = HASH_ITEM_NAME(m); int x = check_special_macro(mname); size_t i; if (x != MAC_NONE) { fprintf(emit_output, "/* #define %s */ /* special */\n", mname); return; } fprintf(emit_output, "#define %s", mname); if (m->narg >= 0) { fprintf(emit_output, "("); for (i = 0; i < (size_t)(m->narg); i ++) { fprintf(emit_output, i ? ", %s" : "%s", m->arg[i]); } if (m->vaarg) { fputs(m->narg ? ", ..." : "...", emit_output); } fprintf(emit_output, ")"); } #ifdef LOW_MEM if (m->cval.length == 0) { fputc('\n', emit_output); return; } fputc(' ', emit_output); for (i = 0; i < m->cval.length;) { int tt = m->cval.t[i ++]; if (tt == MACROARG) { unsigned anum = m->cval.t[i]; if (anum >= 128) anum = ((anum & 127U) << 8) | m->cval.t[++ i]; if (anum == (unsigned)m->narg) fputs("__VA_ARGS__", emit_output); else fputs(m->arg[anum], emit_output); i ++; } else if (S_TOKEN(tt)) { fputs((char *)(m->cval.t + i), emit_output); i += 1 + strlen((char *)(m->cval.t + i)); } else fputs(operators_name[tt], emit_output); } #else if (m->val.nt == 0) { fputc('\n', emit_output); return; } fputc(' ', emit_output); for (i = 0; i < m->val.nt; i ++) { if (m->val.t[i].type == MACROARG) { if (m->val.t[i].line == m->narg) fputs("__VA_ARGS__", emit_output); else fputs(m->arg[(size_t)(m->val.t[i].line)], emit_output); } else fputs(token_name(m->val.t + i), emit_output); } #endif fputc('\n', emit_output); } /* * Send a token to the output (a token_fifo in lexer mode, the output * buffer in stand alone mode). */ void print_token(struct lexer_state *ls, struct token *t, long uz_line) { char *x = t->name; if (uz_line && t->line < 0) t->line = uz_line; if (ls->flags & LEXER) { struct token at; at = *t; if (S_TOKEN(t->type)) { at.name = sdup(at.name); throw_away(ls->gf, at.name); } aol(ls->output_fifo->t, ls->output_fifo->nt, at, TOKEN_LIST_MEMG); return; } if (ls->flags & KEEP_OUTPUT) { for (; ls->oline < ls->line;) put_char(ls, '\n'); } if (!S_TOKEN(t->type)) x = operators_name[t->type]; for (; *x; x ++) put_char(ls, *x); } /* * Send a token to the output at a given line (this is for text output * and unreplaced macros due to lack of arguments). */ static void print_token_nailed(struct lexer_state *ls, struct token *t, long nail_line) { char *x = t->name; if (ls->flags & LEXER) { print_token(ls, t, 0); return; } if (ls->flags & KEEP_OUTPUT) { for (; ls->oline < nail_line;) put_char(ls, '\n'); } if (!S_TOKEN(t->type)) x = operators_name[t->type]; for (; *x; x ++) put_char(ls, *x); } /* * send a reduced whitespace token to the output */ #define print_space(ls) do { \ struct token lt; \ lt.type = OPT_NONE; \ lt.line = (ls)->line; \ print_token((ls), <, 0); \ } while (0) /* * We found a #define directive; parse the end of the line, perform * sanity checks, store the new macro into the "macros" hash table. * * In case of a redefinition of a macro: we enforce the rule that a * macro should be redefined identically, including the spelling of * parameters. We emit an error on offending code; dura lex, sed lex. * After all, it is easy to avoid such problems, with a #undef directive. */ int handle_define(struct lexer_state *ls) { struct macro *m = 0, *n; #ifdef LOW_MEM struct token_fifo mv; #endif int ltwws = 1, redef = 0; char *mname = 0; int narg; size_t nt; long l = ls->line; #ifdef LOW_MEM mv.art = mv.nt = 0; #endif /* find the next non-white token on the line, this should be the macro name */ while (!next_token(ls) && ls->ctok->type != NEWLINE) { if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) mname = sdup(ls->ctok->name); break; } if (mname == 0) { error(l, "missing macro name"); return 1; } if (check_special_macro(mname)) { error(l, "trying to redefine the special macro %s", mname); goto warp_error; } /* * If a macro with this name was already defined: the K&R * states that the new macro should be identical to the old one * (with some arcane rule of equivalence of whitespace); otherwise, * redefining the macro is an error. Most preprocessors would * only emit a warning (or nothing at all) on an unidentical * redefinition. * * Since it is easy to avoid this error (with a #undef directive), * we choose to enforce the rule and emit an error. */ if ((n = HTT_get(¯os, mname)) != 0) { /* redefinition of a macro: we must check that we define it identical */ redef = 1; #ifdef LOW_MEM n->cval.rp = 0; #endif freemem(mname); mname = 0; } if (!redef) { m = new_macro(); m->narg = -1; #ifdef LOW_MEM #define mval mv #else #define mval (m->val) #endif } if (next_token(ls)) goto define_end; /* * Check if the token immediately following the macro name is * a left parenthesis; if so, then this is a macro with arguments. * Collect their names and try to match the next parenthesis. */ if (ls->ctok->type == LPAR) { int i, j; int need_comma = 0, saw_mdots = 0; narg = 0; while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) { error(l, "truncated macro definition"); goto define_error; } if (ls->ctok->type == COMMA) { if (saw_mdots) { error(l, "'...' must end the macro " "argument list"); goto warp_error; } if (!need_comma) { error(l, "void macro argument"); goto warp_error; } need_comma = 0; continue; } else if (ls->ctok->type == NAME) { if (saw_mdots) { error(l, "'...' must end the macro " "argument list"); goto warp_error; } if (need_comma) { error(l, "missing comma in " "macro argument list"); goto warp_error; } if (!redef) { aol(m->arg, narg, sdup(ls->ctok->name), 8); /* we must keep track of m->narg so that cleanup in case of error works. */ m->narg = narg; if (narg == 128 && (ls->flags & WARN_STANDARD)) warning(l, "more arguments to " "macro than the ISO " "limit (127)"); #ifdef LOW_MEM if (narg == 32767) { error(l, "too many arguments " "in macro definition " "(max 32766)"); goto warp_error; } #endif } else { /* this is a redefinition of the macro; check equality between old and new definitions */ if (narg >= n->narg) goto redef_error; if (strcmp(ls->ctok->name, n->arg[narg ++])) goto redef_error; } need_comma = 1; continue; } else if ((ls->flags & MACRO_VAARG) && ls->ctok->type == MDOTS) { if (need_comma) { error(l, "missing comma before '...'"); goto warp_error; } if (redef && !n->vaarg) goto redef_error; if (!redef) m->vaarg = 1; saw_mdots = 1; need_comma = 1; continue; } else if (ls->ctok->type == RPAR) { if (narg > 0 && !need_comma) { error(l, "void macro argument"); goto warp_error; } if (redef && n->vaarg && !saw_mdots) goto redef_error; break; } else if (ttMWS(ls->ctok->type)) { continue; } error(l, "invalid macro argument"); goto warp_error; } if (!redef) { for (i = 1; i < narg; i ++) for (j = 0; j < i; j ++) if (!strcmp(m->arg[i], m->arg[j])) { error(l, "duplicate macro " "argument"); goto warp_error; } } if (!redef) m->narg = narg; } else { if (!ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) warning(ls->line, "identifier not followed by " "whitespace in #define"); ls->flags |= READ_AGAIN; narg = 0; } if (redef) nt = 0; /* now, we have the arguments. Let's get the macro contents. */ while (!next_token(ls) && ls->ctok->type != NEWLINE) { struct token t; t.type = ls->ctok->type; if (ltwws && ttMWS(t.type)) continue; t.line = 0; if (t.type == NAME) { int i; if ((ls->flags & MACRO_VAARG) && !strcmp(ls->ctok->name, "__VA_ARGS__")) { if (redef) { if (!n->vaarg) goto redef_error; } else if (!m->vaarg) { error(l, "'__VA_ARGS__' is forbidden " "in macros with a fixed " "number of arguments"); goto warp_error; } t.type = MACROARG; t.line = redef ? n->narg : m->narg; } for (i = 0; i < narg; i ++) if (!strcmp(redef ? n->arg[i] : m->arg[i], ls->ctok->name)) { t.type = MACROARG; /* this is a hack: we store the argument number in the line field */ t.line = i; break; } } if (!redef && S_TOKEN(t.type)) t.name = sdup(ls->ctok->name); if (ttMWS(t.type)) { if (ltwws) continue; #ifdef SEMPER_FIDELIS t.type = OPT_NONE; #else t.type = NONE; #endif ltwws = 1; } else ltwws = 0; if (!redef) { /* we ensure that each macro token has a correct line number */ if (t.type != MACROARG) t.line = 1; aol(mval.t, mval.nt, t, TOKEN_LIST_MEMG); } else { #ifdef LOW_MEM int tt; if (n->cval.rp >= n->cval.length) { #ifdef SEMPER_FIDELIS if (t.type != OPT_NONE) goto redef_error; #else if (t.type != NONE) goto redef_error; #endif } else if (t.type != n->cval.t[n->cval.rp]) { goto redef_error; } else if (t.type == MACROARG) { unsigned anum = n->cval.t[n->cval.rp + 1]; if (anum >= 128U) anum = ((anum & 127U) << 8) | m->cval.t[n->cval.rp + 2]; if (anum != (unsigned)t.line) goto redef_error; } else if (S_TOKEN(t.type) && strcmp(ls->ctok->name, (char *)(n->cval.t + n->cval.rp + 1))) { goto redef_error; } tt = n->cval.t[n->cval.rp ++]; if (S_TOKEN(tt)) n->cval.rp += 1 + strlen((char *)(n->cval.t + n->cval.rp)); else if (tt == MACROARG) { if (n->cval.t[++ n->cval.rp] >= 128) n->cval.rp ++; } #else if (nt >= n->val.nt) { #ifdef SEMPER_FIDELIS if (t.type != OPT_NONE) goto redef_error; #else if (t.type != NONE) goto redef_error; #endif } else if (t.type != n->val.t[nt].type || (t.type == MACROARG && t.line != n->val.t[nt].line) || (S_TOKEN(t.type) && strcmp(ls->ctok->name, n->val.t[nt].name))) { goto redef_error; } #endif nt ++; } } if (redef) { #ifdef LOW_MEM if (n->cval.rp < n->cval.length) goto redef_error_2; #else if (nt < n->val.nt) goto redef_error_2; #endif return 0; } /* now we have the complete macro; perform some checks about the operators # and ##, and, if everything is ok, store the macro into the hash table */ define_end: #ifdef SEMPER_FIDELIS if (mval.nt && mval.t[mval.nt - 1].type == OPT_NONE) { #else if (mval.nt && mval.t[mval.nt - 1].type == NONE) { #endif mval.nt --; if (mval.nt == 0) freemem(mval.t); } if (mval.nt != 0) { size_t i; /* some checks about the macro */ if (mval.t[0].type == DSHARP || mval.t[0].type == DIG_DSHARP || mval.t[mval.nt - 1].type == DSHARP || mval.t[mval.nt - 1].type == DIG_DSHARP) { error(l, "operator '##' may neither begin " "nor end a macro"); goto define_error; } if (m->narg >= 0) for (i = 0; i < mval.nt; i ++) if ((mval.t[i].type == SHARP || mval.t[i].type == DIG_SHARP) && (i == (mval.nt - 1) || (ttMWS(mval.t[i + 1].type) && (i == mval.nt - 2 || mval.t[i + 2].type != MACROARG)) || (!ttMWS(mval.t[i + 1].type) && mval.t[i + 1].type != MACROARG))) { error(l, "operator '#' not followed " "by a macro argument"); goto define_error; } } #ifdef LOW_MEM { size_t i, l; for (i = 0, l = 0; i < mval.nt; i ++) { l ++; if (S_TOKEN(mval.t[i].type)) l += 1 + strlen(mval.t[i].name); else if (mval.t[i].type == MACROARG) { l ++; if (mval.t[i].line >= 128) l ++; } } m->cval.length = l; if (l) m->cval.t = getmem(l); for (i = 0, l = 0; i < mval.nt; i ++) { m->cval.t[l ++] = mval.t[i].type; if (S_TOKEN(mval.t[i].type)) { size_t x = 1 + strlen(mval.t[i].name); mmv(m->cval.t + l, mval.t[i].name, x); l += x; freemem(mval.t[i].name); } else if (mval.t[i].type == MACROARG) { unsigned anum = mval.t[i].line; if (anum >= 128) { m->cval.t[l ++] = 128 | (anum >> 8); m->cval.t[l ++] = anum & 0xFF; } else { m->cval.t[l ++] = anum; } } } if (mval.nt) freemem(mval.t); } #endif HTT_put(¯os, m, mname); freemem(mname); if (emit_defines) print_macro(m); return 0; redef_error: while (ls->ctok->type != NEWLINE && !next_token(ls)); redef_error_2: error(l, "macro '%s' redefined unidentically", HASH_ITEM_NAME(n)); return 1; warp_error: while (ls->ctok->type != NEWLINE && !next_token(ls)); define_error: if (m) del_macro(m); if (mname) freemem(mname); #ifdef LOW_MEM if (mv.nt) { size_t i; for (i = 0; i < mv.nt; i ++) if (S_TOKEN(mv.t[i].type)) freemem(mv.t[i].name); freemem(mv.t); } #endif return 1; #undef mval } /* * Get the arguments for a macro. This code is tricky because there can * be multiple sources for these arguments, if we are in the middle of * a macro replacement; arguments are macro-replaced before inclusion * into the macro replacement. * * return value: * 1 no argument (last token read from next_token()) * 2 no argument (last token read from tfi) * 3 no argument (nothing read) * 4 error * * Void arguments are allowed in C99. */ static int collect_arguments(struct lexer_state *ls, struct token_fifo *tfi, int penury, struct token_fifo *atl, int narg, int vaarg, int *wr) { int ltwws = 1, npar = 0, i; struct token *ct = 0; int read_from_fifo = 0; long begin_line = ls->line; #define unravel(ls) (read_from_fifo = 0, !((tfi && tfi->art < tfi->nt \ && (read_from_fifo = 1) != 0 && (ct = tfi->t + (tfi->art ++))) \ || ((!tfi || penury) && !next_token(ls) && (ct = (ls)->ctok)))) /* * collect_arguments() is assumed to setup correctly atl * (this is not elegant, but it works) */ for (i = 0; i < narg; i ++) atl[i].art = atl[i].nt = 0; if (vaarg) atl[narg].art = atl[narg].nt = 0; *wr = 0; while (!unravel(ls)) { if (!read_from_fifo && ct->type == NEWLINE) ls->ltwnl = 1; if (ttWHI(ct->type)) { *wr = 1; continue; } if (ct->type == LPAR) { npar = 1; } break; } if (!npar) { if (ct == ls->ctok) return 1; if (read_from_fifo) return 2; return 3; } if (!read_from_fifo && ct == ls->ctok) ls->ltwnl = 0; i = 0; if ((narg + vaarg) == 0) { while(!unravel(ls)) { if (ttWHI(ct->type)) continue; if (ct->type == RPAR) goto harvested; npar = 1; goto too_many_args; } } while (!unravel(ls)) { struct token t; if (ct->type == LPAR) npar ++; else if (ct->type == RPAR && (-- npar) == 0) { if (atl[i].nt != 0 && ttMWS(atl[i].t[atl[i].nt - 1].type)) atl[i].nt --; i ++; /* * C99 standard states that at least one argument * should be present for the ... part; to relax * this behaviour, change 'narg + vaarg' to 'narg'. */ if (i < (narg + vaarg)) { error(begin_line, "not enough arguments " "to macro"); return 4; } if (i > narg) { if (!(ls->flags & MACRO_VAARG) || !vaarg) goto too_many_args; } goto harvested; } else if (ct->type == COMMA && npar <= 1 && i < narg) { if (atl[i].nt != 0 && ttMWS(atl[i].t[atl[i].nt - 1].type)) atl[i].nt --; if (++ i == narg) { if (!(ls->flags & MACRO_VAARG) || !vaarg) goto too_many_args; } if (i > 30000) goto too_many_args; ltwws = 1; continue; } else if (ltwws && ttWHI(ct->type)) continue; t.type = ct->type; if (!read_from_fifo) t.line = ls->line; else t.line = ct->line; /* * Stringification applies only to macro arguments; * so we handle here OPT_NONE. * OPT_NONE is kept, but does not count as whitespace, * and merges with other whitespace to give a fully * qualified NONE token. Two OPT_NONE tokens merge. * Initial and final OPT_NONE are discarded (initial * is already done, as OPT_NONE is matched by ttWHI). */ if (ttWHI(t.type)) { if (t.type != OPT_NONE) { t.type = NONE; #ifdef SEMPER_FIDELIS t.name = sdup(" "); throw_away(ls->gf, t.name); #endif ltwws = 1; } if (atl[i].nt > 0 && atl[i].t[atl[i].nt - 1].type == OPT_NONE) atl[i].nt --; } else { ltwws = 0; if (S_TOKEN(t.type)) { t.name = ct->name; if (ct == (ls)->ctok) { t.name = sdup(t.name); throw_away(ls->gf, t.name); } } } aol(atl[i].t, atl[i].nt, t, TOKEN_LIST_MEMG); } error(begin_line, "unfinished macro call"); return 4; too_many_args: error(begin_line, "too many arguments to macro"); while (npar && !unravel(ls)) { if (ct->type == LPAR) npar ++; else if (ct->type == RPAR) npar --; } return 4; harvested: if (i > 127 && (ls->flags & WARN_STANDARD)) warning(begin_line, "macro call with %d arguments (ISO " "specifies 127 max)", i); return 0; #undef unravel } /* * concat_token() is called when the ## operator is used. It uses * the struct lexer_state dsharp_lexer to parse the result of the * concatenation. * * Law enforcement: if the whole string does not produce a valid * single token, an error (non-zero result) is returned. */ struct lexer_state dsharp_lexer; static inline int concat_token(struct token *t1, struct token *t2) { char *n1 = token_name(t1), *n2 = token_name(t2); size_t l1 = strlen(n1), l2 = strlen(n2); unsigned char *x = getmem(l1 + l2 + 1); int r; mmv(x, n1, l1); mmv(x + l1, n2, l2); x[l1 + l2] = 0; dsharp_lexer.input = 0; dsharp_lexer.input_string = x; dsharp_lexer.pbuf = 0; dsharp_lexer.ebuf = l1 + l2; dsharp_lexer.discard = 1; dsharp_lexer.flags = DEFAULT_LEXER_FLAGS; dsharp_lexer.pending_token = 0; r = next_token(&dsharp_lexer); freemem(x); return (r == 1 || dsharp_lexer.pbuf < (l1 + l2) || dsharp_lexer.pending_token || (dsharp_lexer.pbuf == (l1 + l2) && !dsharp_lexer.discard)); } #ifdef PRAGMA_TOKENIZE /* * tokenize_string() takes a string as input, and split it into tokens, * reassembling the tokens into a single compressed string generated by * compress_token_list(); this function is used for _Pragma processing. */ struct lexer_state tokenize_lexer; static char *tokenize_string(struct lexer_state *ls, char *buf) { struct token_fifo tf; size_t bl = strlen(buf); int r; tokenize_lexer.input = 0; tokenize_lexer.input_string = (unsigned char *)buf; tokenize_lexer.pbuf = 0; tokenize_lexer.ebuf = bl; tokenize_lexer.discard = 1; tokenize_lexer.flags = ls->flags | LEXER; tokenize_lexer.pending_token = 0; tf.art = tf.nt = 0; while (!(r = next_token(&tokenize_lexer))) { struct token t, *ct = tokenize_lexer.ctok; if (ttWHI(ct->type)) continue; t = *ct; if (S_TOKEN(t.type)) t.name = sdup(t.name); aol(tf.t, tf.nt, t, TOKEN_LIST_MEMG); } if (tokenize_lexer.pbuf < bl) goto tokenize_error; return (char *)((compress_token_list(&tf)).t); tokenize_error: if (tf.nt) { for (tf.art = 0; tf.art < tf.nt; tf.art ++) if (S_TOKEN(tf.t[tf.art].type)) freemem(tf.t[tf.art].name); freemem(tf.t); } return 0; } #endif /* * stringify_string() has a self-explanatory name. It is called when * the # operator is used in a macro and a string constant must be * stringified. */ static inline char *stringify_string(char *x) { size_t l; int i, inside_str = 0, inside_cc = 0, must_quote, has_quoted = 0; char *y, *d; for (i = 0; i < 2; i ++) { if (i) d[0] = '"'; for (l = 1, y = x; *y; y ++, l ++) { must_quote = 0; if (inside_cc) { if (*y == '\\') { must_quote = 1; has_quoted = 1; } else if (!has_quoted && *y == '\'') inside_cc = 0; } else if (inside_str) { if (*y == '"' || *y == '\\') must_quote = 1; if (*y == '\\') has_quoted = 1; else if (!has_quoted && *y == '"') inside_str = 0; } else if (*y == '"') { inside_str = 1; must_quote = 1; } else if (*y == '\'') { inside_cc = 1; } if (must_quote) { if (i) d[l] = '\\'; l ++; } if (i) d[l] = *y; } if (!i) d = getmem(l + 2); if (i) { d[l] = '"'; d[l + 1] = 0; } } return d; } /* * stringify() produces a constant string, result of the # operator * on a list of tokens. */ static char *stringify(struct token_fifo *tf) { size_t tlen; size_t i; char *x, *y; for (tlen = 0, i = 0; i < tf->nt; i ++) if (tf->t[i].type < CPPERR && tf->t[i].type != OPT_NONE) tlen += strlen(token_name(tf->t + i)); if (tlen == 0) return sdup("\"\""); x = getmem(tlen + 1); for (tlen = 0, i = 0; i < tf->nt; i ++) { if (tf->t[i].type >= CPPERR || tf->t[i].type == OPT_NONE) continue; strcpy(x + tlen, token_name(tf->t + i)); tlen += strlen(token_name(tf->t + i)); } /* no need to add a trailing 0: strcpy() did that (and the string is not empty) */ y = stringify_string(x); freemem(x); return y; } /* * Two strings evaluated at initialization time, to handle the __TIME__ * and __DATE__ special macros. * * C99 specifies that these macros should remain constant throughout * the whole preprocessing. */ char compile_time[12], compile_date[24]; /* * substitute_macro() performs the macro substitution. It is called when * an identifier recognized as a macro name has been found; this function * tries to collect the arguments (if needed), applies # and ## operators * and perform recursive and nested macro expansions. * * In the substitution of a macro, we remove all newlines that were in the * arguments. This might confuse error reporting (which could report * erroneous line numbers) or have worse effect is the preprocessor is * used for another language pickier than C. Since the interface between * the preprocessor and the compiler is not fully specified, I believe * that this is no violation of the standard. Comments welcome. * * We take tokens from tfi. If tfi has no more tokens to give: we may * take some tokens from ls to complete a call (fetch arguments) if * and only if penury is non zero. */ int substitute_macro(struct lexer_state *ls, struct macro *m, struct token_fifo *tfi, int penury, int reject_nested, long l) { char *mname = HASH_ITEM_NAME(m); struct token_fifo *atl, etl; struct token t, *ct; int i, save_nest = m->nest; size_t save_art, save_tfi, etl_limit; int ltwds, ntwds, ltwws; int pragma_op = 0; /* * Reject the replacement, if we are already inside the macro. */ if (m->nest > reject_nested) { t.type = NAME; t.line = ls->line; t.name = mname; print_token(ls, &t, 0); return 0; } /* * put a separation from preceeding tokens */ print_space(ls); /* * Check if the macro is a special one. */ if ((i = check_special_macro(mname)) >= MAC_SPECIAL) { /* we have a special macro */ switch (i) { char buf[30], *bbuf, *cfn; case MAC_LINE: t.type = NUMBER; t.line = l; sprintf(buf, "%ld", l); t.name = buf; print_space(ls); print_token(ls, &t, 0); break; case MAC_FILE: t.type = STRING; t.line = l; cfn = current_long_filename ? current_long_filename : current_filename; bbuf = getmem(2 * strlen(cfn) + 3); { char *c, *d; int lcwb = 0; bbuf[0] = '"'; for (c = cfn, d = bbuf + 1; *c; c ++) { if (*c == '\\') { if (lcwb) continue; *(d ++) = '\\'; lcwb = 1; } else lcwb = 0; *(d ++) = *c; } *(d ++) = '"'; *(d ++) = 0; } t.name = bbuf; print_space(ls); print_token(ls, &t, 0); freemem(bbuf); break; case MAC_DATE: t.type = STRING; t.line = l; t.name = compile_date; print_space(ls); print_token(ls, &t, 0); break; case MAC_TIME: t.type = STRING; t.line = l; t.name = compile_time; print_space(ls); print_token(ls, &t, 0); break; case MAC_STDC: t.type = NUMBER; t.line = l; t.name = "1"; print_space(ls); print_token(ls, &t, 0); break; case MAC_PRAGMA: if (reject_nested > 0) { /* do not replace _Pragma() unless toplevel */ t.type = NAME; t.line = ls->line; t.name = mname; print_token(ls, &t, 0); return 0; } pragma_op = 1; goto collect_args; #ifdef AUDIT default: ouch("unbekanntes fliegendes macro"); #endif } return 0; } /* * If the macro has arguments, collect them. */ collect_args: if (m->narg >= 0) { unsigned long save_flags = ls->flags; int wr = 0; ls->flags |= LEXER; if (m->narg > 0 || m->vaarg) atl = getmem((m->narg + m->vaarg) * sizeof(struct token_fifo)); switch (collect_arguments(ls, tfi, penury, atl, m->narg, m->vaarg, &wr)) { case 1: /* the macro expected arguments, but we did not find any; the last read token should be read again. */ ls->flags = save_flags | READ_AGAIN; goto no_argument_next; case 2: tfi->art --; /* fall through */ case 3: ls->flags = save_flags; no_argument_next: t.type = NAME; t.line = l; t.name = mname; print_token_nailed(ls, &t, l); if (wr) { t.type = NONE; t.line = l; #ifdef SEMPER_FIDELIS t.name = " "; #endif print_token(ls, &t, 0); goto exit_macro_2; } goto exit_macro_1; case 4: ls->flags = save_flags; goto exit_error_1; } ls->flags = save_flags; } /* * If the macro is _Pragma, and we got here, then we have * exactly one argument. We check it, unstringize it, and * emit a PRAGMA token. */ if (pragma_op) { char *pn; if (atl[0].nt != 1 || atl[0].t[0].type != STRING) { error(ls->line, "invalid argument to _Pragma"); if (atl[0].nt) freemem(atl[0].t); freemem(atl); goto exit_error; } pn = atl[0].t[0].name; if ((pn[0] == '"' && pn[1] == '"') || (pn[0] == 'L' && pn[1] == '"' && pn[2] == '"')) { /* void pragma -- just ignore it */ freemem(atl[0].t); freemem(atl); return 0; } if (ls->flags & TEXT_OUTPUT) { #ifdef PRAGMA_DUMP /* * This code works because we actually evaluate arguments in a * lazy way: we scan a macro argument only if it appears in the * output, and exactly as many times as it appears. Therefore, * _Pragma() will get evaluated just like they should. */ char *c = atl[0].t[0].name, *d; for (d = "\n#pragma "; *d; d ++) put_char(ls, *d); d = (*c == 'L') ? c + 2 : c + 1; for (; *d != '"'; d ++) { if (*d == '\\' && (*(d + 1) == '\\' || *(d + 1) == '"')) { d ++; } put_char(ls, *d); } put_char(ls, '\n'); ls->oline = ls->line; enter_file(ls, ls->flags); #else if (ls->flags & WARN_PRAGMA) warning(ls->line, "_Pragma() ignored and not dumped"); #endif } else if (ls->flags & HANDLE_PRAGMA) { char *c = atl[0].t[0].name, *d, *buf; struct token t; /* a wide string is a string */ if (*c == 'L') c ++; c ++; for (buf = d = getmem(strlen(c)); *c != '"'; c ++) { if (*c == '\\' && (*(c + 1) == '\\' || *(c + 1) == '"')) { *(d ++) = *(++ c); } else *(d ++) = *c; } *d = 0; t.type = PRAGMA; t.line = ls->line; #ifdef PRAGMA_TOKENIZE t.name = tokenize_string(ls, buf); freemem(buf); buf = t.name; if (!buf) { freemem(atl[0].t); freemem(atl); goto exit_error; } #else t.name = buf; #endif aol(ls->toplevel_of->t, ls->toplevel_of->nt, t, TOKEN_LIST_MEMG); throw_away(ls->gf, buf); } freemem(atl[0].t); freemem(atl); return 0; } /* * Now we expand and replace the arguments in the macro; we * also handle '#' and '##'. If we find an argument, that has * to be replaced, we expand it in its own token list, then paste * it. Tricky point: when we paste an argument, we must scan * again the resulting list for further replacements. This * implies problems with regards to nesting self-referencing * macros. * * We do then YAUH (yet another ugly hack): if a macro is replaced, * and nested replacement exhibit the same macro, we mark it with * a negative line number. All produced negative line numbers * must be cleaned in the end. */ #define ZAP_LINE(t) do { \ if ((t).type == NAME) { \ struct macro *zlm = HTT_get(¯os, (t).name); \ if (zlm && zlm->nest > reject_nested) \ (t).line = -1 - (t).line; \ } \ } while (0) #ifdef LOW_MEM save_art = m->cval.rp; m->cval.rp = 0; #else save_art = m->val.art; m->val.art = 0; #endif etl.art = etl.nt = 0; m->nest = reject_nested + 1; ltwds = ntwds = 0; #ifdef LOW_MEM while (m->cval.rp < m->cval.length) { #else while (m->val.art < m->val.nt) { #endif size_t next, z; #ifdef LOW_MEM struct token uu; ct = &uu; ct->line = 1; t.type = ct->type = m->cval.t[m->cval.rp ++]; if (ct->type == MACROARG) { unsigned anum = m->cval.t[m->cval.rp ++]; if (anum >= 128U) anum = ((anum & 127U) << 8) | (unsigned)m->cval.t[m->cval.rp ++]; ct->line = anum; } else if (S_TOKEN(ct->type)) { t.name = ct->name = (char *)(m->cval.t + m->cval.rp); m->cval.rp += 1 + strlen(ct->name); } #ifdef SEMPER_FIDELIS else if (ct->type == OPT_NONE) { t.type = ct->type = NONE; t.name = ct->name = " "; } #endif t.line = ls->line; next = m->cval.rp; if ((next < m->cval.length && (m->cval.t[z = next] == DSHARP || m->cval.t[z = next] == DIG_DSHARP)) || ((next + 1) < m->cval.length && ttWHI(m->cval.t[next]) && (m->cval.t[z = next + 1] == DSHARP || m->cval.t[z = next + 1] == DIG_DSHARP))) { ntwds = 1; m->cval.rp = z; } else ntwds = 0; #else ct = m->val.t + (m->val.art ++); next = m->val.art; t.type = ct->type; t.line = ls->line; #ifdef SEMPER_FIDELIS if (t.type == OPT_NONE) { t.type = NONE; t.name = " "; } else #endif t.name = ct->name; if ((next < m->val.nt && (m->val.t[z = next].type == DSHARP || m->val.t[z = next].type == DIG_DSHARP)) || ((next + 1) < m->val.nt && ttWHI(m->val.t[next].type) && (m->val.t[z = next + 1].type == DSHARP || m->val.t[z = next + 1].type == DIG_DSHARP))) { ntwds = 1; m->val.art = z; } else ntwds = 0; #endif if (ct->type == MACROARG) { #ifdef DSHARP_TOKEN_MERGE int need_opt_space = 1; #endif z = ct->line; /* the argument number is there */ if (ltwds && atl[z].nt != 0 && etl.nt) { if (concat_token(etl.t + (-- etl.nt), atl[z].t)) { warning(ls->line, "operator '##' " "produced the invalid token " "'%s%s'", token_name(etl.t + etl.nt), token_name(atl[z].t)); #if 0 /* obsolete */ #ifdef LOW_MEM m->cval.rp = save_art; #else m->val.art = save_art; #endif etl.nt ++; goto exit_error_2; #endif etl.nt ++; atl[z].art = 0; #ifdef DSHARP_TOKEN_MERGE need_opt_space = 0; #endif } else { if (etl.nt == 0) freemem(etl.t); else if (!ttWHI(etl.t[etl.nt - 1] .type)) { t.type = OPT_NONE; t.line = ls->line; aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } t.type = dsharp_lexer.ctok->type; t.line = ls->line; if (S_TOKEN(t.type)) { t.name = sdup(dsharp_lexer .ctok->name); throw_away(ls->gf, t.name); } ZAP_LINE(t); aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); atl[z].art = 1; } } else atl[z].art = 0; if ( #ifdef DSHARP_TOKEN_MERGE need_opt_space && #endif atl[z].art < atl[z].nt && (!etl.nt || !ttWHI(etl.t[etl.nt - 1].type))) { t.type = OPT_NONE; t.line = ls->line; aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } if (ltwds || ntwds) { while (atl[z].art < atl[z].nt) { t = atl[z].t[atl[z].art ++]; t.line = ls->line; ZAP_LINE(t); aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } } else { struct token_fifo *save_tf; unsigned long save_flags; int ret = 0; atl[z].art = 0; save_tf = ls->output_fifo; ls->output_fifo = &etl; save_flags = ls->flags; ls->flags |= LEXER; while (atl[z].art < atl[z].nt) { struct macro *nm; struct token *cct; cct = atl[z].t + (atl[z].art ++); if (cct->type == NAME && cct->line >= 0 && (nm = HTT_get(¯os, cct->name)) && nm->nest <= (reject_nested + 1)) { ret |= substitute_macro(ls, nm, atl + z, 0, reject_nested + 1, l); continue; } t = *cct; ZAP_LINE(t); aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } ls->output_fifo = save_tf; ls->flags = save_flags; if (ret) { #ifdef LOW_MEM m->cval.rp = save_art; #else m->val.art = save_art; #endif goto exit_error_2; } } if (!ntwds && (!etl.nt || !ttWHI(etl.t[etl.nt - 1].type))) { t.type = OPT_NONE; t.line = ls->line; aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } ltwds = 0; continue; } /* * This code is definitely cursed. * * For the extremely brave reader who tries to understand * what is happening: ltwds is a flag meaning "last token * was double-sharp" and ntwds means "next token will be * double-sharp". The tokens are from the macro definition, * and scanned from left to right. Arguments that are * not implied into a #/## construction are macro-expanded * seperately, then included into the token stream. */ if (ct->type == DSHARP || ct->type == DIG_DSHARP) { if (ltwds) { error(ls->line, "quad sharp"); #ifdef LOW_MEM m->cval.rp = save_art; #else m->val.art = save_art; #endif goto exit_error_2; } #ifdef LOW_MEM if (m->cval.rp < m->cval.length && ttMWS(m->cval.t[m->cval.rp])) m->cval.rp ++; #else if (m->val.art < m->val.nt && ttMWS(m->val.t[m->val.art].type)) m->val.art ++; #endif ltwds = 1; continue; } else if (ltwds && etl.nt != 0) { if (concat_token(etl.t + (-- etl.nt), ct)) { warning(ls->line, "operator '##' produced " "the invalid token '%s%s'", token_name(etl.t + etl.nt), token_name(ct)); #if 0 /* obsolete */ #ifdef LOW_MEM m->cval.rp = save_art; #else m->val.art = save_art; #endif etl.nt ++; goto exit_error_2; #endif etl.nt ++; } else { if (etl.nt == 0) freemem(etl.t); t.type = dsharp_lexer.ctok->type; t.line = ls->line; if (S_TOKEN(t.type)) { t.name = sdup(dsharp_lexer.ctok->name); throw_away(ls->gf, t.name); } ct = &t; } } ltwds = 0; #ifdef LOW_MEM if ((ct->type == SHARP || ct->type == DIG_SHARP) && next < m->cval.length && (m->cval.t[next] == MACROARG || (ttMWS(m->cval.t[next]) && (next + 1) < m->cval.length && m->cval.t[next + 1] == MACROARG))) { unsigned anum; #else if ((ct->type == SHARP || ct->type == DIG_SHARP) && next < m->val.nt && (m->val.t[next].type == MACROARG || (ttMWS(m->val.t[next].type) && (next + 1) < m->val.nt && m->val.t[next + 1].type == MACROARG))) { #endif /* * We have a # operator followed by (an optional * whitespace and) a macro argument; this means * stringification. So be it. */ #ifdef LOW_MEM if (ttMWS(m->cval.t[next])) m->cval.rp ++; #else if (ttMWS(m->val.t[next].type)) m->val.art ++; #endif t.type = STRING; #ifdef LOW_MEM anum = m->cval.t[++ m->cval.rp]; if (anum >= 128U) anum = ((anum & 127U) << 8) | (unsigned)m->cval.t[++ m->cval.rp]; t.name = stringify(atl + anum); m->cval.rp ++; #else t.name = stringify(atl + (size_t)(m->val.t[m->val.art ++].line)); #endif throw_away(ls->gf, t.name); ct = &t; /* * There is no need for extra spaces here. */ } t = *ct; ZAP_LINE(t); aol(etl.t, etl.nt, t, TOKEN_LIST_MEMG); } #ifdef LOW_MEM m->cval.rp = save_art; #else m->val.art = save_art; #endif /* * Now etl contains the expanded macro, to be parsed again for * further expansions -- much easier, since '#' and '##' have * already been handled. * However, we might need some input from tfi. So, we paste * the contents of tfi after etl, and we put back what was * not used. * * Some adjacent spaces are merged; only unique NONE, or sequences * OPT_NONE NONE are emitted. */ etl_limit = etl.nt; if (tfi) { save_tfi = tfi->art; while (tfi->art < tfi->nt) aol(etl.t, etl.nt, tfi->t[tfi->art ++], TOKEN_LIST_MEMG); } ltwws = 0; while (etl.art < etl_limit) { struct macro *nm; ct = etl.t + (etl.art ++); if (ct->type == NAME && ct->line >= 0 && (nm = HTT_get(¯os, ct->name))) { if (substitute_macro(ls, nm, &etl, penury, reject_nested, l)) { m->nest = save_nest; goto exit_error_2; } ltwws = 0; continue; } if (ttMWS(ct->type)) { if (ltwws == 1) { if (ct->type == OPT_NONE) continue; ltwws = 2; } else if (ltwws == 2) continue; else if (ct->type == OPT_NONE) ltwws = 1; else ltwws = 2; } else ltwws = 0; if (ct->line >= 0) ct->line = l; print_token(ls, ct, reject_nested ? 0 : l); } if (etl.nt) freemem(etl.t); if (tfi) { tfi->art = save_tfi + (etl.art - etl_limit); } exit_macro_1: print_space(ls); exit_macro_2: for (i = 0; i < (m->narg + m->vaarg); i ++) if (atl[i].nt) freemem(atl[i].t); if (m->narg > 0 || m->vaarg) freemem(atl); m->nest = save_nest; return 0; exit_error_2: if (etl.nt) freemem(etl.t); exit_error_1: for (i = 0; i < (m->narg + m->vaarg); i ++) if (atl[i].nt) freemem(atl[i].t); if (m->narg > 0 || m->vaarg) freemem(atl); m->nest = save_nest; exit_error: return 1; } /* * print already defined macros */ void print_defines(void) { HTT_scan(¯os, print_macro); } /* * define_macro() defines a new macro, whom definition is given in * the command-line syntax: macro=def * The '=def' part is optional. * * It returns non-zero on error. */ int define_macro(struct lexer_state *ls, char *def) { char *c = sdup(def), *d; int with_def = 0; int ret = 0; for (d = c; *d && *d != '='; d ++); if (*d) { *d = ' '; with_def = 1; } if (with_def) { struct lexer_state lls; size_t n = strlen(c) + 1; if (c == d) { error(-1, "void macro name"); ret = 1; } else { *(c + n - 1) = '\n'; init_buf_lexer_state(&lls, 0); lls.flags = ls->flags | LEXER; lls.input = 0; lls.input_string = (unsigned char *)c; lls.pbuf = 0; lls.ebuf = n; lls.line = -1; ret = handle_define(&lls); free_lexer_state(&lls); } } else { struct macro *m; if (!*c) { error(-1, "void macro name"); ret = 1; } else if ((m = HTT_get(¯os, c)) #ifdef LOW_MEM && (m->cval.length != 3 || m->cval.t[0] != NUMBER || strcmp((char *)(m->cval.t + 1), "1"))) { #else && (m->val.nt != 1 || m->val.t[0].type != NUMBER || strcmp(m->val.t[0].name, "1"))) { #endif error(-1, "macro %s already defined", c); ret = 1; } else { #ifndef LOW_MEM struct token t; #endif m = new_macro(); #ifdef LOW_MEM m->cval.length = 3; m->cval.t = getmem(3); m->cval.t[0] = NUMBER; m->cval.t[1] = '1'; m->cval.t[2] = 0; #else t.type = NUMBER; t.name = sdup("1"); aol(m->val.t, m->val.nt, t, TOKEN_LIST_MEMG); #endif HTT_put(¯os, m, c); } } freemem(c); return ret; } /* * undef_macro() undefines the macro whom name is given as "def"; * it is not an error to try to undef a macro that does not exist. * * It returns non-zero on error (undefinition of a special macro, * void macro name). */ int undef_macro(struct lexer_state *ls, char *def) { char *c = def; if (!*c) { error(-1, "void macro name"); return 1; } if (HTT_get(¯os, c)) { if (check_special_macro(c)) { error(-1, "trying to undef special macro %s", c); return 1; } else HTT_del(¯os, c); } return 0; } /* * We saw a #ifdef directive. Parse the line. * return value: 1 if the macro is defined, 0 if it is not, -1 on error */ int handle_ifdef(struct lexer_state *ls) { while (!next_token(ls)) { int tgd = 1; if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { int x = (HTT_get(¯os, ls->ctok->name) != 0); while (!next_token(ls) && ls->ctok->type != NEWLINE) if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(ls->line, "trailing garbage " "in #ifdef"); tgd = 0; } return x; } error(ls->line, "illegal macro name for #ifdef"); while (!next_token(ls) && ls->ctok->type != NEWLINE) if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(ls->line, "trailing garbage in " "#ifdef"); tgd = 0; } return -1; } error(ls->line, "unfinished #ifdef"); return -1; } /* * for #undef * return value: 1 on error, 0 on success. Undefining a macro that was * already not defined is not an error. */ int handle_undef(struct lexer_state *ls) { while (!next_token(ls)) { if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { struct macro *m = HTT_get(¯os, ls->ctok->name); int tgd = 1; if (m != 0) { if (check_special_macro(ls->ctok->name)) { error(ls->line, "trying to undef " "special macro %s", ls->ctok->name); goto undef_error; } if (emit_defines) fprintf(emit_output, "#undef %s\n", ls->ctok->name); HTT_del(¯os, ls->ctok->name); } while (!next_token(ls) && ls->ctok->type != NEWLINE) if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(ls->line, "trailing garbage " "in #undef"); tgd = 0; } return 0; } error(ls->line, "illegal macro name for #undef"); undef_error: while (!next_token(ls) && ls->ctok->type != NEWLINE); return 1; } error(ls->line, "unfinished #undef"); return 1; } /* * for #ifndef * return value: 0 if the macro is defined, 1 if it is not, -1 on error. */ int handle_ifndef(struct lexer_state *ls) { while (!next_token(ls)) { int tgd = 1; if (ls->ctok->type == NEWLINE) break; if (ttMWS(ls->ctok->type)) continue; if (ls->ctok->type == NAME) { int x = (HTT_get(¯os, ls->ctok->name) == 0); while (!next_token(ls) && ls->ctok->type != NEWLINE) if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(ls->line, "trailing garbage " "in #ifndef"); tgd = 0; } if (protect_detect.state == 1) { protect_detect.state = 2; protect_detect.macro = sdup(ls->ctok->name); } return x; } error(ls->line, "illegal macro name for #ifndef"); while (!next_token(ls) && ls->ctok->type != NEWLINE) if (tgd && !ttWHI(ls->ctok->type) && (ls->flags & WARN_STANDARD)) { warning(ls->line, "trailing garbage in " "#ifndef"); tgd = 0; } return -1; } error(ls->line, "unfinished #ifndef"); return -1; } /* * erase the macro table. */ void wipe_macros(void) { if (macros_init_done) HTT_kill(¯os); macros_init_done = 0; } /* * initialize the macro table */ void init_macros(void) { wipe_macros(); HTT_init(¯os, del_macro); macros_init_done = 1; if (!no_special_macros) add_special_macros(); } /* * find a macro from its name */ struct macro *get_macro(char *name) { return HTT_get(¯os, name); } ./nhash.h0000644000175000017500000001112011620140753011123 0ustar renerene/* * (c) Thomas Pornin 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #ifndef UCPP__NHASH__ #define UCPP__NHASH__ /* * Each item stored in the hash table should be a structure beginning * with the following header. */ typedef struct hash_item_header_ { char *ident; struct hash_item_header_ *left, *right; } hash_item_header; /* * This macro takes as argument a pointer to a hash table item (a * structure beginning with `hash_item_header') and returns a pointer to * the item name. This name should be considered as read-only. The * retrieved pointer can become invalid whenever a new item is inserted * in or removed from the table. */ #define HASH_ITEM_NAME(s) (((hash_item_header *)(s))->ident + sizeof(unsigned)) /* * Number of lists for the primary hash step. Can be reduced to save more * memory, or increased to speed things up. It should be a power of 2 * greater or equal than 2 and smaller than UINT_MAX. */ #define HTT_NUM_TREES 128 /* * Type for a hash table. */ typedef struct { void (*deldata)(void *); hash_item_header *tree[HTT_NUM_TREES]; } HTT; /* * Type for a reduced version of HTT with only two binary trees. That * version has a lower initialization time and is suitable for situation * where only a limited number of elements will be stored, but new tables * need frequent initializations. */ typedef struct { void (*deldata)(void *); hash_item_header *tree[2]; } HTT2; /* * Initialize a hash table. The `deldata' parameter should point to a * function which will be invoked on any item removed from the table; * that function should take care of the release of memory allocated for * that item (except the hash_item_header contents, which are handled * internally). */ void HTT_init(HTT *htt, void (*deldata)(void *)); /* * Link an item into the hash table under the given name. If another * item of identical name is already present in the table, a pointer to * that item is returned; otherwise, the new item is linked into the * table and NULL is returned. The object pointed to by `item' is * linked from the table, but not the string pointed to by `name'. */ void *HTT_put(HTT *htt, void *item, char *name); /* * Retrieve an item by name from the hash table. NULL is returned if * the object is not found. */ void *HTT_get(HTT *htt, char *name); /* * Remove an item from the hash table. 1 is returned if the item was * removed, 0 if it was not found. */ int HTT_del(HTT *htt, char *name); /* * For all items stored within the hash table, invoke the provided * function with the item as parameter. The function may abort the * scan by performing a longjmp() to a context encapsulating the * call to that function. */ void HTT_scan(HTT *htt, void (*action)(void *)); /* * Release the whole table contents. After a call to this function, * the table is ready to accept new items. */ void HTT_kill(HTT *htt); /* * The following functions are identical to the HTT_*() functions, except * that they operate on the reduced HTT2 tables. */ void HTT2_init(HTT2 *htt, void (*deldata)(void *)); void *HTT2_put(HTT2 *htt, void *item, char *name); void *HTT2_get(HTT2 *htt, char *name); int HTT2_del(HTT2 *htt, char *name); void HTT2_scan(HTT2 *htt, void (*action)(void *)); void HTT2_kill(HTT2 *htt); #endif ./mem.c0000644000175000017500000001567111620140753010612 0ustar renerene/* * Memory manipulation routines * (c) Thomas Pornin 1998 - 2002 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include "mem.h" #include #include #include /* * Shifting a pointer of that some bytes is supposed to satisfy * alignment requirements. This is *not* guaranteed by the standard * but should work everywhere anyway. */ #define ALIGNSHIFT (sizeof(long) > sizeof(long double) \ ? sizeof(long) : sizeof(long double)) #ifdef AUDIT void die(void) { abort(); } static void suicide(unsigned long e) { fprintf(stderr, "ouch: Schrodinger's beef is not dead ! %lx\n", e); die(); } #else void die(void) { exit(EXIT_FAILURE); } #endif #if defined AUDIT || defined MEM_CHECK || defined MEM_DEBUG /* * This function is equivalent to a malloc(), but will display an error * message and exit if the wanted memory is not available */ #ifdef MEM_DEBUG static void *getmem_raw(size_t x) #else void *(getmem)(size_t x) #endif { void *m; #ifdef AUDIT m = malloc(x + ALIGNSHIFT); #else m = malloc(x); #endif if (m == 0) { fprintf(stderr, "ouch: malloc() failed\n"); die(); } #ifdef AUDIT *((unsigned long *)m) = 0xdeadbeefUL; return (void *)(((char *)m) + ALIGNSHIFT); #else return m; #endif } #endif #ifndef MEM_DEBUG /* * This function is equivalent to a realloc(); if the realloc() call * fails, it will try a malloc() and a memcpy(). If not enough memory is * available, the program exits with an error message */ void *(incmem)(void *m, size_t x, size_t nx) { void *nm; #ifdef AUDIT m = (void *)(((char *)m) - ALIGNSHIFT); if (*((unsigned long *)m) != 0xdeadbeefUL) suicide(*((unsigned long *)m)); x += ALIGNSHIFT; nx += ALIGNSHIFT; #endif if (!(nm = realloc(m, nx))) { if (x > nx) x = nx; nm = (getmem)(nx); memcpy(nm, m, x); /* free() and not freemem(), because of the Schrodinger beef */ free(m); } #ifdef AUDIT return (void *)(((char *)nm) + ALIGNSHIFT); #else return nm; #endif } #endif #if defined AUDIT || defined MEM_DEBUG /* * This function frees the given block */ #ifdef MEM_DEBUG static void freemem_raw(void *x) #else void (freemem)(void *x) #endif { #ifdef AUDIT void *y = (void *)(((char *)x) - ALIGNSHIFT); if ((*((unsigned long *)y)) != 0xdeadbeefUL) suicide(*((unsigned long *)y)); *((unsigned long *)y) = 0xfeedbabeUL; free(y); #else free(x); #endif } #endif #ifdef AUDIT /* * This function copies n bytes from src to dest */ void *mmv(void *dest, void *src, size_t n) { return memcpy(dest, src, n); } /* * This function copies n bytes from src to dest */ void *mmvwo(void *dest, void *src, size_t n) { return memmove(dest, src, n); } #endif #ifndef MEM_DEBUG /* * This function creates a new char * and fills it with a copy of src */ char *(sdup)(char *src) { size_t n = 1 + strlen(src); char *x = getmem(n); mmv(x, src, n); return x; } #endif #ifdef MEM_DEBUG /* * We include here special versions of getmem(), freemem() and incmem() * that track allocations and are used to detect memory leaks. * * Each allocation is referenced in a list, with a serial number. */ /* * Define "true" functions for applications that need pointers * to such functions. */ void *(getmem)(size_t n) { return getmem(n); } void (freemem)(void *x) { freemem(x); } void *(incmem)(void *x, size_t s, size_t ns) { return incmem(x, s, ns); } char *(sdup)(char *s) { return sdup(s); } static long current_serial = 0L; /* must be a power of two */ #define MEMDEBUG_MEMG 128U static struct mem_track { void *block; long serial; char *file; int line; } *mem = 0; static size_t meml = 0; static unsigned int current_ptr = 0; static void *true_incmem(void *x, size_t old_size, size_t new_size) { void * y = realloc(x, new_size); if (y == 0) { y = malloc(new_size); if (y == 0) { fprintf(stderr, "ouch: malloc() failed\n"); die(); } mmv(y, x, old_size < new_size ? old_size : new_size); free(x); } return y; } static long find_free_block(void) { unsigned int n; size_t i; for (i = 0, n = current_ptr; i < meml; i ++) { if (mem[n].block == 0) { current_ptr = n; return n; } n = (n + 1) & (meml - 1U); } if (meml == 0) { size_t j; meml = MEMDEBUG_MEMG; mem = malloc(meml * sizeof(struct mem_track)); current_ptr = 0; for (j = 0; j < meml ; j ++) mem[j].block = 0; } else { size_t j; mem = true_incmem(mem, meml * sizeof(struct mem_track), 2 * meml * sizeof(struct mem_track)); current_ptr = meml; for (j = meml; j < 2 * meml ; j ++) mem[j].block = 0; meml *= 2; } return current_ptr; } void *getmem_debug(size_t n, char *file, int line) { void *x = getmem_raw(n + ALIGNSHIFT); long i = find_free_block(); *(long *)x = i; mem[i].block = x; mem[i].serial = current_serial ++; mem[i].file = file; mem[i].line = line; return (void *)((unsigned char *)x + ALIGNSHIFT); } void freemem_debug(void *x, char *file, int line) { void *y = (unsigned char *)x - ALIGNSHIFT; long i = *(long *)y; if (i < 0 || (size_t)i >= meml || mem[i].block != y) { fprintf(stderr, "ouch: freeing free people (from %s:%d)\n", file, line); die(); } mem[i].block = 0; freemem_raw(y); } void *incmem_debug(void *x, size_t ol, size_t nl, char *file, int line) { void *y = getmem_debug(nl, file, line); mmv(y, x, ol < nl ? ol : nl); freemem_debug(x, file, line); return y; } char *sdup_debug(char *src, char *file, int line) { size_t n = 1 + strlen(src); char *x = getmem_debug(n, file, line); mmv(x, src, n); return x; } void report_leaks(void) { size_t i; for (i = 0; i < meml; i ++) { if (mem[i].block) fprintf(stderr, "leak: serial %ld, %s:%d\n", mem[i].serial, mem[i].file, mem[i].line); } } #endif ./hash.c0000644000175000017500000001736511620140753010761 0ustar renerene/* * Generic hash table routines. * (c) Thomas Pornin 1998, 1999, 2000 * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. The name of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include #include "hash.h" #include "mem.h" #include "tune.h" /* * hash_string() is a sample hash function for strings */ int hash_string(char *s) { #ifdef FAST_HASH unsigned h = 0, g; while (*s) { h = (h << 4) + *(unsigned char *)(s ++); if ((g = h & 0xF000U) != 0) h ^= (g >> 12); h &= ~g; } return (h ^ (h >> 9)) & 127U; #else unsigned char h = 0; for (; *s; s ++) h ^= (unsigned char)(*s); return ((int)h); #endif } /* * struct hash_item is the basic data type to internally handle hash tables */ struct hash_item { void *data; struct hash_item *next; }; /* * This function adds an entry to the struct hash_item list */ static struct hash_item *add_entry(struct hash_item *blist, void *data) { struct hash_item *t = getmem(sizeof(struct hash_item)); t->data = data; t->next = blist; return t; } /* * This function finds a struct hash_item in a list, using the * comparison function provided as cmpdata (*cmpdata() returns * non-zero if the two parameters are to be considered identical). * * It returns 0 if the item is not found. */ static struct hash_item *get_entry(struct hash_item *blist, void *data, int (*cmpdata)(void *, void *)) { while (blist) { if ((*cmpdata)(data, blist->data)) return blist; blist = blist->next; } return 0; } /* * This function acts like get_entry but deletes the found item, using * the provided function deldata(); it returns 0 if the given data was * not found. */ static struct hash_item *del_entry(struct hash_item *blist, void *data, int (*cmpdata)(void *, void *), void (*deldata)(void *)) { struct hash_item *prev = 0, *save = blist; while (blist) { if ((*cmpdata)(data, blist->data)) { if (deldata) (*deldata)(blist->data); if (prev) prev->next = blist->next; if (save == blist) save = blist->next; freemem(blist); return save; } prev = blist; blist = blist->next; } return 0; } /* * This function creates a new hashtable, with the hashing and comparison * functions given as parameters */ struct HT *newHT(int n, int (*cmpdata)(void *, void *), int (*hash)(void *), void (*deldata)(void *)) { struct HT *t = getmem(sizeof(struct HT)); int i; t->lists = getmem(n * sizeof(struct hash_item *)); for (i = 0; i < n; i ++) t->lists[i] = 0; t->nb_lists = n; t->cmpdata = cmpdata; t->hash = hash; t->deldata = deldata; return t; } /* * This function adds a new entry in the hashtable ht; it returns 0 * on success, or a pointer to the already present item otherwise. */ void *putHT(struct HT *ht, void *data) { int h; struct hash_item *d; h = ((*(ht->hash))(data)); #ifndef FAST_HASH h %= ht->nb_lists; #endif if ((d = get_entry(ht->lists[h], data, ht->cmpdata))) return d->data; ht->lists[h] = add_entry(ht->lists[h], data); return 0; } /* * This function adds a new entry in the hashtable ht, even if an equal * entry is already there. Exercise caution ! * The new entry will "hide" the old one, which means that the new will be * found upon lookup/delete, not the old one. */ void *forceputHT(struct HT *ht, void *data) { int h; h = ((*(ht->hash))(data)); #ifndef FAST_HASH h %= ht->nb_lists; #endif ht->lists[h] = add_entry(ht->lists[h], data); return 0; } /* * This function finds the entry corresponding to *data in the * hashtable ht (using the comparison function given as argument * to newHT) */ void *getHT(struct HT *ht, void *data) { int h; struct hash_item *t; h = ((*(ht->hash))(data)); #ifndef FAST_HASH h %= ht->nb_lists; #endif if ((t = get_entry(ht->lists[h], data, ht->cmpdata)) == 0) return 0; return (t->data); } /* * This function finds and delete the entry corresponding to *data * in the hashtable ht (using the comparison function given as * argument to newHT). */ int delHT(struct HT *ht, void *data) { int h; h = ((*(ht->hash))(data)); #ifndef FAST_HASH h %= ht->nb_lists; #endif ht->lists[h] = del_entry(ht->lists[h], data, ht->cmpdata, ht->deldata); return 1; } /* * This function completely eradicates from memory a given hash table, * releasing all objects */ void killHT(struct HT *ht) { int i; struct hash_item *t, *n; void (*dd)(void *) = ht->deldata; for (i = 0; i < ht->nb_lists; i ++) for (t = ht->lists[i]; t;) { n = t->next; if (dd) (*dd)(t->data); freemem(t); t = n; } freemem(ht->lists); freemem(ht); } /* * This function stores a backup of the hash table, for context stacking. */ void saveHT(struct HT *ht, void **buffer) { struct hash_item **b = (struct hash_item **)buffer; mmv(b, ht->lists, ht->nb_lists * sizeof(struct hash_item *)); } /* * This function restores the saved state of the hash table. * Do NOT use if some of the entries that were present before the backup * have been removed (even temporarily). */ void restoreHT(struct HT *ht, void **buffer) { struct hash_item **b = (struct hash_item **)buffer; int i; for (i = 0; i < ht->nb_lists; i ++) { struct hash_item *t = ht->lists[i], *n; while (t != b[i]) { n = t->next; (*(ht->deldata))(t->data); freemem(t); t = n; } ht->lists[i] = b[i]; } } /* * This function is evil. It inserts a new item in a saved hash table, * tweaking the save buffer and the hash table in order to keep things * stable. There are no checks. */ void tweakHT(struct HT *ht, void **buffer, void *data) { int h; struct hash_item *d, *e; h = ((*(ht->hash))(data)); #ifndef FAST_HASH h %= ht->nb_lists; #endif for (d = ht->lists[h]; d != buffer[h]; d = d->next); d = add_entry(buffer[h], data); if (buffer[h] == ht->lists[h]) { buffer[h] = ht->lists[h] = d; return; } for (e = ht->lists[h]; e->next != buffer[h]; e = e->next); e->next = d; buffer[h] = d; } /* * This function scans the whole table and calls the given function on * each entry. */ void scanHT(struct HT *ht, void (*action)(void *)) { int i; for (i = 0; i < ht->nb_lists; i ++) { struct hash_item *t = ht->lists[i]; while (t) { (*action)(t->data); t = t->next; } } } /* * The two following fonctions are generic for storing structures * uniquely identified by their name, which must be the first * field of the structure. */ int hash_struct(void *m) { char *n = *(char **)m; #ifdef FAST_HASH return hash_string(n); #else return hash_string(n) & 127; #endif } int cmp_struct(void *m1, void *m2) { char *n1 = *(char **)m1, *n2 = *(char **)m2; return !strcmp(n1, n2); }