coco-cpp_20120102/zipsources.bat0000644000175000017500000000011011700310730014210 0ustar mlmljar -cfM CocoSourcesCPP.zip *.atg *.frame *.bat *.h *.cpp *.sh Makefile coco-cpp_20120102/Scanner.h0000644000175000017500000002114011700304130013057 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #if !defined(Coco_COCO_SCANNER_H__) #define Coco_COCO_SCANNER_H__ #include #include #include #include #include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 #include #include #endif #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 #define coco_swprintf _snwprintf #elif defined __MINGW32__ #define coco_swprintf _snwprintf #else // assume every other compiler knows swprintf #define coco_swprintf swprintf #endif #define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) #define COCO_CPP_NAMESPACE_SEPARATOR L':' namespace Coco { // string handling, wide character wchar_t* coco_string_create(const wchar_t *value); wchar_t* coco_string_create(const wchar_t *value, int startIndex); wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length); wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); int coco_string_indexof(const wchar_t* data, const wchar_t value); int coco_string_lastindexof(const wchar_t* data, const wchar_t value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); int coco_string_hash(const wchar_t* data); // string handling, ascii character wchar_t* coco_string_create(const char *value); char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); class Token { public: int kind; // token kind int pos; // token position in bytes in the source text (starting at 0) int charPos; // token position in characters in the source text (starting at 0) int col; // token column (starting at 1) int line; // token line (starting at 1) wchar_t* val; // token value Token *next; // ML 2005-03-11 Peek tokens are kept in linked list Token(); ~Token(); }; class Buffer { // This Buffer supports the following cases: // 1) seekable stream (file) // a) whole stream in buffer // b) part of stream in buffer // 2) non seekable stream (network, console) private: unsigned char *buf; // input buffer int bufCapacity; // capacity of buf int bufStart; // position of first byte in buffer relative to input stream int bufLen; // length of buffer int fileLen; // length of input stream (may change if the stream is no file) int bufPos; // current position in buffer FILE* stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? int ReadNextStreamChunk(); bool CanSeek(); // true if stream can be seeked otherwise false public: static const int EoF = COCO_WCHAR_MAX + 1; Buffer(FILE* s, bool isUserStream); Buffer(const unsigned char* buf, int len); Buffer(Buffer *b); virtual ~Buffer(); virtual void Close(); virtual int Read(); virtual int Peek(); virtual wchar_t* GetString(int beg, int end); virtual int GetPos(); virtual void SetPos(int value); }; class UTF8Buffer : public Buffer { public: UTF8Buffer(Buffer *b) : Buffer(b) {}; virtual int Read(); }; //----------------------------------------------------------------------------------- // StartStates -- maps characters to start states of tokens //----------------------------------------------------------------------------------- class StartStates { private: class Elem { public: int key, val; Elem *next; Elem(int key, int val) { this->key = key; this->val = val; next = NULL; } }; Elem **tab; public: StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~StartStates() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(int key, int val) { Elem *e = new Elem(key, val); int k = ((unsigned int) key) % 128; e->next = tab[k]; tab[k] = e; } int state(int key) { Elem *e = tab[((unsigned int) key) % 128]; while (e != NULL && e->key != key) e = e->next; return e == NULL ? 0 : e->val; } }; //------------------------------------------------------------------------------------------- // KeywordMap -- maps strings to integers (identifiers to keyword kinds) //------------------------------------------------------------------------------------------- class KeywordMap { private: class Elem { public: wchar_t *key; int val; Elem *next; Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; } virtual ~Elem() { coco_string_delete(key); } }; Elem **tab; public: KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~KeywordMap() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(const wchar_t *key, int val) { Elem *e = new Elem(key, val); int k = coco_string_hash(key) % 128; e->next = tab[k]; tab[k] = e; } int get(const wchar_t *key, int defaultVal) { Elem *e = tab[coco_string_hash(key) % 128]; while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; return e == NULL ? defaultVal : e->val; } }; class Scanner { private: void *firstHeap; void *heap; void *heapTop; void **heapEnd; unsigned char EOL; int eofSym; int noSym; int maxT; int charSetSize; StartStates start; KeywordMap keywords; Token *t; // current token wchar_t *tval; // text of current token int tvalLength; // length of text of current token int tlen; // length of current token Token *tokens; // list of tokens already peeked (first token is a dummy) Token *pt; // current peek token int ch; // current input character int pos; // byte position of current character int charPos; // position by unicode characters starting with 0 int line; // line number of current character int col; // column number of current character int oldEols; // EOLs that appeared in a comment; void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); void SetScannerBehindT(); void Init(); void NextCh(); void AddCh(); bool Comment0(); bool Comment1(); Token* NextToken(); public: Buffer *buffer; // scanner buffer Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); ~Scanner(); Token* Scan(); Token* Peek(); void ResetPeek(); }; // end Scanner } // namespace #endif coco-cpp_20120102/Scanner.cpp0000644000175000017500000005331111700304130013417 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include #include #include "Scanner.h" namespace Coco { // string handling, wide character wchar_t* coco_string_create(const wchar_t* value) { return coco_string_create(value, 0); } wchar_t* coco_string_create(const wchar_t *value, int startIndex) { int valueLen = 0; int len = 0; if (value) { valueLen = wcslen(value); len = valueLen - startIndex; } return coco_string_create(value, startIndex, len); } wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { int len = 0; wchar_t* data; if (value) { len = length; } data = new wchar_t[len + 1]; wcsncpy(data, &(value[startIndex]), len); data[len] = 0; return data; } wchar_t* coco_string_create_upper(const wchar_t* data) { if (!data) { return NULL; } int dataLen = 0; if (data) { dataLen = wcslen(data); } wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { if ((L'a' <= data[i]) && (data[i] <= L'z')) { newData[i] = data[i] + (L'A' - L'a'); } else { newData[i] = data[i]; } } newData[dataLen] = L'\0'; return newData; } wchar_t* coco_string_create_lower(const wchar_t* data) { if (!data) { return NULL; } int dataLen = wcslen(data); return coco_string_create_lower(data, 0, dataLen); } wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { if (!data) { return NULL; } wchar_t* newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; if ((L'A' <= ch) && (ch <= L'Z')) { newData[i] = ch - (L'A' - L'a'); } else { newData[i] = ch; } } newData[dataLen] = L'\0'; return newData; } wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { wchar_t* data; int data1Len = 0; int data2Len = 0; if (data1) { data1Len = wcslen(data1); } if (data2) {data2Len = wcslen(data2); } data = new wchar_t[data1Len + data2Len + 1]; if (data1) { wcscpy(data, data1); } if (data2) { wcscpy(data + data1Len, data2); } data[data1Len + data2Len] = 0; return data; } wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); data[targetLen] = appendix; data[targetLen + 1] = 0; return data; } void coco_string_delete(wchar_t* &data) { delete [] data; data = NULL; } int coco_string_length(const wchar_t* data) { if (data) { return wcslen(data); } return 0; } bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { int dataLen = wcslen(data); int endLen = wcslen(end); return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } int coco_string_indexof(const wchar_t* data, const wchar_t value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } return -1; } void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { if (!appendix) { return; } wchar_t* data = coco_string_create_append(target, appendix); delete [] target; target = data; } bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } int coco_string_hash(const wchar_t *data) { int h = 0; if (!data) { return 0; } while (*data != 0) { h = (h * 7) ^ *data; ++data; } if (h < 0) { h = -h; } return h; } // string handling, ascii character wchar_t* coco_string_create(const char* value) { int len = 0; if (value) { len = strlen(value); } wchar_t* data = new wchar_t[len + 1]; for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } data[len] = 0; return data; } char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } res[len] = 0; return res; } void coco_string_delete(char* &data) { delete [] data; data = NULL; } Token::Token() { kind = 0; pos = 0; col = 0; line = 0; val = NULL; next = NULL; } Token::~Token() { coco_string_delete(val); } Buffer::Buffer(FILE* s, bool isUserStream) { // ensure binary read on windows #if _MSC_VER >= 1300 _setmode(_fileno(s), _O_BINARY); #endif stream = s; this->isUserStream = isUserStream; if (CanSeek()) { fseek(s, 0, SEEK_END); fileLen = ftell(s); fseek(s, 0, SEEK_SET); bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; bufStart = INT_MAX; // nothing in the buffer so far } else { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); } Buffer::Buffer(Buffer *b) { buf = b->buf; bufCapacity = b->bufCapacity; b->buf = NULL; bufStart = b->bufStart; bufLen = b->bufLen; fileLen = b->fileLen; bufPos = b->bufPos; stream = b->stream; b->stream = NULL; isUserStream = b->isUserStream; } Buffer::Buffer(const unsigned char* buf, int len) { this->buf = new unsigned char[len]; memcpy(this->buf, buf, len*sizeof(unsigned char)); bufStart = 0; bufCapacity = bufLen = len; fileLen = len; bufPos = 0; stream = NULL; } Buffer::~Buffer() { Close(); if (buf != NULL) { delete [] buf; buf = NULL; } } void Buffer::Close() { if (!isUserStream && stream != NULL) { fclose(stream); stream = NULL; } } int Buffer::Read() { if (bufPos < bufLen) { return buf[bufPos++]; } else if (GetPos() < fileLen) { SetPos(GetPos()); // shift buffer start to Pos return buf[bufPos++]; } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { return buf[bufPos++]; } else { return EoF; } } int Buffer::Peek() { int curPos = GetPos(); int ch = Read(); SetPos(curPos); return ch; } // beg .. begin, zero-based, inclusive, in byte // end .. end, zero-based, exclusive, in byte wchar_t* Buffer::GetString(int beg, int end) { int len = 0; wchar_t *buf = new wchar_t[end - beg]; int oldPos = GetPos(); SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); wchar_t *res = coco_string_create(buf, 0, len); coco_string_delete(buf); return res; } int Buffer::GetPos() { return bufPos + bufStart; } void Buffer::SetPos(int value) { if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { // Wanted position is after buffer and the stream // is not seek-able e.g. network or console, // thus we have to read the stream manually till // the wanted position is in sight. while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); } if ((value < 0) || (value > fileLen)) { wprintf(L"--- buffer out of bounds access, position: %d\n", value); exit(1); } if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer bufPos = value - bufStart; } else if (stream != NULL) { // must be swapped in fseek(stream, value, SEEK_SET); bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); bufStart = value; bufPos = 0; } else { bufPos = fileLen - bufStart; // make Pos return fileLen } } // Read the next chunk of bytes from the stream, increases the buffer // if needed and updates the fields fileLen and bufLen. // Returns the number of bytes read. int Buffer::ReadNextStreamChunk() { int free = bufCapacity - bufLen; if (free == 0) { // in the case of a growing input stream // we can neither seek in the stream, nor can we // foresee the maximum length, thus we must adapt // the buffer size on demand. bufCapacity = bufLen * 2; unsigned char *newBuf = new unsigned char[bufCapacity]; memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); delete [] buf; buf = newBuf; free = bufLen; } int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); if (read > 0) { fileLen = bufLen = (bufLen + read); return read; } // end of stream reached return 0; } bool Buffer::CanSeek() { return (stream != NULL) && (ftell(stream) != -1); } int UTF8Buffer::Read() { int ch; do { ch = Buffer::Read(); // until we find a utf8 start (0xxxxxxx or 11xxxxxx) } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); if (ch < 128 || ch == EoF) { // nothing to do, first 127 chars are the same in ascii and utf8 // 0xxxxxxx or end of file character } else if ((ch & 0xF0) == 0xF0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx int c1 = ch & 0x07; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = Buffer::Read(); int c3 = ch & 0x3F; ch = Buffer::Read(); int c4 = ch & 0x3F; ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; } else if ((ch & 0xE0) == 0xE0) { // 1110xxxx 10xxxxxx 10xxxxxx int c1 = ch & 0x0F; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = Buffer::Read(); int c3 = ch & 0x3F; ch = (((c1 << 6) | c2) << 6) | c3; } else if ((ch & 0xC0) == 0xC0) { // 110xxxxx 10xxxxxx int c1 = ch & 0x1F; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = (c1 << 6) | c2; } return ch; } Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { wprintf(L"--- Cannot open file %ls\n", fileName); exit(1); } coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); Init(); } Scanner::~Scanner() { char* cur = (char*) firstHeap; while(cur != NULL) { cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); free(firstHeap); firstHeap = cur; } delete [] tval; delete buffer; } void Scanner::Init() { EOL = '\n'; eofSym = 0; maxT = 41; noSym = 41; int i; for (i = 65; i <= 90; ++i) start.set(i, 1); for (i = 95; i <= 95; ++i) start.set(i, 1); for (i = 97; i <= 122; ++i) start.set(i, 1); for (i = 48; i <= 57; ++i) start.set(i, 2); start.set(34, 12); start.set(39, 5); start.set(36, 13); start.set(61, 16); start.set(46, 31); start.set(43, 17); start.set(45, 18); start.set(60, 32); start.set(62, 20); start.set(124, 23); start.set(40, 33); start.set(41, 24); start.set(91, 25); start.set(93, 26); start.set(123, 27); start.set(125, 28); start.set(Buffer::EoF, -1); keywords.set(L"COMPILER", 6); keywords.set(L"IGNORECASE", 7); keywords.set(L"CHARACTERS", 8); keywords.set(L"TOKENS", 9); keywords.set(L"PRAGMAS", 10); keywords.set(L"COMMENTS", 11); keywords.set(L"FROM", 12); keywords.set(L"TO", 13); keywords.set(L"NESTED", 14); keywords.set(L"IGNORE", 15); keywords.set(L"PRODUCTIONS", 16); keywords.set(L"END", 19); keywords.set(L"ANY", 23); keywords.set(L"WEAK", 29); keywords.set(L"SYNC", 36); keywords.set(L"IF", 37); keywords.set(L"CONTEXT", 38); tvalLength = 128; tval = new wchar_t[tvalLength]; // text of current token // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); firstHeap = heap; heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); exit(1); } pos = -1; line = 1; col = 0; charPos = -1; oldEols = 0; NextCh(); if (ch == 0xEF) { // check optional byte order mark for UTF-8 NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { wprintf(L"Illegal byte order mark at start of file"); exit(1); } Buffer *oldBuf = buffer; buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; delete oldBuf; oldBuf = NULL; NextCh(); } pt = tokens = CreateToken(); // first token is a dummy } void Scanner::NextCh() { if (oldEols > 0) { ch = EOL; oldEols--; } else { pos = buffer->GetPos(); // buffer reads unicode chars, if UTF8 has been detected ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; if (ch == EOL) { line++; col = 0; } } } void Scanner::AddCh() { if (tlen >= tvalLength) { tvalLength *= 2; wchar_t *newBuf = new wchar_t[tvalLength]; memcpy(newBuf, tval, tlen*sizeof(wchar_t)); delete [] tval; tval = newBuf; } if (ch != Buffer::EoF) { tval[tlen++] = ch; NextCh(); } } bool Scanner::Comment0() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); if (ch == L'/') { NextCh(); for(;;) { if (ch == 10) { level--; if (level == 0) { oldEols = line - line0; NextCh(); return true; } NextCh(); } else if (ch == buffer->EoF) return false; else NextCh(); } } else { buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } return false; } bool Scanner::Comment1() { int level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos; NextCh(); if (ch == L'*') { NextCh(); for(;;) { if (ch == L'*') { NextCh(); if (ch == L'/') { level--; if (level == 0) { oldEols = line - line0; NextCh(); return true; } NextCh(); } } else if (ch == L'/') { NextCh(); if (ch == L'*') { level++; NextCh(); } } else if (ch == buffer->EoF) return false; else NextCh(); } } else { buffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0; } return false; } void Scanner::CreateHeapBlock() { void* newHeap; char* cur = (char*) firstHeap; while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); free(firstHeap); firstHeap = cur; } // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); *heapEnd = newHeap; heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); *heapEnd = 0; heap = newHeap; heapTop = heap; } Token* Scanner::CreateToken() { Token *t; if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { CreateHeapBlock(); } t = (Token*) heapTop; heapTop = (void*) ((char*) heapTop + sizeof(Token)); t->val = NULL; t->next = NULL; return t; } void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { wprintf(L"--- Too long token value\n"); exit(1); } CreateHeapBlock(); } t->val = (wchar_t*) heapTop; heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); t->val[tlen] = L'\0'; } Token* Scanner::NextToken() { while (ch == ' ' || (ch >= 9 && ch <= 10) || ch == 13 ) NextCh(); if ((ch == L'/' && Comment0()) || (ch == L'/' && Comment1())) return NextToken(); int recKind = noSym; int recEnd = pos; t = CreateToken(); t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; int state = start.state(ch); tlen = 0; AddCh(); switch (state) { case -1: { t->kind = eofSym; break; } // NextCh already done case 0: { case_0: if (recKind != noSym) { tlen = recEnd - t->pos; SetScannerBehindT(); } t->kind = recKind; break; } // NextCh already done case 1: case_1: recEnd = pos; recKind = 1; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_1;} else {t->kind = 1; wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;} case 2: case_2: recEnd = pos; recKind = 2; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_2;} else {t->kind = 2; break;} case 3: case_3: {t->kind = 3; break;} case 4: case_4: {t->kind = 4; break;} case 5: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'&') || (ch >= L'(' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_6;} else if (ch == 92) {AddCh(); goto case_7;} else {goto case_0;} case 6: case_6: if (ch == 39) {AddCh(); goto case_9;} else {goto case_0;} case 7: case_7: if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_8;} else {goto case_0;} case 8: case_8: if ((ch >= L'0' && ch <= L'9') || (ch >= L'a' && ch <= L'f')) {AddCh(); goto case_8;} else if (ch == 39) {AddCh(); goto case_9;} else {goto case_0;} case 9: case_9: {t->kind = 5; break;} case 10: case_10: recEnd = pos; recKind = 42; if ((ch >= L'0' && ch <= L'9') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_10;} else {t->kind = 42; break;} case 11: case_11: recEnd = pos; recKind = 43; if ((ch >= L'-' && ch <= L'.') || (ch >= L'0' && ch <= L':') || (ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_11;} else {t->kind = 43; break;} case 12: case_12: if (ch <= 9 || (ch >= 11 && ch <= 12) || (ch >= 14 && ch <= L'!') || (ch >= L'#' && ch <= L'[') || (ch >= L']' && ch <= 65535)) {AddCh(); goto case_12;} else if (ch == 10 || ch == 13) {AddCh(); goto case_4;} else if (ch == L'"') {AddCh(); goto case_3;} else if (ch == 92) {AddCh(); goto case_14;} else {goto case_0;} case 13: recEnd = pos; recKind = 42; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} else {t->kind = 42; break;} case 14: case_14: if ((ch >= L' ' && ch <= L'~')) {AddCh(); goto case_12;} else {goto case_0;} case 15: case_15: recEnd = pos; recKind = 42; if ((ch >= L'0' && ch <= L'9')) {AddCh(); goto case_10;} else if ((ch >= L'A' && ch <= L'Z') || ch == L'_' || (ch >= L'a' && ch <= L'z')) {AddCh(); goto case_15;} else if (ch == L'=') {AddCh(); goto case_11;} else {t->kind = 42; break;} case 16: {t->kind = 17; break;} case 17: {t->kind = 20; break;} case 18: {t->kind = 21; break;} case 19: case_19: {t->kind = 22; break;} case 20: {t->kind = 25; break;} case 21: case_21: {t->kind = 26; break;} case 22: case_22: {t->kind = 27; break;} case 23: {t->kind = 28; break;} case 24: {t->kind = 31; break;} case 25: {t->kind = 32; break;} case 26: {t->kind = 33; break;} case 27: {t->kind = 34; break;} case 28: {t->kind = 35; break;} case 29: case_29: {t->kind = 39; break;} case 30: case_30: {t->kind = 40; break;} case 31: recEnd = pos; recKind = 18; if (ch == L'.') {AddCh(); goto case_19;} else if (ch == L'>') {AddCh(); goto case_22;} else if (ch == L')') {AddCh(); goto case_30;} else {t->kind = 18; break;} case 32: recEnd = pos; recKind = 24; if (ch == L'.') {AddCh(); goto case_21;} else {t->kind = 24; break;} case 33: recEnd = pos; recKind = 30; if (ch == L'.') {AddCh(); goto case_29;} else {t->kind = 30; break;} } AppendVal(t); return t; } void Scanner::SetScannerBehindT() { buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; charPos = t->charPos; for (int i = 0; i < tlen; i++) NextCh(); } // get the next token (possibly a token already seen during peeking) Token* Scanner::Scan() { if (tokens->next == NULL) { return pt = tokens = NextToken(); } else { pt = tokens = tokens->next; return tokens; } } // peek for the next token, ignore pragmas Token* Scanner::Peek() { do { if (pt->next == NULL) { pt->next = NextToken(); } pt = pt->next; } while (pt->kind > maxT); // skip pragmas return pt; } // make sure that peeking starts at the current scan position void Scanner::ResetPeek() { pt = tokens; } } // namespace coco-cpp_20120102/Parser.h0000644000175000017500000000744111700304130012732 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #if !defined(Coco_COCO_PARSER_H__) #define Coco_COCO_PARSER_H__ #include "Tab.h" #include "DFA.h" #include "ParserGen.h" #include "Scanner.h" namespace Coco { class Errors { public: int count; // number of errors detected Errors(); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); void Warning(const wchar_t *s); void Exception(const wchar_t *s); }; // Errors class Parser { private: enum { _EOF=0, _ident=1, _number=2, _string=3, _badString=4, _char=5, _ddtSym=42, _optionSym=43 }; int maxT; Token *dummyToken; int errDist; int minErrDist; void SynErr(int n); void Get(); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); bool WeakSeparator(int n, int syFol, int repFol); public: Scanner *scanner; Errors *errors; Token *t; // last recognized token Token *la; // lookahead token int id; int str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; DFA *dfa; ParserGen *pgen; bool genScanner; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens // This method will be called by the contructor if it exits. // This support is specific to the C++ version of Coco/R. void Init() { tab = NULL; dfa = NULL; pgen = NULL; id = 0; str = 1; tokenString = NULL; noString = coco_string_create(L"-none-"); } // Uncomment this method if cleanup is necessary, // this method will be called by the destructor if it exists. // This support is specific to the C++ version of Coco/R. // void Destroy() { // nothing to do // } /*-------------------------------------------------------------------------*/ Parser(Scanner *scanner); ~Parser(); void SemErr(const wchar_t* msg); void Coco(); void SetDecl(); void TokenDecl(int typ); void TokenExpr(Graph* &g); void Set(CharSet* &s); void AttrDecl(Symbol *sym); void SemText(Position* &pos); void Expression(Graph* &g); void SimSet(CharSet* &s); void Char(int &n); void Sym(wchar_t* &name, int &kind); void Term(Graph* &g); void Resolver(Position* &pos); void Factor(Graph* &g); void Attribs(Node *p); void Condition(); void TokenTerm(Graph* &g); void TokenFactor(Graph* &g); void Parse(); }; // end Parser } // namespace #endif coco-cpp_20120102/Parser.cpp0000644000175000017500000006711111700304130013265 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include #include "Parser.h" #include "Scanner.h" namespace Coco { void Parser::SynErr(int n) { if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); errDist = 0; } void Parser::Get() { for (;;) { t = la; la = scanner->Scan(); if (la->kind <= maxT) { ++errDist; break; } if (la->kind == _ddtSym) { tab->SetDDT(la->val); } if (la->kind == _optionSym) { tab->SetOption(la->val); } if (dummyToken != t) { dummyToken->kind = t->kind; dummyToken->pos = t->pos; dummyToken->col = t->col; dummyToken->line = t->line; dummyToken->next = NULL; coco_string_delete(dummyToken->val); dummyToken->val = coco_string_create(t->val); t = dummyToken; } la = t; } } void Parser::Expect(int n) { if (la->kind==n) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { if (la->kind == n) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } bool Parser::WeakSeparator(int n, int syFol, int repFol) { if (la->kind == n) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { Get(); } return StartOf(syFol); } } void Parser::Coco() { Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; int beg = la->pos; int line = la->line; while (StartOf(1)) { Get(); } if (la->pos != beg) { pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line); } Expect(6 /* "COMPILER" */); genScanner = true; tab->ignored = new CharSet(); Expect(_ident); gramName = coco_string_create(t->val); beg = la->pos; line = la->line; while (StartOf(2)) { Get(); } tab->semDeclPos = new Position(beg, la->pos, 0, line); if (la->kind == 7 /* "IGNORECASE" */) { Get(); dfa->ignoreCase = true; } if (la->kind == 8 /* "CHARACTERS" */) { Get(); while (la->kind == _ident) { SetDecl(); } } if (la->kind == 9 /* "TOKENS" */) { Get(); while (la->kind == _ident || la->kind == _string || la->kind == _char) { TokenDecl(Node::t); } } if (la->kind == 10 /* "PRAGMAS" */) { Get(); while (la->kind == _ident || la->kind == _string || la->kind == _char) { TokenDecl(Node::pr); } } while (la->kind == 11 /* "COMMENTS" */) { Get(); bool nested = false; Expect(12 /* "FROM" */); TokenExpr(g1); Expect(13 /* "TO" */); TokenExpr(g2); if (la->kind == 14 /* "NESTED" */) { Get(); nested = true; } dfa->NewComment(g1->l, g2->l, nested); } while (la->kind == 15 /* "IGNORE" */) { Get(); Set(s); tab->ignored->Or(s); } while (!(la->kind == _EOF || la->kind == 16 /* "PRODUCTIONS" */)) {SynErr(42); Get();} Expect(16 /* "PRODUCTIONS" */); if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); while (la->kind == _ident) { Get(); sym = tab->FindSym(t->val); bool undef = (sym == NULL); if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(L"name declared twice"); } else SemErr(L"this symbol kind not allowed on left side of production"); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { AttrDecl(sym); } if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); if (la->kind == 39 /* "(." */) { SemText(sym->semPos); } ExpectWeak(17 /* "=" */, 3); Expression(g); sym->graph = g->l; tab->Finish(g); ExpectWeak(18 /* "." */, 4); } Expect(19 /* "END" */); Expect(_ident); if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); if (tab->gramSy == NULL) SemErr(L"missing production for grammar name"); else { sym = tab->gramSy; if (sym->attrPos != NULL) SemErr(L"grammar symbol must not have attributes"); } tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors->count == 0) { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); if (tab->GrammarOk()) { wprintf(L"parser"); pgen->WriteParser(); if (genScanner) { wprintf(L" + scanner"); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } wprintf(L" generated\n"); if (tab->ddt[8]) pgen->WriteStatistics(); } } if (tab->ddt[6]) tab->PrintSymbolTable(); Expect(18 /* "." */); } void Parser::SetDecl() { CharSet *s; Expect(_ident); wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); if (c != NULL) SemErr(L"name declared twice"); Expect(17 /* "=" */); Set(s); if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); Expect(18 /* "." */); } void Parser::TokenDecl(int typ) { wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; Sym(name, kind); sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); else { sym = tab->NewSym(typ, name, t->line); sym->tokenKind = Symbol::fixedToken; } tokenString = NULL; while (!(StartOf(5))) {SynErr(43); Get();} if (la->kind == 17 /* "=" */) { Get(); TokenExpr(g); Expect(18 /* "." */); if (kind == str) SemErr(L"a literal must not be declared with a structure"); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if ((*(tab->literals))[tokenString] != NULL) SemErr(L"token string declared twice"); tab->literals->Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } } else if (StartOf(6)) { if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); } else SynErr(44); if (la->kind == 39 /* "(." */) { SemText(sym->semPos); if (typ != Node::pr) SemErr(L"semantic action not allowed here"); } } void Parser::TokenExpr(Graph* &g) { Graph *g2; TokenTerm(g); bool first = true; while (WeakSeparator(28 /* "|" */,8,7) ) { TokenTerm(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); } } void Parser::Set(CharSet* &s) { CharSet *s2; SimSet(s); while (la->kind == 20 /* "+" */ || la->kind == 21 /* "-" */) { if (la->kind == 20 /* "+" */) { Get(); SimSet(s2); s->Or(s2); } else { Get(); SimSet(s2); s->Subtract(s2); } } } void Parser::AttrDecl(Symbol *sym) { if (la->kind == 24 /* "<" */) { Get(); int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(9)) { if (StartOf(10)) { Get(); } else { Get(); SemErr(L"bad string in attributes"); } } Expect(25 /* ">" */); if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); } else if (la->kind == 26 /* "<." */) { Get(); int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(11)) { if (StartOf(12)) { Get(); } else { Get(); SemErr(L"bad string in attributes"); } } Expect(27 /* ".>" */); if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); } else SynErr(45); } void Parser::SemText(Position* &pos) { Expect(39 /* "(." */); int beg = la->pos; int col = la->col; int line = t->line; while (StartOf(13)) { if (StartOf(14)) { Get(); } else if (la->kind == _badString) { Get(); SemErr(L"bad string in semantic action"); } else { Get(); SemErr(L"missing end of previous semantic action"); } } Expect(40 /* ".)" */); pos = new Position(beg, t->pos, col, line); } void Parser::Expression(Graph* &g) { Graph *g2; Term(g); bool first = true; while (WeakSeparator(28 /* "|" */,16,15) ) { Term(g2); if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); } } void Parser::SimSet(CharSet* &s) { int n1, n2; s = new CharSet(); if (la->kind == _ident) { Get(); CharClass *c = tab->FindCharClass(t->val); if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); } else if (la->kind == _string) { Get(); wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); wchar_t ch; int len = coco_string_length(name); for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() } s->Set(ch); } coco_string_delete(name); } else if (la->kind == _char) { Char(n1); s->Set(n1); if (la->kind == 22 /* ".." */) { Get(); Char(n2); for (int i = n1; i <= n2; i++) s->Set(i); } } else if (la->kind == 23 /* "ANY" */) { Get(); s = new CharSet(); s->Fill(); } else SynErr(46); } void Parser::Char(int &n) { Expect(_char); n = 0; wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t* name = tab->Unescape(subName); coco_string_delete(subName); // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; else SemErr(L"unacceptable character value"); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; } void Parser::Sym(wchar_t* &name, int &kind) { name = coco_string_create(L"???"); kind = id; if (la->kind == _ident) { Get(); kind = id; coco_string_delete(name); name = coco_string_create(t->val); } else if (la->kind == _string || la->kind == _char) { if (la->kind == _string) { Get(); coco_string_delete(name); name = coco_string_create(t->val); } else { Get(); wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); name = coco_string_create_append(L"\"", subName); coco_string_delete(subName); coco_string_merge(name, L"\""); } kind = str; if (dfa->ignoreCase) { wchar_t *oldName = name; name = coco_string_create_lower(name); coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) SemErr(L"literal tokens must not contain blanks"); } else SynErr(47); } void Parser::Term(Graph* &g) { Graph *g2; Node *rslv = NULL; g = NULL; if (StartOf(17)) { if (la->kind == 37 /* "IF" */) { rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); Resolver(rslv->pos); g = new Graph(rslv); } Factor(g2); if (rslv != NULL) tab->MakeSequence(g, g2); else g = g2; while (StartOf(18)) { Factor(g2); tab->MakeSequence(g, g2); } } else if (StartOf(19)) { g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); } else SynErr(48); if (g == NULL) // invalid start of Term g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); } void Parser::Resolver(Position* &pos) { Expect(37 /* "IF" */); Expect(30 /* "(" */); int beg = la->pos; int col = la->col; int line = la->line; Condition(); pos = new Position(beg, t->pos, col, line); } void Parser::Factor(Graph* &g) { wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; switch (la->kind) { case _ident: case _string: case _char: case 29 /* "WEAK" */: { if (la->kind == 29 /* "WEAK" */) { Get(); weak = true; } Sym(name, kind); Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) sym = (Symbol*)((*(tab->literals))[name]); bool undef = (sym == NULL); if (undef) { if (kind == id) sym = tab->NewSym(Node::nt, name, 0); // forward nt else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(L"undefined string in production"); sym = tab->eofSy; // dummy } } int typ = sym->typ; if (typ != Node::t && typ != Node::nt) SemErr(L"this symbol kind is not allowed in a production"); if (weak) { if (typ == Node::t) typ = Node::wt; else SemErr(L"only terminals may be weak"); } Node *p = tab->NewNode(typ, sym, t->line); g = new Graph(p); if (la->kind == 24 /* "<" */ || la->kind == 26 /* "<." */) { Attribs(p); if (kind != id) SemErr(L"a literal must not have attributes"); } if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); break; } case 30 /* "(" */: { Get(); Expression(g); Expect(31 /* ")" */); break; } case 32 /* "[" */: { Get(); Expression(g); Expect(33 /* "]" */); tab->MakeOption(g); break; } case 34 /* "{" */: { Get(); Expression(g); Expect(35 /* "}" */); tab->MakeIteration(g); break; } case 39 /* "(." */: { SemText(pos); Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); p->pos = pos; g = new Graph(p); break; } case 23 /* "ANY" */: { Get(); Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys g = new Graph(p); break; } case 36 /* "SYNC" */: { Get(); Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); g = new Graph(p); break; } default: SynErr(49); break; } if (g == NULL) // invalid start of Factor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); } void Parser::Attribs(Node *p) { if (la->kind == 24 /* "<" */) { Get(); int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(9)) { if (StartOf(10)) { Get(); } else { Get(); SemErr(L"bad string in attributes"); } } Expect(25 /* ">" */); if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); } else if (la->kind == 26 /* "<." */) { Get(); int beg = la->pos; int col = la->col; int line = la->line; while (StartOf(11)) { if (StartOf(12)) { Get(); } else { Get(); SemErr(L"bad string in attributes"); } } Expect(27 /* ".>" */); if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); } else SynErr(50); } void Parser::Condition() { while (StartOf(20)) { if (la->kind == 30 /* "(" */) { Get(); Condition(); } else { Get(); } } Expect(31 /* ")" */); } void Parser::TokenTerm(Graph* &g) { Graph *g2; TokenFactor(g); while (StartOf(8)) { TokenFactor(g2); tab->MakeSequence(g, g2); } if (la->kind == 38 /* "CONTEXT" */) { Get(); Expect(30 /* "(" */); TokenExpr(g2); tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); Expect(31 /* ")" */); } } void Parser::TokenFactor(Graph* &g) { wchar_t* name = NULL; int kind; g = NULL; if (la->kind == _ident || la->kind == _string || la->kind == _char) { Sym(name, kind); if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { SemErr(L"undefined name"); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; g = new Graph(p); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); else tokenString = coco_string_create(noString); } } else if (la->kind == 30 /* "(" */) { Get(); TokenExpr(g); Expect(31 /* ")" */); } else if (la->kind == 32 /* "[" */) { Get(); TokenExpr(g); Expect(33 /* "]" */); tab->MakeOption(g); tokenString = coco_string_create(noString); } else if (la->kind == 34 /* "{" */) { Get(); TokenExpr(g); Expect(35 /* "}" */); tab->MakeIteration(g); tokenString = coco_string_create(noString); } else SynErr(51); if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); } // If the user declared a method Init and a mehtod Destroy they should // be called in the contructur and the destructor respctively. // // The following templates are used to recognize if the user declared // the methods Init and Destroy. template struct ParserInitExistsRecognizer { template struct ExistsIfInitIsDefinedMarker{}; struct InitIsMissingType { char dummy1; }; struct InitExistsType { char dummy1; char dummy2; }; // exists always template static InitIsMissingType is_here(...); // exist only if ExistsIfInitIsDefinedMarker is defined template static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; }; template struct ParserDestroyExistsRecognizer { template struct ExistsIfDestroyIsDefinedMarker{}; struct DestroyIsMissingType { char dummy1; }; struct DestroyExistsType { char dummy1; char dummy2; }; // exists always template static DestroyIsMissingType is_here(...); // exist only if ExistsIfDestroyIsDefinedMarker is defined template static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; }; // The folloing templates are used to call the Init and Destroy methods if they exist. // Generic case of the ParserInitCaller, gets used if the Init method is missing template::InitExists> struct ParserInitCaller { static void CallInit(T *t) { // nothing to do } }; // True case of the ParserInitCaller, gets used if the Init method exists template struct ParserInitCaller { static void CallInit(T *t) { t->Init(); } }; // Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing template::DestroyExists> struct ParserDestroyCaller { static void CallDestroy(T *t) { // nothing to do } }; // True case of the ParserDestroyCaller, gets used if the Destroy method exists template struct ParserDestroyCaller { static void CallDestroy(T *t) { t->Destroy(); } }; void Parser::Parse() { t = NULL; la = dummyToken = new Token(); la->val = coco_string_create(L"Dummy Token"); Get(); Coco(); Expect(0); } Parser::Parser(Scanner *scanner) { maxT = 41; ParserInitCaller::CallInit(this); dummyToken = NULL; t = la = NULL; minErrDist = 2; errDist = minErrDist; this->scanner = scanner; errors = new Errors(); } bool Parser::StartOf(int s) { const bool T = true; const bool x = false; static bool set[21][43] = { {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, {x,T,T,T, T,T,x,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {x,T,T,T, T,T,T,x, x,x,x,x, T,T,T,x, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,T,x, x,x,x,T, x,x,x,x, T,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,T, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, {T,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, {x,T,x,T, x,T,x,x, x,x,T,T, x,x,x,T, T,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x}, {x,x,x,x, x,x,x,x, x,x,x,T, x,T,T,T, T,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, T,x,T,x, x,x,x,x, x,x,x}, {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,x,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,T,T, T,T,x}, {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, x,T,x}, {x,T,T,T, x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, x,T,x}, {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, x,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,T, x,x,x,x, T,T,T,T, T,T,T,T, T,T,x,T, x,x,x}, {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,T,x,T, x,x,x}, {x,T,x,T, x,T,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,T, x,x,x,x, x,T,T,x, T,x,T,x, T,x,x,T, x,x,x}, {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x, x,x,T,x, x,x,x,x, x,x,x,x, T,x,x,T, x,T,x,T, x,x,x,x, x,x,x}, {x,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,T, T,T,T,x, T,T,T,T, T,T,T,T, T,T,x} }; return set[s][la->kind]; } Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete errors; delete dummyToken; } Errors::Errors() { count = 0; } void Errors::SynErr(int line, int col, int n) { wchar_t* s; switch (n) { case 0: s = coco_string_create(L"EOF expected"); break; case 1: s = coco_string_create(L"ident expected"); break; case 2: s = coco_string_create(L"number expected"); break; case 3: s = coco_string_create(L"string expected"); break; case 4: s = coco_string_create(L"badString expected"); break; case 5: s = coco_string_create(L"char expected"); break; case 6: s = coco_string_create(L"\"COMPILER\" expected"); break; case 7: s = coco_string_create(L"\"IGNORECASE\" expected"); break; case 8: s = coco_string_create(L"\"CHARACTERS\" expected"); break; case 9: s = coco_string_create(L"\"TOKENS\" expected"); break; case 10: s = coco_string_create(L"\"PRAGMAS\" expected"); break; case 11: s = coco_string_create(L"\"COMMENTS\" expected"); break; case 12: s = coco_string_create(L"\"FROM\" expected"); break; case 13: s = coco_string_create(L"\"TO\" expected"); break; case 14: s = coco_string_create(L"\"NESTED\" expected"); break; case 15: s = coco_string_create(L"\"IGNORE\" expected"); break; case 16: s = coco_string_create(L"\"PRODUCTIONS\" expected"); break; case 17: s = coco_string_create(L"\"=\" expected"); break; case 18: s = coco_string_create(L"\".\" expected"); break; case 19: s = coco_string_create(L"\"END\" expected"); break; case 20: s = coco_string_create(L"\"+\" expected"); break; case 21: s = coco_string_create(L"\"-\" expected"); break; case 22: s = coco_string_create(L"\"..\" expected"); break; case 23: s = coco_string_create(L"\"ANY\" expected"); break; case 24: s = coco_string_create(L"\"<\" expected"); break; case 25: s = coco_string_create(L"\">\" expected"); break; case 26: s = coco_string_create(L"\"<.\" expected"); break; case 27: s = coco_string_create(L"\".>\" expected"); break; case 28: s = coco_string_create(L"\"|\" expected"); break; case 29: s = coco_string_create(L"\"WEAK\" expected"); break; case 30: s = coco_string_create(L"\"(\" expected"); break; case 31: s = coco_string_create(L"\")\" expected"); break; case 32: s = coco_string_create(L"\"[\" expected"); break; case 33: s = coco_string_create(L"\"]\" expected"); break; case 34: s = coco_string_create(L"\"{\" expected"); break; case 35: s = coco_string_create(L"\"}\" expected"); break; case 36: s = coco_string_create(L"\"SYNC\" expected"); break; case 37: s = coco_string_create(L"\"IF\" expected"); break; case 38: s = coco_string_create(L"\"CONTEXT\" expected"); break; case 39: s = coco_string_create(L"\"(.\" expected"); break; case 40: s = coco_string_create(L"\".)\" expected"); break; case 41: s = coco_string_create(L"??? expected"); break; case 42: s = coco_string_create(L"this symbol not expected in Coco"); break; case 43: s = coco_string_create(L"this symbol not expected in TokenDecl"); break; case 44: s = coco_string_create(L"invalid TokenDecl"); break; case 45: s = coco_string_create(L"invalid AttrDecl"); break; case 46: s = coco_string_create(L"invalid SimSet"); break; case 47: s = coco_string_create(L"invalid Sym"); break; case 48: s = coco_string_create(L"invalid Term"); break; case 49: s = coco_string_create(L"invalid Factor"); break; case 50: s = coco_string_create(L"invalid Attribs"); break; case 51: s = coco_string_create(L"invalid TokenFactor"); break; default: { wchar_t format[20]; coco_swprintf(format, 20, L"error %d", n); s = coco_string_create(format); } break; } wprintf(L"-- line %d col %d: %ls\n", line, col, s); coco_string_delete(s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { wprintf(L"-- line %d col %d: %ls\n", line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { wprintf(L"-- line %d col %d: %ls\n", line, col, s); } void Errors::Warning(const wchar_t *s) { wprintf(L"%ls\n", s); } void Errors::Exception(const wchar_t* s) { wprintf(L"%ls", s); exit(1); } } // namespace coco-cpp_20120102/build.bat0000644000175000017500000000005611700304070013112 0ustar mlmlcl *.cpp -FeCoco.exe /O2 /wd4996 del *.obj coco-cpp_20120102/Coco.cpp0000644000175000017500000001412111700303324012711 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- Trace output options 0 | A: prints the states of the scanner automaton 1 | F: prints the First and Follow sets of all nonterminals 2 | G: prints the syntax graph of the productions 3 | I: traces the computation of the First sets 4 | J: prints the sets associated with ANYs and synchronisation sets 6 | S: prints the symbol table (terminals, nonterminals, pragmas) 7 | X: prints a cross reference list of all syntax symbols 8 | P: prints statistics about the Coco run Trace output can be switched on by the pragma $ { digit | letter } in the attributed grammar or as a command-line option -------------------------------------------------------------------------*/ #include #include "Scanner.h" #include "Parser.h" #include "Tab.h" using namespace Coco; #ifdef _WIN32 int wmain(int argc, wchar_t *argv[]) { #elif defined __GNUC__ int main(int argc, char *argv_[]) { wchar_t ** argv = new wchar_t*[argc]; for (int i = 0; i < argc; ++i) { argv[i] = coco_string_create(argv_[i]); } #else #error unknown compiler! #endif wprintf(L"Coco/R (Jan 02, 2012)\n"); wchar_t *srcName = NULL, *nsName = NULL, *frameDir = NULL, *ddtString = NULL, *traceFileName = NULL; wchar_t *outDir = NULL; char *chTrFileName = NULL; bool emitLines = false; for (int i = 1; i < argc; i++) { if (coco_string_equal(argv[i], L"-namespace") && i < argc - 1) nsName = coco_string_create(argv[++i]); else if (coco_string_equal(argv[i], L"-frames") && i < argc - 1) frameDir = coco_string_create(argv[++i]); else if (coco_string_equal(argv[i], L"-trace") && i < argc - 1) ddtString = coco_string_create(argv[++i]); else if (coco_string_equal(argv[i], L"-o") && i < argc - 1) outDir = coco_string_create_append(argv[++i], L"/"); else if (coco_string_equal(argv[i], L"-lines")) emitLines = true; else srcName = coco_string_create(argv[i]); } #if defined __GNUC__ for (int i = 0; i < argc; ++i) { coco_string_delete(argv[i]); } delete [] argv; argv = NULL; #endif if (argc > 0 && srcName != NULL) { int pos = coco_string_lastindexof(srcName, '/'); if (pos < 0) pos = coco_string_lastindexof(srcName, '\\'); wchar_t* file = coco_string_create(srcName); wchar_t* srcDir = coco_string_create(srcName, 0, pos+1); Coco::Scanner *scanner = new Coco::Scanner(file); Coco::Parser *parser = new Coco::Parser(scanner); traceFileName = coco_string_create_append(srcDir, L"trace.txt"); chTrFileName = coco_string_create_char(traceFileName); if ((parser->trace = fopen(chTrFileName, "w")) == NULL) { wprintf(L"-- could not open %hs\n", chTrFileName); exit(1); } parser->tab = new Coco::Tab(parser); parser->dfa = new Coco::DFA(parser); parser->pgen = new Coco::ParserGen(parser); parser->tab->srcName = coco_string_create(srcName); parser->tab->srcDir = coco_string_create(srcDir); parser->tab->nsName = nsName ? coco_string_create(nsName) : NULL; parser->tab->frameDir = coco_string_create(frameDir); parser->tab->outDir = coco_string_create(outDir != NULL ? outDir : srcDir); parser->tab->emitLines = emitLines; if (ddtString != NULL) parser->tab->SetDDT(ddtString); parser->Parse(); fclose(parser->trace); // obtain the FileSize parser->trace = fopen(chTrFileName, "r"); fseek(parser->trace, 0, SEEK_END); long fileSize = ftell(parser->trace); fclose(parser->trace); if (fileSize == 0) { remove(chTrFileName); } else { wprintf(L"trace output is in %hs\n", chTrFileName); } wprintf(L"%d errors detected\n", parser->errors->count); if (parser->errors->count != 0) { exit(1); } delete parser->pgen; delete parser->dfa; delete parser->tab; delete parser; delete scanner; coco_string_delete(file); coco_string_delete(srcDir); } else { wprintf(L"Usage: Coco Grammar.ATG {Option}\n"); wprintf(L"Options:\n"); wprintf(L" -namespace \n"); wprintf(L" -frames \n"); wprintf(L" -trace \n"); wprintf(L" -o \n"); wprintf(L" -lines\n"); wprintf(L"Valid characters in the trace string:\n"); wprintf(L" A trace automaton\n"); wprintf(L" F list first/follow sets\n"); wprintf(L" G print syntax graph\n"); wprintf(L" I trace computation of first sets\n"); wprintf(L" J list ANY and SYNC sets\n"); wprintf(L" P print statistics\n"); wprintf(L" S list symbol table\n"); wprintf(L" X list cross reference table\n"); wprintf(L"Scanner.frame and Parser.frame files needed in ATG directory\n"); wprintf(L"or in a directory specified in the -frames option.\n"); } coco_string_delete(srcName); coco_string_delete(nsName); coco_string_delete(frameDir); coco_string_delete(ddtString); coco_string_delete(chTrFileName); coco_string_delete(traceFileName); return 0; } coco-cpp_20120102/Tab.cpp0000644000175000017500000010230711700302216012537 0ustar mlml/*------------------------------------------------------------------------- Tab -- Symbol Table Management Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "Tab.h" #include "Parser.h" #include "BitArray.h" #include "Scanner.h" namespace Coco { const char* Tab::nTyp[] = {" ", "t ", "pr ", "nt ", "clas", "chr ", "wt ", "any ", "eps ", "sync", "sem ", "alt ", "iter", "opt ", "rslv"}; const char* Tab::tKind[] = {"fixedToken", "classToken", "litToken", "classLitToken"}; Tab::Tab(Parser *parser) { for (int i=0; i<10; i++) ddt[i] = false; terminals = new ArrayList(); pragmas = new ArrayList(); nonterminals = new ArrayList(); nodes = new ArrayList(); dummyNode = NULL; classes= new ArrayList(); dummyName = 'A'; this->parser = parser; trace = parser->trace; errors = parser->errors; eofSy = NewSym(Node::t, L"EOF", 0); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); literals = new HashTable(); checkEOF = true; } Symbol* Tab::NewSym(int typ, const wchar_t* name, int line) { if (coco_string_length(name) == 2 && name[0] == '"') { parser->SemErr(L"empty token not allowed"); name = coco_string_create(L"???"); } Symbol *sym = new Symbol(typ, name, line); if (typ == Node::t) { sym->n = terminals->Count; terminals->Add(sym); } else if (typ == Node::pr) { pragmas->Add(sym); } else if (typ == Node::nt) { sym->n = nonterminals->Count; nonterminals->Add(sym); } return sym; } Symbol* Tab::FindSym(const wchar_t* name) { Symbol *s; int i; for (i=0; iCount; i++) { s = (Symbol*)((*terminals)[i]); if (coco_string_equal(s->name, name)) return s; } for (i=0; iCount; i++) { s = (Symbol*)((*nonterminals)[i]); if (coco_string_equal(s->name, name)) return s; } return NULL; } int Tab::Num(Node *p) { if (p == NULL) return 0; else return p->n; } void Tab::PrintSym(Symbol *sym) { wchar_t *paddedName = Name(sym->name); fwprintf(trace, L"%3d %14s %ls", sym->n, paddedName, nTyp[sym->typ]); coco_string_delete(paddedName); if (sym->attrPos==NULL) fwprintf(trace, L" false "); else fwprintf(trace, L" true "); if (sym->typ == Node::nt) { fwprintf(trace, L"%5d", Num(sym->graph)); if (sym->deletable) fwprintf(trace, L" true "); else fwprintf(trace, L" false "); } else fwprintf(trace, L" "); fwprintf(trace, L"%5d %ls\n", sym->line, tKind[sym->tokenKind]); } void Tab::PrintSymbolTable() { fwprintf(trace, L"Symbol Table:\n"); fwprintf(trace, L"------------\n\n"); fwprintf(trace, L" nr name typ hasAt graph del line tokenKind\n"); Symbol *sym; int i; for (i=0; iCount; i++) { sym = (Symbol*)((*terminals)[i]); PrintSym(sym); } for (i=0; iCount; i++) { sym = (Symbol*)((*pragmas)[i]); PrintSym(sym); } for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); PrintSym(sym); } fwprintf(trace, L"\nLiteral Tokens:\n"); fwprintf(trace, L"--------------\n"); Iterator *iter = literals->GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); fwprintf(trace, L"_%ls = %ls.\n", ((Symbol*) (e->val))->name, e->key); } fwprintf(trace, L"\n"); } void Tab::PrintSet(BitArray *s, int indent) { int col, len; col = indent; Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*terminals)[i]); if ((*s)[sym->n]) { len = coco_string_length(sym->name); if (col + len >= 80) { fwprintf(trace, L"\n"); for (col = 1; col < indent; col++) fwprintf(trace, L" "); } fwprintf(trace, L"%ls ", sym->name); col += len + 1; } } if (col == indent) fwprintf(trace, L"-- empty set --"); fwprintf(trace, L"\n"); } //--------------------------------------------------------------------- // Syntax graph management //--------------------------------------------------------------------- Node* Tab::NewNode(int typ, Symbol *sym, int line) { Node* node = new Node(typ, sym, line); node->n = nodes->Count; nodes->Add(node); return node; } Node* Tab::NewNode(int typ, Node* sub) { Node* node = NewNode(typ, (Symbol*)NULL, 0); node->sub = sub; return node; } Node* Tab::NewNode(int typ, int val, int line) { Node* node = NewNode(typ, (Symbol*)NULL, line); node->val = val; return node; } void Tab::MakeFirstAlt(Graph *g) { g->l = NewNode(Node::alt, g->l); g->l->line = g->l->sub->line; g->r->up = true; g->l->next = g->r; g->r = g->l; } // The result will be in g1 void Tab::MakeAlternative(Graph *g1, Graph *g2) { g2->l = NewNode(Node::alt, g2->l); g2->l->line = g2->l->sub->line; g2->l->up = true; g2->r->up = true; Node *p = g1->l; while (p->down != NULL) p = p->down; p->down = g2->l; p = g1->r; while (p->next != NULL) p = p->next; // append alternative to g1 end list p->next = g2->l; // append g2 end list to g1 end list g2->l->next = g2->r; } // The result will be in g1 void Tab::MakeSequence(Graph *g1, Graph *g2) { Node *p = g1->r->next; g1->r->next = g2->l; // link head node while (p != NULL) { // link substructure Node *q = p->next; p->next = g2->l; p = q; } g1->r = g2->r; } void Tab::MakeIteration(Graph *g) { g->l = NewNode(Node::iter, g->l); g->r->up = true; Node *p = g->r; g->r = g->l; while (p != NULL) { Node *q = p->next; p->next = g->l; p = q; } } void Tab::MakeOption(Graph *g) { g->l = NewNode(Node::opt, g->l); g->r->up = true; g->l->next = g->r; g->r = g->l; } void Tab::Finish(Graph *g) { Node *p = g->r; while (p != NULL) { Node *q = p->next; p->next = NULL; p = q; } } void Tab::DeleteNodes() { nodes = new ArrayList(); dummyNode = NewNode(Node::eps, (Symbol*)NULL, 0); } Graph* Tab::StrToGraph(const wchar_t* str) { wchar_t *subStr = coco_string_create(str, 1, coco_string_length(str)-2); wchar_t *s = Unescape(subStr); coco_string_delete(subStr); if (coco_string_length(s) == 0) parser->SemErr(L"empty token not allowed"); Graph *g = new Graph(); g->r = dummyNode; for (int i = 0; i < coco_string_length(s); i++) { Node *p = NewNode(Node::chr, (int)s[i], 0); g->r->next = p; g->r = p; } g->l = dummyNode->next; dummyNode->next = NULL; coco_string_delete(s); return g; } void Tab::SetContextTrans(Node *p) { // set transition code in the graph rooted at p while (p != NULL) { if (p->typ == Node::chr || p->typ == Node::clas) { p->code = Node::contextTrans; } else if (p->typ == Node::opt || p->typ == Node::iter) { SetContextTrans(p->sub); } else if (p->typ == Node::alt) { SetContextTrans(p->sub); SetContextTrans(p->down); } if (p->up) break; p = p->next; } } //------------ graph deletability check ----------------- bool Tab::DelGraph(Node* p) { return p == NULL || (DelNode(p) && DelGraph(p->next)); } bool Tab::DelSubGraph(Node* p) { return p == NULL || (DelNode(p) && (p->up || DelSubGraph(p->next))); } bool Tab::DelNode(Node* p) { if (p->typ == Node::nt) { return p->sym->deletable; } else if (p->typ == Node::alt) { return DelSubGraph(p->sub) || (p->down != NULL && DelSubGraph(p->down)); } else { return p->typ == Node::iter || p->typ == Node::opt || p->typ == Node::sem || p->typ == Node::eps || p->typ == Node::rslv || p->typ == Node::sync; } } //----------------- graph printing ---------------------- int Tab::Ptr(Node *p, bool up) { if (p == NULL) return 0; else if (up) return -(p->n); else return p->n; } wchar_t* Tab::Pos(Position *pos) { wchar_t* format = new wchar_t[10]; if (pos == NULL) { coco_swprintf(format, 10, L" "); } else { coco_swprintf(format, 10, L"%5d", pos->beg); } return format; } wchar_t* Tab::Name(const wchar_t *name) { wchar_t *name2 = coco_string_create_append(name, L" "); wchar_t *subName2 = coco_string_create(name2, 0, 12); coco_string_delete(name2); return subName2; // found no simpler way to get the first 12 characters of the name // padded with blanks on the right } void Tab::PrintNodes() { fwprintf(trace, L"Graph nodes:\n"); fwprintf(trace, L"----------------------------------------------------\n"); fwprintf(trace, L" n type name next down sub pos line\n"); fwprintf(trace, L" val code\n"); fwprintf(trace, L"----------------------------------------------------\n"); Node *p; for (int i=0; iCount; i++) { p = (Node*)((*nodes)[i]); fwprintf(trace, L"%4d %ls ", p->n, (nTyp[p->typ])); if (p->sym != NULL) { wchar_t *paddedName = Name(p->sym->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); } else if (p->typ == Node::clas) { CharClass *c = (CharClass*)(*classes)[p->val]; wchar_t *paddedName = Name(c->name); fwprintf(trace, L"%12s ", paddedName); coco_string_delete(paddedName); } else fwprintf(trace, L" "); fwprintf(trace, L"%5d ", Ptr(p->next, p->up)); if (p->typ == Node::t || p->typ == Node::nt || p->typ == Node::wt) { fwprintf(trace, L" %5s", Pos(p->pos)); } if (p->typ == Node::chr) { fwprintf(trace, L"%5d %5d ", p->val, p->code); } if (p->typ == Node::clas) { fwprintf(trace, L" %5d ", p->code); } if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { fwprintf(trace, L"%5d %5d ", Ptr(p->down, false), Ptr(p->sub, false)); } if (p->typ == Node::sem) { fwprintf(trace, L" %5s", Pos(p->pos)); } if (p->typ == Node::eps || p->typ == Node::any || p->typ == Node::sync) { fwprintf(trace, L" "); } fwprintf(trace, L"%5d\n", p->line); } fwprintf(trace, L"\n"); } //--------------------------------------------------------------------- // Character class management //--------------------------------------------------------------------- CharClass* Tab::NewCharClass(const wchar_t* name, CharSet *s) { CharClass *c; if (coco_string_equal(name, L"#")) { wchar_t* temp = coco_string_create_append(name, (wchar_t) dummyName++); c = new CharClass(temp, s); coco_string_delete(temp); } else { c = new CharClass(name, s); } c->n = classes->Count; classes->Add(c); return c; } CharClass* Tab::FindCharClass(const wchar_t* name) { CharClass *c; for (int i=0; iCount; i++) { c = (CharClass*)((*classes)[i]); if (coco_string_equal(c->name, name)) return c; } return NULL; } CharClass* Tab::FindCharClass(CharSet *s) { CharClass *c; for (int i=0; iCount; i++) { c = (CharClass*)((*classes)[i]); if (s->Equals(c->set)) return c; } return NULL; } CharSet* Tab::CharClassSet(int i) { return ((CharClass*)((*classes)[i]))->set; } //----------- character class printing wchar_t* Tab::Ch(const wchar_t ch) { wchar_t* format = new wchar_t[10]; if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') { coco_swprintf(format, 10, L"%d", ch); return format; } else { coco_swprintf(format, 10, L"'%lc'", ch); return format; } } void Tab::WriteCharSet(CharSet *s) { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from < r->to) { wchar_t *from = Ch(r->from); wchar_t *to = Ch(r->to); fwprintf(trace, L"%ls .. %ls ", from, to); delete [] from; delete [] to; } else { wchar_t *from = Ch(r->from); fwprintf(trace, L"%ls ", from); delete [] from; } } } void Tab::WriteCharClasses () { CharClass *c; for (int i=0; iCount; i++) { c = (CharClass*)((*classes)[i]); wchar_t* format2 = coco_string_create_append(c->name, L" "); wchar_t* format = coco_string_create(format2, 0, 10); coco_string_merge(format, L": "); fwprintf(trace, format); WriteCharSet(c->set); fwprintf(trace, L"\n"); coco_string_delete(format); coco_string_delete(format2); } fwprintf(trace, L"\n"); } //--------------------------------------------------------------------- // Symbol set computations //--------------------------------------------------------------------- /* Computes the first set for the given Node. */ BitArray* Tab::First0(Node *p, BitArray *mark) { BitArray *fs = new BitArray(terminals->Count); while (p != NULL && !((*mark)[p->n])) { mark->Set(p->n, true); if (p->typ == Node::nt) { if (p->sym->firstReady) { fs->Or(p->sym->first); } else { BitArray *fs0 = First0(p->sym->graph, mark); fs->Or(fs0); delete fs0; } } else if (p->typ == Node::t || p->typ == Node::wt) { fs->Set(p->sym->n, true); } else if (p->typ == Node::any) { fs->Or(p->set); } else if (p->typ == Node::alt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; fs0 = First0(p->down, mark); fs->Or(fs0); delete fs0; } else if (p->typ == Node::iter || p->typ == Node::opt) { BitArray *fs0 = First0(p->sub, mark); fs->Or(fs0); delete fs0; } if (!DelNode(p)) break; p = p->next; } return fs; } BitArray* Tab::First(Node *p) { BitArray *mark = new BitArray(nodes->Count); BitArray *fs = First0(p, mark); delete mark; if (ddt[3]) { fwprintf(trace, L"\n"); if (p != NULL) fwprintf(trace, L"First: node = %d\n", p->n ); else fwprintf(trace, L"First: node = null\n"); PrintSet(fs, 0); } return fs; } void Tab::CompFirstSets() { Symbol *sym; int i; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); sym->first = new BitArray(terminals->Count); sym->firstReady = false; } for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); sym->first = First(sym->graph); sym->firstReady = true; } } void Tab::CompFollow(Node *p) { while (p != NULL && !((*visited)[p->n])) { visited->Set(p->n, true); if (p->typ == Node::nt) { BitArray *s = First(p->next); p->sym->follow->Or(s); if (DelGraph(p->next)) p->sym->nts->Set(curSy->n, true); } else if (p->typ == Node::opt || p->typ == Node::iter) { CompFollow(p->sub); } else if (p->typ == Node::alt) { CompFollow(p->sub); CompFollow(p->down); } p = p->next; } } void Tab::Complete(Symbol *sym) { if (!((*visited)[sym->n])) { visited->Set(sym->n, true); Symbol *s; for (int i=0; iCount; i++) { s = (Symbol*)((*nonterminals)[i]); if ((*(sym->nts))[s->n]) { Complete(s); sym->follow->Or(s->follow); if (sym == curSy) sym->nts->Set(s->n, false); } } } } void Tab::CompFollowSets() { Symbol *sym; int i; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); sym->follow = new BitArray(terminals->Count); sym->nts = new BitArray(nonterminals->Count); } gramSy->follow->Set(eofSy->n, true); visited = new BitArray(nodes->Count); for (i=0; iCount; i++) { // get direct successors of nonterminals sym = (Symbol*)((*nonterminals)[i]); curSy = sym; CompFollow(sym->graph); } for (i=0; iCount; i++) { // add indirect successors to followers sym = (Symbol*)((*nonterminals)[i]); visited = new BitArray(nonterminals->Count); curSy = sym; Complete(sym); } } Node* Tab::LeadingAny(Node *p) { if (p == NULL) return NULL; Node *a = NULL; if (p->typ == Node::any) a = p; else if (p->typ == Node::alt) { a = LeadingAny(p->sub); if (a == NULL) a = LeadingAny(p->down); } else if (p->typ == Node::opt || p->typ == Node::iter) a = LeadingAny(p->sub); if (a == NULL && DelNode(p) && !p->up) a = LeadingAny(p->next); return a; } void Tab::FindAS(Node *p) { // find ANY sets Node *a; while (p != NULL) { if (p->typ == Node::opt || p->typ == Node::iter) { FindAS(p->sub); a = LeadingAny(p->sub); if (a != NULL) Sets::Subtract(a->set, First(p->next)); } else if (p->typ == Node::alt) { BitArray *s1 = new BitArray(terminals->Count); Node *q = p; while (q != NULL) { FindAS(q->sub); a = LeadingAny(q->sub); if (a != NULL) { BitArray *tmp = First(q->down); tmp->Or(s1); Sets::Subtract(a->set, tmp); } else { BitArray *f = First(q->sub); s1->Or(f); delete f; } q = q->down; } } // Remove alternative terminals before ANY, in the following // examples a and b must be removed from the ANY set: // [a] ANY, or {a|b} ANY, or [a][b] ANY, or (a|) ANY, or // A = [a]. A ANY if (DelNode(p)) { a = LeadingAny(p->next); if (a != NULL) { Node *q = (p->typ == Node::nt) ? p->sym->graph : p->sub; Sets::Subtract(a->set, First(q)); } } if (p->up) break; p = p->next; } } void Tab::CompAnySets() { Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); FindAS(sym->graph); } } BitArray* Tab::Expected(Node *p, Symbol *curSy) { BitArray *s = First(p); if (DelGraph(p)) s->Or(curSy->follow); return s; } // does not look behind resolvers; only called during LL(1) test and in CheckRes BitArray* Tab::Expected0(Node *p, Symbol *curSy) { if (p->typ == Node::rslv) return new BitArray(terminals->Count); else return Expected(p, curSy); } void Tab::CompSync(Node *p) { while (p != NULL && !(visited->Get(p->n))) { visited->Set(p->n, true); if (p->typ == Node::sync) { BitArray *s = Expected(p->next, curSy); s->Set(eofSy->n, true); allSyncSets->Or(s); p->set = s; } else if (p->typ == Node::alt) { CompSync(p->sub); CompSync(p->down); } else if (p->typ == Node::opt || p->typ == Node::iter) CompSync(p->sub); p = p->next; } } void Tab::CompSyncSets() { allSyncSets = new BitArray(terminals->Count); allSyncSets->Set(eofSy->n, true); visited = new BitArray(nodes->Count); Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); curSy = sym; CompSync(curSy->graph); } } void Tab::SetupAnys() { Node *p; for (int i=0; iCount; i++) { p = (Node*)((*nodes)[i]); if (p->typ == Node::any) { p->set = new BitArray(terminals->Count, true); p->set->Set(eofSy->n, false); } } } void Tab::CompDeletableSymbols() { bool changed; Symbol *sym; int i; do { changed = false; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (!sym->deletable && sym->graph != NULL && DelGraph(sym->graph)) { sym->deletable = true; changed = true; } } } while (changed); for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (sym->deletable) wprintf(L" %ls deletable\n", sym->name); } } void Tab::RenumberPragmas() { int n = terminals->Count; Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*pragmas)[i]); sym->n = n++; } } void Tab::CompSymbolSets() { CompDeletableSymbols(); CompFirstSets(); CompAnySets(); CompFollowSets(); CompSyncSets(); if (ddt[1]) { fwprintf(trace, L"\n"); fwprintf(trace, L"First & follow symbols:\n"); fwprintf(trace, L"----------------------\n\n"); Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); fwprintf(trace, L"%ls\n", sym->name); fwprintf(trace, L"first: "); PrintSet(sym->first, 10); fwprintf(trace, L"follow: "); PrintSet(sym->follow, 10); fwprintf(trace, L"\n"); } } if (ddt[4]) { fwprintf(trace, L"\n"); fwprintf(trace, L"ANY and SYNC sets:\n"); fwprintf(trace, L"-----------------\n"); Node *p; for (int i=0; iCount; i++) { p = (Node*)((*nodes)[i]); if (p->typ == Node::any || p->typ == Node::sync) { fwprintf(trace, L"%4d %4s ", p->n, nTyp[p->typ]); PrintSet(p->set, 11); } } } } //--------------------------------------------------------------------- // String handling //--------------------------------------------------------------------- wchar_t Tab::Hex2Char(const wchar_t* s) { int val = 0; int len = coco_string_length(s); for (int i = 0; i < len; i++) { wchar_t ch = s[i]; if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0'); else if ('a' <= ch && ch <= 'f') val = 16 * val + (10 + ch - 'a'); else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A'); else parser->SemErr(L"bad escape sequence in string or character"); } if (val >= COCO_WCHAR_MAX) {/* pdt */ parser->SemErr(L"bad escape sequence in string or character"); } return (wchar_t) val; } wchar_t* Tab::Char2Hex(const wchar_t ch) { wchar_t* format = new wchar_t[10]; coco_swprintf(format, 10, L"\\0x%04x", ch); return format; } wchar_t* Tab::Unescape (const wchar_t* s) { /* replaces escape sequences in s by their Unicode values. */ StringBuilder buf = StringBuilder(); int i = 0; int len = coco_string_length(s); while (i < len) { if (s[i] == '\\') { switch (s[i+1]) { case L'\\': buf.Append(L'\\'); i += 2; break; case L'\'': buf.Append(L'\''); i += 2; break; case L'\"': buf.Append(L'\"'); i += 2; break; case L'r': buf.Append(L'\r'); i += 2; break; case L'n': buf.Append(L'\n'); i += 2; break; case L't': buf.Append(L'\t'); i += 2; break; case L'0': buf.Append(L'\0'); i += 2; break; case L'a': buf.Append(L'\a'); i += 2; break; case L'b': buf.Append(L'\b'); i += 2; break; case L'f': buf.Append(L'\f'); i += 2; break; case L'v': buf.Append(L'\v'); i += 2; break; case L'u': case L'x': if (i + 6 <= coco_string_length(s)) { wchar_t *subS = coco_string_create(s, i+2, 4); buf.Append(Hex2Char(subS)); i += 6; break; coco_string_delete(subS); } else { parser->SemErr(L"bad escape sequence in string or character"); i = coco_string_length(s); break; } default: parser->SemErr(L"bad escape sequence in string or character"); i += 2; break; } } else { buf.Append(s[i]); i++; } } return buf.ToString(); } wchar_t* Tab::Escape (const wchar_t* s) { StringBuilder buf = StringBuilder(); wchar_t ch; int len = coco_string_length(s); for (int i=0; i < len; i++) { ch = s[i]; switch(ch) { case L'\\': buf.Append(L"\\\\"); break; case L'\'': buf.Append(L"\\'"); break; case L'\"': buf.Append(L"\\\""); break; case L'\t': buf.Append(L"\\t"); break; case L'\r': buf.Append(L"\\r"); break; case L'\n': buf.Append(L"\\n"); break; default: if ((ch < L' ') || (ch > 0x7f)) { wchar_t* res = Char2Hex(ch); buf.Append(res); delete [] res; } else buf.Append(ch); break; } } return buf.ToString(); } //--------------------------------------------------------------------- // Grammar checks //--------------------------------------------------------------------- bool Tab::GrammarOk() { bool ok = NtsComplete() && AllNtReached() && NoCircularProductions() && AllNtToTerm(); if (ok) { CheckResolvers(); CheckLL1(); } return ok; } //--------------- check for circular productions ---------------------- void Tab::GetSingles(Node *p, ArrayList *singles) { if (p == NULL) return; // end of graph if (p->typ == Node::nt) { if (p->up || DelGraph(p->next)) singles->Add(p->sym); } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { if (p->up || DelGraph(p->next)) { GetSingles(p->sub, singles); if (p->typ == Node::alt) GetSingles(p->down, singles); } } if (!p->up && DelNode(p)) GetSingles(p->next, singles); } bool Tab::NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; ArrayList *list = new ArrayList(); Symbol *sym; int i; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); ArrayList *singles = new ArrayList(); GetSingles(sym->graph, singles); // get nonterminals s such that sym-->s Symbol *s; for (int j=0; jCount; j++) { s = (Symbol*)((*singles)[j]); list->Add(new CNode(sym, s)); } } CNode *n; do { changed = false; for (i = 0; i < list->Count; i++) { n = (CNode*)(*list)[i]; onLeftSide = false; onRightSide = false; CNode *m; for (int j=0; jCount; j++) { m = (CNode*)((*list)[j]); if (n->left == m->right) onRightSide = true; if (n->right == m->left) onLeftSide = true; } if (!onLeftSide || !onRightSide) { list->Remove(n); i--; changed = true; } } } while(changed); ok = true; for (i=0; iCount; i++) { n = (CNode*)((*list)[i]); ok = false; errors->count++; wprintf(L" %ls --> %ls", n->left->name, n->right->name); } return ok; } //--------------- check for LL(1) errors ---------------------- void Tab::LL1Error(int cond, Symbol *sym) { wprintf(L" LL1 warning in %ls: ", curSy->name); if (sym != NULL) wprintf(L"%ls is ", sym->name); switch (cond) { case 1: wprintf(L"start of several alternatives\n"); break; case 2: wprintf(L"start & successor of deletable structure\n"); break; case 3: wprintf(L"an ANY node that matches no symbol\n"); break; case 4: wprintf(L"contents of [...] or {...} must not be deletable\n"); break; } } void Tab::CheckOverlap(BitArray *s1, BitArray *s2, int cond) { Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*terminals)[i]); if ((*s1)[sym->n] && (*s2)[sym->n]) { LL1Error(cond, sym); } } } void Tab::CheckAlts(Node *p) { BitArray *s1, *s2; while (p != NULL) { if (p->typ == Node::alt) { Node *q = p; s1 = new BitArray(terminals->Count); while (q != NULL) { // for all alternatives s2 = Expected0(q->sub, curSy); CheckOverlap(s1, s2, 1); s1->Or(s2); CheckAlts(q->sub); q = q->down; } } else if (p->typ == Node::opt || p->typ == Node::iter) { if (DelSubGraph(p->sub)) LL1Error(4, NULL); // e.g. [[...]] else { s1 = Expected0(p->sub, curSy); s2 = Expected(p->next, curSy); CheckOverlap(s1, s2, 2); } CheckAlts(p->sub); } else if (p->typ == Node::any) { if (Sets::Elements(p->set) == 0) LL1Error(3, NULL); // e.g. {ANY} ANY or [ANY] ANY or ( ANY | ANY ) } if (p->up) break; p = p->next; } } void Tab::CheckLL1() { Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); curSy = sym; CheckAlts(curSy->graph); } } //------------- check if resolvers are legal -------------------- void Tab::ResErr(Node *p, const wchar_t* msg) { errors->Warning(p->line, p->pos->col, msg); } void Tab::CheckRes(Node *p, bool rslvAllowed) { while (p != NULL) { Node *q; if (p->typ == Node::alt) { BitArray *expected = new BitArray(terminals->Count); for (q = p; q != NULL; q = q->down) expected->Or(Expected0(q->sub, curSy)); BitArray *soFar = new BitArray(terminals->Count); for (q = p; q != NULL; q = q->down) { if (q->sub->typ == Node::rslv) { BitArray *fs = Expected(q->sub->next, curSy); if (Sets::Intersect(fs, soFar)) ResErr(q->sub, L"Warning: Resolver will never be evaluated. Place it at previous conflicting alternative."); if (!Sets::Intersect(fs, expected)) ResErr(q->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); } else soFar->Or(Expected(q->sub, curSy)); CheckRes(q->sub, true); } } else if (p->typ == Node::iter || p->typ == Node::opt) { if (p->sub->typ == Node::rslv) { BitArray *fs = First(p->sub->next); BitArray *fsNext = Expected(p->next, curSy); if (!Sets::Intersect(fs, fsNext)) ResErr(p->sub, L"Warning: Misplaced resolver: no LL(1) conflict."); } CheckRes(p->sub, true); } else if (p->typ == Node::rslv) { if (!rslvAllowed) ResErr(p, L"Warning: Misplaced resolver: no alternative."); } if (p->up) break; p = p->next; rslvAllowed = false; } } void Tab::CheckResolvers() { for (int i=0; iCount; i++) { curSy = (Symbol*)((*nonterminals)[i]); CheckRes(curSy->graph, false); } } //------------- check if every nts has a production -------------------- bool Tab::NtsComplete() { bool complete = true; Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (sym->graph == NULL) { complete = false; errors->count++; wprintf(L" No production for %ls\n", sym->name); } } return complete; } //-------------- check if every nts can be reached ----------------- void Tab::MarkReachedNts(Node *p) { while (p != NULL) { if (p->typ == Node::nt && !((*visited)[p->sym->n])) { // new nt reached visited->Set(p->sym->n, true); MarkReachedNts(p->sym->graph); } else if (p->typ == Node::alt || p->typ == Node::iter || p->typ == Node::opt) { MarkReachedNts(p->sub); if (p->typ == Node::alt) MarkReachedNts(p->down); } if (p->up) break; p = p->next; } } bool Tab::AllNtReached() { bool ok = true; visited = new BitArray(nonterminals->Count); visited->Set(gramSy->n, true); MarkReachedNts(gramSy->graph); Symbol *sym; for (int i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (!((*visited)[sym->n])) { ok = false; errors->count++; wprintf(L" %ls cannot be reached\n", sym->name); } } return ok; } //--------- check if every nts can be derived to terminals ------------ bool Tab::IsTerm(Node *p, BitArray *mark) { // true if graph can be derived to terminals while (p != NULL) { if (p->typ == Node::nt && !((*mark)[p->sym->n])) return false; if (p->typ == Node::alt && !IsTerm(p->sub, mark) && (p->down == NULL || !IsTerm(p->down, mark))) return false; if (p->up) break; p = p->next; } return true; } bool Tab::AllNtToTerm() { bool changed, ok = true; BitArray *mark = new BitArray(nonterminals->Count); // a nonterminal is marked if it can be derived to terminal symbols Symbol *sym; int i; do { changed = false; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (!((*mark)[sym->n]) && IsTerm(sym->graph, mark)) { mark->Set(sym->n, true); changed = true; } } } while (changed); for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); if (!((*mark)[sym->n])) { ok = false; errors->count++; wprintf(L" %ls cannot be derived to terminals\n", sym->name); } } return ok; } //--------------------------------------------------------------------- // Cross reference list //--------------------------------------------------------------------- void Tab::XRef() { SortedList *xref = new SortedList(); // collect lines where symbols have been defined Symbol *sym; int i, j; for (i=0; iCount; i++) { sym = (Symbol*)((*nonterminals)[i]); ArrayList *list = (ArrayList*)(xref->Get(sym)); if (list == NULL) {list = new ArrayList(); xref->Set(sym, list);} int *intg = new int(- sym->line); list->Add(intg); } // collect lines where symbols have been referenced Node *n; for (i=0; iCount; i++) { n = (Node*)((*nodes)[i]); if (n->typ == Node::t || n->typ == Node::wt || n->typ == Node::nt) { ArrayList *list = (ArrayList*)(xref->Get(n->sym)); if (list == NULL) {list = new ArrayList(); xref->Set(n->sym, list);} int *intg = new int(n->line); list->Add(intg); } } // print cross reference list fwprintf(trace, L"\n"); fwprintf(trace, L"Cross reference list:\n"); fwprintf(trace, L"--------------------\n\n"); for (i=0; iCount; i++) { sym = (Symbol*)(xref->GetKey(i)); wchar_t *paddedName = Name(sym->name); fwprintf(trace, L" %12ls", paddedName); coco_string_delete(paddedName); ArrayList *list = (ArrayList*)(xref->Get(sym)); int col = 14; int line; for (j=0; jCount; j++) { line = *(int*)((*list)[j]); if (col + 5 > 80) { fwprintf(trace, L"\n"); for (col = 1; col <= 14; col++) fwprintf(trace, L" "); } fwprintf(trace, L"%5d", line); col += 5; } fwprintf(trace, L"\n"); } fwprintf(trace, L"\n\n"); } void Tab::SetDDT(const wchar_t* s) { wchar_t* st = coco_string_create_upper(s); wchar_t ch; int len = coco_string_length(st); for (int i = 0; i < len; i++) { ch = st[i]; if (L'0' <= ch && ch <= L'9') ddt[ch - L'0'] = true; else switch (ch) { case L'A' : ddt[0] = true; break; // trace automaton case L'F' : ddt[1] = true; break; // list first/follow sets case L'G' : ddt[2] = true; break; // print syntax graph case L'I' : ddt[3] = true; break; // trace computation of first sets case L'J' : ddt[4] = true; break; // print ANY and SYNC sets case L'P' : ddt[8] = true; break; // print statistics case L'S' : ddt[6] = true; break; // list symbol table case L'X' : ddt[7] = true; break; // list cross reference table default : break; } } coco_string_delete(st); } void Tab::SetOption(const wchar_t* s) { // example: $namespace=xxx // index of '=' is 10 => nameLenght = 10 // start index of xxx = 11 int nameLenght = coco_string_indexof(s, '='); int valueIndex = nameLenght + 1; wchar_t *name = coco_string_create(s, 0, nameLenght); wchar_t *value = coco_string_create(s, valueIndex); if (coco_string_equal(L"$namespace", name)) { if (nsName == NULL) nsName = coco_string_create(value); } else if (coco_string_equal(L"$checkEOF", name)) { checkEOF = coco_string_equal(L"true", value); } delete [] name; delete [] value; } }; // namespace coco-cpp_20120102/ParserGen.h0000644000175000017500000000574511700302070013372 0ustar mlml/*------------------------------------------------------------------------- ParserGen -- Generation of the Recursive Descent Parser Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz ported to C++ by Csaba Balazs, University of Szeged extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_PARSERGEN_H__) #define COCO_PARSERGEN_H__ #include "Position.h" #include "Tab.h" #include "Symbol.h" #include "Scanner.h" #include "DFA.h" namespace Coco { class Errors; class Parser; class BitArray; class ParserGen { public: int maxTerm; // sets of size < maxTerm are enumerated char CR; char LF; int tErr; // error codes int altErr; int syncErr; Position *usingPos; // "using" definitions from the attributed grammar int errorNr; // highest parser error number Symbol *curSy; // symbol whose production is currently generated FILE* fram; // parser frame file FILE* gen; // generated parser source file wchar_t* err; // generated parser error messages ArrayList *symSet; Tab *tab; // other Coco objects FILE* trace; Errors *errors; Buffer *buffer; void Indent(int n); bool UseSwitch(Node *p); void CopyFramePart(const wchar_t* stop); void CopySourcePart(Position *pos, int indent); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); void GenErrorMsg(int errTyp, Symbol *sym); int NewCondSet(BitArray *s); void GenCond(BitArray *s, Node *p); void PutCaseLabels(BitArray *s); void GenCode(Node *p, int indent, BitArray *isChecked); void GenTokens(); void GenTokensHeader(); void GenPragmas(); void GenPragmasHeader(); void GenCodePragmas(); void GenProductions(); void GenProductionsHeader(); void InitSets(); void OpenGen(const wchar_t* genName, bool backUp); void WriteParser(); void WriteStatistics(); void WriteSymbolOrCode(FILE *gen, const Symbol *sym); ParserGen (Parser *parser); }; }; // namespace #endif // !defined(COCO_PARSERGEN_H__) coco-cpp_20120102/ParserGen.cpp0000644000175000017500000003522311700301442013721 0ustar mlml/*------------------------------------------------------------------------- ParserGen -- Generation of the Recursive Descent Parser Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz ported to C++ by Csaba Balazs, University of Szeged extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "ArrayList.h" #include "ParserGen.h" #include "Parser.h" #include "BitArray.h" #include "Scanner.h" #include "Generator.h" namespace Coco { void ParserGen::Indent (int n) { for (int i = 1; i <= n; i++) fwprintf(gen, L"\t"); } // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning bool ParserGen::UseSwitch (Node *p) { BitArray *s1, *s2; if (p->typ != Node::alt) return false; int nAlts = 0; s1 = new BitArray(tab->terminals->Count); while (p != NULL) { s2 = tab->Expected0(p->sub, curSy); // must not optimize with switch statement, if there are ll1 warnings if (s1->Overlaps(s2)) { return false; } s1->Or(s2); ++nAlts; // must not optimize with switch-statement, if alt uses a resolver expression if (p->sub->typ == Node::rslv) return false; p = p->down; } return nAlts > 5; } int ParserGen::GenNamespaceOpen(const wchar_t *nsName) { if (nsName == NULL || coco_string_length(nsName) == 0) { return 0; } const int len = coco_string_length(nsName); int startPos = 0; int nrOfNs = 0; do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); fwprintf(gen, L"namespace %ls {\n", curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; } ++nrOfNs; } while (startPos < len); return nrOfNs; } void ParserGen::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { fwprintf(gen, L"} // namespace\n"); } } void ParserGen::CopySourcePart (Position *pos, int indent) { // Copy text described by pos from atg to gen int ch, i; if (pos != NULL) { buffer->SetPos(pos->beg); ch = buffer->Read(); if (tab->emitLines && pos->line) { fwprintf(gen, L"\n#line %d \"%ls\"\n", pos->line, tab->srcName); } Indent(indent); while (buffer->GetPos() <= pos->end) { while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF fwprintf(gen, L"\n"); Indent(indent); if (ch == CR) { ch = buffer->Read(); } // skip CR if (ch == LF) { ch = buffer->Read(); } // skip LF for (i = 1; i <= pos->col && (ch == ' ' || ch == '\t'); i++) { // skip blanks at beginning of line ch = buffer->Read(); } if (buffer->GetPos() > pos->end) goto done; } fwprintf(gen, L"%lc", ch); ch = buffer->Read(); } done: if (indent > 0) fwprintf(gen, L"\n"); } } void ParserGen::GenErrorMsg (int errTyp, Symbol *sym) { errorNr++; const int formatLen = 1000; wchar_t format[formatLen]; coco_swprintf(format, formatLen, L"\t\t\tcase %d: s = coco_string_create(L\"", errorNr); coco_string_merge(err, format); if (errTyp == tErr) { if (sym->name[0] == L'"') { coco_swprintf(format, formatLen, L"%ls expected", tab->Escape(sym->name)); coco_string_merge(err, format); } else { coco_swprintf(format, formatLen, L"%ls expected", sym->name); coco_string_merge(err, format); } } else if (errTyp == altErr) { coco_swprintf(format, formatLen, L"invalid %ls", sym->name); coco_string_merge(err, format); } else if (errTyp == syncErr) { coco_swprintf(format, formatLen, L"this symbol not expected in %ls", sym->name); coco_string_merge(err, format); } coco_swprintf(format, formatLen, L"\"); break;\n"); coco_string_merge(err, format); } int ParserGen::NewCondSet (BitArray *s) { for (int i = 1; i < symSet->Count; i++) // skip symSet[0] (reserved for union of SYNC sets) if (Sets::Equals(s, (BitArray*)(*symSet)[i])) return i; symSet->Add(s->Clone()); return symSet->Count - 1; } void ParserGen::GenCond (BitArray *s, Node *p) { if (p->typ == Node::rslv) CopySourcePart(p->pos, 0); else { int n = Sets::Elements(s); if (n == 0) fwprintf(gen, L"false"); // happens if an ANY set matches no symbol else if (n <= maxTerm) { Symbol *sym; for (int i=0; iterminals->Count; i++) { sym = (Symbol*)((*(tab->terminals))[i]); if ((*s)[sym->n]) { fwprintf(gen, L"la->kind == "); WriteSymbolOrCode(gen, sym); --n; if (n > 0) fwprintf(gen, L" || "); } } } else fwprintf(gen, L"StartOf(%d)", NewCondSet(s)); } } void ParserGen::PutCaseLabels (BitArray *s) { Symbol *sym; for (int i=0; iterminals->Count; i++) { sym = (Symbol*)((*(tab->terminals))[i]); if ((*s)[sym->n]) { fwprintf(gen, L"case "); WriteSymbolOrCode(gen, sym); fwprintf(gen, L": "); } } } void ParserGen::GenCode (Node *p, int indent, BitArray *isChecked) { Node *p2; BitArray *s1, *s2; while (p != NULL) { if (p->typ == Node::nt) { Indent(indent); fwprintf(gen, L"%ls(", p->sym->name); CopySourcePart(p->pos, 0); fwprintf(gen, L");\n"); } else if (p->typ == Node::t) { Indent(indent); // assert: if isChecked[p->sym->n] is true, then isChecked contains only p->sym->n if ((*isChecked)[p->sym->n]) fwprintf(gen, L"Get();\n"); else { fwprintf(gen, L"Expect("); WriteSymbolOrCode(gen, p->sym); fwprintf(gen, L");\n"); } } if (p->typ == Node::wt) { Indent(indent); s1 = tab->Expected(p->next, curSy); s1->Or(tab->allSyncSets); fwprintf(gen, L"ExpectWeak("); WriteSymbolOrCode(gen, p->sym); fwprintf(gen, L", %d);\n", NewCondSet(s1)); } if (p->typ == Node::any) { Indent(indent); int acc = Sets::Elements(p->set); if (tab->terminals->Count == (acc + 1) || (acc > 0 && Sets::Equals(p->set, isChecked))) { // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here fwprintf(gen, L"Get();\n"); } else { GenErrorMsg(altErr, curSy); if (acc > 0) { fwprintf(gen, L"if ("); GenCond(p->set, p); fwprintf(gen, L") Get(); else SynErr(%d);\n", errorNr); } else fwprintf(gen, L"SynErr(%d); // ANY node that matches no symbol\n", errorNr); } } if (p->typ == Node::eps) { // nothing } if (p->typ == Node::rslv) { // nothing } if (p->typ == Node::sem) { CopySourcePart(p->pos, indent); } if (p->typ == Node::sync) { Indent(indent); GenErrorMsg(syncErr, curSy); s1 = p->set->Clone(); fwprintf(gen, L"while (!("); GenCond(s1, p); fwprintf(gen, L")) {"); fwprintf(gen, L"SynErr(%d); Get();", errorNr); fwprintf(gen, L"}\n"); } if (p->typ == Node::alt) { s1 = tab->First(p); bool equal = Sets::Equals(s1, isChecked); bool useSwitch = UseSwitch(p); if (useSwitch) { Indent(indent); fwprintf(gen, L"switch (la->kind) {\n"); } p2 = p; while (p2 != NULL) { s1 = tab->Expected(p2->sub, curSy); Indent(indent); if (useSwitch) { PutCaseLabels(s1); fwprintf(gen, L"{\n"); } else if (p2 == p) { fwprintf(gen, L"if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); } else if (p2->down == NULL && equal) { fwprintf(gen, L"} else {\n"); } else { fwprintf(gen, L"} else if ("); GenCond(s1, p2->sub); fwprintf(gen, L") {\n"); } GenCode(p2->sub, indent + 1, s1); if (useSwitch) { Indent(indent); fwprintf(gen, L"\tbreak;\n"); Indent(indent); fwprintf(gen, L"}\n"); } p2 = p2->down; } Indent(indent); if (equal) { fwprintf(gen, L"}\n"); } else { GenErrorMsg(altErr, curSy); if (useSwitch) { fwprintf(gen, L"default: SynErr(%d); break;\n", errorNr); Indent(indent); fwprintf(gen, L"}\n"); } else { fwprintf(gen, L"} "); fwprintf(gen, L"else SynErr(%d);\n", errorNr); } } } if (p->typ == Node::iter) { Indent(indent); p2 = p->sub; fwprintf(gen, L"while ("); if (p2->typ == Node::wt) { s1 = tab->Expected(p2->next, curSy); s2 = tab->Expected(p->next, curSy); fwprintf(gen, L"WeakSeparator("); WriteSymbolOrCode(gen, p2->sym); fwprintf(gen, L",%d,%d) ", NewCondSet(s1), NewCondSet(s2)); s1 = new BitArray(tab->terminals->Count); // for inner structure if (p2->up || p2->next == NULL) p2 = NULL; else p2 = p2->next; } else { s1 = tab->First(p2); GenCond(s1, p2); } fwprintf(gen, L") {\n"); GenCode(p2, indent + 1, s1); Indent(indent); fwprintf(gen, L"}\n"); } if (p->typ == Node::opt) { s1 = tab->First(p->sub); Indent(indent); fwprintf(gen, L"if ("); GenCond(s1, p->sub); fwprintf(gen, L") {\n"); GenCode(p->sub, indent + 1, s1); Indent(indent); fwprintf(gen, L"}\n"); } if (p->typ != Node::eps && p->typ != Node::sem && p->typ != Node::sync) isChecked->SetAll(false); // = new BitArray(Symbol.terminals.Count); if (p->up) break; p = p->next; } } void ParserGen::GenTokensHeader() { Symbol *sym; int i; bool isFirst = true; fwprintf(gen, L"\tenum {\n"); // tokens for (i=0; iterminals->Count; i++) { sym = (Symbol*)((*(tab->terminals))[i]); if (!isalpha(sym->name[0])) { continue; } if (isFirst) { isFirst = false; } else { fwprintf(gen , L",\n"); } fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); } // pragmas for (i=0; ipragmas->Count; i++) { if (isFirst) { isFirst = false; } else { fwprintf(gen , L",\n"); } sym = (Symbol*)((*(tab->pragmas))[i]); fwprintf(gen , L"\t\t_%ls=%d", sym->name, sym->n); } fwprintf(gen, L"\n\t};\n"); } void ParserGen::GenCodePragmas() { Symbol *sym; for (int i=0; ipragmas->Count; i++) { sym = (Symbol*)((*(tab->pragmas))[i]); fwprintf(gen, L"\t\tif (la->kind == "); WriteSymbolOrCode(gen, sym); fwprintf(gen, L") {\n"); CopySourcePart(sym->semPos, 4); fwprintf(gen, L"\t\t}\n"); } } void ParserGen::WriteSymbolOrCode(FILE *gen, const Symbol *sym) { if (!isalpha(sym->name[0])) { fwprintf(gen, L"%d /* %ls */", sym->n, sym->name); } else { fwprintf(gen, L"_%ls", sym->name); } } void ParserGen::GenProductionsHeader() { Symbol *sym; for (int i=0; inonterminals->Count; i++) { sym = (Symbol*)((*(tab->nonterminals))[i]); curSy = sym; fwprintf(gen, L"\tvoid %ls(", sym->name); CopySourcePart(sym->attrPos, 0); fwprintf(gen, L");\n"); } } void ParserGen::GenProductions() { Symbol *sym; for (int i=0; inonterminals->Count; i++) { sym = (Symbol*)((*(tab->nonterminals))[i]); curSy = sym; fwprintf(gen, L"void Parser::%ls(", sym->name); CopySourcePart(sym->attrPos, 0); fwprintf(gen, L") {\n"); CopySourcePart(sym->semPos, 2); GenCode(sym->graph, 2, new BitArray(tab->terminals->Count)); fwprintf(gen, L"}\n"); fwprintf(gen, L"\n"); } } void ParserGen::InitSets() { fwprintf(gen, L"\tstatic bool set[%d][%d] = {\n", symSet->Count, tab->terminals->Count+1); for (int i = 0; i < symSet->Count; i++) { BitArray *s = (BitArray*)(*symSet)[i]; fwprintf(gen, L"\t\t{"); int j = 0; Symbol *sym; for (int k=0; kterminals->Count; k++) { sym = (Symbol*)((*(tab->terminals))[k]); if ((*s)[sym->n]) fwprintf(gen, L"T,"); else fwprintf(gen, L"x,"); ++j; if (j%4 == 0) fwprintf(gen, L" "); } if (i == symSet->Count-1) fwprintf(gen, L"x}\n"); else fwprintf(gen, L"x},\n"); } fwprintf(gen, L"\t};\n\n"); } void ParserGen::WriteParser () { Generator g = Generator(tab, errors); int oldPos = buffer->GetPos(); // Pos is modified by CopySourcePart symSet->Add(tab->allSyncSets); fram = g.OpenFrame(L"Parser.frame"); gen = g.OpenGen(L"Parser.h"); Symbol *sym; for (int i=0; iterminals->Count; i++) { sym = (Symbol*)((*(tab->terminals))[i]); GenErrorMsg(tErr, sym); } g.GenCopyright(); g.SkipFramePart(L"-->begin"); g.CopyFramePart(L"-->prefix"); g.GenPrefixFromNamespace(); g.CopyFramePart(L"-->prefix"); g.GenPrefixFromNamespace(); g.CopyFramePart(L"-->headerdef"); if (usingPos != NULL) {CopySourcePart(usingPos, 0); fwprintf(gen, L"\n");} g.CopyFramePart(L"-->namespace_open"); int nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(L"-->constantsheader"); GenTokensHeader(); /* ML 2002/09/07 write the token kinds */ fwprintf(gen, L"\tint maxT;\n"); g.CopyFramePart(L"-->declarations"); CopySourcePart(tab->semDeclPos, 0); g.CopyFramePart(L"-->productionsheader"); GenProductionsHeader(); g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); g.CopyFramePart(L"-->implementation"); fclose(gen); // Source gen = g.OpenGen(L"Parser.cpp"); g.GenCopyright(); g.SkipFramePart(L"-->begin"); g.CopyFramePart(L"-->namespace_open"); nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(L"-->pragmas"); GenCodePragmas(); g.CopyFramePart(L"-->productions"); GenProductions(); g.CopyFramePart(L"-->parseRoot"); fwprintf(gen, L"\t%ls();\n", tab->gramSy->name); if (tab->checkEOF) fwprintf(gen, L"\tExpect(0);"); g.CopyFramePart(L"-->constants"); fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count-1); g.CopyFramePart(L"-->initialization"); InitSets(); g.CopyFramePart(L"-->errors"); fwprintf(gen, L"%ls", err); g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); fclose(gen); buffer->SetPos(oldPos); } void ParserGen::WriteStatistics () { fwprintf(trace, L"\n"); fwprintf(trace, L"%d terminals\n", tab->terminals->Count); fwprintf(trace, L"%d symbols\n", tab->terminals->Count + tab->pragmas->Count + tab->nonterminals->Count); fwprintf(trace, L"%d nodes\n", tab->nodes->Count); fwprintf(trace, L"%d sets\n", symSet->Count); } ParserGen::ParserGen (Parser *parser) { maxTerm = 3; CR = '\r'; LF = '\n'; tErr = 0; altErr = 1; syncErr = 2; tab = parser->tab; errors = parser->errors; trace = parser->trace; buffer = parser->scanner->buffer; errorNr = -1; usingPos = NULL; symSet = new ArrayList(); err = NULL; } }; // namespace coco-cpp_20120102/DFA.cpp0000644000175000017500000006536411700277370012452 0ustar mlml/*------------------------------------------------------------------------- DFA -- Generation of the Scanner Automaton Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include #include "DFA.h" #include "Tab.h" #include "Parser.h" #include "BitArray.h" #include "Scanner.h" #include "Generator.h" namespace Coco { //---------- Output primitives wchar_t* DFA::Ch(wchar_t ch) { wchar_t* format = new wchar_t[10]; if (ch < L' ' || ch >= 127 || ch == L'\'' || ch == L'\\') coco_swprintf(format, 10, L"%d\0", (int) ch); else coco_swprintf(format, 10, L"L'%lc'\0", (int) ch); return format; } wchar_t* DFA::ChCond(wchar_t ch) { wchar_t* format = new wchar_t[20]; wchar_t* res = Ch(ch); coco_swprintf(format, 20, L"ch == %ls\0", res); delete [] res; return format; } void DFA::PutRange(CharSet *s) { for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (r->from == r->to) { wchar_t *from = Ch((wchar_t) r->from); fwprintf(gen, L"ch == %ls", from); delete [] from; } else if (r->from == 0) { wchar_t *to = Ch((wchar_t) r->to); fwprintf(gen, L"ch <= %ls", to); delete [] to; } else { wchar_t *from = Ch((wchar_t) r->from); wchar_t *to = Ch((wchar_t) r->to); fwprintf(gen, L"(ch >= %ls && ch <= %ls)", from, to); delete [] from; delete [] to; } if (r->next != NULL) fwprintf(gen, L" || "); } } //---------- State handling State* DFA::NewState() { State *s = new State(); s->nr = ++lastStateNr; if (firstState == NULL) firstState = s; else lastState->next = s; lastState = s; return s; } void DFA::NewTransition(State *from, State *to, int typ, int sym, int tc) { Target *t = new Target(to); Action *a = new Action(typ, sym, tc); a->target = t; from->AddAction(a); if (typ == Node::clas) curSy->tokenKind = Symbol::classToken; } void DFA::CombineShifts() { State *state; Action *a, *b, *c; CharSet *seta, *setb; for (state = firstState; state != NULL; state = state->next) { for (a = state->firstAction; a != NULL; a = a->next) { b = a->next; while (b != NULL) if (a->target->state == b->target->state && a->tc == b->tc) { seta = a->Symbols(tab); setb = b->Symbols(tab); seta->Or(setb); a->ShiftWith(seta, tab); c = b; b = b->next; state->DetachAction(c); } else b = b->next; } } } void DFA::FindUsedStates(State *state, BitArray *used) { if ((*used)[state->nr]) return; used->Set(state->nr, true); for (Action *a = state->firstAction; a != NULL; a = a->next) FindUsedStates(a->target->state, used); } void DFA::DeleteRedundantStates() { //State *newState = new State[State::lastNr + 1]; State **newState = (State**) malloc (sizeof(State*) * (lastStateNr + 1)); BitArray *used = new BitArray(lastStateNr + 1); FindUsedStates(firstState, used); // combine equal final states for (State *s1 = firstState->next; s1 != NULL; s1 = s1->next) // firstState cannot be final if ((*used)[s1->nr] && s1->endOf != NULL && s1->firstAction == NULL && !(s1->ctx)) for (State *s2 = s1->next; s2 != NULL; s2 = s2->next) if ((*used)[s2->nr] && s1->endOf == s2->endOf && s2->firstAction == NULL && !(s2->ctx)) { used->Set(s2->nr, false); newState[s2->nr] = s1; } State *state; for (state = firstState; state != NULL; state = state->next) if ((*used)[state->nr]) for (Action *a = state->firstAction; a != NULL; a = a->next) if (!((*used)[a->target->state->nr])) a->target->state = newState[a->target->state->nr]; // delete unused states lastState = firstState; lastStateNr = 0; // firstState has number 0 for (state = firstState->next; state != NULL; state = state->next) if ((*used)[state->nr]) {state->nr = ++lastStateNr; lastState = state;} else lastState->next = state->next; free (newState); delete used; } State* DFA::TheState(Node *p) { State *state; if (p == NULL) {state = NewState(); state->endOf = curSy; return state;} else return p->state; } void DFA::Step(State *from, Node *p, BitArray *stepped) { if (p == NULL) return; stepped->Set(p->n, true); if (p->typ == Node::clas || p->typ == Node::chr) { NewTransition(from, TheState(p->next), p->typ, p->val, p->code); } else if (p->typ == Node::alt) { Step(from, p->sub, stepped); Step(from, p->down, stepped); } else if (p->typ == Node::iter) { if (tab->DelSubGraph(p->sub)) { parser->SemErr(L"contents of {...} must not be deletable"); return; } if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); if (p->state != from) { BitArray *newStepped = new BitArray(tab->nodes->Count); Step(p->state, p, newStepped); delete newStepped; } } else if (p->typ == Node::opt) { if (p->next != NULL && !((*stepped)[p->next->n])) Step(from, p->next, stepped); Step(from, p->sub, stepped); } } // Assigns a state n.state to every node n. There will be a transition from // n.state to n.next.state triggered by n.val. All nodes in an alternative // chain are represented by the same state. // Numbering scheme: // - any node after a chr, clas, opt, or alt, must get a new number // - if a nested structure starts with an iteration the iter node must get a new number // - if an iteration follows an iteration, it must get a new number void DFA::NumberNodes(Node *p, State *state, bool renumIter) { if (p == NULL) return; if (p->state != NULL) return; // already visited; if ((state == NULL) || ((p->typ == Node::iter) && renumIter)) state = NewState(); p->state = state; if (tab->DelGraph(p)) state->endOf = curSy; if (p->typ == Node::clas || p->typ == Node::chr) { NumberNodes(p->next, NULL, false); } else if (p->typ == Node::opt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); } else if (p->typ == Node::iter) { NumberNodes(p->next, state, true); NumberNodes(p->sub, state, true); } else if (p->typ == Node::alt) { NumberNodes(p->next, NULL, false); NumberNodes(p->sub, state, true); NumberNodes(p->down, state, renumIter); } } void DFA::FindTrans (Node *p, bool start, BitArray *marked) { if (p == NULL || (*marked)[p->n]) return; marked->Set(p->n, true); if (start) { BitArray *stepped = new BitArray(tab->nodes->Count); Step(p->state, p, stepped); // start of group of equally numbered nodes delete stepped; } if (p->typ == Node::clas || p->typ == Node::chr) { FindTrans(p->next, true, marked); } else if (p->typ == Node::opt) { FindTrans(p->next, true, marked); FindTrans(p->sub, false, marked); } else if (p->typ == Node::iter) { FindTrans(p->next, false, marked); FindTrans(p->sub, false, marked); } else if (p->typ == Node::alt) { FindTrans(p->sub, false, marked); FindTrans(p->down, false, marked); } } void DFA::ConvertToStates(Node *p, Symbol *sym) { curGraph = p; curSy = sym; if (tab->DelGraph(curGraph)) { parser->SemErr(L"token might be empty"); return; } NumberNodes(curGraph, firstState, true); FindTrans(curGraph, true, new BitArray(tab->nodes->Count)); if (p->typ == Node::iter) { BitArray *stepped = new BitArray(tab->nodes->Count); Step(firstState, p, stepped); delete stepped; } } // match string against current automaton; store it either as a fixedToken or as a litToken void DFA::MatchLiteral(wchar_t* s, Symbol *sym) { wchar_t *subS = coco_string_create(s, 1, coco_string_length(s)-2); s = tab->Unescape(subS); coco_string_delete(subS); int i, len = coco_string_length(s); State *state = firstState; Action *a = NULL; for (i = 0; i < len; i++) { // try to match s against existing DFA a = FindAction(state, s[i]); if (a == NULL) break; state = a->target->state; } // if s was not totally consumed or leads to a non-final state => make new DFA from it if (i != len || state->endOf == NULL) { state = firstState; i = 0; a = NULL; dirtyDFA = true; } for (; i < len; i++) { // make new DFA for s[i..len-1] State *to = NewState(); NewTransition(state, to, Node::chr, s[i], Node::normalTrans); state = to; } coco_string_delete(s); Symbol *matchedSym = state->endOf; if (state->endOf == NULL) { state->endOf = sym; } else if (matchedSym->tokenKind == Symbol::fixedToken || (a != NULL && a->tc == Node::contextTrans)) { // s matched a token with a fixed definition or a token with an appendix that will be cut off wchar_t format[200]; coco_swprintf(format, 200, L"tokens %ls and %ls cannot be distinguished", sym->name, matchedSym->name); parser->SemErr(format); } else { // matchedSym == classToken || classLitToken matchedSym->tokenKind = Symbol::classLitToken; sym->tokenKind = Symbol::litToken; } } void DFA::SplitActions(State *state, Action *a, Action *b) { Action *c; CharSet *seta, *setb, *setc; seta = a->Symbols(tab); setb = b->Symbols(tab); if (seta->Equals(setb)) { a->AddTargets(b); state->DetachAction(b); } else if (seta->Includes(setb)) { setc = seta->Clone(); setc->Subtract(setb); b->AddTargets(a); a->ShiftWith(setc, tab); } else if (setb->Includes(seta)) { setc = setb->Clone(); setc->Subtract(seta); a->AddTargets(b); b->ShiftWith(setc, tab); } else { setc = seta->Clone(); setc->And(setb); seta->Subtract(setc); setb->Subtract(setc); a->ShiftWith(seta, tab); b->ShiftWith(setb, tab); c = new Action(0, 0, Node::normalTrans); // typ and sym are set in ShiftWith c->AddTargets(a); c->AddTargets(b); c->ShiftWith(setc, tab); state->AddAction(c); } } bool DFA::Overlap(Action *a, Action *b) { CharSet *seta, *setb; if (a->typ == Node::chr) if (b->typ == Node::chr) return (a->sym == b->sym); else {setb = tab->CharClassSet(b->sym); return setb->Get(a->sym);} else { seta = tab->CharClassSet(a->sym); if (b->typ == Node::chr) return seta->Get(b->sym); else {setb = tab->CharClassSet(b->sym); return seta->Intersects(setb);} } } bool DFA::MakeUnique(State *state) { // return true if actions were split bool changed = false; for (Action *a = state->firstAction; a != NULL; a = a->next) for (Action *b = a->next; b != NULL; b = b->next) if (Overlap(a, b)) { SplitActions(state, a, b); changed = true; } return changed; } void DFA::MeltStates(State *state) { bool changed, ctx; BitArray *targets; Symbol *endOf; for (Action *action = state->firstAction; action != NULL; action = action->next) { if (action->target->next != NULL) { GetTargetStates(action, targets, endOf, ctx); Melted *melt = StateWithSet(targets); if (melt == NULL) { State *s = NewState(); s->endOf = endOf; s->ctx = ctx; for (Target *targ = action->target; targ != NULL; targ = targ->next) s->MeltWith(targ->state); do {changed = MakeUnique(s);} while (changed); melt = NewMelted(targets, s); } action->target->next = NULL; action->target->state = melt->state; } } } void DFA::FindCtxStates() { for (State *state = firstState; state != NULL; state = state->next) for (Action *a = state->firstAction; a != NULL; a = a->next) if (a->tc == Node::contextTrans) a->target->state->ctx = true; } void DFA::MakeDeterministic() { State *state; bool changed; lastSimState = lastState->nr; maxStates = 2 * lastSimState; // heuristic for set size in Melted.set FindCtxStates(); for (state = firstState; state != NULL; state = state->next) do {changed = MakeUnique(state);} while (changed); for (state = firstState; state != NULL; state = state->next) MeltStates(state); DeleteRedundantStates(); CombineShifts(); } void DFA::PrintStates() { fwprintf(trace, L"\n"); fwprintf(trace, L"---------- states ----------\n"); for (State *state = firstState; state != NULL; state = state->next) { bool first = true; if (state->endOf == NULL) fwprintf(trace, L" "); else { wchar_t *paddedName = tab->Name(state->endOf->name); fwprintf(trace, L"E(%12s)", paddedName); coco_string_delete(paddedName); } fwprintf(trace, L"%3d:", state->nr); if (state->firstAction == NULL) fwprintf(trace, L"\n"); for (Action *action = state->firstAction; action != NULL; action = action->next) { if (first) {fwprintf(trace, L" "); first = false;} else fwprintf(trace, L" "); if (action->typ == Node::clas) fwprintf(trace, L"%ls", ((CharClass*)(*tab->classes)[action->sym])->name); else fwprintf(trace, L"%3s", Ch((wchar_t)action->sym)); for (Target *targ = action->target; targ != NULL; targ = targ->next) { fwprintf(trace, L"%3d", targ->state->nr); } if (action->tc == Node::contextTrans) fwprintf(trace, L" context\n"); else fwprintf(trace, L"\n"); } } fwprintf(trace, L"\n---------- character classes ----------\n"); tab->WriteCharClasses(); } //---------------------------- actions -------------------------------- Action* DFA::FindAction(State *state, wchar_t ch) { for (Action *a = state->firstAction; a != NULL; a = a->next) if (a->typ == Node::chr && ch == a->sym) return a; else if (a->typ == Node::clas) { CharSet *s = tab->CharClassSet(a->sym); if (s->Get(ch)) return a; } return NULL; } void DFA::GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx) { // compute the set of target states targets = new BitArray(maxStates); endOf = NULL; ctx = false; for (Target *t = a->target; t != NULL; t = t->next) { int stateNr = t->state->nr; if (stateNr <= lastSimState) { targets->Set(stateNr, true); } else { targets->Or(MeltedSet(stateNr)); } if (t->state->endOf != NULL) { if (endOf == NULL || endOf == t->state->endOf) { endOf = t->state->endOf; } else { wprintf(L"Tokens %ls and %ls cannot be distinguished\n", endOf->name, t->state->endOf->name); errors->count++; } } if (t->state->ctx) { ctx = true; // The following check seems to be unnecessary. It reported an error // if a symbol + context was the prefix of another symbol, e.g. // s1 = "a" "b" "c". // s2 = "a" CONTEXT("b"). // But this is ok. // if (t.state.endOf != null) { // Console.WriteLine("Ambiguous context clause"); // Errors.count++; // } } } } //------------------------- melted states ------------------------------ Melted* DFA::NewMelted(BitArray *set, State *state) { Melted *m = new Melted(set, state); m->next = firstMelted; firstMelted = m; return m; } BitArray* DFA::MeltedSet(int nr) { Melted *m = firstMelted; while (m != NULL) { if (m->state->nr == nr) return m->set; else m = m->next; } //Errors::Exception("-- compiler error in Melted::Set"); //throw new Exception("-- compiler error in Melted::Set"); return NULL; } Melted* DFA::StateWithSet(BitArray *s) { for (Melted *m = firstMelted; m != NULL; m = m->next) if (Sets::Equals(s, m->set)) return m; return NULL; } //------------------------ comments -------------------------------- wchar_t* DFA::CommentStr(Node *p) { StringBuilder s = StringBuilder(); while (p != NULL) { if (p->typ == Node::chr) { s.Append((wchar_t)p->val); } else if (p->typ == Node::clas) { CharSet *set = tab->CharClassSet(p->val); if (set->Elements() != 1) parser->SemErr(L"character set contains more than 1 character"); s.Append((wchar_t) set->First()); } else parser->SemErr(L"comment delimiters may not be structured"); p = p->next; } if (s.GetLength() == 0 || s.GetLength() > 2) { parser->SemErr(L"comment delimiters must be 1 or 2 characters long"); s = StringBuilder(L"?"); } return s.ToString(); } void DFA::NewComment(Node *from, Node *to, bool nested) { Comment *c = new Comment(CommentStr(from), CommentStr(to), nested); c->next = firstComment; firstComment = c; } //------------------------ scanner generation ---------------------- void DFA::GenComBody(Comment *com) { fwprintf(gen, L"\t\tfor(;;) {\n"); wchar_t* res = ChCond(com->stop[0]); fwprintf(gen, L"\t\t\tif (%ls) ", res); fwprintf(gen, L"{\n"); delete [] res; if (coco_string_length(com->stop) == 1) { fwprintf(gen, L"\t\t\t\tlevel--;\n"); fwprintf(gen, L"\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); fwprintf(gen, L"\t\t\t\tNextCh();\n"); } else { fwprintf(gen, L"\t\t\t\tNextCh();\n"); wchar_t* res = ChCond(com->stop[1]); fwprintf(gen, L"\t\t\t\tif (%ls) {\n", res); delete [] res; fwprintf(gen, L"\t\t\t\t\tlevel--;\n"); fwprintf(gen, L"\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }\n"); fwprintf(gen, L"\t\t\t\t\tNextCh();\n"); fwprintf(gen, L"\t\t\t\t}\n"); } if (com->nested) { fwprintf(gen, L"\t\t\t}"); wchar_t* res = ChCond(com->start[0]); fwprintf(gen, L" else if (%ls) ", res); delete [] res; fwprintf(gen, L"{\n"); if (coco_string_length(com->stop) == 1) fwprintf(gen, L"\t\t\t\tlevel++; NextCh();\n"); else { fwprintf(gen, L"\t\t\t\tNextCh();\n"); wchar_t* res = ChCond(com->start[1]); fwprintf(gen, L"\t\t\t\tif (%ls) ", res); delete [] res; fwprintf(gen, L"{\n"); fwprintf(gen, L"\t\t\t\t\tlevel++; NextCh();\n"); fwprintf(gen, L"\t\t\t\t}\n"); } } fwprintf(gen, L"\t\t\t} else if (ch == buffer->EoF) return false;\n"); fwprintf(gen, L"\t\t\telse NextCh();\n"); fwprintf(gen, L"\t\t}\n"); } void DFA::GenCommentHeader(Comment *com, int i) { fwprintf(gen, L"\tbool Comment%d();\n", i); } void DFA::GenComment(Comment *com, int i) { fwprintf(gen, L"\n"); fwprintf(gen, L"bool Scanner::Comment%d() ", i); fwprintf(gen, L"{\n"); fwprintf(gen, L"\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;\n"); if (coco_string_length(com->start) == 1) { fwprintf(gen, L"\tNextCh();\n"); GenComBody(com); } else { fwprintf(gen, L"\tNextCh();\n"); wchar_t* res = ChCond(com->start[1]); fwprintf(gen, L"\tif (%ls) ", res); delete [] res; fwprintf(gen, L"{\n"); fwprintf(gen, L"\t\tNextCh();\n"); GenComBody(com); fwprintf(gen, L"\t} else {\n"); fwprintf(gen, L"\t\tbuffer->SetPos(pos0); NextCh(); line = line0; col = col0; charPos = charPos0;\n"); fwprintf(gen, L"\t}\n"); fwprintf(gen, L"\treturn false;\n"); } fwprintf(gen, L"}\n"); } wchar_t* DFA::SymName(Symbol *sym) { // real name value is stored in Tab.literals if (('a'<=sym->name[0] && sym->name[0]<='z') || ('A'<=sym->name[0] && sym->name[0]<='Z')) { //Char::IsLetter(sym->name[0]) Iterator *iter = tab->literals->GetIterator(); while (iter->HasNext()) { DictionaryEntry *e = iter->Next(); if (e->val == sym) { return e->key; } } } return sym->name; } void DFA::GenLiterals () { Symbol *sym; ArrayList *ts[2]; ts[0] = tab->terminals; ts[1] = tab->pragmas; for (int i = 0; i < 2; ++i) { for (int j = 0; j < ts[i]->Count; j++) { sym = (Symbol*) ((*(ts[i]))[j]); if (sym->tokenKind == Symbol::litToken) { wchar_t* name = coco_string_create(SymName(sym)); if (ignoreCase) { wchar_t *oldName = name; name = coco_string_create_lower(name); coco_string_delete(oldName); } // sym.name stores literals with quotes, e.g. "\"Literal\"" fwprintf(gen, L"\tkeywords.set(L"); // write keyword, escape non printable characters for (int k = 0; name[k] != L'\0'; k++) { wchar_t c = name[k]; fwprintf(gen, (c >= 32 && c <= 127) ? L"%lc" : L"\\x%04x", c); } fwprintf(gen, L", %d);\n", sym->n); coco_string_delete(name); } } } } int DFA::GenNamespaceOpen(const wchar_t *nsName) { if (nsName == NULL || coco_string_length(nsName) == 0) { return 0; } const int len = coco_string_length(nsName); int startPos = 0; int nrOfNs = 0; do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); fwprintf(gen, L"namespace %ls {\n", curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; if (startPos < len && nsName[startPos] == COCO_CPP_NAMESPACE_SEPARATOR) { ++startPos; } ++nrOfNs; } while (startPos < len); return nrOfNs; } void DFA::GenNamespaceClose(int nrOfNs) { for (int i = 0; i < nrOfNs; ++i) { fwprintf(gen, L"} // namespace\n"); } } void DFA::CheckLabels() { int i; State *state; Action *action; for (i=0; i < lastStateNr+1; i++) { existLabel[i] = false; } for (state = firstState->next; state != NULL; state = state->next) { for (action = state->firstAction; action != NULL; action = action->next) { existLabel[action->target->state->nr] = true; } } } void DFA::WriteState(State *state) { Symbol *endOf = state->endOf; fwprintf(gen, L"\t\tcase %d:\n", state->nr); if (existLabel[state->nr]) fwprintf(gen, L"\t\t\tcase_%d:\n", state->nr); if (endOf != NULL && state->firstAction != NULL) { fwprintf(gen, L"\t\t\trecEnd = pos; recKind = %d;\n", endOf->n); } bool ctxEnd = state->ctx; for (Action *action = state->firstAction; action != NULL; action = action->next) { if (action == state->firstAction) fwprintf(gen, L"\t\t\tif ("); else fwprintf(gen, L"\t\t\telse if ("); if (action->typ == Node::chr) { wchar_t* res = ChCond((wchar_t)action->sym); fwprintf(gen, L"%ls", res); delete [] res; } else PutRange(tab->CharClassSet(action->sym)); fwprintf(gen, L") {"); if (action->tc == Node::contextTrans) { fwprintf(gen, L"apx++; "); ctxEnd = false; } else if (state->ctx) fwprintf(gen, L"apx = 0; "); fwprintf(gen, L"AddCh(); goto case_%d;", action->target->state->nr); fwprintf(gen, L"}\n"); } if (state->firstAction == NULL) fwprintf(gen, L"\t\t\t{"); else fwprintf(gen, L"\t\t\telse {"); if (ctxEnd) { // final context state: cut appendix fwprintf(gen, L"\n"); fwprintf(gen, L"\t\t\t\ttlen -= apx;\n"); fwprintf(gen, L"\t\t\t\tSetScannerBehindT();"); fwprintf(gen, L"\t\t\t\tbuffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col;\n"); fwprintf(gen, L"\t\t\t\tfor (int i = 0; i < tlen; i++) NextCh();\n"); fwprintf(gen, L"\t\t\t\t"); } if (endOf == NULL) { fwprintf(gen, L"goto case_0;}\n"); } else { fwprintf(gen, L"t->kind = %d; ", endOf->n); if (endOf->tokenKind == Symbol::classLitToken) { if (ignoreCase) { fwprintf(gen, L"wchar_t *literal = coco_string_create_lower(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); } else { fwprintf(gen, L"wchar_t *literal = coco_string_create(tval, 0, tlen); t->kind = keywords.get(literal, t->kind); coco_string_delete(literal); break;}\n"); } } else { fwprintf(gen, L"break;}\n"); } } } void DFA::WriteStartTab() { bool firstRange = true; for (Action *action = firstState->firstAction; action != NULL; action = action->next) { int targetState = action->target->state->nr; if (action->typ == Node::chr) { fwprintf(gen, L"\tstart.set(%d, %d);\n", action->sym, targetState); } else { CharSet *s = tab->CharClassSet(action->sym); for (CharSet::Range *r = s->head; r != NULL; r = r->next) { if (firstRange) { firstRange = false; fwprintf(gen, L"\tint i;\n"); } fwprintf(gen, L"\tfor (i = %d; i <= %d; ++i) start.set(i, %d);\n", r->from, r->to, targetState); } } } fwprintf(gen, L"\t\tstart.set(Buffer::EoF, -1);\n"); } void DFA::WriteScanner() { Generator g = Generator(tab, errors); fram = g.OpenFrame(L"Scanner.frame"); gen = g.OpenGen(L"Scanner.h"); if (dirtyDFA) MakeDeterministic(); // Header g.GenCopyright(); g.SkipFramePart(L"-->begin"); g.CopyFramePart(L"-->prefix"); g.GenPrefixFromNamespace(); g.CopyFramePart(L"-->prefix"); g.GenPrefixFromNamespace(); g.CopyFramePart(L"-->namespace_open"); int nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(L"-->casing0"); if (ignoreCase) { fwprintf(gen, L"\twchar_t valCh; // current input character (for token.val)\n"); } g.CopyFramePart(L"-->commentsheader"); Comment *com = firstComment; int cmdIdx = 0; while (com != NULL) { GenCommentHeader(com, cmdIdx); com = com->next; cmdIdx++; } g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); g.CopyFramePart(L"-->implementation"); fclose(gen); // Source gen = g.OpenGen(L"Scanner.cpp"); g.GenCopyright(); g.SkipFramePart(L"-->begin"); g.CopyFramePart(L"-->namespace_open"); nrOfNs = GenNamespaceOpen(tab->nsName); g.CopyFramePart(L"-->declarations"); fwprintf(gen, L"\tmaxT = %d;\n", tab->terminals->Count - 1); fwprintf(gen, L"\tnoSym = %d;\n", tab->noSym->n); WriteStartTab(); GenLiterals(); g.CopyFramePart(L"-->initialization"); g.CopyFramePart(L"-->casing1"); if (ignoreCase) { fwprintf(gen, L"\t\tvalCh = ch;\n"); fwprintf(gen, L"\t\tif ('A' <= ch && ch <= 'Z') ch = ch - 'A' + 'a'; // ch.ToLower()"); } g.CopyFramePart(L"-->casing2"); fwprintf(gen, L"\t\ttval[tlen++] = "); if (ignoreCase) fwprintf(gen, L"valCh;"); else fwprintf(gen, L"ch;"); g.CopyFramePart(L"-->comments"); com = firstComment; cmdIdx = 0; while (com != NULL) { GenComment(com, cmdIdx); com = com->next; cmdIdx++; } g.CopyFramePart(L"-->scan1"); fwprintf(gen, L"\t\t\t"); if (tab->ignored->Elements() > 0) { PutRange(tab->ignored); } else { fwprintf(gen, L"false"); } g.CopyFramePart(L"-->scan2"); if (firstComment != NULL) { fwprintf(gen, L"\tif ("); com = firstComment; cmdIdx = 0; while (com != NULL) { wchar_t* res = ChCond(com->start[0]); fwprintf(gen, L"(%ls && Comment%d())", res, cmdIdx); delete [] res; if (com->next != NULL) { fwprintf(gen, L" || "); } com = com->next; cmdIdx++; } fwprintf(gen, L") return NextToken();"); } if (hasCtxMoves) { fwprintf(gen, L"\n"); fwprintf(gen, L"\tint apx = 0;"); } /* pdt */ g.CopyFramePart(L"-->scan3"); /* CSB 02-10-05 check the Labels */ existLabel = new bool[lastStateNr+1]; CheckLabels(); for (State *state = firstState->next; state != NULL; state = state->next) WriteState(state); delete [] existLabel; g.CopyFramePart(L"-->namespace_close"); GenNamespaceClose(nrOfNs); g.CopyFramePart(NULL); fclose(gen); } DFA::DFA(Parser *parser) { this->parser = parser; tab = parser->tab; errors = parser->errors; trace = parser->trace; firstState = NULL; lastState = NULL; lastStateNr = -1; firstState = NewState(); firstMelted = NULL; firstComment = NULL; ignoreCase = false; dirtyDFA = false; hasCtxMoves = false; } }; // namespace coco-cpp_20120102/build.sh0000644000175000017500000000004711634127360012771 0ustar mlml#!/bin/sh g++ *.cpp -o Coco -g -Wall coco-cpp_20120102/Scanner.frame0000644000175000017500000005477411616171604013763 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ /*---------------------------------------------------------------------- Scanner.h Specification -----------------------------------------------------------------------*/ -->begin #if !defined(-->prefixCOCO_SCANNER_H__) #define -->prefixCOCO_SCANNER_H__ #include #include #include #include #include // io.h and fcntl are used to ensure binary read from streams on windows #if _MSC_VER >= 1300 #include #include #endif #if _MSC_VER >= 1400 #define coco_swprintf swprintf_s #elif _MSC_VER >= 1300 #define coco_swprintf _snwprintf #elif defined __MINGW32__ #define coco_swprintf _snwprintf #else // assume every other compiler knows swprintf #define coco_swprintf swprintf #endif #define COCO_WCHAR_MAX 65535 #define COCO_MIN_BUFFER_LENGTH 1024 #define COCO_MAX_BUFFER_LENGTH (64*COCO_MIN_BUFFER_LENGTH) #define COCO_HEAP_BLOCK_SIZE (64*1024) #define COCO_CPP_NAMESPACE_SEPARATOR L':' -->namespace_open // string handling, wide character wchar_t* coco_string_create(const wchar_t *value); wchar_t* coco_string_create(const wchar_t *value, int startIndex); wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length); wchar_t* coco_string_create_upper(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data); wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen); wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2); wchar_t* coco_string_create_append(const wchar_t* data, const wchar_t value); void coco_string_delete(wchar_t* &data); int coco_string_length(const wchar_t* data); bool coco_string_endswith(const wchar_t* data, const wchar_t *value); int coco_string_indexof(const wchar_t* data, const wchar_t value); int coco_string_lastindexof(const wchar_t* data, const wchar_t value); void coco_string_merge(wchar_t* &data, const wchar_t* value); bool coco_string_equal(const wchar_t* data1, const wchar_t* data2); int coco_string_compareto(const wchar_t* data1, const wchar_t* data2); int coco_string_hash(const wchar_t* data); // string handling, ascii character wchar_t* coco_string_create(const char *value); char* coco_string_create_char(const wchar_t *value); void coco_string_delete(char* &data); class Token { public: int kind; // token kind int pos; // token position in bytes in the source text (starting at 0) int charPos; // token position in characters in the source text (starting at 0) int col; // token column (starting at 1) int line; // token line (starting at 1) wchar_t* val; // token value Token *next; // ML 2005-03-11 Peek tokens are kept in linked list Token(); ~Token(); }; class Buffer { // This Buffer supports the following cases: // 1) seekable stream (file) // a) whole stream in buffer // b) part of stream in buffer // 2) non seekable stream (network, console) private: unsigned char *buf; // input buffer int bufCapacity; // capacity of buf int bufStart; // position of first byte in buffer relative to input stream int bufLen; // length of buffer int fileLen; // length of input stream (may change if the stream is no file) int bufPos; // current position in buffer FILE* stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? int ReadNextStreamChunk(); bool CanSeek(); // true if stream can be seeked otherwise false public: static const int EoF = COCO_WCHAR_MAX + 1; Buffer(FILE* s, bool isUserStream); Buffer(const unsigned char* buf, int len); Buffer(Buffer *b); virtual ~Buffer(); virtual void Close(); virtual int Read(); virtual int Peek(); virtual wchar_t* GetString(int beg, int end); virtual int GetPos(); virtual void SetPos(int value); }; class UTF8Buffer : public Buffer { public: UTF8Buffer(Buffer *b) : Buffer(b) {}; virtual int Read(); }; //----------------------------------------------------------------------------------- // StartStates -- maps characters to start states of tokens //----------------------------------------------------------------------------------- class StartStates { private: class Elem { public: int key, val; Elem *next; Elem(int key, int val) { this->key = key; this->val = val; next = NULL; } }; Elem **tab; public: StartStates() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~StartStates() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(int key, int val) { Elem *e = new Elem(key, val); int k = ((unsigned int) key) % 128; e->next = tab[k]; tab[k] = e; } int state(int key) { Elem *e = tab[((unsigned int) key) % 128]; while (e != NULL && e->key != key) e = e->next; return e == NULL ? 0 : e->val; } }; //------------------------------------------------------------------------------------------- // KeywordMap -- maps strings to integers (identifiers to keyword kinds) //------------------------------------------------------------------------------------------- class KeywordMap { private: class Elem { public: wchar_t *key; int val; Elem *next; Elem(const wchar_t *key, int val) { this->key = coco_string_create(key); this->val = val; next = NULL; } virtual ~Elem() { coco_string_delete(key); } }; Elem **tab; public: KeywordMap() { tab = new Elem*[128]; memset(tab, 0, 128 * sizeof(Elem*)); } virtual ~KeywordMap() { for (int i = 0; i < 128; ++i) { Elem *e = tab[i]; while (e != NULL) { Elem *next = e->next; delete e; e = next; } } delete [] tab; } void set(const wchar_t *key, int val) { Elem *e = new Elem(key, val); int k = coco_string_hash(key) % 128; e->next = tab[k]; tab[k] = e; } int get(const wchar_t *key, int defaultVal) { Elem *e = tab[coco_string_hash(key) % 128]; while (e != NULL && !coco_string_equal(e->key, key)) e = e->next; return e == NULL ? defaultVal : e->val; } }; class Scanner { private: void *firstHeap; void *heap; void *heapTop; void **heapEnd; unsigned char EOL; int eofSym; int noSym; int maxT; int charSetSize; StartStates start; KeywordMap keywords; Token *t; // current token wchar_t *tval; // text of current token int tvalLength; // length of text of current token int tlen; // length of current token Token *tokens; // list of tokens already peeked (first token is a dummy) Token *pt; // current peek token int ch; // current input character -->casing0 int pos; // byte position of current character int charPos; // position by unicode characters starting with 0 int line; // line number of current character int col; // column number of current character int oldEols; // EOLs that appeared in a comment; void CreateHeapBlock(); Token* CreateToken(); void AppendVal(Token *t); void SetScannerBehindT(); void Init(); void NextCh(); void AddCh(); -->commentsheader Token* NextToken(); public: Buffer *buffer; // scanner buffer Scanner(const unsigned char* buf, int len); Scanner(const wchar_t* fileName); Scanner(FILE* s); ~Scanner(); Token* Scan(); Token* Peek(); void ResetPeek(); }; // end Scanner -->namespace_close #endif -->implementation /*---------------------------------------------------------------------- Scanner.cpp Specification -----------------------------------------------------------------------*/ -->begin #include #include #include "Scanner.h" -->namespace_open // string handling, wide character wchar_t* coco_string_create(const wchar_t* value) { return coco_string_create(value, 0); } wchar_t* coco_string_create(const wchar_t *value, int startIndex) { int valueLen = 0; int len = 0; if (value) { valueLen = wcslen(value); len = valueLen - startIndex; } return coco_string_create(value, startIndex, len); } wchar_t* coco_string_create(const wchar_t *value, int startIndex, int length) { int len = 0; wchar_t* data; if (value) { len = length; } data = new wchar_t[len + 1]; wcsncpy(data, &(value[startIndex]), len); data[len] = 0; return data; } wchar_t* coco_string_create_upper(const wchar_t* data) { if (!data) { return NULL; } int dataLen = 0; if (data) { dataLen = wcslen(data); } wchar_t *newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { if ((L'a' <= data[i]) && (data[i] <= L'z')) { newData[i] = data[i] + (L'A' - L'a'); } else { newData[i] = data[i]; } } newData[dataLen] = L'\0'; return newData; } wchar_t* coco_string_create_lower(const wchar_t* data) { if (!data) { return NULL; } int dataLen = wcslen(data); return coco_string_create_lower(data, 0, dataLen); } wchar_t* coco_string_create_lower(const wchar_t* data, int startIndex, int dataLen) { if (!data) { return NULL; } wchar_t* newData = new wchar_t[dataLen + 1]; for (int i = 0; i <= dataLen; i++) { wchar_t ch = data[startIndex + i]; if ((L'A' <= ch) && (ch <= L'Z')) { newData[i] = ch - (L'A' - L'a'); } else { newData[i] = ch; } } newData[dataLen] = L'\0'; return newData; } wchar_t* coco_string_create_append(const wchar_t* data1, const wchar_t* data2) { wchar_t* data; int data1Len = 0; int data2Len = 0; if (data1) { data1Len = wcslen(data1); } if (data2) {data2Len = wcslen(data2); } data = new wchar_t[data1Len + data2Len + 1]; if (data1) { wcscpy(data, data1); } if (data2) { wcscpy(data + data1Len, data2); } data[data1Len + data2Len] = 0; return data; } wchar_t* coco_string_create_append(const wchar_t *target, const wchar_t appendix) { int targetLen = coco_string_length(target); wchar_t* data = new wchar_t[targetLen + 2]; wcsncpy(data, target, targetLen); data[targetLen] = appendix; data[targetLen + 1] = 0; return data; } void coco_string_delete(wchar_t* &data) { delete [] data; data = NULL; } int coco_string_length(const wchar_t* data) { if (data) { return wcslen(data); } return 0; } bool coco_string_endswith(const wchar_t* data, const wchar_t *end) { int dataLen = wcslen(data); int endLen = wcslen(end); return (endLen <= dataLen) && (wcscmp(data + dataLen - endLen, end) == 0); } int coco_string_indexof(const wchar_t* data, const wchar_t value) { const wchar_t* chr = wcschr(data, value); if (chr) { return (chr-data); } return -1; } int coco_string_lastindexof(const wchar_t* data, const wchar_t value) { const wchar_t* chr = wcsrchr(data, value); if (chr) { return (chr-data); } return -1; } void coco_string_merge(wchar_t* &target, const wchar_t* appendix) { if (!appendix) { return; } wchar_t* data = coco_string_create_append(target, appendix); delete [] target; target = data; } bool coco_string_equal(const wchar_t* data1, const wchar_t* data2) { return wcscmp( data1, data2 ) == 0; } int coco_string_compareto(const wchar_t* data1, const wchar_t* data2) { return wcscmp(data1, data2); } int coco_string_hash(const wchar_t *data) { int h = 0; if (!data) { return 0; } while (*data != 0) { h = (h * 7) ^ *data; ++data; } if (h < 0) { h = -h; } return h; } // string handling, ascii character wchar_t* coco_string_create(const char* value) { int len = 0; if (value) { len = strlen(value); } wchar_t* data = new wchar_t[len + 1]; for (int i = 0; i < len; ++i) { data[i] = (wchar_t) value[i]; } data[len] = 0; return data; } char* coco_string_create_char(const wchar_t *value) { int len = coco_string_length(value); char *res = new char[len + 1]; for (int i = 0; i < len; ++i) { res[i] = (char) value[i]; } res[len] = 0; return res; } void coco_string_delete(char* &data) { delete [] data; data = NULL; } Token::Token() { kind = 0; pos = 0; col = 0; line = 0; val = NULL; next = NULL; } Token::~Token() { coco_string_delete(val); } Buffer::Buffer(FILE* s, bool isUserStream) { // ensure binary read on windows #if _MSC_VER >= 1300 _setmode(_fileno(s), _O_BINARY); #endif stream = s; this->isUserStream = isUserStream; if (CanSeek()) { fseek(s, 0, SEEK_END); fileLen = ftell(s); fseek(s, 0, SEEK_SET); bufLen = (fileLen < COCO_MAX_BUFFER_LENGTH) ? fileLen : COCO_MAX_BUFFER_LENGTH; bufStart = INT_MAX; // nothing in the buffer so far } else { fileLen = bufLen = bufStart = 0; } bufCapacity = (bufLen>0) ? bufLen : COCO_MIN_BUFFER_LENGTH; buf = new unsigned char[bufCapacity]; if (fileLen > 0) SetPos(0); // setup buffer to position 0 (start) else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && CanSeek()) Close(); } Buffer::Buffer(Buffer *b) { buf = b->buf; bufCapacity = b->bufCapacity; b->buf = NULL; bufStart = b->bufStart; bufLen = b->bufLen; fileLen = b->fileLen; bufPos = b->bufPos; stream = b->stream; b->stream = NULL; isUserStream = b->isUserStream; } Buffer::Buffer(const unsigned char* buf, int len) { this->buf = new unsigned char[len]; memcpy(this->buf, buf, len*sizeof(unsigned char)); bufStart = 0; bufCapacity = bufLen = len; fileLen = len; bufPos = 0; stream = NULL; } Buffer::~Buffer() { Close(); if (buf != NULL) { delete [] buf; buf = NULL; } } void Buffer::Close() { if (!isUserStream && stream != NULL) { fclose(stream); stream = NULL; } } int Buffer::Read() { if (bufPos < bufLen) { return buf[bufPos++]; } else if (GetPos() < fileLen) { SetPos(GetPos()); // shift buffer start to Pos return buf[bufPos++]; } else if ((stream != NULL) && !CanSeek() && (ReadNextStreamChunk() > 0)) { return buf[bufPos++]; } else { return EoF; } } int Buffer::Peek() { int curPos = GetPos(); int ch = Read(); SetPos(curPos); return ch; } // beg .. begin, zero-based, inclusive, in byte // end .. end, zero-based, exclusive, in byte wchar_t* Buffer::GetString(int beg, int end) { int len = 0; wchar_t *buf = new wchar_t[end - beg]; int oldPos = GetPos(); SetPos(beg); while (GetPos() < end) buf[len++] = (wchar_t) Read(); SetPos(oldPos); wchar_t *res = coco_string_create(buf, 0, len); coco_string_delete(buf); return res; } int Buffer::GetPos() { return bufPos + bufStart; } void Buffer::SetPos(int value) { if ((value >= fileLen) && (stream != NULL) && !CanSeek()) { // Wanted position is after buffer and the stream // is not seek-able e.g. network or console, // thus we have to read the stream manually till // the wanted position is in sight. while ((value >= fileLen) && (ReadNextStreamChunk() > 0)); } if ((value < 0) || (value > fileLen)) { wprintf(L"--- buffer out of bounds access, position: %d\n", value); exit(1); } if ((value >= bufStart) && (value < (bufStart + bufLen))) { // already in buffer bufPos = value - bufStart; } else if (stream != NULL) { // must be swapped in fseek(stream, value, SEEK_SET); bufLen = fread(buf, sizeof(unsigned char), bufCapacity, stream); bufStart = value; bufPos = 0; } else { bufPos = fileLen - bufStart; // make Pos return fileLen } } // Read the next chunk of bytes from the stream, increases the buffer // if needed and updates the fields fileLen and bufLen. // Returns the number of bytes read. int Buffer::ReadNextStreamChunk() { int free = bufCapacity - bufLen; if (free == 0) { // in the case of a growing input stream // we can neither seek in the stream, nor can we // foresee the maximum length, thus we must adapt // the buffer size on demand. bufCapacity = bufLen * 2; unsigned char *newBuf = new unsigned char[bufCapacity]; memcpy(newBuf, buf, bufLen*sizeof(unsigned char)); delete [] buf; buf = newBuf; free = bufLen; } int read = fread(buf + bufLen, sizeof(unsigned char), free, stream); if (read > 0) { fileLen = bufLen = (bufLen + read); return read; } // end of stream reached return 0; } bool Buffer::CanSeek() { return (stream != NULL) && (ftell(stream) != -1); } int UTF8Buffer::Read() { int ch; do { ch = Buffer::Read(); // until we find a utf8 start (0xxxxxxx or 11xxxxxx) } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EoF)); if (ch < 128 || ch == EoF) { // nothing to do, first 127 chars are the same in ascii and utf8 // 0xxxxxxx or end of file character } else if ((ch & 0xF0) == 0xF0) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx int c1 = ch & 0x07; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = Buffer::Read(); int c3 = ch & 0x3F; ch = Buffer::Read(); int c4 = ch & 0x3F; ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4; } else if ((ch & 0xE0) == 0xE0) { // 1110xxxx 10xxxxxx 10xxxxxx int c1 = ch & 0x0F; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = Buffer::Read(); int c3 = ch & 0x3F; ch = (((c1 << 6) | c2) << 6) | c3; } else if ((ch & 0xC0) == 0xC0) { // 110xxxxx 10xxxxxx int c1 = ch & 0x1F; ch = Buffer::Read(); int c2 = ch & 0x3F; ch = (c1 << 6) | c2; } return ch; } Scanner::Scanner(const unsigned char* buf, int len) { buffer = new Buffer(buf, len); Init(); } Scanner::Scanner(const wchar_t* fileName) { FILE* stream; char *chFileName = coco_string_create_char(fileName); if ((stream = fopen(chFileName, "rb")) == NULL) { wprintf(L"--- Cannot open file %ls\n", fileName); exit(1); } coco_string_delete(chFileName); buffer = new Buffer(stream, false); Init(); } Scanner::Scanner(FILE* s) { buffer = new Buffer(s, true); Init(); } Scanner::~Scanner() { char* cur = (char*) firstHeap; while(cur != NULL) { cur = *(char**) (cur + COCO_HEAP_BLOCK_SIZE); free(firstHeap); firstHeap = cur; } delete [] tval; delete buffer; } void Scanner::Init() { EOL = '\n'; eofSym = 0; -->declarations tvalLength = 128; tval = new wchar_t[tvalLength]; // text of current token // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block heap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); firstHeap = heap; heapEnd = (void**) (((char*) heap) + COCO_HEAP_BLOCK_SIZE); *heapEnd = 0; heapTop = heap; if (sizeof(Token) > COCO_HEAP_BLOCK_SIZE) { wprintf(L"--- Too small COCO_HEAP_BLOCK_SIZE\n"); exit(1); } pos = -1; line = 1; col = 0; charPos = -1; oldEols = 0; NextCh(); if (ch == 0xEF) { // check optional byte order mark for UTF-8 NextCh(); int ch1 = ch; NextCh(); int ch2 = ch; if (ch1 != 0xBB || ch2 != 0xBF) { wprintf(L"Illegal byte order mark at start of file"); exit(1); } Buffer *oldBuf = buffer; buffer = new UTF8Buffer(buffer); col = 0; charPos = -1; delete oldBuf; oldBuf = NULL; NextCh(); } -->initialization pt = tokens = CreateToken(); // first token is a dummy } void Scanner::NextCh() { if (oldEols > 0) { ch = EOL; oldEols--; } else { pos = buffer->GetPos(); // buffer reads unicode chars, if UTF8 has been detected ch = buffer->Read(); col++; charPos++; // replace isolated '\r' by '\n' in order to make // eol handling uniform across Windows, Unix and Mac if (ch == L'\r' && buffer->Peek() != L'\n') ch = EOL; if (ch == EOL) { line++; col = 0; } } -->casing1 } void Scanner::AddCh() { if (tlen >= tvalLength) { tvalLength *= 2; wchar_t *newBuf = new wchar_t[tvalLength]; memcpy(newBuf, tval, tlen*sizeof(wchar_t)); delete [] tval; tval = newBuf; } if (ch != Buffer::EoF) { -->casing2 NextCh(); } } -->comments void Scanner::CreateHeapBlock() { void* newHeap; char* cur = (char*) firstHeap; while(((char*) tokens < cur) || ((char*) tokens > (cur + COCO_HEAP_BLOCK_SIZE))) { cur = *((char**) (cur + COCO_HEAP_BLOCK_SIZE)); free(firstHeap); firstHeap = cur; } // COCO_HEAP_BLOCK_SIZE byte heap + pointer to next heap block newHeap = malloc(COCO_HEAP_BLOCK_SIZE + sizeof(void*)); *heapEnd = newHeap; heapEnd = (void**) (((char*) newHeap) + COCO_HEAP_BLOCK_SIZE); *heapEnd = 0; heap = newHeap; heapTop = heap; } Token* Scanner::CreateToken() { Token *t; if (((char*) heapTop + (int) sizeof(Token)) >= (char*) heapEnd) { CreateHeapBlock(); } t = (Token*) heapTop; heapTop = (void*) ((char*) heapTop + sizeof(Token)); t->val = NULL; t->next = NULL; return t; } void Scanner::AppendVal(Token *t) { int reqMem = (tlen + 1) * sizeof(wchar_t); if (((char*) heapTop + reqMem) >= (char*) heapEnd) { if (reqMem > COCO_HEAP_BLOCK_SIZE) { wprintf(L"--- Too long token value\n"); exit(1); } CreateHeapBlock(); } t->val = (wchar_t*) heapTop; heapTop = (void*) ((char*) heapTop + reqMem); wcsncpy(t->val, tval, tlen); t->val[tlen] = L'\0'; } Token* Scanner::NextToken() { while (ch == ' ' || -->scan1 ) NextCh(); -->scan2 int recKind = noSym; int recEnd = pos; t = CreateToken(); t->pos = pos; t->col = col; t->line = line; t->charPos = charPos; int state = start.state(ch); tlen = 0; AddCh(); switch (state) { case -1: { t->kind = eofSym; break; } // NextCh already done case 0: { case_0: if (recKind != noSym) { tlen = recEnd - t->pos; SetScannerBehindT(); } t->kind = recKind; break; } // NextCh already done -->scan3 } AppendVal(t); return t; } void Scanner::SetScannerBehindT() { buffer->SetPos(t->pos); NextCh(); line = t->line; col = t->col; charPos = t->charPos; for (int i = 0; i < tlen; i++) NextCh(); } // get the next token (possibly a token already seen during peeking) Token* Scanner::Scan() { if (tokens->next == NULL) { return pt = tokens = NextToken(); } else { pt = tokens = tokens->next; return tokens; } } // peek for the next token, ignore pragmas Token* Scanner::Peek() { do { if (pt->next == NULL) { pt->next = NextToken(); } pt = pt->next; } while (pt->kind > maxT); // skip pragmas return pt; } // make sure that peeking starts at the current scan position void Scanner::ResetPeek() { pt = tokens; } -->namespace_close coco-cpp_20120102/BitArray.cpp0000644000175000017500000000740711602643574013573 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include #include "BitArray.h" namespace Coco { BitArray::BitArray(const int length, const bool defaultValue) { Count = length; Data = new unsigned char[ (length+7)>>3 ]; if (defaultValue) memset(Data, 0xFF, (length+7)>>3); else memset(Data, 0x00, (length+7)>>3); } BitArray::BitArray(const BitArray ©) { Count = copy.Count; Data = new unsigned char[ (copy.Count+7)>>3 ]; memcpy(Data, copy.Data, (copy.Count+7)>>3); } BitArray::~BitArray() { delete [] Data; Data = NULL; } int BitArray::getCount() { return Count; } bool BitArray::Get(const int index) const { return (Data[(index>>3)] & (1<<(index&7))) != 0; } void BitArray::Set(const int index, const bool value) { if (value){ Data[(index>>3)] |= (1 << (index&7)); } else { unsigned char mask = 0xFF; mask ^= (1 << (index&7)); Data[(index>>3)] &= mask; } } void BitArray::SetAll(const bool value) { if (value) memset(Data, 0xFF, (Count+7)>>3); else memset(Data, 0x00, (Count+7)>>3); } void BitArray::Not() { for (int i=0; i<(Count+7)>>3; i++) { Data[i] ^= 0xFF; } } void BitArray::And(const BitArray *value) { for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { Data[i] = (Data[i] & value->Data[i]); } } void BitArray::Or(const BitArray *value) { for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { Data[i] = (Data[i] | value->Data[i]); } } void BitArray::Xor(const BitArray *value) { for (int i=0; (i<(Count+7)>>3) && (i<(value->Count+7)>>3); i++) { Data[i] = (Data[i] ^ value->Data[i]); } } BitArray* BitArray::Clone() const { BitArray *newBitArray = new BitArray(Count); newBitArray->Count = Count; memcpy(newBitArray->Data, Data, (Count+7)>>3); return newBitArray; } bool BitArray::Equal(const BitArray *right) const { if (Count != right->Count) { return false; } for(int index = 0; index < Count; index++) { if (Get(index) != right->Get(index)) { return false; } } return true; } bool BitArray::Overlaps(const BitArray *right) const { for (int index = 0; index < Count; ++index) { if (Get(index) && right->Get(index)) { return true; } } return false; } const BitArray &BitArray::operator=(const BitArray &right) { if ( &right != this ) { // avoid self assignment delete [] Data; // prevents memory leak Count = right.Count; Data = new unsigned char[ (Count+7)>>3 ]; memcpy(Data, right.Data, (Count+7)>>3); } return *this; // enables cascaded assignments } } // namespace coco-cpp_20120102/BitArray.h0000644000175000017500000000413111602641526013222 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_BITARRAY_H__) #define COCO_BITARRAY_H__ namespace Coco { class BitArray { public: BitArray(int length = 0, bool defaultValue = false) ; BitArray(const BitArray © ); virtual ~BitArray(); int getCount(); bool Get(const int index) const; void Set(const int index, const bool value); void SetAll(const bool value); bool Equal(const BitArray *right ) const; bool Overlaps(const BitArray *right ) const; bool operator[](const int index) const { return Get(index); }; const BitArray &operator=(const BitArray &right); void Not(); void And(const BitArray *value); void Or(const BitArray *value); void Xor(const BitArray *value); BitArray* Clone() const; private: int Count; unsigned char* Data; void setMem(int length, bool value); }; } #endif // !defined(COCO_BITARRAY_H__) coco-cpp_20120102/Melted.cpp0000644000175000017500000000271711602641376013265 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Melted.h" namespace Coco { class BitArray; Melted::Melted(BitArray *set, State *state) { this->set = set; this->state = state; } }; // namespace coco-cpp_20120102/Target.h0000644000175000017500000000312111601423172012723 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_TARGET_H__) #define COCO_TARGET_H__ namespace Coco { class State; class Target // set of states that are reached by an action { public: Target (State *s); State *state; // target state Target *next; }; }; // namespace #endif // !defined(COCO_TARGET_H__) coco-cpp_20120102/Target.cpp0000644000175000017500000000270511601423172013265 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "Target.h" #include "State.h" namespace Coco { Target::Target(State *s) { next = NULL; state = s; } }; // namespace coco-cpp_20120102/Tab.h0000644000175000017500000001717011601423172012214 0ustar mlml/*------------------------------------------------------------------------- Tab -- Symbol Table Management Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_TAB_H__) #define COCO_TAB_H__ #include "ArrayList.h" #include "HashTable.h" #include "StringBuilder.h" #include "SortedList.h" #include "Scanner.h" #include "Position.h" #include "Symbol.h" #include "Node.h" #include "Graph.h" #include "Sets.h" #include "CharClass.h" namespace Coco { class Errors; class Parser; class BitArray; class Tab { public: Position *semDeclPos; // position of global semantic declarations CharSet *ignored; // characters ignored by the scanner bool ddt[10]; // debug and test switches Symbol *gramSy; // root nonterminal; filled by ATG Symbol *eofSy; // end of file symbol Symbol *noSym; // used in case of an error BitArray *allSyncSets; // union of all synchronisation sets HashTable *literals; // symbols that are used as literals wchar_t* srcName; // name of the atg file (including path) wchar_t* srcDir; // directory path of the atg file wchar_t* nsName; // namespace for generated files wchar_t* frameDir; // directory containing the frame files wchar_t* outDir; // directory for generated files bool checkEOF; // should coco generate a check for EOF at // the end of Parser.Parse(): bool emitLines; // emit line directives in generated parser BitArray *visited; // mark list for graph traversals Symbol *curSy; // current symbol in computation of sets Parser *parser; // other Coco objects FILE* trace; Errors *errors; ArrayList *terminals; ArrayList *pragmas; ArrayList *nonterminals; ArrayList *nodes; static const char* nTyp[]; Node *dummyNode; ArrayList *classes; int dummyName; Tab(Parser *parser); //--------------------------------------------------------------------- // Symbol list management //--------------------------------------------------------------------- static const char* tKind[]; Symbol* NewSym(int typ, const wchar_t* name, int line); Symbol* FindSym(const wchar_t* name); int Num(Node *p); void PrintSym(Symbol *sym); void PrintSymbolTable(); void PrintSet(BitArray *s, int indent); //--------------------------------------------------------------------- // Syntax graph management //--------------------------------------------------------------------- Node* NewNode(int typ, Symbol *sym, int line); Node* NewNode(int typ, Node* sub); Node* NewNode(int typ, int val, int line); void MakeFirstAlt(Graph *g); void MakeAlternative(Graph *g1, Graph *g2); void MakeSequence(Graph *g1, Graph *g2); void MakeIteration(Graph *g); void MakeOption(Graph *g); void Finish(Graph *g); void DeleteNodes(); Graph* StrToGraph(const wchar_t* str); void SetContextTrans(Node *p); // set transition code in the graph rooted at p //------------ graph deletability check ----------------- bool DelGraph(Node* p); bool DelSubGraph(Node* p); bool DelNode(Node* p); //----------------- graph printing ---------------------- int Ptr(Node *p, bool up); wchar_t* Pos(Position *pos); wchar_t* Name(const wchar_t* name); void PrintNodes(); //--------------------------------------------------------------------- // Character class management //--------------------------------------------------------------------- CharClass* NewCharClass(const wchar_t* name, CharSet *s); CharClass* FindCharClass(const wchar_t* name); CharClass* FindCharClass(CharSet *s); CharSet* CharClassSet(int i); //----------- character class printing wchar_t* Ch(const wchar_t ch); void WriteCharSet(CharSet *s); void WriteCharClasses (); //--------------------------------------------------------------------- // Symbol set computations //--------------------------------------------------------------------- /* Computes the first set for the given Node. */ BitArray* First0(Node *p, BitArray *mark); BitArray* First(Node *p); void CompFirstSets(); void CompFollow(Node *p); void Complete(Symbol *sym); void CompFollowSets(); Node* LeadingAny(Node *p); void FindAS(Node *p); // find ANY sets void CompAnySets(); BitArray* Expected(Node *p, Symbol *curSy); // does not look behind resolvers; only called during LL(1) test and in CheckRes BitArray* Expected0(Node *p, Symbol *curSy); void CompSync(Node *p); void CompSyncSets(); void SetupAnys(); void CompDeletableSymbols(); void RenumberPragmas(); void CompSymbolSets(); //--------------------------------------------------------------------- // String handling //--------------------------------------------------------------------- wchar_t Hex2Char(const wchar_t* s); wchar_t* Char2Hex(const wchar_t ch); wchar_t* Unescape(const wchar_t* s); wchar_t* Escape(const wchar_t* s); //--------------------------------------------------------------------- // Grammar checks //--------------------------------------------------------------------- bool GrammarOk(); //--------------- check for circular productions ---------------------- class CNode { // node of list for finding circular productions public: Symbol *left, *right; CNode (Symbol *l, Symbol *r) { left = l; right = r; } }; void GetSingles(Node *p, ArrayList *singles); bool NoCircularProductions(); //--------------- check for LL(1) errors ---------------------- void LL1Error(int cond, Symbol *sym); void CheckOverlap(BitArray *s1, BitArray *s2, int cond); void CheckAlts(Node *p); void CheckLL1(); //------------- check if resolvers are legal -------------------- void ResErr(Node *p, const wchar_t* msg); void CheckRes(Node *p, bool rslvAllowed); void CheckResolvers(); //------------- check if every nts has a production -------------------- bool NtsComplete(); //-------------- check if every nts can be reached ----------------- void MarkReachedNts(Node *p); bool AllNtReached(); //--------- check if every nts can be derived to terminals ------------ bool IsTerm(Node *p, BitArray *mark); // true if graph can be derived to terminals bool AllNtToTerm(); //--------------------------------------------------------------------- // Cross reference list //--------------------------------------------------------------------- void XRef(); void SetDDT(const wchar_t* s); void SetOption(const wchar_t* s); }; }; // namespace #endif // !defined(COCO_TAB_H__) coco-cpp_20120102/Symbol.h0000644000175000017500000000551311601423172012751 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_SYMBOL_H__) #define COCO_SYMBOL_H__ #include "Position.h" #include "Scanner.h" namespace Coco { class Node; class BitArray; class Symbol { public: // token kinds static int fixedToken; // e.g. 'a' ('b' | 'c') (structure of literals) static int classToken; // e.g. digit {digit} (at least one char class) static int litToken; // e.g. "while" static int classLitToken; // e.g. letter {letter} but without literals that have the same structure*/ int n; // symbol number int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ wchar_t *name; // symbol name Node *graph; // nt: to first node of syntax graph int tokenKind; // t: token kind (fixedToken, classToken, ...) bool deletable; // nt: true if nonterminal is deletable bool firstReady; // nt: true if terminal start symbols have already been computed BitArray *first; // nt: terminal start symbols BitArray *follow; // nt: terminal followers BitArray *nts; // nt: nonterminals whose followers have to be added to this sym int line; // source text line number of item in this node Position *attrPos; // nt: position of attributes in source text (or null) Position *semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) Symbol(int typ, const wchar_t* name, int line); virtual ~Symbol(); }; }; // namespace #endif // !defined(COCO_SYMBOL_H__) coco-cpp_20120102/CharClass.cpp0000644000175000017500000000305011601423172013674 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "CharClass.h" #include "Scanner.h" namespace Coco { CharClass::CharClass(const wchar_t* name, CharSet *s) { this->name = coco_string_create(name); this->set = s; } CharClass::~CharClass() { coco_string_delete(name); } }; // namespace coco-cpp_20120102/ArrayList.h0000644000175000017500000000316511601423172013417 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_ARRAYLIST_H__) #define COCO_ARRAYLIST_H__ namespace Coco { class ArrayList { public: ArrayList(); virtual ~ArrayList(); void Add(void *value); void Remove(void *value); void* operator[](int index); int Count; int Capacity; private: void** Data; }; }; // namespace #endif // !defined(COCO_ARRAYLIST_H__) coco-cpp_20120102/ArrayList.cpp0000644000175000017500000000414311601423172013747 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "ArrayList.h" namespace Coco { ArrayList::ArrayList() { Count = 0; Capacity = 10; Data = new void*[ Capacity ]; } ArrayList::~ArrayList() { delete [] Data; } void ArrayList::Add(void *value) { if (Count < Capacity) { Data[Count] = value; Count++; } else { Capacity *= 2; void** newData = new void*[Capacity]; for (int i=0; itarget = NULL; this->next = NULL; this->typ = typ; this->sym = sym; this->tc = tc; } void Action::AddTarget(Target *t) { // add t to the action.targets Target *last = NULL; Target *p = target; while (p != NULL && t->state->nr >= p->state->nr) { if (t->state == p->state) return; last = p; p = p->next; } t->next = p; if (p == target) target = t; else last->next = t; } void Action::AddTargets(Action *a) {// add copy of a.targets to action.targets for (Target *p = a->target; p != NULL; p = p->next) { Target *t = new Target(p->state); AddTarget(t); } if (a->tc == Node::contextTrans) tc = Node::contextTrans; } CharSet* Action::Symbols(Tab *tab) { CharSet *s; if (typ == Node::clas) s = tab->CharClassSet(sym)->Clone(); else { s = new CharSet(); s->Set(sym); } return s; } void Action::ShiftWith(CharSet *s, Tab *tab) { if (s->Elements() == 1) { typ = Node::chr; sym = s->First(); } else { CharClass *c = tab->FindCharClass(s); if (c == NULL) c = tab->NewCharClass(L"#", s); // class with dummy name typ = Node::clas; sym = c->n; } } }; // namespace coco-cpp_20120102/Symbol.cpp0000644000175000017500000000352711601423170013305 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Symbol.h" #include "Scanner.h" namespace Coco { int Symbol::fixedToken = 0; int Symbol::classToken = 1; int Symbol::litToken = 2; int Symbol::classLitToken = 3; Symbol::Symbol(int typ, const wchar_t* name, int line) { n = 0; graph = NULL; tokenKind = 0; deletable = false; firstReady = false; first = NULL; follow = NULL; nts = NULL; attrPos = NULL; semPos = NULL; this->typ = typ; this->name = coco_string_create(name); this->line = line; } Symbol::~Symbol() { coco_string_delete(name); } }; // namespace coco-cpp_20120102/StringBuilder.h0000644000175000017500000000077511601423170014264 0ustar mlml#if !defined(COCO_STRINGBUILDER_H__) #define COCO_STRINGBUILDER_H__ #include namespace Coco { class StringBuilder { public: StringBuilder(int capacity = 32); StringBuilder(const wchar_t *val); virtual ~StringBuilder(); void Append(const wchar_t val); void Append(const wchar_t *val); wchar_t* ToString(); int GetLength() { return length; }; private: void Init(int capacity); wchar_t *data; int capacity; int length; }; }; // namespace #endif // !defined(COCO_STRINGBUILDER_H__) coco-cpp_20120102/StringBuilder.cpp0000644000175000017500000000460311601423170014611 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "StringBuilder.h" #include "Scanner.h" namespace Coco { void StringBuilder::Init(int capacity) { length = 0; this->capacity = capacity; data = new wchar_t[capacity + 1]; data[0] = 0; } StringBuilder::StringBuilder(int capacity) { Init(capacity); } StringBuilder::StringBuilder(const wchar_t *val) { capacity = length = wcslen(val); Init(capacity); wcscpy(data, val); } StringBuilder::~StringBuilder() { if (data != NULL) { delete [] data; data = NULL; length = 0; capacity = 0; } } void StringBuilder::Append(const wchar_t value) { if (length == capacity) { int oldCap = capacity; capacity = capacity * 2; wchar_t *nData = new wchar_t[capacity + 1]; memcpy(nData, data, oldCap * sizeof(int)); delete [] data; data = nData; } data[length] = value; length++; data[length] = '\0'; } void StringBuilder::Append(const wchar_t *value) { if (length + (int)wcslen(value) < capacity) { wcscpy(data + length, value); length += wcslen(value); } } wchar_t* StringBuilder::ToString() { return coco_string_create(data); } }; // namespace coco-cpp_20120102/State.h0000644000175000017500000000350411601423170012560 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_STATE_H__) #define COCO_STATE_H__ #include "Symbol.h" namespace Coco { class Action; class State // state of finite automaton { public: int nr; // state number Action *firstAction; // to first action of this state Symbol *endOf; // recognized token if state is final bool ctx; // true if state is reached via contextTrans State *next; State(); void AddAction(Action *act); void DetachAction(Action *act); void MeltWith(State *s); }; }; // namespace #endif // !defined(COCO_STATE_H__) coco-cpp_20120102/State.cpp0000644000175000017500000000446311601423170013120 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Action.h" #include "State.h" namespace Coco { State::State() { this->firstAction = NULL; this->endOf = NULL; this->ctx = false; this->next = NULL; } void State::AddAction(Action *act) { Action *lasta = NULL, *a = firstAction; while (a != NULL && act->typ >= a->typ) {lasta = a; a = a->next;} // collecting classes at the beginning gives better performance act->next = a; if (a==firstAction) { firstAction = act; } else { lasta->next = act; } } void State::DetachAction(Action *act) { Action *lasta = NULL, *a = firstAction; while (a != NULL && a != act) {lasta = a; a = a->next;} if (a != NULL) { if (a == firstAction) { firstAction = a->next; } else { lasta->next = a->next; } } } void State::MeltWith(State *s) { // copy actions of s to state Action *a; for (Action *action = s->firstAction; action != NULL; action = action->next) { a = new Action(action->typ, action->sym, action->tc); a->AddTargets(action); AddAction(a); } } }; // namespace coco-cpp_20120102/SortedList.h0000644000175000017500000000360211601423170013573 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_SORTEDLIST_H__) #define COCO_SORTEDLIST_H__ namespace Coco { class Symbol; class SortedEntry { public: Symbol* Key; void* Value; SortedEntry* next; SortedEntry(Symbol* Key, void* Value); virtual ~SortedEntry(); }; class SortedList { public: SortedList(); virtual ~SortedList(); void Set(Symbol *key, void *value); void* Get( Symbol* key ) const; // Value void* GetKey( int index ) const ;// Key SortedEntry* operator[]( int index ) const; int Count; private: bool Find(Symbol* key); SortedEntry *Data; }; }; // namespace #endif // !defined(COCO_SORTEDLIST_H__) coco-cpp_20120102/SortedList.cpp0000644000175000017500000000711711601423170014133 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include "SortedList.h" #include "Tab.h" namespace Coco { int Compare(Symbol *x, Symbol *y) { return coco_string_compareto(x->name, y->name); } SortedEntry::SortedEntry(Symbol* Key, void* Value) { this->Key = Key; this->Value = Value; this->next = NULL; } SortedEntry::~SortedEntry() { }; SortedList::SortedList() { Count = 0; Data = NULL; } SortedList::~SortedList() { } bool SortedList::Find(Symbol* key) { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { if (!Compare(pSortedEntry->Key, key)) return true; pSortedEntry = pSortedEntry->next; } return false; } void SortedList::Set(Symbol *key, void *value) { if (!Find(key)) { // new entry SortedEntry* pSortedEntry = Data; SortedEntry* pSortedEntryPrev = NULL; SortedEntry* newSortedEntry = new SortedEntry(key, value); if (pSortedEntry) { // insert if (Compare(pSortedEntry->Key, key) > 0) { // before the first newSortedEntry->next = Data; Data = newSortedEntry; } else { while (pSortedEntry) { if (Compare(pSortedEntry->Key, key) < 0) { pSortedEntryPrev = pSortedEntry; pSortedEntry = pSortedEntry->next; } else { break; } } pSortedEntryPrev->next = newSortedEntry; newSortedEntry->next = pSortedEntry; } } else { Data = newSortedEntry; // first entry } Count++; } else { // exist entry - overwrite SortedEntry* pSortedEntry = Data; while (Compare(pSortedEntry->Key, key)) { pSortedEntry = pSortedEntry->next; } pSortedEntry->Value = value; } } void* SortedList::Get( Symbol* key ) const // Value { SortedEntry* pSortedEntry = Data; while (pSortedEntry) { if (!Compare(pSortedEntry->Key, key)) return pSortedEntry->Value; pSortedEntry = pSortedEntry->next; } return NULL; } void* SortedList::GetKey( int index ) const // Key { if (0 <= index && index < Count) { SortedEntry* pSortedEntry = Data; for (int i=0; inext; } return pSortedEntry->Key; } else { return NULL; } } SortedEntry* SortedList::operator[]( int index ) const { if (0 <= index && index < Count) { SortedEntry* pSortedEntry = Data; for (int i=0; inext; } return pSortedEntry; } else { return NULL; } } }; // namespace coco-cpp_20120102/Sets.h0000644000175000017500000000456711601423170012430 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_SETS_H__) #define COCO_SETS_H__ #include "BitArray.h" namespace Coco { class Sets { public: static int First(BitArray *s) { int max = s->getCount(); for (int i=0; igetCount(); int n = 0; for (int i=0; igetCount(); for (int i=0; i b ? int max = a->getCount(); for (int i=0; igetCount(); for (int i=0; iClone(); c->Not(); a->And(c); delete c; } }; }; // namespace #endif // !defined(COCO_SETS_H__) coco-cpp_20120102/Position.h0000644000175000017500000000343411601423170013306 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_POSITION_H__) #define COCO_POSITION_H__ namespace Coco { class Position { // position of source code stretch (e.g. semantic action, resolver expressions) public: int beg; // start relative to the beginning of the file int end; // end of stretch int col; // column number of start position int line; // line number of beginnnig of source code stretch Position(int beg, int end, int col, int line); }; }; // namespace #endif // !defined(COCO_POSITION_H__) coco-cpp_20120102/Position.cpp0000644000175000017500000000274311601423170013643 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Position.h" namespace Coco { Position::Position(int beg, int end, int col, int line) { this->beg = beg; this->end = end; this->col = col; this->line = line; } }; // namespace coco-cpp_20120102/Parser.frame0000644000175000017500000001667611601423170013615 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*---------------------------------------------------------------------- Parser.h Specification -----------------------------------------------------------------------*/ -->begin #if !defined(-->prefixCOCO_PARSER_H__) #define -->prefixCOCO_PARSER_H__ -->headerdef #include "Scanner.h" -->namespace_open class Errors { public: int count; // number of errors detected Errors(); void SynErr(int line, int col, int n); void Error(int line, int col, const wchar_t *s); void Warning(int line, int col, const wchar_t *s); void Warning(const wchar_t *s); void Exception(const wchar_t *s); }; // Errors class Parser { private: -->constantsheader Token *dummyToken; int errDist; int minErrDist; void SynErr(int n); void Get(); void Expect(int n); bool StartOf(int s); void ExpectWeak(int n, int follow); bool WeakSeparator(int n, int syFol, int repFol); public: Scanner *scanner; Errors *errors; Token *t; // last recognized token Token *la; // lookahead token -->declarations Parser(Scanner *scanner); ~Parser(); void SemErr(const wchar_t* msg); -->productionsheader void Parse(); }; // end Parser -->namespace_close #endif -->implementation /*---------------------------------------------------------------------- Parser.cpp Specification -----------------------------------------------------------------------*/ -->begin #include #include "Parser.h" #include "Scanner.h" -->namespace_open void Parser::SynErr(int n) { if (errDist >= minErrDist) errors->SynErr(la->line, la->col, n); errDist = 0; } void Parser::SemErr(const wchar_t* msg) { if (errDist >= minErrDist) errors->Error(t->line, t->col, msg); errDist = 0; } void Parser::Get() { for (;;) { t = la; la = scanner->Scan(); if (la->kind <= maxT) { ++errDist; break; } -->pragmas if (dummyToken != t) { dummyToken->kind = t->kind; dummyToken->pos = t->pos; dummyToken->col = t->col; dummyToken->line = t->line; dummyToken->next = NULL; coco_string_delete(dummyToken->val); dummyToken->val = coco_string_create(t->val); t = dummyToken; } la = t; } } void Parser::Expect(int n) { if (la->kind==n) Get(); else { SynErr(n); } } void Parser::ExpectWeak(int n, int follow) { if (la->kind == n) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); } } bool Parser::WeakSeparator(int n, int syFol, int repFol) { if (la->kind == n) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); while (!(StartOf(syFol) || StartOf(repFol) || StartOf(0))) { Get(); } return StartOf(syFol); } } -->productions // If the user declared a method Init and a mehtod Destroy they should // be called in the contructur and the destructor respctively. // // The following templates are used to recognize if the user declared // the methods Init and Destroy. template struct ParserInitExistsRecognizer { template struct ExistsIfInitIsDefinedMarker{}; struct InitIsMissingType { char dummy1; }; struct InitExistsType { char dummy1; char dummy2; }; // exists always template static InitIsMissingType is_here(...); // exist only if ExistsIfInitIsDefinedMarker is defined template static InitExistsType is_here(ExistsIfInitIsDefinedMarker*); enum { InitExists = (sizeof(is_here(NULL)) == sizeof(InitExistsType)) }; }; template struct ParserDestroyExistsRecognizer { template struct ExistsIfDestroyIsDefinedMarker{}; struct DestroyIsMissingType { char dummy1; }; struct DestroyExistsType { char dummy1; char dummy2; }; // exists always template static DestroyIsMissingType is_here(...); // exist only if ExistsIfDestroyIsDefinedMarker is defined template static DestroyExistsType is_here(ExistsIfDestroyIsDefinedMarker*); enum { DestroyExists = (sizeof(is_here(NULL)) == sizeof(DestroyExistsType)) }; }; // The folloing templates are used to call the Init and Destroy methods if they exist. // Generic case of the ParserInitCaller, gets used if the Init method is missing template::InitExists> struct ParserInitCaller { static void CallInit(T *t) { // nothing to do } }; // True case of the ParserInitCaller, gets used if the Init method exists template struct ParserInitCaller { static void CallInit(T *t) { t->Init(); } }; // Generic case of the ParserDestroyCaller, gets used if the Destroy method is missing template::DestroyExists> struct ParserDestroyCaller { static void CallDestroy(T *t) { // nothing to do } }; // True case of the ParserDestroyCaller, gets used if the Destroy method exists template struct ParserDestroyCaller { static void CallDestroy(T *t) { t->Destroy(); } }; void Parser::Parse() { t = NULL; la = dummyToken = new Token(); la->val = coco_string_create(L"Dummy Token"); Get(); -->parseRoot } Parser::Parser(Scanner *scanner) { -->constants ParserInitCaller::CallInit(this); dummyToken = NULL; t = la = NULL; minErrDist = 2; errDist = minErrDist; this->scanner = scanner; errors = new Errors(); } bool Parser::StartOf(int s) { const bool T = true; const bool x = false; -->initialization return set[s][la->kind]; } Parser::~Parser() { ParserDestroyCaller::CallDestroy(this); delete errors; delete dummyToken; } Errors::Errors() { count = 0; } void Errors::SynErr(int line, int col, int n) { wchar_t* s; switch (n) { -->errors default: { wchar_t format[20]; coco_swprintf(format, 20, L"error %d", n); s = coco_string_create(format); } break; } wprintf(L"-- line %d col %d: %ls\n", line, col, s); coco_string_delete(s); count++; } void Errors::Error(int line, int col, const wchar_t *s) { wprintf(L"-- line %d col %d: %ls\n", line, col, s); count++; } void Errors::Warning(int line, int col, const wchar_t *s) { wprintf(L"-- line %d col %d: %ls\n", line, col, s); } void Errors::Warning(const wchar_t *s) { wprintf(L"%ls\n", s); } void Errors::Exception(const wchar_t* s) { wprintf(L"%ls", s); exit(1); } -->namespace_close coco-cpp_20120102/Node.h0000644000175000017500000000621311601423170012365 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_NODE_H__) #define COCO_NODE_H__ #include #include "Position.h" #include "State.h" #include "Scanner.h" namespace Coco { class Symbol; class BitArray; class Node { public: // constants for node kinds static int t; // terminal symbol static int pr; // pragma static int nt; // nonterminal symbol static int clas; // character class static int chr; // character static int wt; // weak terminal symbol static int any; // static int eps; // empty static int sync; // synchronization symbol static int sem; // semantic action: (. .) static int alt; // alternative: | static int iter; // iteration: { } static int opt; // option: [ ] static int rslv; // resolver expr static int normalTrans; // transition codes static int contextTrans; int n; // node number int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv Node *next; // to successor node Node *down; // alt: to next alternative Node *sub; // alt, iter, opt: to first node of substructure bool up; // true: "next" leads to successor in enclosing structure Symbol *sym; // nt, t, wt: symbol represented by this node int val; // chr: ordinal character value // clas: index of character class int code; // chr, clas: transition code BitArray *set; // any, sync: the set represented by this node Position *pos; // nt, t, wt: pos of actual attributes // sem: pos of semantic action in source text // rslv: pos of resolver in source text int line; // source text line number of item in this node State *state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) Node(int typ, Symbol *sym, int line); }; }; // namespace #endif // !defined(COCO_NODE_H__) coco-cpp_20120102/Node.cpp0000644000175000017500000000446711601423170012731 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Node.h" namespace Coco { // constants for node kinds int Node::t = 1; // terminal symbol int Node::pr = 2; // pragma int Node::nt = 3; // nonterminal symbol int Node::clas = 4; // character class int Node::chr = 5; // character int Node::wt = 6; // weak terminal symbol int Node::any = 7; // int Node::eps = 8; // empty int Node::sync = 9; // synchronization symbol int Node::sem = 10; // semantic action: (. .) int Node::alt = 11; // alternative: | int Node::iter = 12; // iteration: { } int Node::opt = 13; // option: [ ] int Node::rslv = 14; // resolver expr int Node::normalTrans = 0; // transition codes int Node::contextTrans = 1; Node::Node(int typ, Symbol *sym, int line) { this->n = 0; this->next = NULL; this->down = NULL; this->sub = NULL; this->up = false; this->val = 0; this->code = 0; this->set = NULL; this->pos = NULL; this->state = NULL; this->state = 0; this->typ = typ; this->sym = sym; this->line = line; } }; // namespace coco-cpp_20120102/Melted.h0000644000175000017500000000323611601423170012714 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_MELTED_H__) #define COCO_MELTED_H__ #include "State.h" #include "Scanner.h" namespace Coco { class BitArray; class Melted // info about melted states { public: BitArray *set; // set of old states State *state; // new state Melted *next; Melted(BitArray *set, State *state); }; }; // namespace #endif // !defined(COCO_MELTED_H__) coco-cpp_20120102/HashTable.h0000644000175000017500000000426411601423170013337 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_HASHTABLE_H__) #define COCO_HASHTABLE_H__ #include namespace Coco { class DictionaryEntry { public: wchar_t *key; void *val; }; class Iterator { public: virtual bool HasNext() = 0; virtual DictionaryEntry* Next() = 0; }; class HashTable { public: HashTable(int size = 128); virtual ~HashTable(); virtual void Set(wchar_t *key, void *value); virtual void* Get(wchar_t *key) const; inline void* operator[](wchar_t *key) const { return Get(key); }; virtual Iterator* GetIterator(); private: class Obj : public DictionaryEntry { public: Obj *next; }; class Iter : public Iterator { private: HashTable *ht; int pos; Obj* cur; public: Iter(HashTable *ht); virtual bool HasNext(); virtual DictionaryEntry* Next(); }; Obj* Get0(wchar_t *key) const; Obj **data; int size; }; }; // namespace #endif // !defined(COCO_HASHTABLE_H__) coco-cpp_20120102/HashTable.cpp0000644000175000017500000000541711601423170013673 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include #include #include "HashTable.h" #include "Scanner.h" namespace Coco { HashTable::HashTable(int size) { this->size = size; data = new Obj*[size]; memset(data, 0, size * sizeof(Obj*)); } HashTable::~HashTable() { for (int i = 0; i < size; ++i) { Obj *o = data[i]; while (o != NULL) { Obj *del = o; o = o->next; delete del; } } delete[] data; data = NULL; }; HashTable::Obj* HashTable::Get0(wchar_t *key) const { int k = coco_string_hash(key) % size; HashTable::Obj *o = data[k]; while (o != NULL && !coco_string_equal(key, o->key)) { o = o->next; } return o; } void HashTable::Set(wchar_t *key, void *val) { HashTable::Obj *o = Get0(key); if (o == NULL) { // new entry int k = coco_string_hash(key) % size; o = new Obj(); o->key = key; o->val = val; o->next = data[k]; data[k] = o; } else { // exist entry - overwrite o->val = val; } } void* HashTable::Get(wchar_t *key) const { HashTable::Obj *o = Get0(key); if (o != NULL) { return o->val; } return NULL; } Iterator* HashTable::GetIterator() { return new HashTable::Iter(this); } HashTable::Iter::Iter(HashTable *ht) { this->ht = ht; this->pos = 0; this->cur = NULL; } bool HashTable::Iter::HasNext() { while (cur == NULL && pos < ht->size) { cur = ht->data[pos]; ++pos; } return cur != NULL; } DictionaryEntry* HashTable::Iter::Next() { if (!HasNext()) { return NULL; } Obj *next = cur; cur = cur->next; return next; } }; // namespace coco-cpp_20120102/Graph.h0000644000175000017500000000334411601423170012543 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_GRAPH_H__) #define COCO_GRAPH_H__ #include "Node.h" namespace Coco { class Graph { public: Node *l; // left end of graph = head Node *r; // right end of graph = list of nodes to be linked to successor graph Graph() { l = NULL; r = NULL; } Graph(Node *left, Node *right) { l = left; r = right; } Graph(Node *p) { l = p; r = p; } virtual ~Graph() { } }; }; // namespace #endif // !defined(COCO_GRAPH_H__) coco-cpp_20120102/Generator.h0000644000175000017500000000372011601423170013426 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #if !defined(COCO_GENERATOR_H__) #define COCO_GENERATOR_H__ #include #include "Tab.h" #include "Parser.h" namespace Coco { class Generator { public: Generator(Tab *tab, Errors *errors); FILE* OpenFrame(const wchar_t* frame); FILE* OpenGen(const wchar_t *genName); void GenCopyright(); void GenPrefixFromNamespace(); void SkipFramePart(const wchar_t *stop); void CopyFramePart(const wchar_t *stop); private: FILE* fram; FILE* gen; Tab *tab; wchar_t* frameFile; Errors *errors; void CopyFramePart(const wchar_t* stop, bool generateOutput); }; } // namespace #endif // !defined(COCO_GENERATOR_H__) coco-cpp_20120102/Generator.cpp0000644000175000017500000001307011601423170013760 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include "Generator.h" #include "Scanner.h" namespace Coco { Generator::Generator(Tab *tab, Errors *errors) { this->errors = errors; this->tab = tab; fram = NULL; gen = NULL; frameFile = NULL; } FILE* Generator::OpenFrame(const wchar_t* frame) { if (coco_string_length(tab->frameDir) != 0) { frameFile = coco_string_create_append(tab->frameDir, L"/"); coco_string_merge(frameFile, frame); char *chFrameFile = coco_string_create_char(frameFile); fram = fopen(chFrameFile, "r"); delete [] chFrameFile; } if (fram == NULL) { delete [] frameFile; frameFile = coco_string_create_append(tab->srcDir, frame); /* pdt */ char *chFrameFile = coco_string_create_char(frameFile); fram = fopen(chFrameFile, "r"); delete [] chFrameFile; } if (fram == NULL) { wchar_t *message = coco_string_create_append(L"-- Cannot find : ", frame); errors->Exception(message); delete [] message; } return fram; } FILE* Generator::OpenGen(const wchar_t *genName) { /* pdt */ wchar_t *fn = coco_string_create_append(tab->outDir, genName); /* pdt */ char *chFn = coco_string_create_char(fn); if ((gen = fopen(chFn, "r")) != NULL) { fclose(gen); wchar_t *oldName = coco_string_create_append(fn, L".old"); char *chOldName = coco_string_create_char(oldName); remove(chOldName); rename(chFn, chOldName); // copy with overwrite coco_string_delete(chOldName); coco_string_delete(oldName); } if ((gen = fopen(chFn, "w")) == NULL) { wchar_t *message = coco_string_create_append(L"-- Cannot generate : ", genName); errors->Exception(message); delete [] message; } coco_string_delete(chFn); coco_string_delete(fn); return gen; } void Generator::GenCopyright() { FILE *file = NULL; if (coco_string_length(tab->frameDir) != 0) { wchar_t *copyFr = coco_string_create_append(tab->frameDir, L"/Copyright.frame"); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; delete [] chCopyFr; } if (file == NULL) { wchar_t *copyFr = coco_string_create_append(tab->srcDir, L"Copyright.frame"); char *chCopyFr = coco_string_create_char(copyFr); file = fopen(chCopyFr, "r"); delete [] copyFr; delete [] chCopyFr; } if (file == NULL) { return; } FILE *scannerFram = fram; fram = file; CopyFramePart(NULL); fram = scannerFram; fclose(file); } void Generator::GenPrefixFromNamespace() { const wchar_t *nsName = tab->nsName; if (nsName == NULL || coco_string_length(nsName) == 0) { return; } const int len = coco_string_length(nsName); int startPos = 0; do { int curLen = coco_string_indexof(nsName + startPos, COCO_CPP_NAMESPACE_SEPARATOR); if (curLen == -1) { curLen = len - startPos; } wchar_t *curNs = coco_string_create(nsName, startPos, curLen); fwprintf(gen, L"%ls_", curNs); coco_string_delete(curNs); startPos = startPos + curLen + 1; } while (startPos < len); } void Generator::SkipFramePart(const wchar_t *stop) { CopyFramePart(stop, false); } void Generator::CopyFramePart(const wchar_t *stop) { CopyFramePart(stop, true); } void Generator::CopyFramePart(const wchar_t* stop, bool generateOutput) { wchar_t startCh = 0; int endOfStopString = 0; wchar_t ch = 0; if (stop != NULL) { startCh = stop[0]; endOfStopString = coco_string_length(stop)-1; } fwscanf(fram, L"%lc", &ch); // fram.ReadByte(); while (!feof(fram)) { // ch != EOF if (stop != NULL && ch == startCh) { int i = 0; do { if (i == endOfStopString) return; // stop[0..i] found fwscanf(fram, L"%lc", &ch); i++; } while (ch == stop[i]); // stop[0..i-1] found; continue with last read character if (generateOutput) { wchar_t *subStop = coco_string_create(stop, 0, i); fwprintf(gen, L"%ls", subStop); coco_string_delete(subStop); } } else { if (generateOutput) { fwprintf(gen, L"%lc", ch); } fwscanf(fram, L"%lc", &ch); } } if (stop != NULL) { wchar_t *message = coco_string_create_append(L" -- Incomplete or corrupt frame file: ", frameFile); errors->Exception(message); delete [] message; } } }coco-cpp_20120102/DFA.h0000644000175000017500000001111311601423170012065 0ustar mlml/*------------------------------------------------------------------------- DFA -- Generation of the Scanner Automaton Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_DFA_H__) #define COCO_DFA_H__ #include #include "Action.h" #include "Comment.h" #include "State.h" #include "Symbol.h" #include "Melted.h" #include "Node.h" #include "Target.h" namespace Coco { class Parser; class Tab; class BitArray; class DFA { public: int maxStates; int lastStateNr; // highest state number State *firstState; State *lastState; // last allocated state int lastSimState; // last non melted state FILE* fram; // scanner frame input FILE* gen; // generated scanner file Symbol *curSy; // current token to be recognized (in FindTrans) Node *curGraph; // start of graph for current token (in FindTrans) bool ignoreCase; // true if input should be treated case-insensitively bool dirtyDFA; // DFA may become nondeterministic in MatchLiteral bool hasCtxMoves; // DFA has context transitions bool *existLabel; // checking the Labels (in order to avoid the warning messages) Parser *parser; // other Coco objects Tab *tab; Errors *errors; FILE* trace; Melted *firstMelted; // head of melted state list Comment *firstComment; // list of comments //---------- Output primitives wchar_t* Ch(wchar_t ch); wchar_t* ChCond(wchar_t ch); void PutRange(CharSet *s); //---------- State handling State* NewState(); void NewTransition(State *from, State *to, int typ, int sym, int tc); void CombineShifts(); void FindUsedStates(State *state, BitArray *used); void DeleteRedundantStates(); State* TheState(Node *p); void Step(State *from, Node *p, BitArray *stepped); void NumberNodes(Node *p, State *state, bool renumIter); void FindTrans (Node *p, bool start, BitArray *marked); void ConvertToStates(Node *p, Symbol *sym); // match string against current automaton; store it either as a fixedToken or as a litToken void MatchLiteral(wchar_t* s, Symbol *sym); void SplitActions(State *state, Action *a, Action *b); bool Overlap(Action *a, Action *b); bool MakeUnique(State *state); // return true if actions were split void MeltStates(State *state); void FindCtxStates(); void MakeDeterministic(); void PrintStates(); void CheckLabels(); //---------------------------- actions -------------------------------- Action* FindAction(State *state, wchar_t ch); void GetTargetStates(Action *a, BitArray* &targets, Symbol* &endOf, bool &ctx); //------------------------- melted states ------------------------------ Melted* NewMelted(BitArray *set, State *state); BitArray* MeltedSet(int nr); Melted* StateWithSet(BitArray *s); //------------------------ comments -------------------------------- wchar_t* CommentStr(Node *p); void NewComment(Node *from, Node *to, bool nested); //------------------------ scanner generation ---------------------- void GenComBody(Comment *com); void GenCommentHeader(Comment *com, int i); void GenComment(Comment *com, int i); void CopyFramePart(const wchar_t* stop); wchar_t* SymName(Symbol *sym); // real name value is stored in Tab.literals void GenLiterals (); int GenNamespaceOpen(const wchar_t* nsName); void GenNamespaceClose(int nrOfNs); void WriteState(State *state); void WriteStartTab(); void OpenGen(const wchar_t* genName, bool backUp); /* pdt */ void WriteScanner(); DFA(Parser *parser); }; }; // namespace #endif // !defined(COCO_DFA_H__) coco-cpp_20120102/cygBuild.bat0000644000175000017500000000002611601423170013555 0ustar mlmlg++ *.cpp -o Coco.exe coco-cpp_20120102/Copyright.frame0000644000175000017500000000250411601423170014312 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ coco-cpp_20120102/Comment.h0000644000175000017500000000321711601423170013103 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_COMMENT_H__) #define COCO_COMMENT_H__ #include namespace Coco { class Comment // info about comment syntax { public: wchar_t* start; wchar_t* stop; bool nested; Comment *next; Comment(wchar_t* start, wchar_t* stop, bool nested); virtual ~Comment(); }; }; // namespace #endif // !defined(COCO_COMMENT_H__) coco-cpp_20120102/Comment.cpp0000644000175000017500000000317011601423170013434 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #include "Comment.h" #include "Scanner.h" namespace Coco { Comment::Comment(wchar_t* start, wchar_t* stop, bool nested) { this->start = coco_string_create(start); this->stop = coco_string_create(stop); this->nested = nested; } Comment::~Comment() { coco_string_delete(start); coco_string_delete(stop); } }; // namespace coco-cpp_20120102/Coco.atg0000644000175000017500000005650011601423170012713 0ustar mlml/*------------------------------------------------------------------------- Coco.ATG -- Attributed Grammar Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- compile with: Coco Coco.ATG -namespace Coco -------------------------------------------------------------------------*/ $namespace=Coco #include "Tab.h" #include "DFA.h" #include "ParserGen.h" COMPILER Coco int id; int str; FILE* trace; // other Coco objects referenced in this ATG Tab *tab; DFA *dfa; ParserGen *pgen; bool genScanner; wchar_t* tokenString; // used in declarations of literal tokens wchar_t* noString; // used in declarations of literal tokens // This method will be called by the contructor if it exits. // This support is specific to the C++ version of Coco/R. void Init() { tab = NULL; dfa = NULL; pgen = NULL; id = 0; str = 1; tokenString = NULL; noString = coco_string_create(L"-none-"); } // Uncomment this method if cleanup is necessary, // this method will be called by the destructor if it exists. // This support is specific to the C++ version of Coco/R. // void Destroy() { // nothing to do // } /*-------------------------------------------------------------------------*/ CHARACTERS letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_". digit = "0123456789". cr = '\r'. lf = '\n'. tab = '\t'. stringCh = ANY - '"' - '\\' - cr - lf. charCh = ANY - '\'' - '\\' - cr - lf. printable = '\u0020' .. '\u007e'. hex = "0123456789abcdef". TOKENS ident = letter { letter | digit }. number = digit { digit }. string = '"' { stringCh | '\\' printable } '"'. badString = '"' { stringCh | '\\' printable } (cr | lf). char = '\'' ( charCh | '\\' printable { hex } ) '\''. PRAGMAS ddtSym = '$' { digit | letter }. (. tab->SetDDT(la->val); .) optionSym = '$' letter { letter } '=' { digit | letter | '-' | '.' | ':' }. (. tab->SetOption(la->val); .) COMMENTS FROM "/*" TO "*/" NESTED COMMENTS FROM "//" TO lf IGNORE cr + lf + tab /*-------------------------------------------------------------------------*/ PRODUCTIONS Coco (. Symbol *sym; Graph *g, *g1, *g2; wchar_t* gramName = NULL; CharSet *s; .) = (. int beg = la->pos; int line = la->line; .) { // this section can be used // for #include statements ANY } (. if (la->pos != beg) { pgen->usingPos = new Position(beg, t->pos + coco_string_length(t->val), 0, line); } .) "COMPILER" (. genScanner = true; tab->ignored = new CharSet(); .) ident (. gramName = coco_string_create(t->val); beg = la->pos; line = la->line; .) { ANY } (. tab->semDeclPos = new Position(beg, la->pos, 0, line); .) [ "IGNORECASE" (. dfa->ignoreCase = true; .) ] /* pdt */ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) ] (. dfa->NewComment(g1->l, g2->l, nested); .) } { "IGNORE" Set (. tab->ignored->Or(s); .) } SYNC "PRODUCTIONS" (. if (genScanner) dfa->MakeDeterministic(); tab->DeleteNodes(); .) { ident (. sym = tab->FindSym(t->val); bool undef = (sym == NULL); if (undef) sym = tab->NewSym(Node::nt, t->val, t->line); else { if (sym->typ == Node::nt) { if (sym->graph != NULL) SemErr(L"name declared twice"); } else SemErr(L"this symbol kind not allowed on left side of production"); sym->line = t->line; } bool noAttrs = (sym->attrPos == NULL); sym->attrPos = NULL; .) [ AttrDecl ] (. if (!undef) if (noAttrs != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); .) [ SemText<.sym->semPos.> ] WEAK '=' Expression (. sym->graph = g->l; tab->Finish(g); .) WEAK '.' } "END" ident (. if (!coco_string_equal(gramName, t->val)) SemErr(L"name does not match grammar name"); tab->gramSy = tab->FindSym(gramName); if (tab->gramSy == NULL) SemErr(L"missing production for grammar name"); else { sym = tab->gramSy; if (sym->attrPos != NULL) SemErr(L"grammar symbol must not have attributes"); } tab->noSym = tab->NewSym(Node::t, L"???", 0); // noSym gets highest number tab->SetupAnys(); tab->RenumberPragmas(); if (tab->ddt[2]) tab->PrintNodes(); if (errors->count == 0) { wprintf(L"checking\n"); tab->CompSymbolSets(); if (tab->ddt[7]) tab->XRef(); if (tab->GrammarOk()) { wprintf(L"parser"); pgen->WriteParser(); if (genScanner) { wprintf(L" + scanner"); dfa->WriteScanner(); if (tab->ddt[0]) dfa->PrintStates(); } wprintf(L" generated\n"); if (tab->ddt[8]) pgen->WriteStatistics(); } } if (tab->ddt[6]) tab->PrintSymbolTable(); .) '.' . /*------------------------------------------------------------------------------------*/ SetDecl (. CharSet *s; .) = ident (. wchar_t *name = coco_string_create(t->val); CharClass *c = tab->FindCharClass(name); if (c != NULL) SemErr(L"name declared twice"); .) '=' Set (. if (s->Elements() == 0) SemErr(L"character set must not be empty"); tab->NewCharClass(name, s); .) '.' . /*------------------------------------------------------------------------------------*/ Set (. CharSet *s2; .) = SimSet { '+' SimSet (. s->Or(s2); .) | '-' SimSet (. s->Subtract(s2); .) } . /*------------------------------------------------------------------------------------*/ SimSet (. int n1, n2; .) = (. s = new CharSet(); .) ( ident (. CharClass *c = tab->FindCharClass(t->val); if (c == NULL) SemErr(L"undefined name"); else s->Or(c->set); .) | string (. wchar_t *subName2 = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t *name = tab->Unescape(subName2); coco_string_delete(subName2); wchar_t ch; int len = coco_string_length(name); for(int i=0; i < len; i++) { ch = name[i]; if (dfa->ignoreCase) { if ((L'A' <= ch) && (ch <= L'Z')) ch = ch - (L'A' - L'a'); // ch.ToLower() } s->Set(ch); } coco_string_delete(name); .) | Char (. s->Set(n1); .) [ ".." Char (. for (int i = n1; i <= n2; i++) s->Set(i); .) ] | "ANY" (. s = new CharSet(); s->Fill(); .) ) . /*--------------------------------------------------------------------------------------*/ Char = char (. n = 0; wchar_t* subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); wchar_t* name = tab->Unescape(subName); coco_string_delete(subName); // "<= 1" instead of "== 1" to allow the escape sequence '\0' in c++ if (coco_string_length(name) <= 1) n = name[0]; else SemErr(L"unacceptable character value"); coco_string_delete(name); if (dfa->ignoreCase && (((wchar_t) n) >= 'A') && (((wchar_t) n) <= 'Z')) n += 32; .) . /*------------------------------------------------------------------------------------*/ TokenDecl (. wchar_t* name = NULL; int kind; Symbol *sym; Graph *g; .) = Sym (. sym = tab->FindSym(name); if (sym != NULL) SemErr(L"name declared twice"); else { sym = tab->NewSym(typ, name, t->line); sym->tokenKind = Symbol::fixedToken; } tokenString = NULL; .) SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr(L"a literal must not be declared with a structure"); tab->Finish(g); if (tokenString == NULL || coco_string_equal(tokenString, noString)) dfa->ConvertToStates(g->l, sym); else { // TokenExpr is a single string if ((*(tab->literals))[tokenString] != NULL) SemErr(L"token string declared twice"); tab->literals->Set(tokenString, sym); dfa->MatchLiteral(tokenString, sym); } .) | (. if (kind == id) genScanner = false; else dfa->MatchLiteral(sym->name, sym); .) ) [ SemText<.sym->semPos.> (. if (typ != Node::pr) SemErr(L"semantic action not allowed here"); .) ] . /*------------------------------------------------------------------------------------*/ AttrDecl = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } '>' (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } ".>" (. if (t->pos > beg) sym->attrPos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ Expression (. Graph *g2; .) = Term (. bool first = true; .) { WEAK '|' Term (. if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); .) } . /*------------------------------------------------------------------------------------*/ Term (. Graph *g2; Node *rslv = NULL; g = NULL; .) = ( [ (. rslv = tab->NewNode(Node::rslv, (Symbol*)NULL, la->line); .) Resolver<.rslv->pos.> (. g = new Graph(rslv); .) ] Factor (. if (rslv != NULL) tab->MakeSequence(g, g2); else g = g2; .) { Factor (. tab->MakeSequence(g, g2); .) } | (. g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) ) (. if (g == NULL) // invalid start of Term g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Factor (. wchar_t* name = NULL; int kind; Position *pos; bool weak = false; g = NULL; .) = ( [ "WEAK" (. weak = true; .) ] Sym (. Symbol *sym = tab->FindSym(name); if (sym == NULL && kind == str) sym = (Symbol*)((*(tab->literals))[name]); bool undef = (sym == NULL); if (undef) { if (kind == id) sym = tab->NewSym(Node::nt, name, 0); // forward nt else if (genScanner) { sym = tab->NewSym(Node::t, name, t->line); dfa->MatchLiteral(sym->name, sym); } else { // undefined string in production SemErr(L"undefined string in production"); sym = tab->eofSy; // dummy } } int typ = sym->typ; if (typ != Node::t && typ != Node::nt) SemErr(L"this symbol kind is not allowed in a production"); if (weak) { if (typ == Node::t) typ = Node::wt; else SemErr(L"only terminals may be weak"); } Node *p = tab->NewNode(typ, sym, t->line); g = new Graph(p); .) [ Attribs

(. if (kind != id) SemErr(L"a literal must not have attributes"); .) ] (. if (undef) sym->attrPos = p->pos; // dummy else if ((p->pos == NULL) != (sym->attrPos == NULL)) SemErr(L"attribute mismatch between declaration and use of this symbol"); .) | '(' Expression ')' | '[' Expression ']' (. tab->MakeOption(g); .) | '{' Expression '}' (. tab->MakeIteration(g); .) | SemText (. Node *p = tab->NewNode(Node::sem, (Symbol*)NULL, 0); p->pos = pos; g = new Graph(p); .) | "ANY" (. Node *p = tab->NewNode(Node::any, (Symbol*)NULL, 0); // p.set is set in tab->SetupAnys g = new Graph(p); .) | "SYNC" (. Node *p = tab->NewNode(Node::sync, (Symbol*)NULL, 0); g = new Graph(p); .) ) (. if (g == NULL) // invalid start of Factor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Resolver = "IF" "(" (. int beg = la->pos; int col = la->col; int line = la->line; .) Condition (. pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ Condition = { "(" Condition | ANY } ")" . /*------------------------------------------------------------------------------------*/ TokenExpr (. Graph *g2; .) = TokenTerm (. bool first = true; .) { WEAK '|' TokenTerm (. if (first) { tab->MakeFirstAlt(g); first = false; } tab->MakeAlternative(g, g2); .) } . /*------------------------------------------------------------------------------------*/ TokenTerm (. Graph *g2; .) = TokenFactor { TokenFactor (. tab->MakeSequence(g, g2); .) } [ "CONTEXT" '(' TokenExpr (. tab->SetContextTrans(g2->l); dfa->hasCtxMoves = true; tab->MakeSequence(g, g2); .) ')' ] . /*------------------------------------------------------------------------------------*/ TokenFactor (. wchar_t* name = NULL; int kind; .) = (. g = NULL; .) ( Sym (. if (kind == id) { CharClass *c = tab->FindCharClass(name); if (c == NULL) { SemErr(L"undefined name"); c = tab->NewCharClass(name, new CharSet()); } Node *p = tab->NewNode(Node::clas, (Symbol*)NULL, 0); p->val = c->n; g = new Graph(p); tokenString = coco_string_create(noString); } else { // str g = tab->StrToGraph(name); if (tokenString == NULL) tokenString = coco_string_create(name); else tokenString = coco_string_create(noString); } .) | '(' TokenExpr ')' | '[' TokenExpr ']' (. tab->MakeOption(g); tokenString = coco_string_create(noString); .) | '{' TokenExpr '}' (. tab->MakeIteration(g); tokenString = coco_string_create(noString); .) ) (. if (g == NULL) // invalid start of TokenFactor g = new Graph(tab->NewNode(Node::eps, (Symbol*)NULL, 0)); .) . /*------------------------------------------------------------------------------------*/ Sym = (. name = coco_string_create(L"???"); kind = id; .) ( ident (. kind = id; coco_string_delete(name); name = coco_string_create(t->val); .) | (string (. coco_string_delete(name); name = coco_string_create(t->val); .) | char (. wchar_t *subName = coco_string_create(t->val, 1, coco_string_length(t->val)-2); coco_string_delete(name); name = coco_string_create_append(L"\"", subName); coco_string_delete(subName); coco_string_merge(name, L"\""); .) ) (. kind = str; if (dfa->ignoreCase) { wchar_t *oldName = name; name = coco_string_create_lower(name); coco_string_delete(oldName); } if (coco_string_indexof(name, ' ') >= 0) SemErr(L"literal tokens must not contain blanks"); .) ) . /*------------------------------------------------------------------------------------*/ Attribs = '<' (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } '>' (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) | "<." (. int beg = la->pos; int col = la->col; int line = la->line; .) { ANY | badString (. SemErr(L"bad string in attributes"); .) } ".>" (. if (t->pos > beg) p->pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ SemText = "(." (. int beg = la->pos; int col = la->col; int line = t->line; .) { ANY | badString (. SemErr(L"bad string in semantic action"); .) | "(." (. SemErr(L"missing end of previous semantic action"); .) } ".)" (. pos = new Position(beg, t->pos, col, line); .) . /*------------------------------------------------------------------------------------*/ END Coco. coco-cpp_20120102/coc.sh0000644000175000017500000000005211601423170012422 0ustar mlml#!/bin/sh ./Coco Coco.atg -namespace Coco coco-cpp_20120102/coc.bat0000644000175000017500000000003511601423170012557 0ustar mlmlcoco Coco.ATG -namespace Cocococo-cpp_20120102/CharSet.h0000644000175000017500000000375011601423170013034 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #if !defined(COCO_CHARSET_H__) #define COCO_CHARSET_H__ #include namespace Coco { class CharSet { public: class Range { public: int from; int to; Range *next; Range(int from, int to) { this->from = from; this->to = to; next = NULL; }; }; Range *head; CharSet() { head = NULL; }; virtual ~CharSet(); bool Get(int i) const; void Set(int i); CharSet* Clone() const; bool Equals(CharSet *s) const; int Elements() const; int First() const; void Or(CharSet *s); void And(CharSet *s); void Subtract(CharSet *s); bool Includes(CharSet *s) const; bool Intersects(CharSet *s) const; void Clear(); void Fill(); }; } // namespace #endif // !defined(COCO_CHARSET_H__) coco-cpp_20120102/CharSet.cpp0000644000175000017500000001012511601423170013361 0ustar mlml/*---------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ #include #include #include #include #include "CharSet.h" #include "Scanner.h" namespace Coco { bool CharSet::Get(int i) const { for (CharSet::Range *p = head; p != NULL; p = p->next) if (i < p->from) return false; else if (i <= p->to) return true; // p.from <= i <= p.to return false; } void CharSet::Set(int i) { Range *cur = head, *prev = NULL; while (cur != NULL && i >= cur->from-1) { if (i <= cur->to + 1) { // (cur.from-1) <= i <= (cur.to+1) if (i == cur->from - 1) cur->from--; else if (i == cur->to + 1) { cur->to++; Range *next = cur->next; if (next != NULL && cur->to == next->from - 1) { cur->to = next->to; cur->next = next->next; delete next; }; } return; } prev = cur; cur = cur->next; } Range *n = new Range(i, i); n->next = cur; if (prev == NULL) head = n; else prev->next = n; } CharSet* CharSet::Clone() const { CharSet *s = new CharSet(); Range *prev = NULL; for (Range *cur = head; cur != NULL; cur = cur->next) { Range *r = new Range(cur->from, cur->to); if (prev == NULL) s->head = r; else prev->next = r; prev = r; } return s; } bool CharSet::Equals(CharSet *s) const { Range *p = head, *q = s->head; while (p != NULL && q != NULL) { if (p->from != q->from || p->to != q->to) return false; p = p->next; q = q->next; } return p == q; } int CharSet::Elements() const { int n = 0; for (Range *p = head; p != NULL; p = p->next) n += p->to - p->from + 1; return n; } int CharSet::First() const { if (head != NULL) return head->from; return -1; } void CharSet::Or(CharSet *s) { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) Set(i); } void CharSet::And(CharSet *s) { CharSet *x = new CharSet(); Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) if (s->Get(i)) x->Set(i); Range *del = p; p = p->next; delete del; } head = x->head; x->head = NULL; delete x; } void CharSet::Subtract(CharSet *s) { CharSet *x = new CharSet(); Range *p = head; while (p != NULL) { for (int i = p->from; i <= p->to; i++) if (!s->Get(i)) x->Set(i); Range *del = p; p = p->next; delete del; } head = x->head; x->head = NULL; delete x; } bool CharSet::Includes(CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (!Get(i)) return false; return true; } bool CharSet::Intersects(CharSet *s) const { for (Range *p = s->head; p != NULL; p = p->next) for (int i = p->from; i <= p->to; i++) if (Get(i)) return true; return false; } void CharSet::Clear() { while (head != NULL) { Range *del = head; head = head->next; delete del; } } void CharSet::Fill() { Clear(); head = new Range(0, COCO_WCHAR_MAX); } CharSet::~CharSet() { Clear(); } } coco-cpp_20120102/CharClass.h0000644000175000017500000000322511601423170013343 0ustar mlml/*------------------------------------------------------------------------- Compiler Generator Coco/R, Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz ported to C++ by Csaba Balazs, University of Szeged with improvements by Pat Terry, Rhodes University This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ #if !defined(COCO_CHARCLASS_H__) #define COCO_CHARCLASS_H__ #include "CharSet.h" namespace Coco { class CharClass { public: int n; // class number wchar_t* name; // class name CharSet *set; // set representing the class CharClass(const wchar_t* name, CharSet *s); virtual ~CharClass(); }; }; // namespace #endif // !defined(COCO_CHARCLASS_H__) coco-cpp_20120102/Makefile0000644000175000017500000000034311553316520012773 0ustar mlmlall: g++ *.cpp -o Coco $(CFLAGS) clean: rm -f Coco install: ln -s /usr/lib/coco-cpp/Coco $(DESTDIR)/usr/bin/cococpp install -m 0755 Coco $(DESTDIR)/usr/lib/coco-cpp install -m 0644 *frame $(DESTDIR)/usr/share/coco-cpp coco-cpp_20120102/mingwbuild.bat0000644000175000017500000000004410710077322014160 0ustar mlmlC:\MinGW\bin\g++ *.cpp -o Coco.exe coco-cpp_20120102/0000755000175000017500000000000011706606005011333 5ustar mlml