expat-1.2/0040775000076400007640000000000007167265675010636 5ustar jjcjjcexpat-1.2/expat.html0100664000076400007640000000641707167265121012634 0ustar jjcjjc expat

expat - XML Parser Toolkit

Version 1.2

Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd. Expat is freely available with source under a very liberal license (the MIT license).

This is a production version of expat. Relative to expat 1.1, it adds support for parsing external DTDs and parameter entities. Compiling with -DXML_DTD enables this support. There's a new -p option for xmlwf which will cause it to process external DTDs and parameter entities; this implies the -x option. See the comment above XML_SetParamEntityParsing in xmlparse.h for the API addition that enables this.

Expat is an XML 1.0 parser written in C. It aims to be fully conforming. It is currently not a validating XML processor. The current production version of expat 1.X can be downloaded from ftp://ftp.jclark.com/pub/xml/expat.zip.

Development of expat 2.0 is being handled by a team led by Clark Cooper, hosted by sourceforge.net. See http://expat.sourceforge.net for the latest on expat 2.0.

The directory xmltok contains a low-level library for tokenizing XML. The interface is documented in xmltok/xmltok.h.

The directory xmlparse contains an XML parser library which is built on top of the xmltok library. The interface is documented in xmlparse/xmlparse.h. The directory sample contains a simple example program using this interface; sample/build.bat is a batch file to build the example using Visual C++.

The directory xmlwf contains the xmlwf application, which uses the xmlparse library. The arguments to xmlwf are one or more files which are each to be checked for well-formedness. An option -d dir can be specified; for each well-formed input file the corresponding canonical XML will be written to dir/f, where f is the filename (without any path) of the input file. A -x option will cause references to external general entities to be processed. A -s option will make documents that are not standalone cause an error (a document is considered standalone if either it is intrinsically standalone because it has no external subset and no references to parameter entities in the internal subset or it is declared as standalone in the XML declaration).

The bin directory contains Win32 executables. The lib directory contains Win32 import libraries.

Answers to some frequently asked questions about expat can be found in the expat FAQ.

James Clark
expat-1.2/xmltok/0040775000076400007640000000000007167265632012145 5ustar jjcjjcexpat-1.2/xmltok/ascii.h0100664000076400007640000000342707077753353013413 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #define ASCII_A 0x41 #define ASCII_B 0x42 #define ASCII_C 0x43 #define ASCII_D 0x44 #define ASCII_E 0x45 #define ASCII_F 0x46 #define ASCII_G 0x47 #define ASCII_H 0x48 #define ASCII_I 0x49 #define ASCII_J 0x4A #define ASCII_K 0x4B #define ASCII_L 0x4C #define ASCII_M 0x4D #define ASCII_N 0x4E #define ASCII_O 0x4F #define ASCII_P 0x50 #define ASCII_Q 0x51 #define ASCII_R 0x52 #define ASCII_S 0x53 #define ASCII_T 0x54 #define ASCII_U 0x55 #define ASCII_V 0x56 #define ASCII_W 0x57 #define ASCII_X 0x58 #define ASCII_Y 0x59 #define ASCII_Z 0x5A #define ASCII_a 0x61 #define ASCII_b 0x62 #define ASCII_c 0x63 #define ASCII_d 0x64 #define ASCII_e 0x65 #define ASCII_f 0x66 #define ASCII_g 0x67 #define ASCII_h 0x68 #define ASCII_i 0x69 #define ASCII_j 0x6A #define ASCII_k 0x6B #define ASCII_l 0x6C #define ASCII_m 0x6D #define ASCII_n 0x6E #define ASCII_o 0x6F #define ASCII_p 0x70 #define ASCII_q 0x71 #define ASCII_r 0x72 #define ASCII_s 0x73 #define ASCII_t 0x74 #define ASCII_u 0x75 #define ASCII_v 0x76 #define ASCII_w 0x77 #define ASCII_x 0x78 #define ASCII_y 0x79 #define ASCII_z 0x7A #define ASCII_0 0x30 #define ASCII_1 0x31 #define ASCII_2 0x32 #define ASCII_3 0x33 #define ASCII_4 0x34 #define ASCII_5 0x35 #define ASCII_6 0x36 #define ASCII_7 0x37 #define ASCII_8 0x38 #define ASCII_9 0x39 #define ASCII_TAB 0x09 #define ASCII_SPACE 0x20 #define ASCII_EXCL 0x21 #define ASCII_QUOT 0x22 #define ASCII_AMP 0x26 #define ASCII_APOS 0x27 #define ASCII_MINUS 0x2D #define ASCII_PERIOD 0x2E #define ASCII_COLON 0x3A #define ASCII_SEMI 0x3B #define ASCII_LT 0x3C #define ASCII_EQUALS 0x3D #define ASCII_GT 0x3E #define ASCII_LSQB 0x5B #define ASCII_RSQB 0x5D #define ASCII_UNDERSCORE 0x5F expat-1.2/xmltok/asciitab.h0100664000076400007640000000334107077753353014075 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, /* 0x0C */ BT_NONXML, BT_CR, BT_NONXML, BT_NONXML, /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, expat-1.2/xmltok/dllmain.c0100664000076400007640000000043707077753351013732 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #define STRICT 1 #define WIN32_LEAN_AND_MEAN 1 #include BOOL WINAPI DllMain(HANDLE hInst, ULONG ul_reason_for_call, LPVOID lpReserved) { return TRUE; } expat-1.2/xmltok/iasciitab.h0100664000076400007640000000344707077753353014255 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ /* Like asciitab.h, except that 0xD has code BT_S rather than BT_CR */ /* 0x00 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x04 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x08 */ BT_NONXML, BT_S, BT_LF, BT_NONXML, /* 0x0C */ BT_NONXML, BT_S, BT_NONXML, BT_NONXML, /* 0x10 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x14 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x18 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x1C */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0x20 */ BT_S, BT_EXCL, BT_QUOT, BT_NUM, /* 0x24 */ BT_OTHER, BT_PERCNT, BT_AMP, BT_APOS, /* 0x28 */ BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, /* 0x2C */ BT_COMMA, BT_MINUS, BT_NAME, BT_SOL, /* 0x30 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x34 */ BT_DIGIT, BT_DIGIT, BT_DIGIT, BT_DIGIT, /* 0x38 */ BT_DIGIT, BT_DIGIT, BT_COLON, BT_SEMI, /* 0x3C */ BT_LT, BT_EQUALS, BT_GT, BT_QUEST, /* 0x40 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x44 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, /* 0x48 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x4C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x50 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x54 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x58 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_LSQB, /* 0x5C */ BT_OTHER, BT_RSQB, BT_OTHER, BT_NMSTRT, /* 0x60 */ BT_OTHER, BT_HEX, BT_HEX, BT_HEX, /* 0x64 */ BT_HEX, BT_HEX, BT_HEX, BT_NMSTRT, /* 0x68 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x6C */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x70 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x74 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0x78 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, /* 0x7C */ BT_VERBAR, BT_OTHER, BT_OTHER, BT_OTHER, expat-1.2/xmltok/latin1tab.h0100664000076400007640000000342607077753353014201 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ /* 0x80 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x84 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x88 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x8C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x90 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x94 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x98 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0x9C */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xA0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xA4 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xA8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, /* 0xAC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xB0 */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xB4 */ BT_OTHER, BT_NMSTRT, BT_OTHER, BT_NAME, /* 0xB8 */ BT_OTHER, BT_OTHER, BT_NMSTRT, BT_OTHER, /* 0xBC */ BT_OTHER, BT_OTHER, BT_OTHER, BT_OTHER, /* 0xC0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xC4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xC8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xCC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xD0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xD4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, /* 0xD8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xDC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xE0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xE4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xE8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xEC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xF0 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xF4 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_OTHER, /* 0xF8 */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, /* 0xFC */ BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, BT_NMSTRT, expat-1.2/xmltok/nametab.h0100664000076400007640000001561207167263450013723 0ustar jjcjjcstatic const unsigned namingBitmap[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF, 0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, 0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF, 0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000, 0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060, 0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003, 0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003, 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, 0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001, 0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003, 0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000, 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, 0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003, 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000, 0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF, 0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB, 0x40000000, 0xF580C900, 0x00000007, 0x02010800, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, 0x00000000, 0x00004C40, 0x00000000, 0x00000000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF, 0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF, 0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, 0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE, 0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF, 0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003, 0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD, 0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF, 0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF, 0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, 0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, 0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF, 0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF, 0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF, 0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF, 0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, 0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1, 0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3, 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80, 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3, 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000, 0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000, 0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF, 0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1FFF0000, 0x00000002, 0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, 0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF, }; static const unsigned char nmstrtPages[] = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00, 0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; static const unsigned char namePages[] = { 0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, 0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13, 0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; expat-1.2/xmltok/utf8tab.h0100664000076400007640000000334407077753354013677 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ /* 0x80 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x84 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x88 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x8C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x90 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x94 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x98 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0x9C */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xA0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xA4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xA8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xAC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xB0 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xB4 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xB8 */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xBC */ BT_TRAIL, BT_TRAIL, BT_TRAIL, BT_TRAIL, /* 0xC0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xC4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xC8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xCC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xD0 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xD4 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xD8 */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xDC */ BT_LEAD2, BT_LEAD2, BT_LEAD2, BT_LEAD2, /* 0xE0 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, /* 0xE4 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, /* 0xE8 */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, /* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3, /* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4, /* 0xF4 */ BT_LEAD4, BT_NONXML, BT_NONXML, BT_NONXML, /* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML, /* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM, expat-1.2/xmltok/xmldef.h0100664000076400007640000000222007106151013013542 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #ifdef XML_WINLIB #define WIN32_LEAN_AND_MEAN #define STRICT #include #define malloc(x) HeapAlloc(GetProcessHeap(), 0, (x)) #define calloc(x, y) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (x)*(y)) #define free(x) HeapFree(GetProcessHeap(), 0, (x)) #define realloc(x, y) HeapReAlloc(GetProcessHeap(), 0, x, y) #define abort() /* as nothing */ #else /* not XML_WINLIB */ #include #endif /* not XML_WINLIB */ /* This file can be used for any definitions needed in particular environments. */ /* Mozilla specific defines */ #ifdef MOZILLA_CLIENT #include "nspr.h" #define malloc(x) PR_Malloc((size_t)(x)) #define realloc(x, y) PR_Realloc((x), (size_t)(y)) #define calloc(x, y) PR_Calloc((x),(y)) #define free(x) PR_Free(x) #if PR_BYTES_PER_INT != 4 #define int int32 #endif /* Enable Unicode string processing in expat. */ #ifndef XML_UNICODE #define XML_UNICODE #endif /* Enable external parameter entity parsing in expat */ #ifndef XML_DTD #define XML_DTD 1 #endif #endif /* MOZILLA_CLIENT */ expat-1.2/xmltok/xmlrole.c0100664000076400007640000006752307106706745014003 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include "xmldef.h" #include "xmlrole.h" #include "ascii.h" /* Doesn't check: that ,| are not mixed in a model group content of literals */ static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' }; static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' }; static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' }; static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' }; static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' }; static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' }; static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' }; static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' }; static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' }; static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' }; static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' }; static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' }; static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' }; static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' }; #ifndef MIN_BYTES_PER_CHAR #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar) #endif #ifdef XML_DTD #define setTopLevel(state) \ ((state)->handler = ((state)->documentEntity \ ? internalSubset \ : externalSubset1)) #else /* not XML_DTD */ #define setTopLevel(state) ((state)->handler = internalSubset) #endif /* not XML_DTD */ typedef int PROLOG_HANDLER(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc); static PROLOG_HANDLER prolog0, prolog1, prolog2, doctype0, doctype1, doctype2, doctype3, doctype4, doctype5, internalSubset, entity0, entity1, entity2, entity3, entity4, entity5, entity6, entity7, entity8, entity9, notation0, notation1, notation2, notation3, notation4, attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6, attlist7, attlist8, attlist9, element0, element1, element2, element3, element4, element5, element6, element7, #ifdef XML_DTD externalSubset0, externalSubset1, condSect0, condSect1, condSect2, #endif /* XML_DTD */ declClose, error; static int common(PROLOG_STATE *state, int tok); static int prolog0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: state->handler = prolog1; return XML_ROLE_NONE; case XML_TOK_XML_DECL: state->handler = prolog1; return XML_ROLE_XML_DECL; case XML_TOK_PI: state->handler = prolog1; return XML_ROLE_NONE; case XML_TOK_COMMENT: state->handler = prolog1; case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } return common(state, tok); } static int prolog1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: case XML_TOK_COMMENT: case XML_TOK_BOM: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (!XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_DOCTYPE)) break; state->handler = doctype0; return XML_ROLE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } return common(state, tok); } static int prolog2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PI: case XML_TOK_COMMENT: return XML_ROLE_NONE; case XML_TOK_INSTANCE_START: state->handler = error; return XML_ROLE_INSTANCE_START; } return common(state, tok); } static int doctype0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = doctype1; return XML_ROLE_DOCTYPE_NAME; } return common(state, tok); } static int doctype1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = doctype3; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = doctype2; return XML_ROLE_NONE; } break; } return common(state, tok); } static int doctype2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = doctype3; return XML_ROLE_DOCTYPE_PUBLIC_ID; } return common(state, tok); } static int doctype3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = doctype4; return XML_ROLE_DOCTYPE_SYSTEM_ID; } return common(state, tok); } static int doctype4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = internalSubset; return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } return common(state, tok); } static int doctype5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: state->handler = prolog2; return XML_ROLE_DOCTYPE_CLOSE; } return common(state, tok); } static int internalSubset(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_OPEN: if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ENTITY)) { state->handler = entity0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ATTLIST)) { state->handler = attlist0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_ELEMENT)) { state->handler = element0; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr + 2 * MIN_BYTES_PER_CHAR(enc), end, KW_NOTATION)) { state->handler = notation0; return XML_ROLE_NONE; } break; case XML_TOK_PI: case XML_TOK_COMMENT: return XML_ROLE_NONE; case XML_TOK_PARAM_ENTITY_REF: return XML_ROLE_PARAM_ENTITY_REF; case XML_TOK_CLOSE_BRACKET: state->handler = doctype5; return XML_ROLE_NONE; } return common(state, tok); } #ifdef XML_DTD static int externalSubset0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { state->handler = externalSubset1; if (tok == XML_TOK_XML_DECL) return XML_ROLE_TEXT_DECL; return externalSubset1(state, tok, ptr, end, enc); } static int externalSubset1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_COND_SECT_OPEN: state->handler = condSect0; return XML_ROLE_NONE; case XML_TOK_COND_SECT_CLOSE: if (state->includeLevel == 0) break; state->includeLevel -= 1; return XML_ROLE_NONE; case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_BRACKET: break; case XML_TOK_NONE: if (state->includeLevel) break; return XML_ROLE_NONE; default: return internalSubset(state, tok, ptr, end, enc); } return common(state, tok); } #endif /* XML_DTD */ static int entity0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_PERCENT: state->handler = entity1; return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = entity2; return XML_ROLE_GENERAL_ENTITY_NAME; } return common(state, tok); } static int entity1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = entity7; return XML_ROLE_PARAM_ENTITY_NAME; } return common(state, tok); } static int entity2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity4; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity3; return XML_ROLE_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } static int entity3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity4; return XML_ROLE_ENTITY_PUBLIC_ID; } return common(state, tok); } static int entity4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity5; return XML_ROLE_ENTITY_SYSTEM_ID; } return common(state, tok); } static int entity5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) { state->handler = entity6; return XML_ROLE_NONE; } break; } return common(state, tok); } static int entity6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = declClose; return XML_ROLE_ENTITY_NOTATION_NAME; } return common(state, tok); } static int entity7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = entity9; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = entity8; return XML_ROLE_NONE; } break; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_ENTITY_VALUE; } return common(state, tok); } static int entity8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = entity9; return XML_ROLE_ENTITY_PUBLIC_ID; } return common(state, tok); } static int entity9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_ENTITY_SYSTEM_ID; } return common(state, tok); } static int notation0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = notation1; return XML_ROLE_NOTATION_NAME; } return common(state, tok); } static int notation1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) { state->handler = notation3; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) { state->handler = notation2; return XML_ROLE_NONE; } break; } return common(state, tok); } static int notation2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = notation4; return XML_ROLE_NOTATION_PUBLIC_ID; } return common(state, tok); } static int notation3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; } return common(state, tok); } static int notation4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = declClose; return XML_ROLE_NOTATION_SYSTEM_ID; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_NOTATION_NO_SYSTEM_ID; } return common(state, tok); } static int attlist0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist1; return XML_ROLE_ATTLIST_ELEMENT_NAME; } return common(state, tok); } static int attlist1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist2; return XML_ROLE_ATTRIBUTE_NAME; } return common(state, tok); } static int attlist2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: { static const char *types[] = { KW_CDATA, KW_ID, KW_IDREF, KW_IDREFS, KW_ENTITY, KW_ENTITIES, KW_NMTOKEN, KW_NMTOKENS, }; int i; for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++) if (XmlNameMatchesAscii(enc, ptr, end, types[i])) { state->handler = attlist8; return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i; } } if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) { state->handler = attlist5; return XML_ROLE_NONE; } break; case XML_TOK_OPEN_PAREN: state->handler = attlist3; return XML_ROLE_NONE; } return common(state, tok); } static int attlist3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NMTOKEN: case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = attlist4; return XML_ROLE_ATTRIBUTE_ENUM_VALUE; } return common(state, tok); } static int attlist4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; return XML_ROLE_NONE; case XML_TOK_OR: state->handler = attlist3; return XML_ROLE_NONE; } return common(state, tok); } static int attlist5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_PAREN: state->handler = attlist6; return XML_ROLE_NONE; } return common(state, tok); } static int attlist6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: state->handler = attlist7; return XML_ROLE_ATTRIBUTE_NOTATION_VALUE; } return common(state, tok); } static int attlist7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->handler = attlist8; return XML_ROLE_NONE; case XML_TOK_OR: state->handler = attlist6; return XML_ROLE_NONE; } return common(state, tok); } /* default value */ static int attlist8(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_IMPLIED)) { state->handler = attlist1; return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_REQUIRED)) { state->handler = attlist1; return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE; } if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_FIXED)) { state->handler = attlist9; return XML_ROLE_NONE; } break; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE; } return common(state, tok); } static int attlist9(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_LITERAL: state->handler = attlist1; return XML_ROLE_FIXED_ATTRIBUTE_VALUE; } return common(state, tok); } static int element0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element1; return XML_ROLE_ELEMENT_NAME; } return common(state, tok); } static int element1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) { state->handler = declClose; return XML_ROLE_CONTENT_EMPTY; } if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) { state->handler = declClose; return XML_ROLE_CONTENT_ANY; } break; case XML_TOK_OPEN_PAREN: state->handler = element2; state->level = 1; return XML_ROLE_GROUP_OPEN; } return common(state, tok); } static int element2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_POUND_NAME: if (XmlNameMatchesAscii(enc, ptr + MIN_BYTES_PER_CHAR(enc), end, KW_PCDATA)) { state->handler = element3; return XML_ROLE_CONTENT_PCDATA; } break; case XML_TOK_OPEN_PAREN: state->level = 2; state->handler = element6; return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; case XML_TOK_NAME_QUESTION: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_OPT; case XML_TOK_NAME_ASTERISK: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_REP; case XML_TOK_NAME_PLUS: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } return common(state, tok); } static int element3(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; return XML_ROLE_NONE; } return common(state, tok); } static int element4(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element5; return XML_ROLE_CONTENT_ELEMENT; } return common(state, tok); } static int element5(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_OR: state->handler = element4; return XML_ROLE_NONE; } return common(state, tok); } static int element6(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_PAREN: state->level += 1; return XML_ROLE_GROUP_OPEN; case XML_TOK_NAME: case XML_TOK_PREFIXED_NAME: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT; case XML_TOK_NAME_QUESTION: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_OPT; case XML_TOK_NAME_ASTERISK: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_REP; case XML_TOK_NAME_PLUS: state->handler = element7; return XML_ROLE_CONTENT_ELEMENT_PLUS; } return common(state, tok); } static int element7(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_CLOSE_PAREN: state->level -= 1; if (state->level == 0) state->handler = declClose; return XML_ROLE_GROUP_CLOSE; case XML_TOK_CLOSE_PAREN_ASTERISK: state->level -= 1; if (state->level == 0) state->handler = declClose; return XML_ROLE_GROUP_CLOSE_REP; case XML_TOK_CLOSE_PAREN_QUESTION: state->level -= 1; if (state->level == 0) state->handler = declClose; return XML_ROLE_GROUP_CLOSE_OPT; case XML_TOK_CLOSE_PAREN_PLUS: state->level -= 1; if (state->level == 0) state->handler = declClose; return XML_ROLE_GROUP_CLOSE_PLUS; case XML_TOK_COMMA: state->handler = element6; return XML_ROLE_GROUP_SEQUENCE; case XML_TOK_OR: state->handler = element6; return XML_ROLE_GROUP_CHOICE; } return common(state, tok); } #ifdef XML_DTD static int condSect0(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_NAME: if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) { state->handler = condSect1; return XML_ROLE_NONE; } if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) { state->handler = condSect2; return XML_ROLE_NONE; } break; } return common(state, tok); } static int condSect1(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = externalSubset1; state->includeLevel += 1; return XML_ROLE_NONE; } return common(state, tok); } static int condSect2(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_OPEN_BRACKET: state->handler = externalSubset1; return XML_ROLE_IGNORE_SECT; } return common(state, tok); } #endif /* XML_DTD */ static int declClose(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_PROLOG_S: return XML_ROLE_NONE; case XML_TOK_DECL_CLOSE: setTopLevel(state); return XML_ROLE_NONE; } return common(state, tok); } #if 0 static int ignore(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { switch (tok) { case XML_TOK_DECL_CLOSE: state->handler = internalSubset; return 0; default: return XML_ROLE_NONE; } return common(state, tok); } #endif static int error(PROLOG_STATE *state, int tok, const char *ptr, const char *end, const ENCODING *enc) { return XML_ROLE_NONE; } static int common(PROLOG_STATE *state, int tok) { #ifdef XML_DTD if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF) return XML_ROLE_INNER_PARAM_ENTITY_REF; #endif state->handler = error; return XML_ROLE_ERROR; } void XmlPrologStateInit(PROLOG_STATE *state) { state->handler = prolog0; #ifdef XML_DTD state->documentEntity = 1; state->includeLevel = 0; #endif /* XML_DTD */ } #ifdef XML_DTD void XmlPrologStateInitExternalEntity(PROLOG_STATE *state) { state->handler = externalSubset0; state->documentEntity = 0; state->includeLevel = 0; } #endif /* XML_DTD */ expat-1.2/xmltok/xmlrole.h0100664000076400007640000000466707106707067014006 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #ifndef XmlRole_INCLUDED #define XmlRole_INCLUDED 1 #include "xmltok.h" #ifdef __cplusplus extern "C" { #endif enum { XML_ROLE_ERROR = -1, XML_ROLE_NONE = 0, XML_ROLE_XML_DECL, XML_ROLE_INSTANCE_START, XML_ROLE_DOCTYPE_NAME, XML_ROLE_DOCTYPE_SYSTEM_ID, XML_ROLE_DOCTYPE_PUBLIC_ID, XML_ROLE_DOCTYPE_CLOSE, XML_ROLE_GENERAL_ENTITY_NAME, XML_ROLE_PARAM_ENTITY_NAME, XML_ROLE_ENTITY_VALUE, XML_ROLE_ENTITY_SYSTEM_ID, XML_ROLE_ENTITY_PUBLIC_ID, XML_ROLE_ENTITY_NOTATION_NAME, XML_ROLE_NOTATION_NAME, XML_ROLE_NOTATION_SYSTEM_ID, XML_ROLE_NOTATION_NO_SYSTEM_ID, XML_ROLE_NOTATION_PUBLIC_ID, XML_ROLE_ATTRIBUTE_NAME, XML_ROLE_ATTRIBUTE_TYPE_CDATA, XML_ROLE_ATTRIBUTE_TYPE_ID, XML_ROLE_ATTRIBUTE_TYPE_IDREF, XML_ROLE_ATTRIBUTE_TYPE_IDREFS, XML_ROLE_ATTRIBUTE_TYPE_ENTITY, XML_ROLE_ATTRIBUTE_TYPE_ENTITIES, XML_ROLE_ATTRIBUTE_TYPE_NMTOKEN, XML_ROLE_ATTRIBUTE_TYPE_NMTOKENS, XML_ROLE_ATTRIBUTE_ENUM_VALUE, XML_ROLE_ATTRIBUTE_NOTATION_VALUE, XML_ROLE_ATTLIST_ELEMENT_NAME, XML_ROLE_IMPLIED_ATTRIBUTE_VALUE, XML_ROLE_REQUIRED_ATTRIBUTE_VALUE, XML_ROLE_DEFAULT_ATTRIBUTE_VALUE, XML_ROLE_FIXED_ATTRIBUTE_VALUE, XML_ROLE_ELEMENT_NAME, XML_ROLE_CONTENT_ANY, XML_ROLE_CONTENT_EMPTY, XML_ROLE_CONTENT_PCDATA, XML_ROLE_GROUP_OPEN, XML_ROLE_GROUP_CLOSE, XML_ROLE_GROUP_CLOSE_REP, XML_ROLE_GROUP_CLOSE_OPT, XML_ROLE_GROUP_CLOSE_PLUS, XML_ROLE_GROUP_CHOICE, XML_ROLE_GROUP_SEQUENCE, XML_ROLE_CONTENT_ELEMENT, XML_ROLE_CONTENT_ELEMENT_REP, XML_ROLE_CONTENT_ELEMENT_OPT, XML_ROLE_CONTENT_ELEMENT_PLUS, #ifdef XML_DTD XML_ROLE_TEXT_DECL, XML_ROLE_IGNORE_SECT, XML_ROLE_INNER_PARAM_ENTITY_REF, #endif /* XML_DTD */ XML_ROLE_PARAM_ENTITY_REF, XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION }; typedef struct prolog_state { int (*handler)(struct prolog_state *state, int tok, const char *ptr, const char *end, const ENCODING *enc); unsigned level; #ifdef XML_DTD unsigned includeLevel; int documentEntity; #endif /* XML_DTD */ } PROLOG_STATE; void XMLTOKAPI XmlPrologStateInit(PROLOG_STATE *); #ifdef XML_DTD void XMLTOKAPI XmlPrologStateInitExternalEntity(PROLOG_STATE *); #endif /* XML_DTD */ #define XmlTokenRole(state, tok, ptr, end, enc) \ (((state)->handler)(state, tok, ptr, end, enc)) #ifdef __cplusplus } #endif #endif /* not XmlRole_INCLUDED */ expat-1.2/xmltok/xmltok.c0100664000076400007640000011162007105440770013613 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include "xmldef.h" #include "xmltok.h" #include "nametab.h" #ifdef XML_DTD #define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) #else #define IGNORE_SECTION_TOK_VTABLE /* as nothing */ #endif #define VTABLE1 \ { PREFIX(prologTok), PREFIX(contentTok), \ PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE }, \ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \ PREFIX(sameName), \ PREFIX(nameMatchesAscii), \ PREFIX(nameLength), \ PREFIX(skipS), \ PREFIX(getAtts), \ PREFIX(charRefNumber), \ PREFIX(predefinedEntityName), \ PREFIX(updatePosition), \ PREFIX(isPublicId) #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define UCS2_GET_NAMING(pages, hi, lo) \ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ #define UTF8_GET_NAMING2(pages, byte) \ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ & (1 << (((byte)[1]) & 0x1F))) /* A 3 byte UTF-8 representation splits the characters 16 bits between the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index into pages, 3 bits to add to that index and 5 bits to generate the mask. */ #define UTF8_GET_NAMING3(pages, byte) \ (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \ + ((((byte)[1]) >> 2) & 0xF)] \ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ & (1 << (((byte)[2]) & 0x1F))) #define UTF8_GET_NAMING(pages, p, n) \ ((n) == 2 \ ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \ : ((n) == 3 \ ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \ : 0)) #define UTF8_INVALID3(p) \ ((*p) == 0xED \ ? (((p)[1] & 0x20) != 0) \ : ((*p) == 0xEF \ ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \ : 0)) #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0) static int isNever(const ENCODING *enc, const char *p) { return 0; } static int utf8_isName2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } static int utf8_isName3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } #define utf8_isName4 isNever static int utf8_isNmstrt2(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } static int utf8_isNmstrt3(const ENCODING *enc, const char *p) { return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } #define utf8_isNmstrt4 isNever #define utf8_isInvalid2 isNever static int utf8_isInvalid3(const ENCODING *enc, const char *p) { return UTF8_INVALID3((const unsigned char *)p); } static int utf8_isInvalid4(const ENCODING *enc, const char *p) { return UTF8_INVALID4((const unsigned char *)p); } struct normal_encoding { ENCODING enc; unsigned char type[256]; #ifdef XML_MIN_SIZE int (*byteType)(const ENCODING *, const char *); int (*isNameMin)(const ENCODING *, const char *); int (*isNmstrtMin)(const ENCODING *, const char *); int (*byteToAscii)(const ENCODING *, const char *); int (*charMatches)(const ENCODING *, const char *, int); #endif /* XML_MIN_SIZE */ int (*isName2)(const ENCODING *, const char *); int (*isName3)(const ENCODING *, const char *); int (*isName4)(const ENCODING *, const char *); int (*isNmstrt2)(const ENCODING *, const char *); int (*isNmstrt3)(const ENCODING *, const char *); int (*isNmstrt4)(const ENCODING *, const char *); int (*isInvalid2)(const ENCODING *, const char *); int (*isInvalid3)(const ENCODING *, const char *); int (*isInvalid4)(const ENCODING *, const char *); }; #ifdef XML_MIN_SIZE #define STANDARD_VTABLE(E) \ E ## byteType, \ E ## isNameMin, \ E ## isNmstrtMin, \ E ## byteToAscii, \ E ## charMatches, #else #define STANDARD_VTABLE(E) /* as nothing */ #endif #define NORMAL_VTABLE(E) \ E ## isName2, \ E ## isName3, \ E ## isName4, \ E ## isNmstrt2, \ E ## isNmstrt3, \ E ## isNmstrt4, \ E ## isInvalid2, \ E ## isInvalid3, \ E ## isInvalid4 static int checkCharRefNumber(int); #include "xmltok_impl.h" #include "ascii.h" #ifdef XML_MIN_SIZE #define sb_isNameMin isNever #define sb_isNmstrtMin isNever #endif #ifdef XML_MIN_SIZE #define MINBPC(enc) ((enc)->minBytesPerChar) #else /* minimum bytes per character */ #define MINBPC(enc) 1 #endif #define SB_BYTE_TYPE(enc, p) \ (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) #ifdef XML_MIN_SIZE static int sb_byteType(const ENCODING *enc, const char *p) { return SB_BYTE_TYPE(enc, p); } #define BYTE_TYPE(enc, p) \ (((const struct normal_encoding *)(enc))->byteType(enc, p)) #else #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) #endif #ifdef XML_MIN_SIZE #define BYTE_TO_ASCII(enc, p) \ (((const struct normal_encoding *)(enc))->byteToAscii(enc, p)) static int sb_byteToAscii(const ENCODING *enc, const char *p) { return *p; } #else #define BYTE_TO_ASCII(enc, p) (*(p)) #endif #define IS_NAME_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isName ## n(enc, p)) #define IS_NMSTRT_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p)) #define IS_INVALID_CHAR(enc, p, n) \ (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p)) #ifdef XML_MIN_SIZE #define IS_NAME_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNameMin(enc, p)) #define IS_NMSTRT_CHAR_MINBPC(enc, p) \ (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p)) #else #define IS_NAME_CHAR_MINBPC(enc, p) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) #endif #ifdef XML_MIN_SIZE #define CHAR_MATCHES(enc, p, c) \ (((const struct normal_encoding *)(enc))->charMatches(enc, p, c)) static int sb_charMatches(const ENCODING *enc, const char *p, int c) { return *p == c; } #else /* c is an ASCII character */ #define CHAR_MATCHES(enc, p, c) (*(p) == c) #endif #define PREFIX(ident) normal_ ## ident #include "xmltok_impl.c" #undef MINBPC #undef BYTE_TYPE #undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NAME_CHAR_MINBPC #undef IS_NMSTRT_CHAR #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval1 = 0x00, UTF8_cval2 = 0xc0, UTF8_cval3 = 0xe0, UTF8_cval4 = 0xf0 }; static void utf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { char *to; const char *from; if (fromLim - *fromP > toLim - *toP) { /* Avoid copying partial characters. */ for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) break; } for (to = *toP, from = *fromP; from != fromLim; from++, to++) *to = *from; *fromP = from; *toP = to; } static void utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { unsigned short *to = *toP; const char *from = *fromP; while (from != fromLim && to != toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f); from += 2; break; case BT_LEAD3: *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f); from += 3; break; case BT_LEAD4: { unsigned long n; if (to + 1 == toLim) break; n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; to[0] = (unsigned short)((n >> 10) | 0xD800); to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); to += 2; from += 4; } break; default: *to++ = *from++; break; } } *fromP = from; *toP = to; } #ifdef XML_NS static const struct normal_encoding utf8_encoding_ns = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #include "asciitab.h" #include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; #endif static const struct normal_encoding utf8_encoding = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; #ifdef XML_NS static const struct normal_encoding internal_utf8_encoding_ns = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #include "iasciitab.h" #include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; #endif static const struct normal_encoding internal_utf8_encoding = { { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 }, { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "utf8tab.h" }, STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; static void latin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { for (;;) { unsigned char c; if (*fromP == fromLim) break; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) break; *(*toP)++ = ((c >> 6) | UTF8_cval2); *(*toP)++ = ((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) break; *(*toP)++ = *(*fromP)++; } } } static void latin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = (unsigned char)*(*fromP)++; } #ifdef XML_NS static const struct normal_encoding latin1_encoding_ns = { { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, { #include "asciitab.h" #include "latin1tab.h" }, STANDARD_VTABLE(sb_) }; #endif static const struct normal_encoding latin1_encoding = { { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 }, { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" }, STANDARD_VTABLE(sb_) }; static void ascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { while (*fromP != fromLim && *toP != toLim) *(*toP)++ = *(*fromP)++; } #ifdef XML_NS static const struct normal_encoding ascii_encoding_ns = { { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, { #include "asciitab.h" /* BT_NONXML == 0 */ }, STANDARD_VTABLE(sb_) }; #endif static const struct normal_encoding ascii_encoding = { { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 }, { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON /* BT_NONXML == 0 */ }, STANDARD_VTABLE(sb_) }; static int unicode_byte_type(char hi, char lo) { switch ((unsigned char)hi) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: return BT_LEAD4; case 0xDC: case 0xDD: case 0xDE: case 0xDF: return BT_TRAIL; case 0xFF: switch ((unsigned char)lo) { case 0xFF: case 0xFE: return BT_NONXML; } break; } return BT_NONASCII; } #define DEFINE_UTF16_TO_UTF8(E) \ static \ void E ## toUtf8(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ { \ const char *from; \ for (from = *fromP; from != fromLim; from += 2) { \ int plane; \ unsigned char lo2; \ unsigned char lo = GET_LO(from); \ unsigned char hi = GET_HI(from); \ switch (hi) { \ case 0: \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ return; \ } \ *(*toP)++ = lo; \ break; \ } \ /* fall through */ \ case 0x1: case 0x2: case 0x3: \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ return; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ return; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ break; \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ *fromP = from; \ return; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ from += 2; \ lo2 = GET_LO(from); \ *(*toP)++ = (((lo & 0x3) << 4) \ | ((GET_HI(from) & 0x3) << 2) \ | (lo2 >> 6) \ | 0x80); \ *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ break; \ } \ } \ *fromP = from; \ } #define DEFINE_UTF16_TO_UTF16(E) \ static \ void E ## toUtf16(const ENCODING *enc, \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ fromLim -= 2; \ for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ } #define SET2(ptr, ch) \ (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8))) #define GET_LO(ptr) ((unsigned char)(ptr)[0]) #define GET_HI(ptr) ((unsigned char)(ptr)[1]) DEFINE_UTF16_TO_UTF8(little2_) DEFINE_UTF16_TO_UTF16(little2_) #undef SET2 #undef GET_LO #undef GET_HI #define SET2(ptr, ch) \ (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF))) #define GET_LO(ptr) ((unsigned char)(ptr)[1]) #define GET_HI(ptr) ((unsigned char)(ptr)[0]) DEFINE_UTF16_TO_UTF8(big2_) DEFINE_UTF16_TO_UTF16(big2_) #undef SET2 #undef GET_LO #undef GET_HI #define LITTLE2_BYTE_TYPE(enc, p) \ ((p)[1] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ : unicode_byte_type((p)[1], (p)[0])) #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1) #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c) #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) #ifdef XML_MIN_SIZE static int little2_byteType(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TYPE(enc, p); } static int little2_byteToAscii(const ENCODING *enc, const char *p) { return LITTLE2_BYTE_TO_ASCII(enc, p); } static int little2_charMatches(const ENCODING *enc, const char *p, int c) { return LITTLE2_CHAR_MATCHES(enc, p, c); } static int little2_isNameMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p); } static int little2_isNmstrtMin(const ENCODING *enc, const char *p) { return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p); } #undef VTABLE #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 #else /* not XML_MIN_SIZE */ #undef PREFIX #define PREFIX(ident) little2_ ## ident #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) #include "xmltok_impl.c" #undef MINBPC #undef BYTE_TYPE #undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NAME_CHAR_MINBPC #undef IS_NMSTRT_CHAR #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS static const struct normal_encoding little2_encoding_ns = { { VTABLE, 2, 0, #if XML_BYTE_ORDER == 12 1 #else 0 #endif }, { #include "asciitab.h" #include "latin1tab.h" }, STANDARD_VTABLE(little2_) }; #endif static const struct normal_encoding little2_encoding = { { VTABLE, 2, 0, #if XML_BYTE_ORDER == 12 1 #else 0 #endif }, { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" }, STANDARD_VTABLE(little2_) }; #if XML_BYTE_ORDER != 21 #ifdef XML_NS static const struct normal_encoding internal_little2_encoding_ns = { { VTABLE, 2, 0, 1 }, { #include "iasciitab.h" #include "latin1tab.h" }, STANDARD_VTABLE(little2_) }; #endif static const struct normal_encoding internal_little2_encoding = { { VTABLE, 2, 0, 1 }, { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "latin1tab.h" }, STANDARD_VTABLE(little2_) }; #endif #define BIG2_BYTE_TYPE(enc, p) \ ((p)[0] == 0 \ ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ : unicode_byte_type((p)[0], (p)[1])) #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1) #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c) #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \ UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) #ifdef XML_MIN_SIZE static int big2_byteType(const ENCODING *enc, const char *p) { return BIG2_BYTE_TYPE(enc, p); } static int big2_byteToAscii(const ENCODING *enc, const char *p) { return BIG2_BYTE_TO_ASCII(enc, p); } static int big2_charMatches(const ENCODING *enc, const char *p, int c) { return BIG2_CHAR_MATCHES(enc, p, c); } static int big2_isNameMin(const ENCODING *enc, const char *p) { return BIG2_IS_NAME_CHAR_MINBPC(enc, p); } static int big2_isNmstrtMin(const ENCODING *enc, const char *p) { return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p); } #undef VTABLE #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 #else /* not XML_MIN_SIZE */ #undef PREFIX #define PREFIX(ident) big2_ ## ident #define MINBPC(enc) 2 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p) #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c) #define IS_NAME_CHAR(enc, p, n) 0 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p) #define IS_NMSTRT_CHAR(enc, p, n) (0) #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) #include "xmltok_impl.c" #undef MINBPC #undef BYTE_TYPE #undef BYTE_TO_ASCII #undef CHAR_MATCHES #undef IS_NAME_CHAR #undef IS_NAME_CHAR_MINBPC #undef IS_NMSTRT_CHAR #undef IS_NMSTRT_CHAR_MINBPC #undef IS_INVALID_CHAR #endif /* not XML_MIN_SIZE */ #ifdef XML_NS static const struct normal_encoding big2_encoding_ns = { { VTABLE, 2, 0, #if XML_BYTE_ORDER == 21 1 #else 0 #endif }, { #include "asciitab.h" #include "latin1tab.h" }, STANDARD_VTABLE(big2_) }; #endif static const struct normal_encoding big2_encoding = { { VTABLE, 2, 0, #if XML_BYTE_ORDER == 21 1 #else 0 #endif }, { #define BT_COLON BT_NMSTRT #include "asciitab.h" #undef BT_COLON #include "latin1tab.h" }, STANDARD_VTABLE(big2_) }; #if XML_BYTE_ORDER != 12 #ifdef XML_NS static const struct normal_encoding internal_big2_encoding_ns = { { VTABLE, 2, 0, 1 }, { #include "iasciitab.h" #include "latin1tab.h" }, STANDARD_VTABLE(big2_) }; #endif static const struct normal_encoding internal_big2_encoding = { { VTABLE, 2, 0, 1 }, { #define BT_COLON BT_NMSTRT #include "iasciitab.h" #undef BT_COLON #include "latin1tab.h" }, STANDARD_VTABLE(big2_) }; #endif #undef PREFIX static int streqci(const char *s1, const char *s2) { for (;;) { char c1 = *s1++; char c2 = *s2++; if (ASCII_a <= c1 && c1 <= ASCII_z) c1 += ASCII_A - ASCII_a; if (ASCII_a <= c2 && c2 <= ASCII_z) c2 += ASCII_A - ASCII_a; if (c1 != c2) return 0; if (!c1) break; } return 1; } static void initUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) { normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); } static int toAscii(const ENCODING *enc, const char *ptr, const char *end) { char buf[1]; char *p = buf; XmlUtf8Convert(enc, &ptr, end, &p, p + 1); if (p == buf) return -1; else return buf[0]; } static int isSpace(int c) { switch (c) { case 0x20: case 0xD: case 0xA: case 0x9: return 1; } return 0; } /* Return 1 if there's just optional white space or there's an S followed by name=val. */ static int parsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, const char **namePtr, const char **nameEndPtr, const char **valPtr, const char **nextTokPtr) { int c; char open; if (ptr == end) { *namePtr = 0; return 1; } if (!isSpace(toAscii(enc, ptr, end))) { *nextTokPtr = ptr; return 0; } do { ptr += enc->minBytesPerChar; } while (isSpace(toAscii(enc, ptr, end))); if (ptr == end) { *namePtr = 0; return 1; } *namePtr = ptr; for (;;) { c = toAscii(enc, ptr, end); if (c == -1) { *nextTokPtr = ptr; return 0; } if (c == ASCII_EQUALS) { *nameEndPtr = ptr; break; } if (isSpace(c)) { *nameEndPtr = ptr; do { ptr += enc->minBytesPerChar; } while (isSpace(c = toAscii(enc, ptr, end))); if (c != ASCII_EQUALS) { *nextTokPtr = ptr; return 0; } break; } ptr += enc->minBytesPerChar; } if (ptr == *namePtr) { *nextTokPtr = ptr; return 0; } ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); while (isSpace(c)) { ptr += enc->minBytesPerChar; c = toAscii(enc, ptr, end); } if (c != ASCII_QUOT && c != ASCII_APOS) { *nextTokPtr = ptr; return 0; } open = c; ptr += enc->minBytesPerChar; *valPtr = ptr; for (;; ptr += enc->minBytesPerChar) { c = toAscii(enc, ptr, end); if (c == open) break; if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z) && !(ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { *nextTokPtr = ptr; return 0; } } *nextTokPtr = ptr + enc->minBytesPerChar; return 1; } static const char KW_version[] = { ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0' }; static const char KW_encoding[] = { ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, ASCII_i, ASCII_n, ASCII_g, '\0' }; static const char KW_standalone[] = { ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0' }; static const char KW_yes[] = { ASCII_y, ASCII_e, ASCII_s, '\0' }; static const char KW_no[] = { ASCII_n, ASCII_o, '\0' }; static int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, const char *), int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone) { const char *val = 0; const char *name = 0; const char *nameEnd = 0; ptr += 5 * enc->minBytesPerChar; end -= 2 * enc->minBytesPerChar; if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) || !name) { *badPtr = ptr; return 0; } if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { if (!isGeneralTextEntity) { *badPtr = name; return 0; } } else { if (versionPtr) *versionPtr = val; if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) { if (isGeneralTextEntity) { /* a TextDecl must have an EncodingDecl */ *badPtr = ptr; return 0; } return 1; } } if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { int c = toAscii(enc, val, end); if (!(ASCII_a <= c && c <= ASCII_z) && !(ASCII_A <= c && c <= ASCII_Z)) { *badPtr = val; return 0; } if (encodingName) *encodingName = val; if (encoding) *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); if (!parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { *badPtr = ptr; return 0; } if (!name) return 1; } if (!XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) || isGeneralTextEntity) { *badPtr = name; return 0; } if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { if (standalone) *standalone = 1; } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { if (standalone) *standalone = 0; } else { *badPtr = val; return 0; } while (isSpace(toAscii(enc, ptr, end))) ptr += enc->minBytesPerChar; if (ptr != end) { *badPtr = ptr; return 0; } return 1; } static int checkCharRefNumber(int result) { switch (result >> 8) { case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF: return -1; case 0: if (latin1_encoding.type[result] == BT_NONXML) return -1; break; case 0xFF: if (result == 0xFFFE || result == 0xFFFF) return -1; break; } return result; } int XmlUtf8Encode(int c, char *buf) { enum { /* minN is minimum legal resulting value for N byte sequence */ min2 = 0x80, min3 = 0x800, min4 = 0x10000 }; if (c < 0) return 0; if (c < min2) { buf[0] = (c | UTF8_cval1); return 1; } if (c < min3) { buf[0] = ((c >> 6) | UTF8_cval2); buf[1] = ((c & 0x3f) | 0x80); return 2; } if (c < min4) { buf[0] = ((c >> 12) | UTF8_cval3); buf[1] = (((c >> 6) & 0x3f) | 0x80); buf[2] = ((c & 0x3f) | 0x80); return 3; } if (c < 0x110000) { buf[0] = ((c >> 18) | UTF8_cval4); buf[1] = (((c >> 12) & 0x3f) | 0x80); buf[2] = (((c >> 6) & 0x3f) | 0x80); buf[3] = ((c & 0x3f) | 0x80); return 4; } return 0; } int XmlUtf16Encode(int charNum, unsigned short *buf) { if (charNum < 0) return 0; if (charNum < 0x10000) { buf[0] = charNum; return 1; } if (charNum < 0x110000) { charNum -= 0x10000; buf[0] = (charNum >> 10) + 0xD800; buf[1] = (charNum & 0x3FF) + 0xDC00; return 2; } return 0; } struct unknown_encoding { struct normal_encoding normal; int (*convert)(void *userData, const char *p); void *userData; unsigned short utf16[256]; char utf8[256][4]; }; int XmlSizeOfUnknownEncoding(void) { return sizeof(struct unknown_encoding); } static int unknown_isName(const ENCODING *enc, const char *p) { int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); } static int unknown_isNmstrt(const ENCODING *enc, const char *p) { int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); if (c & ~0xFFFF) return 0; return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); } static int unknown_isInvalid(const ENCODING *enc, const char *p) { int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, p); return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } static void unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) { char buf[XML_UTF8_ENCODE_MAX]; for (;;) { const char *utf8; int n; if (*fromP == fromLim) break; utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = ((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) break; utf8 = buf; *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2); } else { if (n > toLim - *toP) break; (*fromP)++; } do { *(*toP)++ = *utf8++; } while (--n != 0); } } static void unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { while (*fromP != fromLim && *toP != toLim) { unsigned short c = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP]; if (c == 0) { c = (unsigned short)((const struct unknown_encoding *)enc) ->convert(((const struct unknown_encoding *)enc)->userData, *fromP); *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2); } else (*fromP)++; *(*toP)++ = c; } } ENCODING * XmlInitUnknownEncoding(void *mem, int *table, int (*convert)(void *userData, const char *p), void *userData) { int i; struct unknown_encoding *e = mem; for (i = 0; i < (int)sizeof(struct normal_encoding); i++) ((char *)mem)[i] = ((char *)&latin1_encoding)[i]; for (i = 0; i < 128; i++) if (latin1_encoding.type[i] != BT_OTHER && latin1_encoding.type[i] != BT_NONXML && table[i] != i) return 0; for (i = 0; i < 256; i++) { int c = table[i]; if (c == -1) { e->normal.type[i] = BT_MALFORM; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; } else if (c < 0) { if (c < -4) return 0; e->normal.type[i] = BT_LEAD2 - (c + 2); e->utf8[i][0] = 0; e->utf16[i] = 0; } else if (c < 0x80) { if (latin1_encoding.type[c] != BT_OTHER && latin1_encoding.type[c] != BT_NONXML && c != i) return 0; e->normal.type[i] = latin1_encoding.type[c]; e->utf8[i][0] = 1; e->utf8[i][1] = (char)c; e->utf16[i] = c == 0 ? 0xFFFF : c; } else if (checkCharRefNumber(c) < 0) { e->normal.type[i] = BT_NONXML; /* This shouldn't really get used. */ e->utf16[i] = 0xFFFF; e->utf8[i][0] = 1; e->utf8[i][1] = 0; } else { if (c > 0xFFFF) return 0; if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) e->normal.type[i] = BT_NMSTRT; else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) e->normal.type[i] = BT_NAME; else e->normal.type[i] = BT_OTHER; e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); e->utf16[i] = c; } } e->userData = userData; e->convert = convert; if (convert) { e->normal.isName2 = unknown_isName; e->normal.isName3 = unknown_isName; e->normal.isName4 = unknown_isName; e->normal.isNmstrt2 = unknown_isNmstrt; e->normal.isNmstrt3 = unknown_isNmstrt; e->normal.isNmstrt4 = unknown_isNmstrt; e->normal.isInvalid2 = unknown_isInvalid; e->normal.isInvalid3 = unknown_isInvalid; e->normal.isInvalid4 = unknown_isInvalid; } e->normal.enc.utf8Convert = unknown_toUtf8; e->normal.enc.utf16Convert = unknown_toUtf16; return &(e->normal.enc); } /* If this enumeration is changed, getEncodingIndex and encodings must also be changed. */ enum { UNKNOWN_ENC = -1, ISO_8859_1_ENC = 0, US_ASCII_ENC, UTF_8_ENC, UTF_16_ENC, UTF_16BE_ENC, UTF_16LE_ENC, /* must match encodingNames up to here */ NO_ENC }; static const char KW_ISO_8859_1[] = { ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0' }; static const char KW_US_ASCII[] = { ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, ASCII_C, ASCII_I, ASCII_I, '\0' }; static const char KW_UTF_8[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0' }; static const char KW_UTF_16[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0' }; static const char KW_UTF_16BE[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_B, ASCII_E, '\0' }; static const char KW_UTF_16LE[] = { ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, ASCII_L, ASCII_E, '\0' }; static int getEncodingIndex(const char *name) { static const char *encodingNames[] = { KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, }; int i; if (name == 0) return NO_ENC; for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++) if (streqci(name, encodingNames[i])) return i; return UNKNOWN_ENC; } /* For binary compatibility, we store the index of the encoding specified at initialization in the isUtf16 member. */ #define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) #define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) /* This is what detects the encoding. encodingTable maps from encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding; state is XML_CONTENT_STATE if we're parsing an external text entity, and XML_PROLOG_STATE otherwise. */ static int initScan(const ENCODING **encodingTable, const INIT_ENCODING *enc, int state, const char *ptr, const char *end, const char **nextTokPtr) { const ENCODING **encPtr; if (ptr == end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { /* only a single byte available for auto-detection */ #ifndef XML_DTD /* FIXME */ /* a well-formed document entity must have more than one byte */ if (state != XML_CONTENT_STATE) return XML_TOK_PARTIAL; #endif /* so we're parsing an external text entity... */ /* if UTF-16 was externally specified, then we need at least 2 bytes */ switch (INIT_ENC_INDEX(enc)) { case UTF_16_ENC: case UTF_16LE_ENC: case UTF_16BE_ENC: return XML_TOK_PARTIAL; } switch ((unsigned char)*ptr) { case 0xFE: case 0xFF: case 0xEF: /* possibly first byte of UTF-8 BOM */ if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; /* fall through */ case 0x00: case 0x3C: return XML_TOK_PARTIAL; } } else { switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { case 0xFEFF: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16BE_ENC]; return XML_TOK_BOM; /* 00 3C is handled in the default case */ case 0x3C00: if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC || INIT_ENC_INDEX(enc) == UTF_16_ENC) && state == XML_CONTENT_STATE) break; *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); case 0xFFFE: if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) break; *nextTokPtr = ptr + 2; *encPtr = encodingTable[UTF_16LE_ENC]; return XML_TOK_BOM; case 0xEFBB: /* Maybe a UTF-8 BOM (EF BB BF) */ /* If there's an explicitly specified (external) encoding of ISO-8859-1 or some flavour of UTF-16 and this is an external text entity, don't look for the BOM, because it might be a legal data. */ if (state == XML_CONTENT_STATE) { int e = INIT_ENC_INDEX(enc); if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC) break; } if (ptr + 2 == end) return XML_TOK_PARTIAL; if ((unsigned char)ptr[2] == 0xBF) { *encPtr = encodingTable[UTF_8_ENC]; return XML_TOK_BOM; } break; default: if (ptr[0] == '\0') { /* 0 isn't a legal data character. Furthermore a document entity can only start with ASCII characters. So the only way this can fail to be big-endian UTF-16 if it it's an external parsed general entity that's labelled as UTF-16LE. */ if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) break; *encPtr = encodingTable[UTF_16BE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } else if (ptr[1] == '\0') { /* We could recover here in the case: - parsing an external entity - second byte is 0 - no externally specified encoding - no encoding declaration by assuming UTF-16LE. But we don't, because this would mean when presented just with a single byte, we couldn't reliably determine whether we needed further bytes. */ if (state == XML_CONTENT_STATE) break; *encPtr = encodingTable[UTF_16LE_ENC]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } break; } } *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; return XmlTok(*encPtr, state, ptr, end, nextTokPtr); } #define NS(x) x #define ns(x) x #include "xmltok_ns.c" #undef NS #undef ns #ifdef XML_NS #define NS(x) x ## NS #define ns(x) x ## _ns #include "xmltok_ns.c" #undef NS #undef ns ENCODING * XmlInitUnknownEncodingNS(void *mem, int *table, int (*convert)(void *userData, const char *p), void *userData) { ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); if (enc) ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; return enc; } #endif /* XML_NS */ expat-1.2/xmltok/xmltok.h0100664000076400007640000002347207103723130013617 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #ifndef XmlTok_INCLUDED #define XmlTok_INCLUDED 1 #ifdef __cplusplus extern "C" { #endif #ifndef XMLTOKAPI #define XMLTOKAPI /* as nothing */ #endif /* The following token may be returned by XmlContentTok */ #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be start of illegal ]]> sequence */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ #define XML_TOK_NONE -4 /* The string to be scanned is empty */ #define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan; might be part of CRLF sequence */ #define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */ #define XML_TOK_PARTIAL -1 /* only part of a token */ #define XML_TOK_INVALID 0 /* The following tokens are returned by XmlContentTok; some are also returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok */ #define XML_TOK_START_TAG_WITH_ATTS 1 #define XML_TOK_START_TAG_NO_ATTS 2 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag */ #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 #define XML_TOK_END_TAG 5 #define XML_TOK_DATA_CHARS 6 #define XML_TOK_DATA_NEWLINE 7 #define XML_TOK_CDATA_SECT_OPEN 8 #define XML_TOK_ENTITY_REF 9 #define XML_TOK_CHAR_REF 10 /* numeric character reference */ /* The following tokens may be returned by both XmlPrologTok and XmlContentTok */ #define XML_TOK_PI 11 /* processing instruction */ #define XML_TOK_XML_DECL 12 /* XML decl or text decl */ #define XML_TOK_COMMENT 13 #define XML_TOK_BOM 14 /* Byte order mark */ /* The following tokens are returned only by XmlPrologTok */ #define XML_TOK_PROLOG_S 15 #define XML_TOK_DECL_OPEN 16 /* */ #define XML_TOK_NAME 18 #define XML_TOK_NMTOKEN 19 #define XML_TOK_POUND_NAME 20 /* #name */ #define XML_TOK_OR 21 /* | */ #define XML_TOK_PERCENT 22 #define XML_TOK_OPEN_PAREN 23 #define XML_TOK_CLOSE_PAREN 24 #define XML_TOK_OPEN_BRACKET 25 #define XML_TOK_CLOSE_BRACKET 26 #define XML_TOK_LITERAL 27 #define XML_TOK_PARAM_ENTITY_REF 28 #define XML_TOK_INSTANCE_START 29 /* The following occur only in element type declarations */ #define XML_TOK_NAME_QUESTION 30 /* name? */ #define XML_TOK_NAME_ASTERISK 31 /* name* */ #define XML_TOK_NAME_PLUS 32 /* name+ */ #define XML_TOK_COND_SECT_OPEN 33 /* */ #define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */ #define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */ #define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */ #define XML_TOK_COMMA 38 /* The following token is returned only by XmlAttributeValueTok */ #define XML_TOK_ATTRIBUTE_VALUE_S 39 /* The following token is returned only by XmlCdataSectionTok */ #define XML_TOK_CDATA_SECT_CLOSE 40 /* With namespace processing this is returned by XmlPrologTok for a name with a colon. */ #define XML_TOK_PREFIXED_NAME 41 #ifdef XML_DTD #define XML_TOK_IGNORE_SECT 42 #endif /* XML_DTD */ #ifdef XML_DTD #define XML_N_STATES 4 #else /* not XML_DTD */ #define XML_N_STATES 3 #endif /* not XML_DTD */ #define XML_PROLOG_STATE 0 #define XML_CONTENT_STATE 1 #define XML_CDATA_SECTION_STATE 2 #ifdef XML_DTD #define XML_IGNORE_SECTION_STATE 3 #endif /* XML_DTD */ #define XML_N_LITERAL_TYPES 2 #define XML_ATTRIBUTE_VALUE_LITERAL 0 #define XML_ENTITY_VALUE_LITERAL 1 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ #define XML_UTF8_ENCODE_MAX 4 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ #define XML_UTF16_ENCODE_MAX 2 typedef struct position { /* first line and first column are 0 not 1 */ unsigned long lineNumber; unsigned long columnNumber; } POSITION; typedef struct { const char *name; const char *valuePtr; const char *valueEnd; char normalized; } ATTRIBUTE; struct encoding; typedef struct encoding ENCODING; struct encoding { int (*scanners[XML_N_STATES])(const ENCODING *, const char *, const char *, const char **); int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *, const char *, const char *, const char **); int (*sameName)(const ENCODING *, const char *, const char *); int (*nameMatchesAscii)(const ENCODING *, const char *, const char *, const char *); int (*nameLength)(const ENCODING *, const char *); const char *(*skipS)(const ENCODING *, const char *); int (*getAtts)(const ENCODING *enc, const char *ptr, int attsMax, ATTRIBUTE *atts); int (*charRefNumber)(const ENCODING *enc, const char *ptr); int (*predefinedEntityName)(const ENCODING *, const char *, const char *); void (*updatePosition)(const ENCODING *, const char *ptr, const char *end, POSITION *); int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr); void (*utf8Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim); void (*utf16Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim); int minBytesPerChar; char isUtf8; char isUtf16; }; /* Scan the string starting at ptr until the end of the next complete token, but do not scan past eptr. Return an integer giving the type of token. Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. Return XML_TOK_PARTIAL when the string does not contain a complete token; nextTokPtr will not be set. Return XML_TOK_INVALID when the string does not start a valid token; nextTokPtr will be set to point to the character which made the token invalid. Otherwise the string starts with a valid token; nextTokPtr will be set to point to the character following the end of that token. Each data character counts as a single token, but adjacent data characters may be returned together. Similarly for characters in the prolog outside literals, comments and processing instructions. */ #define XmlTok(enc, state, ptr, end, nextTokPtr) \ (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) #define XmlContentTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) #ifdef XML_DTD #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) #endif /* XML_DTD */ /* This is used for performing a 2nd-level tokenization on the content of a literal that has already been returned by XmlTok. */ #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) #define XmlNameLength(enc, ptr) \ (((enc)->nameLength)(enc, ptr)) #define XmlSkipS(enc, ptr) \ (((enc)->skipS)(enc, ptr)) #define XmlGetAttributes(enc, ptr, attsMax, atts) \ (((enc)->getAtts)(enc, ptr, attsMax, atts)) #define XmlCharRefNumber(enc, ptr) \ (((enc)->charRefNumber)(enc, ptr)) #define XmlPredefinedEntityName(enc, ptr, end) \ (((enc)->predefinedEntityName)(enc, ptr, end)) #define XmlUpdatePosition(enc, ptr, end, pos) \ (((enc)->updatePosition)(enc, ptr, end, pos)) #define XmlIsPublicId(enc, ptr, end, badPtr) \ (((enc)->isPublicId)(enc, ptr, end, badPtr)) #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) typedef struct { ENCODING initEnc; const ENCODING **encPtr; } INIT_ENCODING; int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); int XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncoding(void); const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncoding(void); int XMLTOKAPI XmlUtf8Encode(int charNumber, char *buf); int XMLTOKAPI XmlUtf16Encode(int charNumber, unsigned short *buf); int XMLTOKAPI XmlSizeOfUnknownEncoding(void); ENCODING XMLTOKAPI * XmlInitUnknownEncoding(void *mem, int *table, int (*conv)(void *userData, const char *p), void *userData); int XMLTOKAPI XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingNamePtr, const ENCODING **namedEncodingPtr, int *standalonePtr); int XMLTOKAPI XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name); const ENCODING XMLTOKAPI *XmlGetUtf8InternalEncodingNS(void); const ENCODING XMLTOKAPI *XmlGetUtf16InternalEncodingNS(void); ENCODING XMLTOKAPI * XmlInitUnknownEncodingNS(void *mem, int *table, int (*conv)(void *userData, const char *p), void *userData); #ifdef __cplusplus } #endif #endif /* not XmlTok_INCLUDED */ expat-1.2/xmltok/xmltok_impl.c0100664000076400007640000011765307077753352014663 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #ifndef IS_INVALID_CHAR #define IS_INVALID_CHAR(enc, ptr, n) (0) #endif #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_INVALID_CHAR(enc, ptr, n)) { \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; \ } \ ptr += n; \ break; #define INVALID_CASES(ptr, nextTokPtr) \ INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ case BT_NONXML: \ case BT_MALFORM: \ case BT_TRAIL: \ *(nextTokPtr) = (ptr); \ return XML_TOK_INVALID; #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NAME_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break; #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ case BT_DIGIT: \ case BT_NAME: \ case BT_MINUS: \ ptr += MINBPC(enc); \ break; \ CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ ptr += n; \ break; #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ case BT_NONASCII: \ if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; \ } \ case BT_NMSTRT: \ case BT_HEX: \ ptr += MINBPC(enc); \ break; \ CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) #ifndef PREFIX #define PREFIX(ident) ident #endif /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_INVALID; } /* fall through */ case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DECL_OPEN; case BT_NMSTRT: case BT_HEX: ptr += MINBPC(enc); break; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; } static int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) { int upper = 0; *tokPtr = XML_TOK_PI; if (end - ptr != MINBPC(enc)*3) return 1; switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_x: break; case ASCII_X: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_m: break; case ASCII_M: upper = 1; break; default: return 1; } ptr += MINBPC(enc); switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: break; case ASCII_L: upper = 1; break; default: return 1; } if (upper) return 0; *tokPtr = XML_TOK_XML_DECL; return 1; } /* ptr points to character following " 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: case BT_RSQB: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; } /* ptr points to character following " 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_LT: return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_AMP: return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; case BT_LF: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; } *nextTokPtr = ptr; return XML_TOK_INVALID; INVALID_CASES(ptr, nextTokPtr) default: ptr += MINBPC(enc); break; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ *nextTokPtr = ptr; \ return XML_TOK_DATA_CHARS; \ } \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_RSQB: if (ptr + MINBPC(enc) != end) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } if (ptr + 2*MINBPC(enc) != end) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; } *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_INVALID; } } /* fall through */ case BT_AMP: case BT_LT: case BT_NONXML: case BT_MALFORM: case BT_TRAIL: case BT_CR: case BT_LF: *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; } /* ptr points to character following "%" */ static int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: *nextTokPtr = ptr; return XML_TOK_PERCENT; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_PARAM_ENTITY_REF; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return XML_TOK_PARTIAL; } static int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: *nextTokPtr = ptr; return XML_TOK_POUND_NAME; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return -XML_TOK_POUND_NAME; } static int PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { while (ptr != end) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) case BT_QUOT: case BT_APOS: ptr += MINBPC(enc); if (t != open) break; if (ptr == end) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: case BT_GT: case BT_PERCNT: case BT_LSQB: return XML_TOK_LITERAL; default: return XML_TOK_INVALID; } default: ptr += MINBPC(enc); break; } } return XML_TOK_PARTIAL; } static int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; if (ptr == end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); if (n == 0) return XML_TOK_PARTIAL; end = ptr + n; } } switch (BYTE_TYPE(enc, ptr)) { case BT_QUOT: return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_APOS: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_LT: { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_PARTIAL; switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_QUEST: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_NMSTRT: case BT_HEX: case BT_NONASCII: case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: *nextTokPtr = ptr - MINBPC(enc); return XML_TOK_INSTANCE_START; } *nextTokPtr = ptr; return XML_TOK_INVALID; } case BT_CR: if (ptr + MINBPC(enc) == end) return -XML_TOK_PROLOG_S; /* fall through */ case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); if (ptr == end) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: break; case BT_CR: /* don't split CR/LF pair */ if (ptr + MINBPC(enc) != end) break; /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_PROLOG_S; } } *nextTokPtr = ptr; return XML_TOK_PROLOG_S; case BT_PERCNT: return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_COMMA: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_COMMA; case BT_LSQB: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if (ptr + MINBPC(enc) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; } } *nextTokPtr = ptr; return XML_TOK_CLOSE_BRACKET; case BT_LPAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); if (ptr == end) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_ASTERISK; case BT_QUEST: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_QUESTION; case BT_PLUS: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_CLOSE_PAREN_PLUS; case BT_CR: case BT_LF: case BT_S: case BT_GT: case BT_COMMA: case BT_VERBAR: case BT_RPAR: *nextTokPtr = ptr; return XML_TOK_CLOSE_PAREN; } *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_VERBAR: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_OR; case BT_GT: *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DECL_CLOSE; case BT_NUM: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (end - ptr < n) \ return XML_TOK_PARTIAL_CHAR; \ if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NAME; \ break; \ } \ if (IS_NAME_CHAR(enc, ptr, n)) { \ ptr += n; \ tok = XML_TOK_NMTOKEN; \ break; \ } \ *nextTokPtr = ptr; \ return XML_TOK_INVALID; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NMSTRT: case BT_HEX: tok = XML_TOK_NAME; ptr += MINBPC(enc); break; case BT_DIGIT: case BT_NAME: case BT_MINUS: #ifdef XML_NS case BT_COLON: #endif tok = XML_TOK_NMTOKEN; ptr += MINBPC(enc); break; case BT_NONASCII: if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NAME; break; } if (IS_NAME_CHAR_MINBPC(enc, ptr)) { ptr += MINBPC(enc); tok = XML_TOK_NMTOKEN; break; } /* fall through */ default: *nextTokPtr = ptr; return XML_TOK_INVALID; } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: case BT_VERBAR: case BT_LSQB: case BT_PERCNT: case BT_S: case BT_CR: case BT_LF: *nextTokPtr = ptr; return tok; #ifdef XML_NS case BT_COLON: ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: if (ptr == end) return XML_TOK_PARTIAL; tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) default: tok = XML_TOK_NMTOKEN; break; } break; case XML_TOK_PREFIXED_NAME: tok = XML_TOK_NMTOKEN; break; } break; #endif case BT_PLUS: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_PLUS; case BT_AST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_ASTERISK; case BT_QUEST: if (tok == XML_TOK_NMTOKEN) { *nextTokPtr = ptr; return XML_TOK_INVALID; } *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_NAME_QUESTION; default: *nextTokPtr = ptr; return XML_TOK_INVALID; } } return -tok; } static int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LT: /* this is for inside entity references */ *nextTokPtr = ptr; return XML_TOK_INVALID; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_S: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_ATTRIBUTE_VALUE_S; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; } static int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; if (ptr == end) return XML_TOK_NONE; start = ptr; while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_AMP: if (ptr == start) return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_PERCNT: if (ptr == start) return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_LF: if (ptr == start) { *nextTokPtr = ptr + MINBPC(enc); return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; case BT_CR: if (ptr == start) { ptr += MINBPC(enc); if (ptr == end) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; return XML_TOK_DATA_NEWLINE; } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; default: ptr += MINBPC(enc); break; } } *nextTokPtr = ptr; return XML_TOK_DATA_CHARS; } #ifdef XML_DTD static int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int level = 0; if (MINBPC(enc) > 1) { size_t n = end - ptr; if (n & (MINBPC(enc) - 1)) { n &= ~(MINBPC(enc) - 1); end = ptr + n; } } while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); } } break; case BT_RSQB: if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { if ((ptr += MINBPC(enc)) == end) return XML_TOK_PARTIAL; if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { *nextTokPtr = ptr; return XML_TOK_IGNORE_SECT; } --level; } } break; default: ptr += MINBPC(enc); break; } } return XML_TOK_PARTIAL; } #endif /* XML_DTD */ static int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, const char **badPtr) { ptr += MINBPC(enc); end -= MINBPC(enc); for (; ptr != end; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: case BT_MINUS: case BT_APOS: case BT_LPAR: case BT_RPAR: case BT_PLUS: case BT_COMMA: case BT_SOL: case BT_EQUALS: case BT_QUEST: case BT_CR: case BT_LF: case BT_SEMI: case BT_EXCL: case BT_AST: case BT_PERCNT: case BT_NUM: #ifdef XML_NS case BT_COLON: #endif break; case BT_S: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { *badPtr = ptr; return 0; } break; case BT_NAME: case BT_NMSTRT: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) break; default: switch (BYTE_TO_ASCII(enc, ptr)) { case 0x24: /* $ */ case 0x40: /* @ */ break; default: *badPtr = ptr; return 0; } break; } } return 1; } /* This must only be called for a well-formed start-tag or empty element tag. Returns the number of attributes. Pointers to the first attsMax attributes are stored in atts. */ static int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax, ATTRIBUTE *atts) { enum { other, inName, inValue } state = inName; int nAtts = 0; int open = 0; /* defined when state == inValue; initialization just to shut up compilers */ for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { #define START_NAME \ if (state == other) { \ if (nAtts < attsMax) { \ atts[nAtts].name = ptr; \ atts[nAtts].normalized = 1; \ } \ state = inName; \ } #define LEAD_CASE(n) \ case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: case BT_HEX: START_NAME break; #undef START_NAME case BT_QUOT: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_QUOT; } else if (open == BT_QUOT) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_APOS: if (state != inValue) { if (nAtts < attsMax) atts[nAtts].valuePtr = ptr + MINBPC(enc); state = inValue; open = BT_APOS; } else if (open == BT_APOS) { state = other; if (nAtts < attsMax) atts[nAtts].valueEnd = ptr; nAtts++; } break; case BT_AMP: if (nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_S: if (state == inName) state = other; else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized && (ptr == atts[nAtts].valuePtr || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) atts[nAtts].normalized = 0; break; case BT_CR: case BT_LF: /* This case ensures that the first attribute name is counted Apart from that we could just change state on the quote. */ if (state == inName) state = other; else if (state == inValue && nAtts < attsMax) atts[nAtts].normalized = 0; break; case BT_GT: case BT_SOL: if (state != inValue) return nAtts; break; default: break; } } /* not reached */ } static int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) { int result = 0; /* skip &# */ ptr += 2*MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_x)) { for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); switch (c) { case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: result <<= 4; result |= (c - ASCII_0); break; case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F: result <<= 4; result += 10 + (c - ASCII_A); break; case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f: result <<= 4; result += 10 + (c - ASCII_a); break; } if (result >= 0x110000) return -1; } } else { for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { int c = BYTE_TO_ASCII(enc, ptr); result *= 10; result += (c - ASCII_0); if (result >= 0x110000) return -1; } } return checkCharRefNumber(result); } static int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) { switch ((end - ptr)/MINBPC(enc)) { case 2: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_l: return ASCII_LT; case ASCII_g: return ASCII_GT; } } break; case 3: if (CHAR_MATCHES(enc, ptr, ASCII_a)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_m)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_p)) return ASCII_AMP; } } break; case 4: switch (BYTE_TO_ASCII(enc, ptr)) { case ASCII_q: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_u)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_t)) return ASCII_QUOT; } } break; case ASCII_a: ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_p)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_o)) { ptr += MINBPC(enc); if (CHAR_MATCHES(enc, ptr, ASCII_s)) return ASCII_APOS; } } break; } } return 0; } static int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) { for (;;) { switch (BYTE_TYPE(enc, ptr1)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ if (*ptr1++ != *ptr2++) \ return 0; LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) #undef LEAD_CASE /* fall through */ if (*ptr1++ != *ptr2++) return 0; break; case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 1) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 2) { if (*ptr2++ != *ptr1++) return 0; if (MINBPC(enc) > 3) { if (*ptr2++ != *ptr1++) return 0; } } } break; default: if (MINBPC(enc) == 1 && *ptr1 == *ptr2) return 1; switch (BYTE_TYPE(enc, ptr2)) { case BT_LEAD2: case BT_LEAD3: case BT_LEAD4: case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: return 0; default: return 1; } } } /* not reached */ } static int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { if (ptr1 == end1) return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; } return ptr1 == end1; } static int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) { const char *start = ptr; for (;;) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_NONASCII: case BT_NMSTRT: #ifdef XML_NS case BT_COLON: #endif case BT_HEX: case BT_DIGIT: case BT_NAME: case BT_MINUS: ptr += MINBPC(enc); break; default: return ptr - start; } } } static const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) { for (;;) { switch (BYTE_TYPE(enc, ptr)) { case BT_LF: case BT_CR: case BT_S: ptr += MINBPC(enc); break; default: return ptr; } } } static void PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end, POSITION *pos) { while (ptr != end) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ ptr += n; \ break; LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_LF: pos->columnNumber = (unsigned)-1; pos->lineNumber++; ptr += MINBPC(enc); break; case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (unsigned)-1; break; default: ptr += MINBPC(enc); break; } pos->columnNumber++; } } #undef DO_LEAD_CASE #undef MULTIBYTE_CASES #undef INVALID_CASES #undef CHECK_NAME_CASE #undef CHECK_NAME_CASES #undef CHECK_NMSTRT_CASE #undef CHECK_NMSTRT_CASES expat-1.2/xmltok/xmltok_impl.h0100664000076400007640000000123107077753354014652 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ enum { BT_NONXML, BT_MALFORM, BT_LT, BT_AMP, BT_RSQB, BT_LEAD2, BT_LEAD3, BT_LEAD4, BT_TRAIL, BT_CR, BT_LF, BT_GT, BT_QUOT, BT_APOS, BT_EQUALS, BT_QUEST, BT_EXCL, BT_SOL, BT_SEMI, BT_NUM, BT_LSQB, BT_S, BT_NMSTRT, BT_COLON, BT_HEX, BT_DIGIT, BT_NAME, BT_MINUS, BT_OTHER, /* known not to be a name or name start character */ BT_NONASCII, /* might be a name or name start character */ BT_PERCNT, BT_LPAR, BT_RPAR, BT_AST, BT_PLUS, BT_COMMA, BT_VERBAR }; #include expat-1.2/xmltok/xmltok_ns.c0100664000076400007640000000456307077752072014333 0ustar jjcjjcconst ENCODING *NS(XmlGetUtf8InternalEncoding)(void) { return &ns(internal_utf8_encoding).enc; } const ENCODING *NS(XmlGetUtf16InternalEncoding)(void) { #if XML_BYTE_ORDER == 12 return &ns(internal_little2_encoding).enc; #elif XML_BYTE_ORDER == 21 return &ns(internal_big2_encoding).enc; #else const short n = 1; return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc; #endif } static const ENCODING *NS(encodings)[] = { &ns(latin1_encoding).enc, &ns(ascii_encoding).enc, &ns(utf8_encoding).enc, &ns(big2_encoding).enc, &ns(big2_encoding).enc, &ns(little2_encoding).enc, &ns(utf8_encoding).enc /* NO_ENC */ }; static int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr); } static int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr); } int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name) { int i = getEncodingIndex(name); if (i == UNKNOWN_ENC) return 0; SET_INIT_ENC_INDEX(p, i); p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog); p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent); p->initEnc.updatePosition = initUpdatePosition; p->encPtr = encPtr; *encPtr = &(p->initEnc); return 1; } static const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { #define ENCODING_MAX 128 char buf[ENCODING_MAX]; char *p = buf; int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) return 0; *p = 0; if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) return 0; return NS(encodings)[i]; } int NS(XmlParseXmlDecl)(int isGeneralTextEntity, const ENCODING *enc, const char *ptr, const char *end, const char **badPtr, const char **versionPtr, const char **encodingName, const ENCODING **encoding, int *standalone) { return doParseXmlDecl(NS(findEncoding), isGeneralTextEntity, enc, ptr, end, badPtr, versionPtr, encodingName, encoding, standalone); } expat-1.2/xmltok/xmltok.dsp0100664000076400007640000001342507147127562014172 0ustar jjcjjc# Microsoft Developer Studio Project File - Name="xmltok" - Package Owner=<4> # Microsoft Developer Studio Generated Build File, Format Version 6.00 # ** DO NOT EDIT ** # TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 CFG=xmltok - Win32 Release !MESSAGE This is not a valid makefile. To build this project using NMAKE, !MESSAGE use the Export Makefile command and run !MESSAGE !MESSAGE NMAKE /f "xmltok.mak". !MESSAGE !MESSAGE You can specify a configuration when running NMAKE !MESSAGE by defining the macro CFG on the command line. For example: !MESSAGE !MESSAGE NMAKE /f "xmltok.mak" CFG="xmltok - Win32 Release" !MESSAGE !MESSAGE Possible choices for configuration are: !MESSAGE !MESSAGE "xmltok - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") !MESSAGE "xmltok - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") !MESSAGE # Begin Project # PROP AllowPerConfigDependencies 0 # PROP Scc_ProjName "" # PROP Scc_LocalPath "" CPP=cl.exe MTL=midl.exe RSC=rc.exe !IF "$(CFG)" == "xmltok - Win32 Release" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 0 # PROP BASE Output_Dir ".\Release" # PROP BASE Intermediate_Dir ".\Release" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 0 # PROP Output_Dir ".\Release" # PROP Intermediate_Dir ".\Release" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c # ADD CPP /nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "XML_NS" /D XMLTOKAPI=__declspec(dllexport) /D "WIN32" /D "_WINDOWS" /D "XML_DTD" /YX /FD /c # ADD BASE MTL /nologo /D "NDEBUG" /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x809 /d "NDEBUG" # ADD RSC /l 0x809 /d "NDEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /entry:"DllMain" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmltok.dll" /link50compat # SUBTRACT LINK32 /pdb:none !ELSEIF "$(CFG)" == "xmltok - Win32 Debug" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 1 # PROP BASE Output_Dir ".\Debug" # PROP BASE Intermediate_Dir ".\Debug" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 1 # PROP Output_Dir ".\Debug" # PROP Intermediate_Dir ".\Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c # ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "_DEBUG" /D XMLTOKAPI=__declspec(dllexport) /D "WIN32" /D "_WINDOWS" /D "XML_DTD" /D "XML_NS" /YX /FD /c # ADD BASE MTL /nologo /D "_DEBUG" /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x809 /d "_DEBUG" # ADD RSC /l 0x809 /d "_DEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 /out:"..\dbgbin\xmltok.dll" !ENDIF # Begin Target # Name "xmltok - Win32 Release" # Name "xmltok - Win32 Debug" # Begin Group "Source Files" # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90" # Begin Source File SOURCE=.\dllmain.c # End Source File # Begin Source File SOURCE=..\gennmtab\gennmtab.c !IF "$(CFG)" == "xmltok - Win32 Release" # PROP Ignore_Default_Tool 1 # Begin Custom Build - Creating nametab.h InputDir=\home\work\xmls\gennmtab OutDir=.\Release ProjDir=. InputPath=..\gennmtab\gennmtab.c "$(ProjDir)\nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" $(InputDir)\$(OutDir)\gennmtab >$(ProjDir)\nametab.h # End Custom Build !ELSEIF "$(CFG)" == "xmltok - Win32 Debug" # PROP Ignore_Default_Tool 1 # Begin Custom Build - Creating nametab.h InputDir=\home\work\xmls\gennmtab OutDir=.\Debug ProjDir=. InputPath=..\gennmtab\gennmtab.c "$(ProjDir)\nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)" $(InputDir)\$(OutDir)\gennmtab >$(ProjDir)\nametab.h # End Custom Build !ENDIF # End Source File # Begin Source File SOURCE=.\xmlrole.c # End Source File # Begin Source File SOURCE=.\xmltok.c # End Source File # End Group # Begin Group "Header Files" # PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd" # Begin Source File SOURCE=.\asciitab.h # End Source File # Begin Source File SOURCE=.\iasciitab.h # End Source File # Begin Source File SOURCE=.\latin1tab.h # End Source File # Begin Source File SOURCE=.\nametab.h # End Source File # Begin Source File SOURCE=.\utf8tab.h # End Source File # Begin Source File SOURCE=.\xmldef.h # End Source File # Begin Source File SOURCE=.\xmlrole.h # End Source File # Begin Source File SOURCE=.\xmltok.h # End Source File # Begin Source File SOURCE=.\xmltok_impl.c # PROP BASE Exclude_From_Build 1 # PROP Exclude_From_Build 1 # End Source File # Begin Source File SOURCE=.\xmltok_impl.h # End Source File # Begin Source File SOURCE=.\xmltok_ns.c # PROP Exclude_From_Build 1 # End Source File # End Group # Begin Group "Resource Files" # PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe" # End Group # End Target # End Project expat-1.2/gennmtab/0040775000076400007640000000000007167265632012422 5ustar jjcjjcexpat-1.2/gennmtab/gennmtab.c0100664000076400007640000002072607077753351014365 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #include #include struct range { int start; int end; }; struct range nmstrt[] = { { '_' }, { ':' }, /* BaseChar */ { 0x0041, 0x005a }, { 0x0061, 0x007a }, { 0x00c0, 0x00d6 }, { 0x00d8, 0x00f6 }, { 0x00f8, 0x00ff }, { 0x0100, 0x0131 }, { 0x0134, 0x013e }, { 0x0141, 0x0148 }, { 0x014a, 0x017e }, { 0x0180, 0x01c3 }, { 0x01cd, 0x01f0 }, { 0x01f4, 0x01f5 }, { 0x01fa, 0x0217 }, { 0x0250, 0x02a8 }, { 0x02bb, 0x02c1 }, { 0x0386 }, { 0x0388, 0x038a }, { 0x038c }, { 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03d6 }, { 0x03da }, { 0x03dc }, { 0x03de }, { 0x03e0 }, { 0x03e2, 0x03f3 }, { 0x0401, 0x040c }, { 0x040e, 0x044f }, { 0x0451, 0x045c }, { 0x045e, 0x0481 }, { 0x0490, 0x04c4 }, { 0x04c7, 0x04c8 }, { 0x04cb, 0x04cc }, { 0x04d0, 0x04eb }, { 0x04ee, 0x04f5 }, { 0x04f8, 0x04f9 }, { 0x0531, 0x0556 }, { 0x0559 }, { 0x0561, 0x0586 }, { 0x05d0, 0x05ea }, { 0x05f0, 0x05f2 }, { 0x0621, 0x063a }, { 0x0641, 0x064a }, { 0x0671, 0x06b7 }, { 0x06ba, 0x06be }, { 0x06c0, 0x06ce }, { 0x06d0, 0x06d3 }, { 0x06d5 }, { 0x06e5, 0x06e6 }, { 0x0905, 0x0939 }, { 0x093d }, { 0x0958, 0x0961 }, { 0x0985, 0x098c }, { 0x098f, 0x0990 }, { 0x0993, 0x09a8 }, { 0x09aa, 0x09b0 }, { 0x09b2 }, { 0x09b6, 0x09b9 }, { 0x09dc, 0x09dd }, { 0x09df, 0x09e1 }, { 0x09f0, 0x09f1 }, { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 }, { 0x0a13, 0x0a28 }, { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 }, { 0x0a35, 0x0a36 }, { 0x0a38, 0x0a39 }, { 0x0a59, 0x0a5c }, { 0x0a5e }, { 0x0a72, 0x0a74 }, { 0x0a85, 0x0a8b }, { 0x0a8d }, { 0x0a8f, 0x0a91 }, { 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 }, { 0x0ab5, 0x0ab9 }, { 0x0abd }, { 0x0ae0 }, { 0x0b05, 0x0b0c }, { 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 }, { 0x0b2a, 0x0b30 }, { 0x0b32, 0x0b33 }, { 0x0b36, 0x0b39 }, { 0x0b3d }, { 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 }, { 0x0b85, 0x0b8a }, { 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 }, { 0x0b99, 0x0b9a }, { 0x0b9c }, { 0x0b9e, 0x0b9f }, { 0x0ba3, 0x0ba4 }, { 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb5 }, { 0x0bb7, 0x0bb9 }, { 0x0c05, 0x0c0c }, { 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 }, { 0x0c2a, 0x0c33 }, { 0x0c35, 0x0c39 }, { 0x0c60, 0x0c61 }, { 0x0c85, 0x0c8c }, { 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 }, { 0x0caa, 0x0cb3 }, { 0x0cb5, 0x0cb9 }, { 0x0cde }, { 0x0ce0, 0x0ce1 }, { 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 }, { 0x0d2a, 0x0d39 }, { 0x0d60, 0x0d61 }, { 0x0e01, 0x0e2e }, { 0x0e30 }, { 0x0e32, 0x0e33 }, { 0x0e40, 0x0e45 }, { 0x0e81, 0x0e82 }, { 0x0e84 }, { 0x0e87, 0x0e88 }, { 0x0e8a }, { 0x0e8d }, { 0x0e94, 0x0e97 }, { 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 }, { 0x0ea5 }, { 0x0ea7 }, { 0x0eaa, 0x0eab }, { 0x0ead, 0x0eae }, { 0x0eb0 }, { 0x0eb2, 0x0eb3 }, { 0x0ebd }, { 0x0ec0, 0x0ec4 }, { 0x0f40, 0x0f47 }, { 0x0f49, 0x0f69 }, { 0x10a0, 0x10c5 }, { 0x10d0, 0x10f6 }, { 0x1100 }, { 0x1102, 0x1103 }, { 0x1105, 0x1107 }, { 0x1109 }, { 0x110b, 0x110c }, { 0x110e, 0x1112 }, { 0x113c }, { 0x113e }, { 0x1140 }, { 0x114c }, { 0x114e }, { 0x1150 }, { 0x1154, 0x1155 }, { 0x1159 }, { 0x115f, 0x1161 }, { 0x1163 }, { 0x1165 }, { 0x1167 }, { 0x1169 }, { 0x116d, 0x116e }, { 0x1172, 0x1173 }, { 0x1175 }, { 0x119e }, { 0x11a8 }, { 0x11ab }, { 0x11ae, 0x11af }, { 0x11b7, 0x11b8 }, { 0x11ba }, { 0x11bc, 0x11c2 }, { 0x11eb }, { 0x11f0 }, { 0x11f9 }, { 0x1e00, 0x1e9b }, { 0x1ea0, 0x1ef9 }, { 0x1f00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 }, { 0x1f48, 0x1f4d }, { 0x1f50, 0x1f57 }, { 0x1f59 }, { 0x1f5b }, { 0x1f5d }, { 0x1f5f, 0x1f7d }, { 0x1f80, 0x1fb4 }, { 0x1fb6, 0x1fbc }, { 0x1fbe }, { 0x1fc2, 0x1fc4 }, { 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb }, { 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc }, { 0x2126 }, { 0x212a, 0x212b }, { 0x212e }, { 0x2180, 0x2182 }, { 0x3041, 0x3094 }, { 0x30a1, 0x30fa }, { 0x3105, 0x312c }, { 0xac00, 0xd7a3 }, /* Ideographic */ { 0x4e00, 0x9fa5 }, { 0x3007 }, { 0x3021, 0x3029 }, }; /* name chars that are not name start chars */ struct range name[] = { { '.' }, { '-' }, /* CombiningChar */ { 0x0300, 0x0345 }, { 0x0360, 0x0361 }, { 0x0483, 0x0486 }, { 0x0591, 0x05a1 }, { 0x05a3, 0x05b9 }, { 0x05bb, 0x05bd }, { 0x05bf }, { 0x05c1, 0x05c2 }, { 0x05c4 }, { 0x064b, 0x0652 }, { 0x0670 }, { 0x06d6, 0x06dc }, { 0x06dd, 0x06df }, { 0x06e0, 0x06e4 }, { 0x06e7, 0x06e8 }, { 0x06ea, 0x06ed }, { 0x0901, 0x0903 }, { 0x093c }, { 0x093e, 0x094c }, { 0x094d }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0983 }, { 0x09bc }, { 0x09be }, { 0x09bf }, { 0x09c0, 0x09c4 }, { 0x09c7, 0x09c8 }, { 0x09cb, 0x09cd }, { 0x09d7 }, { 0x09e2, 0x09e3 }, { 0x0a02 }, { 0x0a3c }, { 0x0a3e }, { 0x0a3f }, { 0x0a40, 0x0a42 }, { 0x0a47, 0x0a48 }, { 0x0a4b, 0x0a4d }, { 0x0a70, 0x0a71 }, { 0x0a81, 0x0a83 }, { 0x0abc }, { 0x0abe, 0x0ac5 }, { 0x0ac7, 0x0ac9 }, { 0x0acb, 0x0acd }, { 0x0b01, 0x0b03 }, { 0x0b3c }, { 0x0b3e, 0x0b43 }, { 0x0b47, 0x0b48 }, { 0x0b4b, 0x0b4d }, { 0x0b56, 0x0b57 }, { 0x0b82, 0x0b83 }, { 0x0bbe, 0x0bc2 }, { 0x0bc6, 0x0bc8 }, { 0x0bca, 0x0bcd }, { 0x0bd7 }, { 0x0c01, 0x0c03 }, { 0x0c3e, 0x0c44 }, { 0x0c46, 0x0c48 }, { 0x0c4a, 0x0c4d }, { 0x0c55, 0x0c56 }, { 0x0c82, 0x0c83 }, { 0x0cbe, 0x0cc4 }, { 0x0cc6, 0x0cc8 }, { 0x0cca, 0x0ccd }, { 0x0cd5, 0x0cd6 }, { 0x0d02, 0x0d03 }, { 0x0d3e, 0x0d43 }, { 0x0d46, 0x0d48 }, { 0x0d4a, 0x0d4d }, { 0x0d57 }, { 0x0e31 }, { 0x0e34, 0x0e3a }, { 0x0e47, 0x0e4e }, { 0x0eb1 }, { 0x0eb4, 0x0eb9 }, { 0x0ebb, 0x0ebc }, { 0x0ec8, 0x0ecd }, { 0x0f18, 0x0f19 }, { 0x0f35 }, { 0x0f37 }, { 0x0f39 }, { 0x0f3e }, { 0x0f3f }, { 0x0f71, 0x0f84 }, { 0x0f86, 0x0f8b }, { 0x0f90, 0x0f95 }, { 0x0f97 }, { 0x0f99, 0x0fad }, { 0x0fb1, 0x0fb7 }, { 0x0fb9 }, { 0x20d0, 0x20dc }, { 0x20e1 }, { 0x302a, 0x302f }, { 0x3099 }, { 0x309a }, /* Digit */ { 0x0030, 0x0039 }, { 0x0660, 0x0669 }, { 0x06f0, 0x06f9 }, { 0x0966, 0x096f }, { 0x09e6, 0x09ef }, { 0x0a66, 0x0a6f }, { 0x0ae6, 0x0aef }, { 0x0b66, 0x0b6f }, { 0x0be7, 0x0bef }, { 0x0c66, 0x0c6f }, { 0x0ce6, 0x0cef }, { 0x0d66, 0x0d6f }, { 0x0e50, 0x0e59 }, { 0x0ed0, 0x0ed9 }, { 0x0f20, 0x0f29 }, /* Extender */ { 0xb7 }, { 0x02d0 }, { 0x02d1 }, { 0x0387 }, { 0x0640 }, { 0x0e46 }, { 0x0ec6 }, { 0x3005 }, { 0x3031, 0x3035 }, { 0x309d, 0x309e }, { 0x30fc, 0x30fe }, }; void setTab(char *tab, struct range *ranges, size_t nRanges) { size_t i; int j; for (i = 0; i < nRanges; i++) { if (ranges[i].end) { for (j = ranges[i].start; j <= ranges[i].end; j++) tab[j] = 1; } else tab[ranges[i].start] = 1; } } void printTabs(char *tab) { int nBitmaps = 2; int i, j, k; unsigned char pageIndex[512]; printf( "static const unsigned namingBitmap[] = {\n\ 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\ 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n\ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n"); for (i = 0; i < 512; i++) { int kind = tab[i*256]; for (j = 1; j < 256; j++) if (tab[i*256 +j] != kind) { kind = -1; break; } if (i >= 256 && memcmp(tab + (i - 256)*256, tab + i*256, 256) == 0) pageIndex[i] = pageIndex[i - 256]; else if (kind == -1) { pageIndex[i] = nBitmaps++; for (j = 0; j < 8; j++) { unsigned val = 0; for (k = 0; k < 32; k++) { if (tab[i*256 + j*32 +k]) val |= (1 << k); } printf("0x%08X,", val); putchar((((j + 1) & 3) == 0) ? '\n' : ' '); } } else pageIndex[i] = kind; } printf("};\n"); printf("static const unsigned char nmstrtPages[] = {\n"); for (i = 0; i < 512; i++) { if (i == 256) printf("};\nstatic const unsigned char namePages[] = {\n"); printf("0x%02X,", pageIndex[i]); putchar((((i + 1) & 7) == 0) ? '\n' : ' '); } printf("};\n"); } int main() { char tab[2*65536]; memset(tab, 0, 65536); setTab(tab, nmstrt, sizeof(nmstrt)/sizeof(nmstrt[0])); memcpy(tab + 65536, tab, 65536); setTab(tab + 65536, name, sizeof(name)/sizeof(name[0])); printTabs(tab); return 0; } expat-1.2/gennmtab/gennmtab.dsp0100664000076400007640000000720606613542536014723 0ustar jjcjjc# Microsoft Developer Studio Project File - Name="gennmtab" - Package Owner=<4> # Microsoft Developer Studio Generated Build File, Format Version 6.00 # ** DO NOT EDIT ** # TARGTYPE "Win32 (x86) Console Application" 0x0103 CFG=gennmtab - Win32 Release !MESSAGE This is not a valid makefile. To build this project using NMAKE, !MESSAGE use the Export Makefile command and run !MESSAGE !MESSAGE NMAKE /f "gennmtab.mak". !MESSAGE !MESSAGE You can specify a configuration when running NMAKE !MESSAGE by defining the macro CFG on the command line. For example: !MESSAGE !MESSAGE NMAKE /f "gennmtab.mak" CFG="gennmtab - Win32 Release" !MESSAGE !MESSAGE Possible choices for configuration are: !MESSAGE !MESSAGE "gennmtab - Win32 Release" (based on "Win32 (x86) Console Application") !MESSAGE "gennmtab - Win32 Debug" (based on "Win32 (x86) Console Application") !MESSAGE # Begin Project # PROP AllowPerConfigDependencies 0 # PROP Scc_ProjName "" # PROP Scc_LocalPath "" CPP=cl.exe RSC=rc.exe !IF "$(CFG)" == "gennmtab - Win32 Release" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 0 # PROP BASE Output_Dir ".\Release" # PROP BASE Intermediate_Dir ".\Release" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 0 # PROP Output_Dir ".\Release" # PROP Intermediate_Dir ".\Release" # PROP Target_Dir "." # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c # ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /FD /c # ADD BASE RSC /l 0x809 /d "NDEBUG" # ADD RSC /l 0x809 /d "NDEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 !ELSEIF "$(CFG)" == "gennmtab - Win32 Debug" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 1 # PROP BASE Output_Dir ".\Debug" # PROP BASE Intermediate_Dir ".\Debug" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 1 # PROP Output_Dir ".\Debug" # PROP Intermediate_Dir ".\Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c # ADD CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /FD /c # ADD BASE RSC /l 0x809 /d "_DEBUG" # ADD RSC /l 0x809 /d "_DEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 !ENDIF # Begin Target # Name "gennmtab - Win32 Release" # Name "gennmtab - Win32 Debug" # Begin Group "Source Files" # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90" # Begin Source File SOURCE=.\gennmtab.c # End Source File # End Group # Begin Group "Header Files" # PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd" # End Group # Begin Group "Resource Files" # PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe" # End Group # End Target # End Project expat-1.2/Makefile0100664000076400007640000000224607106152724012256 0ustar jjcjjcCC=gcc # If you know what your system's byte order is, define XML_BYTE_ORDER: # use -DXML_BYTE_ORDER=12 for little-endian byte order; # use -DXML_BYTE_ORDER=21 for big-endian (network) byte order. # -DXML_NS adds support for checking of lexical aspects of XML namespaces spec # -DXML_MIN_SIZE makes a smaller but slower parser # -DXML_DTD adds full support for parsing DTDs CFLAGS=-Wall -O2 -Ixmltok -Ixmlparse -DXML_NS -DXML_DTD AR=ar # Use one of the next two lines; unixfilemap is better if it works. FILEMAP_OBJ=xmlwf/unixfilemap.o #FILEMAP_OBJ=xmlwf/readfilemap.o LIBOBJS=xmltok/xmltok.o \ xmltok/xmlrole.o \ xmlparse/xmlparse.o OBJS=xmlwf/xmlwf.o \ xmlwf/xmlfile.o \ xmlwf/codepage.o \ $(FILEMAP_OBJ) LIB=xmlparse/libexpat.a EXE= XMLWF=xmlwf/xmlwf$(EXE) all: $(XMLWF) $(XMLWF): $(OBJS) $(LIB) $(CC) $(CFLAGS) -o $@ $(OBJS) $(LIB) $(LIB): $(LIBOBJS) $(AR) rc $(LIB) $(LIBOBJS) clean: rm -f $(OBJS) $(LIBOBJS) $(LIB) $(XMLWF) xmltok/nametab.h: gennmtab/gennmtab$(EXE) rm -f $@ gennmtab/gennmtab$(EXE) >$@ gennmtab/gennmtab$(EXE): gennmtab/gennmtab.c $(CC) $(CFLAGS) -o $@ gennmtab/gennmtab.c xmltok/xmltok.o: xmltok/nametab.h .c.o: $(CC) $(CFLAGS) -c -o $@ $< expat-1.2/xmlparse/0040775000076400007640000000000007167265632012462 5ustar jjcjjcexpat-1.2/xmlparse/xmlparse.c0100664000076400007640000032216707130606726014462 0ustar jjcjjc/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include "xmldef.h" #include "xmlparse.h" #include #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XmlConvert XmlUtf16Convert #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS #define XmlEncode XmlUtf16Encode #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) typedef unsigned short ICHAR; #else #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX #define XmlConvert XmlUtf8Convert #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS #define XmlEncode XmlUtf8Encode #define MUST_CONVERT(enc, s) (!(enc)->isUtf8) typedef char ICHAR; #endif #ifndef XML_NS #define XmlInitEncodingNS XmlInitEncoding #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding #undef XmlGetInternalEncodingNS #define XmlGetInternalEncodingNS XmlGetInternalEncoding #define XmlParseXmlDeclNS XmlParseXmlDecl #endif #ifdef XML_UNICODE_WCHAR_T #define XML_T(x) L ## x #else #define XML_T(x) x #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) #include "xmltok.h" #include "xmlrole.h" typedef const XML_Char *KEY; typedef struct { KEY name; } NAMED; typedef struct { NAMED **v; size_t size; size_t used; size_t usedLim; } HASH_TABLE; typedef struct { NAMED **p; NAMED **end; } HASH_TABLE_ITER; #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 #define INIT_BLOCK_SIZE 1024 #define INIT_BUFFER_SIZE 1024 #define EXPAND_SPARE 24 typedef struct binding { struct prefix *prefix; struct binding *nextTagBinding; struct binding *prevPrefixBinding; const struct attribute_id *attId; XML_Char *uri; int uriLen; int uriAlloc; } BINDING; typedef struct prefix { const XML_Char *name; BINDING *binding; } PREFIX; typedef struct { const XML_Char *str; const XML_Char *localPart; int uriLen; } TAG_NAME; typedef struct tag { struct tag *parent; const char *rawName; int rawNameLength; TAG_NAME name; char *buf; char *bufEnd; BINDING *bindings; } TAG; typedef struct { const XML_Char *name; const XML_Char *textPtr; int textLen; const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; const XML_Char *notation; char open; } ENTITY; typedef struct block { struct block *next; int size; XML_Char s[1]; } BLOCK; typedef struct { BLOCK *blocks; BLOCK *freeBlocks; const XML_Char *end; XML_Char *ptr; XML_Char *start; } STRING_POOL; /* The XML_Char before the name is used to determine whether an attribute has been specified. */ typedef struct attribute_id { XML_Char *name; PREFIX *prefix; char maybeTokenized; char xmlns; } ATTRIBUTE_ID; typedef struct { const ATTRIBUTE_ID *id; char isCdata; const XML_Char *value; } DEFAULT_ATTRIBUTE; typedef struct { const XML_Char *name; PREFIX *prefix; const ATTRIBUTE_ID *idAtt; int nDefaultAtts; int allocDefaultAtts; DEFAULT_ATTRIBUTE *defaultAtts; } ELEMENT_TYPE; typedef struct { HASH_TABLE generalEntities; HASH_TABLE elementTypes; HASH_TABLE attributeIds; HASH_TABLE prefixes; STRING_POOL pool; int complete; int standalone; #ifdef XML_DTD HASH_TABLE paramEntities; #endif /* XML_DTD */ PREFIX defaultPrefix; } DTD; typedef struct open_internal_entity { const char *internalEventPtr; const char *internalEventEndPtr; struct open_internal_entity *next; ENTITY *entity; } OPEN_INTERNAL_ENTITY; typedef enum XML_Error Processor(XML_Parser parser, const char *start, const char *end, const char **endPtr); static Processor prologProcessor; static Processor prologInitProcessor; static Processor contentProcessor; static Processor cdataSectionProcessor; #ifdef XML_DTD static Processor ignoreSectionProcessor; #endif /* XML_DTD */ static Processor epilogProcessor; static Processor errorProcessor; static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr); static enum XML_Error processInternalParamEntity(XML_Parser parser, ENTITY *entity); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *start, const char *end, const char **endPtr); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); #ifdef XML_DTD static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr); #endif /* XML_DTD */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr); static int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, int isId, const XML_Char *dfltValue); static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, STRING_POOL *); static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *, STRING_POOL *); static ATTRIBUTE_ID * getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *); static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static int reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static void reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end); static const XML_Char *getContext(XML_Parser parser); static int setContext(XML_Parser parser, const XML_Char *context); static void normalizePublicId(XML_Char *s); static int dtdInit(DTD *); static void dtdDestroy(DTD *); static int dtdCopy(DTD *newDtd, const DTD *oldDtd); static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); #ifdef XML_DTD static void dtdSwap(DTD *, DTD *); #endif /* XML_DTD */ static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize); static void hashTableInit(HASH_TABLE *); static void hashTableDestroy(HASH_TABLE *); static void hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *); static NAMED *hashTableIterNext(HASH_TABLE_ITER *); static void poolInit(STRING_POOL *); static void poolClear(STRING_POOL *); static void poolDestroy(STRING_POOL *); static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end); static int poolGrow(STRING_POOL *pool); static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); #define poolStart(pool) ((pool)->start) #define poolEnd(pool) ((pool)->ptr) #define poolLength(pool) ((pool)->ptr - (pool)->start) #define poolChop(pool) ((void)--(pool->ptr)) #define poolLastChar(pool) (((pool)->ptr)[-1]) #define poolDiscard(pool) ((pool)->ptr = (pool)->start) #define poolFinish(pool) ((pool)->start = (pool)->ptr) #define poolAppendChar(pool, c) \ (((pool)->ptr == (pool)->end && !poolGrow(pool)) \ ? 0 \ : ((*((pool)->ptr)++ = c), 1)) typedef struct { /* The first member must be userData so that the XML_GetUserData macro works. */ void *m_userData; void *m_handlerArg; char *m_buffer; /* first character to be parsed */ const char *m_bufferPtr; /* past last character to be parsed */ char *m_bufferEnd; /* allocated end of buffer */ const char *m_bufferLim; long m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; XML_StartElementHandler m_startElementHandler; XML_EndElementHandler m_endElementHandler; XML_CharacterDataHandler m_characterDataHandler; XML_ProcessingInstructionHandler m_processingInstructionHandler; XML_CommentHandler m_commentHandler; XML_StartCdataSectionHandler m_startCdataSectionHandler; XML_EndCdataSectionHandler m_endCdataSectionHandler; XML_DefaultHandler m_defaultHandler; XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler; XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler; XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler; XML_NotationDeclHandler m_notationDeclHandler; XML_ExternalParsedEntityDeclHandler m_externalParsedEntityDeclHandler; XML_InternalParsedEntityDeclHandler m_internalParsedEntityDeclHandler; XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler; XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler; XML_NotStandaloneHandler m_notStandaloneHandler; XML_ExternalEntityRefHandler m_externalEntityRefHandler; void *m_externalEntityRefHandlerArg; XML_UnknownEncodingHandler m_unknownEncodingHandler; const ENCODING *m_encoding; INIT_ENCODING m_initEncoding; const ENCODING *m_internalEncoding; const XML_Char *m_protocolEncodingName; int m_ns; void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; void (*m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; const char *m_eventPtr; const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; int m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; const XML_Char *m_declNotationName; const XML_Char *m_declNotationPublicId; ELEMENT_TYPE *m_declElementType; ATTRIBUTE_ID *m_declAttributeId; char m_declAttributeIsCdata; char m_declAttributeIsId; DTD m_dtd; const XML_Char *m_curBase; TAG *m_tagStack; TAG *m_freeTagList; BINDING *m_inheritedBindings; BINDING *m_freeBindingList; int m_attsSize; int m_nSpecifiedAtts; int m_idAttIndex; ATTRIBUTE *m_atts; POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; unsigned m_groupSize; int m_hadExternalDoctype; XML_Char m_namespaceSeparator; #ifdef XML_DTD enum XML_ParamEntityParsing m_paramEntityParsing; XML_Parser m_parentParser; #endif } Parser; #define userData (((Parser *)parser)->m_userData) #define handlerArg (((Parser *)parser)->m_handlerArg) #define startElementHandler (((Parser *)parser)->m_startElementHandler) #define endElementHandler (((Parser *)parser)->m_endElementHandler) #define characterDataHandler (((Parser *)parser)->m_characterDataHandler) #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler) #define commentHandler (((Parser *)parser)->m_commentHandler) #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler) #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler) #define defaultHandler (((Parser *)parser)->m_defaultHandler) #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler) #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler) #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler) #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler) #define externalParsedEntityDeclHandler (((Parser *)parser)->m_externalParsedEntityDeclHandler) #define internalParsedEntityDeclHandler (((Parser *)parser)->m_internalParsedEntityDeclHandler) #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler) #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler) #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler) #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler) #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg) #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler) #define encoding (((Parser *)parser)->m_encoding) #define initEncoding (((Parser *)parser)->m_initEncoding) #define internalEncoding (((Parser *)parser)->m_internalEncoding) #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem) #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData) #define unknownEncodingHandlerData \ (((Parser *)parser)->m_unknownEncodingHandlerData) #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease) #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName) #define ns (((Parser *)parser)->m_ns) #define prologState (((Parser *)parser)->m_prologState) #define processor (((Parser *)parser)->m_processor) #define errorCode (((Parser *)parser)->m_errorCode) #define eventPtr (((Parser *)parser)->m_eventPtr) #define eventEndPtr (((Parser *)parser)->m_eventEndPtr) #define positionPtr (((Parser *)parser)->m_positionPtr) #define position (((Parser *)parser)->m_position) #define openInternalEntities (((Parser *)parser)->m_openInternalEntities) #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities) #define tagLevel (((Parser *)parser)->m_tagLevel) #define buffer (((Parser *)parser)->m_buffer) #define bufferPtr (((Parser *)parser)->m_bufferPtr) #define bufferEnd (((Parser *)parser)->m_bufferEnd) #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex) #define parseEndPtr (((Parser *)parser)->m_parseEndPtr) #define bufferLim (((Parser *)parser)->m_bufferLim) #define dataBuf (((Parser *)parser)->m_dataBuf) #define dataBufEnd (((Parser *)parser)->m_dataBufEnd) #define dtd (((Parser *)parser)->m_dtd) #define curBase (((Parser *)parser)->m_curBase) #define declEntity (((Parser *)parser)->m_declEntity) #define declNotationName (((Parser *)parser)->m_declNotationName) #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId) #define declElementType (((Parser *)parser)->m_declElementType) #define declAttributeId (((Parser *)parser)->m_declAttributeId) #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata) #define declAttributeIsId (((Parser *)parser)->m_declAttributeIsId) #define freeTagList (((Parser *)parser)->m_freeTagList) #define freeBindingList (((Parser *)parser)->m_freeBindingList) #define inheritedBindings (((Parser *)parser)->m_inheritedBindings) #define tagStack (((Parser *)parser)->m_tagStack) #define atts (((Parser *)parser)->m_atts) #define attsSize (((Parser *)parser)->m_attsSize) #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts) #define idAttIndex (((Parser *)parser)->m_idAttIndex) #define tempPool (((Parser *)parser)->m_tempPool) #define temp2Pool (((Parser *)parser)->m_temp2Pool) #define groupConnector (((Parser *)parser)->m_groupConnector) #define groupSize (((Parser *)parser)->m_groupSize) #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype) #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator) #ifdef XML_DTD #define parentParser (((Parser *)parser)->m_parentParser) #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing) #endif /* XML_DTD */ #ifdef _MSC_VER #ifdef _DEBUG Parser *asParser(XML_Parser parser) { return parser; } #endif #endif XML_Parser XML_ParserCreate(const XML_Char *encodingName) { XML_Parser parser = malloc(sizeof(Parser)); if (!parser) return parser; processor = prologInitProcessor; XmlPrologStateInit(&prologState); userData = 0; handlerArg = 0; startElementHandler = 0; endElementHandler = 0; characterDataHandler = 0; processingInstructionHandler = 0; commentHandler = 0; startCdataSectionHandler = 0; endCdataSectionHandler = 0; defaultHandler = 0; startDoctypeDeclHandler = 0; endDoctypeDeclHandler = 0; unparsedEntityDeclHandler = 0; notationDeclHandler = 0; externalParsedEntityDeclHandler = 0; internalParsedEntityDeclHandler = 0; startNamespaceDeclHandler = 0; endNamespaceDeclHandler = 0; notStandaloneHandler = 0; externalEntityRefHandler = 0; externalEntityRefHandlerArg = parser; unknownEncodingHandler = 0; buffer = 0; bufferPtr = 0; bufferEnd = 0; parseEndByteIndex = 0; parseEndPtr = 0; bufferLim = 0; declElementType = 0; declAttributeId = 0; declEntity = 0; declNotationName = 0; declNotationPublicId = 0; memset(&position, 0, sizeof(POSITION)); errorCode = XML_ERROR_NONE; eventPtr = 0; eventEndPtr = 0; positionPtr = 0; openInternalEntities = 0; tagLevel = 0; tagStack = 0; freeTagList = 0; freeBindingList = 0; inheritedBindings = 0; attsSize = INIT_ATTS_SIZE; atts = malloc(attsSize * sizeof(ATTRIBUTE)); nSpecifiedAtts = 0; dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); groupSize = 0; groupConnector = 0; hadExternalDoctype = 0; unknownEncodingMem = 0; unknownEncodingRelease = 0; unknownEncodingData = 0; unknownEncodingHandlerData = 0; namespaceSeparator = '!'; #ifdef XML_DTD parentParser = 0; paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif ns = 0; poolInit(&tempPool); poolInit(&temp2Pool); protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0; curBase = 0; if (!dtdInit(&dtd) || !atts || !dataBuf || (encodingName && !protocolEncodingName)) { XML_ParserFree(parser); return 0; } dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE; XmlInitEncoding(&initEncoding, &encoding, 0); internalEncoding = XmlGetInternalEncoding(); return parser; } XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { static const XML_Char implicitContext[] = { XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='), XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'), XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'), XML_T('.'), XML_T('w'), XML_T('3'), XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'), XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'), XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'), XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'), XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'), XML_T('\0') }; XML_Parser parser = XML_ParserCreate(encodingName); if (parser) { XmlInitEncodingNS(&initEncoding, &encoding, 0); ns = 1; internalEncoding = XmlGetInternalEncodingNS(); namespaceSeparator = nsSep; } if (!setContext(parser, implicitContext)) { XML_ParserFree(parser); return 0; } return parser; } int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { if (!encodingName) protocolEncodingName = 0; else { protocolEncodingName = poolCopyString(&tempPool, encodingName); if (!protocolEncodingName) return 0; } return 1; } XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, const XML_Char *encodingName) { XML_Parser parser = oldParser; DTD *oldDtd = &dtd; XML_StartElementHandler oldStartElementHandler = startElementHandler; XML_EndElementHandler oldEndElementHandler = endElementHandler; XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler; XML_CommentHandler oldCommentHandler = commentHandler; XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler; XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler; XML_DefaultHandler oldDefaultHandler = defaultHandler; XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; XML_ExternalParsedEntityDeclHandler oldExternalParsedEntityDeclHandler = externalParsedEntityDeclHandler; XML_InternalParsedEntityDeclHandler oldInternalParsedEntityDeclHandler = internalParsedEntityDeclHandler; XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler; XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler; XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler; void *oldUserData = userData; void *oldHandlerArg = handlerArg; int oldDefaultExpandInternalEntities = defaultExpandInternalEntities; void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; #ifdef XML_DTD int oldParamEntityParsing = paramEntityParsing; #endif parser = (ns ? XML_ParserCreateNS(encodingName, namespaceSeparator) : XML_ParserCreate(encodingName)); if (!parser) return 0; startElementHandler = oldStartElementHandler; endElementHandler = oldEndElementHandler; characterDataHandler = oldCharacterDataHandler; processingInstructionHandler = oldProcessingInstructionHandler; commentHandler = oldCommentHandler; startCdataSectionHandler = oldStartCdataSectionHandler; endCdataSectionHandler = oldEndCdataSectionHandler; defaultHandler = oldDefaultHandler; unparsedEntityDeclHandler = oldUnparsedEntityDeclHandler; notationDeclHandler = oldNotationDeclHandler; externalParsedEntityDeclHandler = oldExternalParsedEntityDeclHandler; internalParsedEntityDeclHandler = oldInternalParsedEntityDeclHandler; startNamespaceDeclHandler = oldStartNamespaceDeclHandler; endNamespaceDeclHandler = oldEndNamespaceDeclHandler; notStandaloneHandler = oldNotStandaloneHandler; externalEntityRefHandler = oldExternalEntityRefHandler; unknownEncodingHandler = oldUnknownEncodingHandler; userData = oldUserData; if (oldUserData == oldHandlerArg) handlerArg = userData; else handlerArg = parser; if (oldExternalEntityRefHandlerArg != oldParser) externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; #ifdef XML_DTD paramEntityParsing = oldParamEntityParsing; if (context) { #endif /* XML_DTD */ if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) { XML_ParserFree(parser); return 0; } processor = externalEntityInitProcessor; #ifdef XML_DTD } else { dtdSwap(&dtd, oldDtd); parentParser = oldParser; XmlPrologStateInitExternalEntity(&prologState); dtd.complete = 1; hadExternalDoctype = 1; } #endif /* XML_DTD */ return parser; } static void destroyBindings(BINDING *bindings) { for (;;) { BINDING *b = bindings; if (!b) break; bindings = b->nextTagBinding; free(b->uri); free(b); } } void XML_ParserFree(XML_Parser parser) { for (;;) { TAG *p; if (tagStack == 0) { if (freeTagList == 0) break; tagStack = freeTagList; freeTagList = 0; } p = tagStack; tagStack = tagStack->parent; free(p->buf); destroyBindings(p->bindings); free(p); } destroyBindings(freeBindingList); destroyBindings(inheritedBindings); poolDestroy(&tempPool); poolDestroy(&temp2Pool); #ifdef XML_DTD if (parentParser) { if (hadExternalDoctype) dtd.complete = 0; dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd); } #endif /* XML_DTD */ dtdDestroy(&dtd); free((void *)atts); free(groupConnector); free(buffer); free(dataBuf); free(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); free(parser); } void XML_UseParserAsHandlerArg(XML_Parser parser) { handlerArg = parser; } void XML_SetUserData(XML_Parser parser, void *p) { if (handlerArg == userData) handlerArg = userData = p; else userData = p; } int XML_SetBase(XML_Parser parser, const XML_Char *p) { if (p) { p = poolCopyString(&dtd.pool, p); if (!p) return 0; curBase = p; } else curBase = 0; return 1; } const XML_Char *XML_GetBase(XML_Parser parser) { return curBase; } int XML_GetSpecifiedAttributeCount(XML_Parser parser) { return nSpecifiedAtts; } int XML_GetIdAttributeIndex(XML_Parser parser) { return idAttIndex; } void XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) { startElementHandler = start; endElementHandler = end; } void XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { characterDataHandler = handler; } void XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler) { processingInstructionHandler = handler; } void XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { commentHandler = handler; } void XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end) { startCdataSectionHandler = start; endCdataSectionHandler = end; } void XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { defaultHandler = handler; defaultExpandInternalEntities = 0; } void XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { defaultHandler = handler; defaultExpandInternalEntities = 1; } void XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end) { startDoctypeDeclHandler = start; endDoctypeDeclHandler = end; } void XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { unparsedEntityDeclHandler = handler; } void XML_SetExternalParsedEntityDeclHandler(XML_Parser parser, XML_ExternalParsedEntityDeclHandler handler) { externalParsedEntityDeclHandler = handler; } void XML_SetInternalParsedEntityDeclHandler(XML_Parser parser, XML_InternalParsedEntityDeclHandler handler) { internalParsedEntityDeclHandler = handler; } void XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { notationDeclHandler = handler; } void XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end) { startNamespaceDeclHandler = start; endNamespaceDeclHandler = end; } void XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { notStandaloneHandler = handler; } void XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler) { externalEntityRefHandler = handler; } void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { if (arg) externalEntityRefHandlerArg = arg; else externalEntityRefHandlerArg = parser; } void XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *data) { unknownEncodingHandler = handler; unknownEncodingHandlerData = data; } int XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing) { #ifdef XML_DTD paramEntityParsing = parsing; return 1; #else return parsing == XML_PARAM_ENTITY_PARSING_NEVER; #endif } int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { if (len == 0) { if (!isFinal) return 1; positionPtr = bufferPtr; errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); if (errorCode == XML_ERROR_NONE) return 1; eventEndPtr = eventPtr; processor = errorProcessor; return 0; } else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; parseEndByteIndex += len; positionPtr = s; if (isFinal) { errorCode = processor(parser, s, parseEndPtr = s + len, 0); if (errorCode == XML_ERROR_NONE) return 1; eventEndPtr = eventPtr; processor = errorProcessor; return 0; } errorCode = processor(parser, s, parseEndPtr = s + len, &end); if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; processor = errorProcessor; return 0; } XmlUpdatePosition(encoding, positionPtr, end, &position); nLeftOver = s + len - end; if (nLeftOver) { if (buffer == 0 || nLeftOver > bufferLim - buffer) { /* FIXME avoid integer overflow */ buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2); /* FIXME storage leak if realloc fails */ if (!buffer) { errorCode = XML_ERROR_NO_MEMORY; eventPtr = eventEndPtr = 0; processor = errorProcessor; return 0; } bufferLim = buffer + len * 2; } memcpy(buffer, end, nLeftOver); bufferPtr = buffer; bufferEnd = buffer + nLeftOver; } return 1; } else { memcpy(XML_GetBuffer(parser, len), s, len); return XML_ParseBuffer(parser, len, isFinal); } } int XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { const char *start = bufferPtr; positionPtr = start; bufferEnd += len; parseEndByteIndex += len; errorCode = processor(parser, start, parseEndPtr = bufferEnd, isFinal ? (const char **)0 : &bufferPtr); if (errorCode == XML_ERROR_NONE) { if (!isFinal) XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); return 1; } else { eventEndPtr = eventPtr; processor = errorProcessor; return 0; } } void *XML_GetBuffer(XML_Parser parser, int len) { if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ int neededSize = len + (bufferEnd - bufferPtr); if (neededSize <= bufferLim - buffer) { memmove(buffer, bufferPtr, bufferEnd - bufferPtr); bufferEnd = buffer + (bufferEnd - bufferPtr); bufferPtr = buffer; } else { char *newBuf; int bufferSize = bufferLim - bufferPtr; if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { bufferSize *= 2; } while (bufferSize < neededSize); newBuf = malloc(bufferSize); if (newBuf == 0) { errorCode = XML_ERROR_NO_MEMORY; return 0; } bufferLim = newBuf + bufferSize; if (bufferPtr) { memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr); free(buffer); } bufferEnd = newBuf + (bufferEnd - bufferPtr); bufferPtr = buffer = newBuf; } } return bufferEnd; } enum XML_Error XML_GetErrorCode(XML_Parser parser) { return errorCode; } long XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) return parseEndByteIndex - (parseEndPtr - eventPtr); return -1; } int XML_GetCurrentByteCount(XML_Parser parser) { if (eventEndPtr && eventPtr) return eventEndPtr - eventPtr; return 0; } int XML_GetCurrentLineNumber(XML_Parser parser) { if (eventPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.lineNumber + 1; } int XML_GetCurrentColumnNumber(XML_Parser parser) { if (eventPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.columnNumber; } void XML_DefaultCurrent(XML_Parser parser) { if (defaultHandler) { if (openInternalEntities) reportDefault(parser, internalEncoding, openInternalEntities->internalEventPtr, openInternalEntities->internalEventEndPtr); else reportDefault(parser, encoding, eventPtr, eventEndPtr); } } const XML_LChar *XML_ErrorString(int code) { static const XML_LChar *message[] = { 0, XML_T("out of memory"), XML_T("syntax error"), XML_T("no element found"), XML_T("not well-formed"), XML_T("unclosed token"), XML_T("unclosed token"), XML_T("mismatched tag"), XML_T("duplicate attribute"), XML_T("junk after document element"), XML_T("illegal parameter entity reference"), XML_T("undefined entity"), XML_T("recursive entity reference"), XML_T("asynchronous entity"), XML_T("reference to invalid character number"), XML_T("reference to binary entity"), XML_T("reference to external entity in attribute"), XML_T("xml processing instruction not at start of external entity"), XML_T("unknown encoding"), XML_T("encoding specified in XML declaration is incorrect"), XML_T("unclosed CDATA section"), XML_T("error in processing external entity reference"), XML_T("document is not standalone") }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; return 0; } static enum XML_Error contentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { return doContent(parser, 0, encoding, start, end, endPtr); } static enum XML_Error externalEntityInitProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; processor = externalEntityInitProcessor2; return externalEntityInitProcessor2(parser, start, end, endPtr); } static enum XML_Error externalEntityInitProcessor2(XML_Parser parser, const char *start, const char *end, const char **endPtr) { const char *next; int tok = XmlContentTok(encoding, start, end, &next); switch (tok) { case XML_TOK_BOM: start = next; break; case XML_TOK_PARTIAL: if (endPtr) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (endPtr) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } processor = externalEntityInitProcessor3; return externalEntityInitProcessor3(parser, start, end, endPtr); } static enum XML_Error externalEntityInitProcessor3(XML_Parser parser, const char *start, const char *end, const char **endPtr) { const char *next; int tok = XmlContentTok(encoding, start, end, &next); switch (tok) { case XML_TOK_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 1, start, next); if (result != XML_ERROR_NONE) return result; start = next; } break; case XML_TOK_PARTIAL: if (endPtr) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (endPtr) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } processor = externalEntityContentProcessor; tagLevel = 1; return doContent(parser, 1, encoding, start, end, endPtr); } static enum XML_Error externalEntityContentProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { return doContent(parser, 1, encoding, start, end, endPtr); } static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, const char *s, const char *end, const char **nextPtr) { const char **eventPP; const char **eventEndPP; if (enc == encoding) { eventPP = &eventPtr; eventEndPP = &eventEndPtr; } else { eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } *eventEndPP = end; if (characterDataHandler) { XML_Char c = 0xA; characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) reportDefault(parser, enc, s, end); if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; if (tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; return XML_ERROR_NONE; case XML_TOK_NONE: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } if (startTagLevel > 0) { if (tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; XML_Char ch = XmlPredefinedEntityName(enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { if (characterDataHandler) characterDataHandler(handlerArg, &ch, 1); else if (defaultHandler) reportDefault(parser, enc, s, next); break; } name = poolStoreString(&dtd.pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); poolDiscard(&dtd.pool); if (!entity) { if (dtd.complete || dtd.standalone) return XML_ERROR_UNDEFINED_ENTITY; if (defaultHandler) reportDefault(parser, enc, s, next); break; } if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->notation) return XML_ERROR_BINARY_ENTITY_REF; if (entity) { if (entity->textPtr) { enum XML_Error result; OPEN_INTERNAL_ENTITY openEntity; if (defaultHandler && !defaultExpandInternalEntities) { reportDefault(parser, enc, s, next); break; } entity->open = 1; openEntity.next = openInternalEntities; openInternalEntities = &openEntity; openEntity.entity = entity; openEntity.internalEventPtr = 0; openEntity.internalEventEndPtr = 0; result = doContent(parser, tagLevel, internalEncoding, (char *)entity->textPtr, (char *)(entity->textPtr + entity->textLen), 0); entity->open = 0; openInternalEntities = openEntity.next; if (result) return result; } else if (externalEntityRefHandler) { const XML_Char *context; entity->open = 1; context = getContext(parser); entity->open = 0; if (!context) return XML_ERROR_NO_MEMORY; if (!externalEntityRefHandler(externalEntityRefHandlerArg, context, entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; poolDiscard(&tempPool); } else if (defaultHandler) reportDefault(parser, enc, s, next); } break; } case XML_TOK_START_TAG_WITH_ATTS: if (!startElementHandler) { enum XML_Error result = storeAtts(parser, enc, s, 0, 0); if (result) return result; } /* fall through */ case XML_TOK_START_TAG_NO_ATTS: { TAG *tag; if (freeTagList) { tag = freeTagList; freeTagList = freeTagList->parent; } else { tag = malloc(sizeof(TAG)); if (!tag) return XML_ERROR_NO_MEMORY; tag->buf = malloc(INIT_TAG_BUF_SIZE); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; } tag->bindings = 0; tag->parent = tagStack; tagStack = tag; tag->name.localPart = 0; tag->rawName = s + enc->minBytesPerChar; tag->rawNameLength = XmlNameLength(enc, tag->rawName); if (nextPtr) { /* Need to guarantee that: tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */ if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) { int bufSize = tag->rawNameLength * 4; bufSize = ROUND_UP(bufSize, sizeof(XML_Char)); tag->buf = realloc(tag->buf, bufSize); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + bufSize; } memcpy(tag->buf, tag->rawName, tag->rawNameLength); tag->rawName = tag->buf; } ++tagLevel; if (startElementHandler) { enum XML_Error result; XML_Char *toPtr; for (;;) { const char *rawNameEnd = tag->rawName + tag->rawNameLength; const char *fromPtr = tag->rawName; int bufSize; if (nextPtr) toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char))); else toPtr = (XML_Char *)tag->buf; tag->name.str = toPtr; XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); if (fromPtr == rawNameEnd) break; bufSize = (tag->bufEnd - tag->buf) << 1; tag->buf = realloc(tag->buf, bufSize); if (!tag->buf) return XML_ERROR_NO_MEMORY; tag->bufEnd = tag->buf + bufSize; if (nextPtr) tag->rawName = tag->buf; } *toPtr = XML_T('\0'); result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings)); if (result) return result; startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts); poolClear(&tempPool); } else { tag->name.str = 0; if (defaultHandler) reportDefault(parser, enc, s, next); } break; } case XML_TOK_EMPTY_ELEMENT_WITH_ATTS: if (!startElementHandler) { enum XML_Error result = storeAtts(parser, enc, s, 0, 0); if (result) return result; } /* fall through */ case XML_TOK_EMPTY_ELEMENT_NO_ATTS: if (startElementHandler || endElementHandler) { const char *rawName = s + enc->minBytesPerChar; enum XML_Error result; BINDING *bindings = 0; TAG_NAME name; name.str = poolStoreString(&tempPool, enc, rawName, rawName + XmlNameLength(enc, rawName)); if (!name.str) return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); result = storeAtts(parser, enc, s, &name, &bindings); if (result) return result; poolFinish(&tempPool); if (startElementHandler) startElementHandler(handlerArg, name.str, (const XML_Char **)atts); if (endElementHandler) { if (startElementHandler) *eventPP = *eventEndPP; endElementHandler(handlerArg, name.str); } poolClear(&tempPool); while (bindings) { BINDING *b = bindings; if (endNamespaceDeclHandler) endNamespaceDeclHandler(handlerArg, b->prefix->name); bindings = bindings->nextTagBinding; b->nextTagBinding = freeBindingList; freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } } else if (defaultHandler) reportDefault(parser, enc, s, next); if (tagLevel == 0) return epilogProcessor(parser, next, end, nextPtr); break; case XML_TOK_END_TAG: if (tagLevel == startTagLevel) return XML_ERROR_ASYNC_ENTITY; else { int len; const char *rawName; TAG *tag = tagStack; tagStack = tag->parent; tag->parent = freeTagList; freeTagList = tag; rawName = s + enc->minBytesPerChar*2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength || memcmp(tag->rawName, rawName, len) != 0) { *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } --tagLevel; if (endElementHandler && tag->name.str) { if (tag->name.localPart) { XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen; const XML_Char *from = tag->name.localPart; while ((*to++ = *from++) != 0) ; } endElementHandler(handlerArg, tag->name.str); } else if (defaultHandler) reportDefault(parser, enc, s, next); while (tag->bindings) { BINDING *b = tag->bindings; if (endNamespaceDeclHandler) endNamespaceDeclHandler(handlerArg, b->prefix->name); tag->bindings = tag->bindings->nextTagBinding; b->nextTagBinding = freeBindingList; freeBindingList = b; b->prefix->binding = b->prevPrefixBinding; } if (tagLevel == 0) return epilogProcessor(parser, next, end, nextPtr); } break; case XML_TOK_CHAR_REF: { int n = XmlCharRefNumber(enc, s); if (n < 0) return XML_ERROR_BAD_CHAR_REF; if (characterDataHandler) { XML_Char buf[XML_ENCODE_MAX]; characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf)); } else if (defaultHandler) reportDefault(parser, enc, s, next); } break; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { XML_Char c = 0xA; characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_CDATA_SECT_OPEN: { enum XML_Error result; if (startCdataSectionHandler) startCdataSectionHandler(handlerArg); #if 0 /* Suppose you doing a transformation on a document that involves changing only the character data. You set up a defaultHandler and a characterDataHandler. The defaultHandler simply copies characters through. The characterDataHandler does the transformation and writes the characters out escaping them as necessary. This case will fail to work if we leave out the following two lines (because & and < inside CDATA sections will be incorrectly escaped). However, now we have a start/endCdataSectionHandler, so it seems easier to let the user deal with this. */ else if (characterDataHandler) characterDataHandler(handlerArg, dataBuf, 0); #endif else if (defaultHandler) reportDefault(parser, enc, s, next); result = doCdataSection(parser, enc, &next, end, nextPtr); if (!next) { processor = cdataSectionProcessor; return result; } } break; case XML_TOK_TRAILING_RSQB: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } if (characterDataHandler) { if (MUST_CONVERT(enc, s)) { ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); } else characterDataHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); } else if (defaultHandler) reportDefault(parser, enc, s, end); if (startTagLevel == 0) { *eventPP = end; return XML_ERROR_NO_ELEMENTS; } if (tagLevel != startTagLevel) { *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: if (characterDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); if (s == next) break; *eventPP = s; } } else characterDataHandler(handlerArg, (XML_Char *)s, (XML_Char *)next - (XML_Char *)s); } else if (defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; default: if (defaultHandler) reportDefault(parser, enc, s, next); break; } *eventPP = s = next; } /* not reached */ } /* If tagNamePtr is non-null, build a real list of attributes, otherwise just check the attributes for well-formedness. */ static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, TAG_NAME *tagNamePtr, BINDING **bindingsPtr) { ELEMENT_TYPE *elementType = 0; int nDefaultAtts = 0; const XML_Char **appAtts; /* the attribute list to pass to the application */ int attIndex = 0; int i; int n; int nPrefixes = 0; BINDING *binding; const XML_Char *localPart; /* lookup the element type name */ if (tagNamePtr) { elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0); if (!elementType) { tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str); if (!tagNamePtr->str) return XML_ERROR_NO_MEMORY; elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; if (ns && !setElementTypePrefix(parser, elementType)) return XML_ERROR_NO_MEMORY; } nDefaultAtts = elementType->nDefaultAtts; } /* get the attributes from the tokenizer */ n = XmlGetAttributes(enc, attStr, attsSize, atts); if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE)); if (!atts) return XML_ERROR_NO_MEMORY; if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, atts); } appAtts = (const XML_Char **)atts; for (i = 0; i < n; i++) { /* add the name and value to the attribute list */ ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, atts[i].name + XmlNameLength(enc, atts[i].name)); if (!attId) return XML_ERROR_NO_MEMORY; /* detect duplicate attributes */ if ((attId->name)[-1]) { if (enc == encoding) eventPtr = atts[i].name; return XML_ERROR_DUPLICATE_ATTRIBUTE; } (attId->name)[-1] = 1; appAtts[attIndex++] = attId->name; if (!atts[i].normalized) { enum XML_Error result; int isCdata = 1; /* figure out whether declared as other than CDATA */ if (attId->maybeTokenized) { int j; for (j = 0; j < nDefaultAtts; j++) { if (attId == elementType->defaultAtts[j].id) { isCdata = elementType->defaultAtts[j].isCdata; break; } } } /* normalize the attribute value */ result = storeAttributeValue(parser, enc, isCdata, atts[i].valuePtr, atts[i].valueEnd, &tempPool); if (result) return result; if (tagNamePtr) { appAtts[attIndex] = poolStart(&tempPool); poolFinish(&tempPool); } else poolDiscard(&tempPool); } else if (tagNamePtr) { /* the value did not need normalizing */ appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd); if (appAtts[attIndex] == 0) return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } /* handle prefixed attribute names */ if (attId->prefix && tagNamePtr) { if (attId->xmlns) { /* deal with namespace declarations here */ if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr)) return XML_ERROR_NO_MEMORY; --attIndex; } else { /* deal with other prefixed names later */ attIndex++; nPrefixes++; (attId->name)[-1] = 2; } } else attIndex++; } if (tagNamePtr) { int j; nSpecifiedAtts = attIndex; if (elementType->idAtt && (elementType->idAtt->name)[-1]) { for (i = 0; i < attIndex; i += 2) if (appAtts[i] == elementType->idAtt->name) { idAttIndex = i; break; } } else idAttIndex = -1; /* do attribute defaulting */ for (j = 0; j < nDefaultAtts; j++) { const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j; if (!(da->id->name)[-1] && da->value) { if (da->id->prefix) { if (da->id->xmlns) { if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr)) return XML_ERROR_NO_MEMORY; } else { (da->id->name)[-1] = 2; nPrefixes++; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } } else { (da->id->name)[-1] = 1; appAtts[attIndex++] = da->id->name; appAtts[attIndex++] = da->value; } } } appAtts[attIndex] = 0; } i = 0; if (nPrefixes) { /* expand prefixed attribute names */ for (; i < attIndex; i += 2) { if (appAtts[i][-1] == 2) { ATTRIBUTE_ID *id; ((XML_Char *)(appAtts[i]))[-1] = 0; id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0); if (id->prefix->binding) { int j; const BINDING *b = id->prefix->binding; const XML_Char *s = appAtts[i]; for (j = 0; j < b->uriLen; j++) { if (!poolAppendChar(&tempPool, b->uri[j])) return XML_ERROR_NO_MEMORY; } while (*s++ != ':') ; do { if (!poolAppendChar(&tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); appAtts[i] = poolStart(&tempPool); poolFinish(&tempPool); } if (!--nPrefixes) break; } else ((XML_Char *)(appAtts[i]))[-1] = 0; } } /* clear the flags that say whether attributes were specified */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; if (!tagNamePtr) return XML_ERROR_NONE; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; if (!binding) return XML_ERROR_NONE; localPart = tagNamePtr->str; while (*localPart++ != XML_T(':')) ; } else if (dtd.defaultPrefix.binding) { binding = dtd.defaultPrefix.binding; localPart = tagNamePtr->str; } else return XML_ERROR_NONE; tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; for (i = 0; localPart[i++];) ; n = i + binding->uriLen; if (n > binding->uriAlloc) { TAG *p; XML_Char *uri = malloc((n + EXPAND_SPARE) * sizeof(XML_Char)); if (!uri) return XML_ERROR_NO_MEMORY; binding->uriAlloc = n + EXPAND_SPARE; memcpy(uri, binding->uri, binding->uriLen * sizeof(XML_Char)); for (p = tagStack; p; p = p->parent) if (p->name.str == binding->uri) p->name.str = uri; free(binding->uri); binding->uri = uri; } memcpy(binding->uri + binding->uriLen, localPart, i * sizeof(XML_Char)); tagNamePtr->str = binding->uri; return XML_ERROR_NONE; } static int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { BINDING *b; int len; for (len = 0; uri[len]; len++) ; if (namespaceSeparator) len++; if (freeBindingList) { b = freeBindingList; if (len > b->uriAlloc) { b->uri = realloc(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) return 0; b->uriAlloc = len + EXPAND_SPARE; } freeBindingList = b->nextTagBinding; } else { b = malloc(sizeof(BINDING)); if (!b) return 0; b->uri = malloc(sizeof(XML_Char) * (len + EXPAND_SPARE)); if (!b->uri) { free(b); return 0; } b->uriAlloc = len + EXPAND_SPARE; } b->uriLen = len; memcpy(b->uri, uri, len * sizeof(XML_Char)); if (namespaceSeparator) b->uri[len - 1] = namespaceSeparator; b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix) prefix->binding = 0; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; if (startNamespaceDeclHandler) startNamespaceDeclHandler(handlerArg, prefix->name, prefix->binding ? uri : 0); return 1; } /* The idea here is to avoid using stack for each CDATA section when the whole file is parsed with one call. */ static enum XML_Error cdataSectionProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr); if (start) { processor = contentProcessor; return contentProcessor(parser, start, end, endPtr); } return result; } /* startPtr gets set to non-null is the section is closed, and to null if the section is not yet closed. */ static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr) { const char *s = *startPtr; const char **eventPP; const char **eventEndPP; if (enc == encoding) { eventPP = &eventPtr; *eventPP = s; eventEndPP = &eventEndPtr; } else { eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; *startPtr = 0; for (;;) { const char *next; int tok = XmlCdataSectionTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { case XML_TOK_CDATA_SECT_CLOSE: if (endCdataSectionHandler) endCdataSectionHandler(handlerArg); #if 0 /* see comment under XML_TOK_CDATA_SECT_OPEN */ else if (characterDataHandler) characterDataHandler(handlerArg, dataBuf, 0); #endif else if (defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { XML_Char c = 0xA; characterDataHandler(handlerArg, &c, 1); } else if (defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: if (characterDataHandler) { if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = next; characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); if (s == next) break; *eventPP = s; } } else characterDataHandler(handlerArg, (XML_Char *)s, (XML_Char *)next - (XML_Char *)s); } else if (defaultHandler) reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_CDATA_SECTION; default: abort(); } *eventPP = s = next; } /* not reached */ } #ifdef XML_DTD /* The idea here is to avoid using stack for each IGNORE section when the whole file is parsed with one call. */ static enum XML_Error ignoreSectionProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr); if (start) { processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); } return result; } /* startPtr gets set to non-null is the section is closed, and to null if the section is not yet closed. */ static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, const char **nextPtr) { const char *next; int tok; const char *s = *startPtr; const char **eventPP; const char **eventEndPP; if (enc == encoding) { eventPP = &eventPtr; *eventPP = s; eventEndPP = &eventEndPtr; } else { eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; *startPtr = 0; tok = XmlIgnoreSectionTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { case XML_TOK_IGNORE_SECT: if (defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; return XML_ERROR_NONE; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */ default: abort(); } /* not reached */ } #endif /* XML_DTD */ static enum XML_Error initializeEncoding(XML_Parser parser) { const char *s; #ifdef XML_UNICODE char encodingBuf[128]; if (!protocolEncodingName) s = 0; else { int i; for (i = 0; protocolEncodingName[i]; i++) { if (i == sizeof(encodingBuf) - 1 || (protocolEncodingName[i] & ~0x7f) != 0) { encodingBuf[0] = '\0'; break; } encodingBuf[i] = (char)protocolEncodingName[i]; } encodingBuf[i] = '\0'; s = encodingBuf; } #else s = protocolEncodingName; #endif if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s)) return XML_ERROR_NONE; return handleUnknownEncoding(parser, protocolEncodingName); } static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s, const char *next) { const char *encodingName = 0; const ENCODING *newEncoding = 0; const char *version; int standalone = -1; if (!(ns ? XmlParseXmlDeclNS : XmlParseXmlDecl)(isGeneralTextEntity, encoding, s, next, &eventPtr, &version, &encodingName, &newEncoding, &standalone)) return XML_ERROR_SYNTAX; if (!isGeneralTextEntity && standalone == 1) { dtd.standalone = 1; #ifdef XML_DTD if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif /* XML_DTD */ } if (defaultHandler) reportDefault(parser, encoding, s, next); if (!protocolEncodingName) { if (newEncoding) { if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) { eventPtr = encodingName; return XML_ERROR_INCORRECT_ENCODING; } encoding = newEncoding; } else if (encodingName) { enum XML_Error result; const XML_Char *s = poolStoreString(&tempPool, encoding, encodingName, encodingName + XmlNameLength(encoding, encodingName)); if (!s) return XML_ERROR_NO_MEMORY; result = handleUnknownEncoding(parser, s); poolDiscard(&tempPool); if (result == XML_ERROR_UNKNOWN_ENCODING) eventPtr = encodingName; return result; } } return XML_ERROR_NONE; } static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName) { if (unknownEncodingHandler) { XML_Encoding info; int i; for (i = 0; i < 256; i++) info.map[i] = -1; info.convert = 0; info.data = 0; info.release = 0; if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) { ENCODING *enc; unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding()); if (!unknownEncodingMem) { if (info.release) info.release(info.data); return XML_ERROR_NO_MEMORY; } enc = (ns ? XmlInitUnknownEncodingNS : XmlInitUnknownEncoding)(unknownEncodingMem, info.map, info.convert, info.data); if (enc) { unknownEncodingData = info.data; unknownEncodingRelease = info.release; encoding = enc; return XML_ERROR_NONE; } } if (info.release) info.release(info.data); } return XML_ERROR_UNKNOWN_ENCODING; } static enum XML_Error prologInitProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { enum XML_Error result = initializeEncoding(parser); if (result != XML_ERROR_NONE) return result; processor = prologProcessor; return prologProcessor(parser, s, end, nextPtr); } static enum XML_Error prologProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { const char *next; int tok = XmlPrologTok(encoding, s, end, &next); return doProlog(parser, encoding, s, end, tok, next, nextPtr); } static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, int tok, const char *next, const char **nextPtr) { #ifdef XML_DTD static const XML_Char externalSubsetName[] = { '#' , '\0' }; #endif /* XML_DTD */ const char **eventPP; const char **eventEndPP; if (enc == encoding) { eventPP = &eventPtr; eventEndPP = &eventEndPtr; } else { eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } for (;;) { int role; *eventPP = s; *eventEndPP = next; if (tok <= 0) { if (nextPtr != 0 && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: #ifdef XML_DTD if (enc != encoding) return XML_ERROR_NONE; if (parentParser) { if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) return XML_ERROR_SYNTAX; hadExternalDoctype = 0; return XML_ERROR_NONE; } #endif /* XML_DTD */ return XML_ERROR_NO_ELEMENTS; default: tok = -tok; next = end; break; } } role = XmlTokenRole(&prologState, tok, s, next, enc); switch (role) { case XML_ROLE_XML_DECL: { enum XML_Error result = processXmlDecl(parser, 0, s, next); if (result != XML_ERROR_NONE) return result; enc = encoding; } break; case XML_ROLE_DOCTYPE_NAME: if (startDoctypeDeclHandler) { const XML_Char *name = poolStoreString(&tempPool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; startDoctypeDeclHandler(handlerArg, name); poolClear(&tempPool); } break; #ifdef XML_DTD case XML_ROLE_TEXT_DECL: { enum XML_Error result = processXmlDecl(parser, 1, s, next); if (result != XML_ERROR_NONE) return result; enc = encoding; } break; #endif /* XML_DTD */ case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD declEntity = (ENTITY *)lookup(&dtd.paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_SYNTAX; if (declEntity) { XML_Char *tem = poolStoreString(&dtd.pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); declEntity->publicId = tem; poolFinish(&dtd.pool); } break; case XML_ROLE_DOCTYPE_CLOSE: if (dtd.complete && hadExternalDoctype) { dtd.complete = 0; #ifdef XML_DTD if (paramEntityParsing && externalEntityRefHandler) { ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities, externalSubsetName, 0); if (!externalEntityRefHandler(externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } #endif /* XML_DTD */ if (!dtd.complete && !dtd.standalone && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; } if (endDoctypeDeclHandler) endDoctypeDeclHandler(handlerArg); break; case XML_ROLE_INSTANCE_START: processor = contentProcessor; return contentProcessor(parser, s, end, nextPtr); case XML_ROLE_ATTLIST_ELEMENT_NAME: { const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE)); if (!declElementType) return XML_ERROR_NO_MEMORY; if (declElementType->name != name) poolDiscard(&dtd.pool); else { poolFinish(&dtd.pool); if (!setElementTypePrefix(parser, declElementType)) return XML_ERROR_NO_MEMORY; } break; } case XML_ROLE_ATTRIBUTE_NAME: declAttributeId = getAttributeId(parser, enc, s, next); if (!declAttributeId) return XML_ERROR_NO_MEMORY; declAttributeIsCdata = 0; declAttributeIsId = 0; break; case XML_ROLE_ATTRIBUTE_TYPE_CDATA: declAttributeIsCdata = 1; break; case XML_ROLE_ATTRIBUTE_TYPE_ID: declAttributeIsId = 1; break; case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd.complete && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, declAttributeIsId, 0)) return XML_ERROR_NO_MEMORY; break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: { const XML_Char *attVal; enum XML_Error result = storeAttributeValue(parser, enc, declAttributeIsCdata, s + enc->minBytesPerChar, next - enc->minBytesPerChar, &dtd.pool); if (result) return result; attVal = poolStart(&dtd.pool); poolFinish(&dtd.pool); if (dtd.complete // ID attributes aren't allowed to have a default && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0, attVal)) return XML_ERROR_NO_MEMORY; break; } case XML_ROLE_ENTITY_VALUE: { enum XML_Error result = storeEntityValue(parser, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (declEntity) { declEntity->textPtr = poolStart(&dtd.pool); declEntity->textLen = poolLength(&dtd.pool); poolFinish(&dtd.pool); if (internalParsedEntityDeclHandler // Check it's not a parameter entity && ((ENTITY *)lookup(&dtd.generalEntities, declEntity->name, 0) == declEntity)) { *eventEndPP = s; internalParsedEntityDeclHandler(handlerArg, declEntity->name, declEntity->textPtr, declEntity->textLen); } } else poolDiscard(&dtd.pool); if (result != XML_ERROR_NONE) return result; } break; case XML_ROLE_DOCTYPE_SYSTEM_ID: if (!dtd.standalone #ifdef XML_DTD && !paramEntityParsing #endif /* XML_DTD */ && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; hadExternalDoctype = 1; #ifndef XML_DTD break; #else /* XML_DTD */ if (!declEntity) { declEntity = (ENTITY *)lookup(&dtd.paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; } /* fall through */ #endif /* XML_DTD */ case XML_ROLE_ENTITY_SYSTEM_ID: if (declEntity) { declEntity->systemId = poolStoreString(&dtd.pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!declEntity->systemId) return XML_ERROR_NO_MEMORY; declEntity->base = curBase; poolFinish(&dtd.pool); } break; case XML_ROLE_ENTITY_NOTATION_NAME: if (declEntity) { declEntity->notation = poolStoreString(&dtd.pool, enc, s, next); if (!declEntity->notation) return XML_ERROR_NO_MEMORY; poolFinish(&dtd.pool); if (unparsedEntityDeclHandler) { *eventEndPP = s; unparsedEntityDeclHandler(handlerArg, declEntity->name, declEntity->base, declEntity->systemId, declEntity->publicId, declEntity->notation); } } break; case XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION: if (declEntity && externalParsedEntityDeclHandler) { *eventEndPP = s; externalParsedEntityDeclHandler(handlerArg, declEntity->name, declEntity->base, declEntity->systemId, declEntity->publicId); } break; case XML_ROLE_GENERAL_ENTITY_NAME: { const XML_Char *name; if (XmlPredefinedEntityName(enc, s, next)) { declEntity = 0; break; } name = poolStoreString(&dtd.pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; if (dtd.complete) { declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; if (declEntity->name != name) { poolDiscard(&dtd.pool); declEntity = 0; } else poolFinish(&dtd.pool); } else { poolDiscard(&dtd.pool); declEntity = 0; } } break; case XML_ROLE_PARAM_ENTITY_NAME: #ifdef XML_DTD if (dtd.complete) { const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; if (declEntity->name != name) { poolDiscard(&dtd.pool); declEntity = 0; } else poolFinish(&dtd.pool); } #else /* not XML_DTD */ declEntity = 0; #endif /* not XML_DTD */ break; case XML_ROLE_NOTATION_NAME: declNotationPublicId = 0; declNotationName = 0; if (notationDeclHandler) { declNotationName = poolStoreString(&tempPool, enc, s, next); if (!declNotationName) return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); } break; case XML_ROLE_NOTATION_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) return XML_ERROR_SYNTAX; if (declNotationName) { XML_Char *tem = poolStoreString(&tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!tem) return XML_ERROR_NO_MEMORY; normalizePublicId(tem); declNotationPublicId = tem; poolFinish(&tempPool); } break; case XML_ROLE_NOTATION_SYSTEM_ID: if (declNotationName && notationDeclHandler) { const XML_Char *systemId = poolStoreString(&tempPool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!systemId) return XML_ERROR_NO_MEMORY; *eventEndPP = s; notationDeclHandler(handlerArg, declNotationName, curBase, systemId, declNotationPublicId); } poolClear(&tempPool); break; case XML_ROLE_NOTATION_NO_SYSTEM_ID: if (declNotationPublicId && notationDeclHandler) { *eventEndPP = s; notationDeclHandler(handlerArg, declNotationName, curBase, 0, declNotationPublicId); } poolClear(&tempPool); break; case XML_ROLE_ERROR: switch (tok) { case XML_TOK_PARAM_ENTITY_REF: return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; default: return XML_ERROR_SYNTAX; } #ifdef XML_DTD case XML_ROLE_IGNORE_SECT: { enum XML_Error result; if (defaultHandler) reportDefault(parser, enc, s, next); result = doIgnoreSection(parser, enc, &next, end, nextPtr); if (!next) { processor = ignoreSectionProcessor; return result; } } break; #endif /* XML_DTD */ case XML_ROLE_GROUP_OPEN: if (prologState.level >= groupSize) { if (groupSize) groupConnector = realloc(groupConnector, groupSize *= 2); else groupConnector = malloc(groupSize = 32); if (!groupConnector) return XML_ERROR_NO_MEMORY; } groupConnector[prologState.level] = 0; break; case XML_ROLE_GROUP_SEQUENCE: if (groupConnector[prologState.level] == '|') return XML_ERROR_SYNTAX; groupConnector[prologState.level] = ','; break; case XML_ROLE_GROUP_CHOICE: if (groupConnector[prologState.level] == ',') return XML_ERROR_SYNTAX; groupConnector[prologState.level] = '|'; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: if (paramEntityParsing && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) { const XML_Char *name; ENTITY *entity; name = poolStoreString(&dtd.pool, enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); poolDiscard(&dtd.pool); if (!entity) { /* FIXME what to do if !dtd.complete? */ return XML_ERROR_UNDEFINED_ENTITY; } if (entity->open) return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; result = processInternalParamEntity(parser, entity); if (result != XML_ERROR_NONE) return result; break; } if (role == XML_ROLE_INNER_PARAM_ENTITY_REF) return XML_ERROR_PARAM_ENTITY_REF; if (externalEntityRefHandler) { dtd.complete = 0; entity->open = 1; if (!externalEntityRefHandler(externalEntityRefHandlerArg, 0, entity->base, entity->systemId, entity->publicId)) { entity->open = 0; return XML_ERROR_EXTERNAL_ENTITY_HANDLING; } entity->open = 0; if (dtd.complete) break; } } #endif /* XML_DTD */ if (!dtd.standalone && notStandaloneHandler && !notStandaloneHandler(handlerArg)) return XML_ERROR_NOT_STANDALONE; dtd.complete = 0; if (defaultHandler) reportDefault(parser, enc, s, next); break; case XML_ROLE_NONE: switch (tok) { case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, enc, s, next)) return XML_ERROR_NO_MEMORY; break; } break; } if (defaultHandler) { switch (tok) { case XML_TOK_PI: case XML_TOK_COMMENT: case XML_TOK_BOM: case XML_TOK_XML_DECL: #ifdef XML_DTD case XML_TOK_IGNORE_SECT: #endif /* XML_DTD */ case XML_TOK_PARAM_ENTITY_REF: break; default: #ifdef XML_DTD if (role != XML_ROLE_IGNORE_SECT) #endif /* XML_DTD */ reportDefault(parser, enc, s, next); } } s = next; tok = XmlPrologTok(enc, s, end, &next); } /* not reached */ } static enum XML_Error epilogProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { processor = epilogProcessor; eventPtr = s; for (;;) { const char *next; int tok = XmlPrologTok(encoding, s, end, &next); eventEndPtr = next; switch (tok) { case -XML_TOK_PROLOG_S: if (defaultHandler) { eventEndPtr = end; reportDefault(parser, encoding, s, end); } /* fall through */ case XML_TOK_NONE: if (nextPtr) *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: if (defaultHandler) reportDefault(parser, encoding, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_COMMENT: if (!reportComment(parser, encoding, s, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_INVALID: eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: if (nextPtr) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; default: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } eventPtr = s = next; } } #ifdef XML_DTD static enum XML_Error processInternalParamEntity(XML_Parser parser, ENTITY *entity) { const char *s, *end, *next; int tok; enum XML_Error result; OPEN_INTERNAL_ENTITY openEntity; entity->open = 1; openEntity.next = openInternalEntities; openInternalEntities = &openEntity; openEntity.entity = entity; openEntity.internalEventPtr = 0; openEntity.internalEventEndPtr = 0; s = (char *)entity->textPtr; end = (char *)(entity->textPtr + entity->textLen); tok = XmlPrologTok(internalEncoding, s, end, &next); result = doProlog(parser, internalEncoding, s, end, tok, next, 0); entity->open = 0; openInternalEntities = openEntity.next; return result; } #endif /* XML_DTD */ static enum XML_Error errorProcessor(XML_Parser parser, const char *s, const char *end, const char **nextPtr) { return errorCode; } static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, const char *ptr, const char *end, STRING_POOL *pool) { enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool); if (result) return result; if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20) poolChop(pool); if (!poolAppendChar(pool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; return XML_ERROR_NONE; } static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata, const char *ptr, const char *end, STRING_POOL *pool) { for (;;) { const char *next; int tok = XmlAttributeValueTok(enc, ptr, end, &next); switch (tok) { case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_INVALID: if (enc == encoding) eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: if (enc == encoding) eventPtr = ptr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; int i; int n = XmlCharRefNumber(enc, ptr); if (n < 0) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_BAD_CHAR_REF; } if (!isCdata && n == 0x20 /* space */ && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; n = XmlEncode(n, (ICHAR *)buf); if (!n) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_BAD_CHAR_REF; } for (i = 0; i < n; i++) { if (!poolAppendChar(pool, buf[i])) return XML_ERROR_NO_MEMORY; } } break; case XML_TOK_DATA_CHARS: if (!poolAppend(pool, enc, ptr, next)) return XML_ERROR_NO_MEMORY; break; break; case XML_TOK_TRAILING_CR: next = ptr + enc->minBytesPerChar; /* fall through */ case XML_TOK_ATTRIBUTE_VALUE_S: case XML_TOK_DATA_NEWLINE: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20)) break; if (!poolAppendChar(pool, 0x20)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_ENTITY_REF: { const XML_Char *name; ENTITY *entity; XML_Char ch = XmlPredefinedEntityName(enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (ch) { if (!poolAppendChar(pool, ch)) return XML_ERROR_NO_MEMORY; break; } name = poolStoreString(&temp2Pool, enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0); poolDiscard(&temp2Pool); if (!entity) { if (dtd.complete) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_UNDEFINED_ENTITY; } } else if (entity->open) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_RECURSIVE_ENTITY_REF; } else if (entity->notation) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_BINARY_ENTITY_REF; } else if (!entity->textPtr) { if (enc == encoding) eventPtr = ptr; return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; const XML_Char *textEnd = entity->textPtr + entity->textLen; entity->open = 1; result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool); entity->open = 0; if (result) return result; } } break; default: abort(); } ptr = next; } /* not reached */ } static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *entityTextPtr, const char *entityTextEnd) { STRING_POOL *pool = &(dtd.pool); for (;;) { const char *next; int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next); switch (tok) { case XML_TOK_PARAM_ENTITY_REF: #ifdef XML_DTD if (parentParser || enc != encoding) { enum XML_Error result; const XML_Char *name; ENTITY *entity; name = poolStoreString(&tempPool, enc, entityTextPtr + enc->minBytesPerChar, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0); poolDiscard(&tempPool); if (!entity) { if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_UNDEFINED_ENTITY; } if (entity->open) { if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_RECURSIVE_ENTITY_REF; } if (entity->systemId) { if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_PARAM_ENTITY_REF; } entity->open = 1; result = storeEntityValue(parser, internalEncoding, (char *)entity->textPtr, (char *)(entity->textPtr + entity->textLen)); entity->open = 0; if (result) return result; break; } #endif /* XML_DTD */ eventPtr = entityTextPtr; return XML_ERROR_SYNTAX; case XML_TOK_NONE: return XML_ERROR_NONE; case XML_TOK_ENTITY_REF: case XML_TOK_DATA_CHARS: if (!poolAppend(pool, enc, entityTextPtr, next)) return XML_ERROR_NO_MEMORY; break; case XML_TOK_TRAILING_CR: next = entityTextPtr + enc->minBytesPerChar; /* fall through */ case XML_TOK_DATA_NEWLINE: if (pool->end == pool->ptr && !poolGrow(pool)) return XML_ERROR_NO_MEMORY; *(pool->ptr)++ = 0xA; break; case XML_TOK_CHAR_REF: { XML_Char buf[XML_ENCODE_MAX]; int i; int n = XmlCharRefNumber(enc, entityTextPtr); if (n < 0) { if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_BAD_CHAR_REF; } n = XmlEncode(n, (ICHAR *)buf); if (!n) { if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_BAD_CHAR_REF; } for (i = 0; i < n; i++) { if (pool->end == pool->ptr && !poolGrow(pool)) return XML_ERROR_NO_MEMORY; *(pool->ptr)++ = buf[i]; } } break; case XML_TOK_PARTIAL: if (enc == encoding) eventPtr = entityTextPtr; return XML_ERROR_INVALID_TOKEN; case XML_TOK_INVALID: if (enc == encoding) eventPtr = next; return XML_ERROR_INVALID_TOKEN; default: abort(); } entityTextPtr = next; } /* not reached */ } static void normalizeLines(XML_Char *s) { XML_Char *p; for (;; s++) { if (*s == XML_T('\0')) return; if (*s == 0xD) break; } p = s; do { if (*s == 0xD) { *p++ = 0xA; if (*++s == 0xA) s++; } else *p++ = *s++; } while (*s); *p = XML_T('\0'); } static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { const XML_Char *target; XML_Char *data; const char *tem; if (!processingInstructionHandler) { if (defaultHandler) reportDefault(parser, enc, start, end); return 1; } start += enc->minBytesPerChar * 2; tem = start + XmlNameLength(enc, start); target = poolStoreString(&tempPool, enc, start, tem); if (!target) return 0; poolFinish(&tempPool); data = poolStoreString(&tempPool, enc, XmlSkipS(enc, tem), end - enc->minBytesPerChar*2); if (!data) return 0; normalizeLines(data); processingInstructionHandler(handlerArg, target, data); poolClear(&tempPool); return 1; } static int reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { XML_Char *data; if (!commentHandler) { if (defaultHandler) reportDefault(parser, enc, start, end); return 1; } data = poolStoreString(&tempPool, enc, start + enc->minBytesPerChar * 4, end - enc->minBytesPerChar * 3); if (!data) return 0; normalizeLines(data); commentHandler(handlerArg, data); poolClear(&tempPool); return 1; } static void reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { const char **eventPP; const char **eventEndPP; if (enc == encoding) { eventPP = &eventPtr; eventEndPP = &eventEndPtr; } else { eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } do { ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); *eventPP = s; } while (s != end); } else defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); } static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, int isId, const XML_Char *value) { DEFAULT_ATTRIBUTE *att; if (value || isId) { /* The handling of default attributes gets messed up if we have a default which duplicates a non-default. */ int i; for (i = 0; i < type->nDefaultAtts; i++) if (attId == type->defaultAtts[i].id) return 1; if (isId && !type->idAtt && !attId->xmlns) type->idAtt = attId; } if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); } else { type->allocDefaultAtts *= 2; type->defaultAtts = realloc(type->defaultAtts, type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE)); } if (!type->defaultAtts) return 0; } att = type->defaultAtts + type->nDefaultAtts; att->id = attId; att->value = value; att->isCdata = isCdata; if (!isCdata) attId->maybeTokenized = 1; type->nDefaultAtts += 1; return 1; } static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) { const XML_Char *name; for (name = elementType->name; *name; name++) { if (*name == XML_T(':')) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { if (!poolAppendChar(&dtd.pool, *s)) return 0; } if (!poolAppendChar(&dtd.pool, XML_T('\0'))) return 0; prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); if (!prefix) return 0; if (prefix->name == poolStart(&dtd.pool)) poolFinish(&dtd.pool); else poolDiscard(&dtd.pool); elementType->prefix = prefix; } } return 1; } static ATTRIBUTE_ID * getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end) { ATTRIBUTE_ID *id; const XML_Char *name; if (!poolAppendChar(&dtd.pool, XML_T('\0'))) return 0; name = poolStoreString(&dtd.pool, enc, start, end); if (!name) return 0; ++name; id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) return 0; if (id->name != name) poolDiscard(&dtd.pool); else { poolFinish(&dtd.pool); if (!ns) ; else if (name[0] == 'x' && name[1] == 'm' && name[2] == 'l' && name[3] == 'n' && name[4] == 's' && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { if (name[5] == '\0') id->prefix = &dtd.defaultPrefix; else id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX)); id->xmlns = 1; } else { int i; for (i = 0; name[i]; i++) { if (name[i] == XML_T(':')) { int j; for (j = 0; j < i; j++) { if (!poolAppendChar(&dtd.pool, name[j])) return 0; } if (!poolAppendChar(&dtd.pool, XML_T('\0'))) return 0; id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX)); if (id->prefix->name == poolStart(&dtd.pool)) poolFinish(&dtd.pool); else poolDiscard(&dtd.pool); break; } } } } return id; } #define CONTEXT_SEP XML_T('\f') static const XML_Char *getContext(XML_Parser parser) { HASH_TABLE_ITER iter; int needSep = 0; if (dtd.defaultPrefix.binding) { int i; int len; if (!poolAppendChar(&tempPool, XML_T('='))) return 0; len = dtd.defaultPrefix.binding->uriLen; if (namespaceSeparator != XML_T('\0')) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i])) return 0; needSep = 1; } hashTableIterInit(&iter, &(dtd.prefixes)); for (;;) { int i; int len; const XML_Char *s; PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter); if (!prefix) break; if (!prefix->binding) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) return 0; for (s = prefix->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return 0; if (!poolAppendChar(&tempPool, XML_T('='))) return 0; len = prefix->binding->uriLen; if (namespaceSeparator != XML_T('\0')) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) return 0; needSep = 1; } hashTableIterInit(&iter, &(dtd.generalEntities)); for (;;) { const XML_Char *s; ENTITY *e = (ENTITY *)hashTableIterNext(&iter); if (!e) break; if (!e->open) continue; if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP)) return 0; for (s = e->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return 0; needSep = 1; } if (!poolAppendChar(&tempPool, XML_T('\0'))) return 0; return tempPool.start; } static int setContext(XML_Parser parser, const XML_Char *context) { const XML_Char *s = context; while (*context != XML_T('\0')) { if (*s == CONTEXT_SEP || *s == XML_T('\0')) { ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) return 0; e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0); if (e) e->open = 1; if (*s != XML_T('\0')) s++; context = s; poolDiscard(&tempPool); } else if (*s == '=') { PREFIX *prefix; if (poolLength(&tempPool) == 0) prefix = &dtd.defaultPrefix; else { if (!poolAppendChar(&tempPool, XML_T('\0'))) return 0; prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX)); if (!prefix) return 0; if (prefix->name == poolStart(&tempPool)) { prefix->name = poolCopyString(&dtd.pool, prefix->name); if (!prefix->name) return 0; } poolDiscard(&tempPool); } for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++) if (!poolAppendChar(&tempPool, *context)) return 0; if (!poolAppendChar(&tempPool, XML_T('\0'))) return 0; if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings)) return 0; poolDiscard(&tempPool); if (*context != XML_T('\0')) ++context; s = context; } else { if (!poolAppendChar(&tempPool, *s)) return 0; s++; } } return 1; } static void normalizePublicId(XML_Char *publicId) { XML_Char *p = publicId; XML_Char *s; for (s = publicId; *s; s++) { switch (*s) { case 0x20: case 0xD: case 0xA: if (p != publicId && p[-1] != 0x20) *p++ = 0x20; break; default: *p++ = *s; } } if (p != publicId && p[-1] == 0x20) --p; *p = XML_T('\0'); } static int dtdInit(DTD *p) { poolInit(&(p->pool)); hashTableInit(&(p->generalEntities)); hashTableInit(&(p->elementTypes)); hashTableInit(&(p->attributeIds)); hashTableInit(&(p->prefixes)); p->complete = 1; p->standalone = 0; #ifdef XML_DTD hashTableInit(&(p->paramEntities)); #endif /* XML_DTD */ p->defaultPrefix.name = 0; p->defaultPrefix.binding = 0; return 1; } #ifdef XML_DTD static void dtdSwap(DTD *p1, DTD *p2) { DTD tem; memcpy(&tem, p1, sizeof(DTD)); memcpy(p1, p2, sizeof(DTD)); memcpy(p2, &tem, sizeof(DTD)); } #endif /* XML_DTD */ static void dtdDestroy(DTD *p) { HASH_TABLE_ITER iter; hashTableIterInit(&iter, &(p->elementTypes)); for (;;) { ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter); if (!e) break; if (e->allocDefaultAtts != 0) free(e->defaultAtts); } hashTableDestroy(&(p->generalEntities)); #ifdef XML_DTD hashTableDestroy(&(p->paramEntities)); #endif /* XML_DTD */ hashTableDestroy(&(p->elementTypes)); hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); } /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise. The new DTD has already been initialized. */ static int dtdCopy(DTD *newDtd, const DTD *oldDtd) { HASH_TABLE_ITER iter; /* Copy the prefix table. */ hashTableIterInit(&iter, &(oldDtd->prefixes)); for (;;) { const XML_Char *name; const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter); if (!oldP) break; name = poolCopyString(&(newDtd->pool), oldP->name); if (!name) return 0; if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } hashTableIterInit(&iter, &(oldDtd->attributeIds)); /* Copy the attribute id table. */ for (;;) { ATTRIBUTE_ID *newA; const XML_Char *name; const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter); if (!oldA) break; /* Remember to allocate the scratch byte before the name. */ if (!poolAppendChar(&(newDtd->pool), XML_T('\0'))) return 0; name = poolCopyString(&(newDtd->pool), oldA->name); if (!name) return 0; ++name; newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); if (!newA) return 0; newA->maybeTokenized = oldA->maybeTokenized; if (oldA->prefix) { newA->xmlns = oldA->xmlns; if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0); } } /* Copy the element type table. */ hashTableIterInit(&iter, &(oldDtd->elementTypes)); for (;;) { int i; ELEMENT_TYPE *newE; const XML_Char *name; const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter); if (!oldE) break; name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); if (!newE) return 0; if (oldE->nDefaultAtts) { newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (!newE->defaultAtts) return 0; } if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value); if (!newE->defaultAtts[i].value) return 0; } else newE->defaultAtts[i].value = 0; } } /* Copy the entity tables. */ if (!copyEntityTable(&(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD if (!copyEntityTable(&(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; #endif /* XML_DTD */ newDtd->complete = oldDtd->complete; newDtd->standalone = oldDtd->standalone; return 1; } static int copyEntityTable(HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable) { HASH_TABLE_ITER iter; const XML_Char *cachedOldBase = 0; const XML_Char *cachedNewBase = 0; hashTableIterInit(&iter, oldTable); for (;;) { ENTITY *newE; const XML_Char *name; const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter); if (!oldE) break; name = poolCopyString(newPool, oldE->name); if (!name) return 0; newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { const XML_Char *tem = poolCopyString(newPool, oldE->systemId); if (!tem) return 0; newE->systemId = tem; if (oldE->base) { if (oldE->base == cachedOldBase) newE->base = cachedNewBase; else { cachedOldBase = oldE->base; tem = poolCopyString(newPool, cachedOldBase); if (!tem) return 0; cachedNewBase = newE->base = tem; } } } else { const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen); if (!tem) return 0; newE->textPtr = tem; newE->textLen = oldE->textLen; } if (oldE->notation) { const XML_Char *tem = poolCopyString(newPool, oldE->notation); if (!tem) return 0; newE->notation = tem; } } return 1; } #define INIT_SIZE 64 static int keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) return 1; return 0; } static unsigned long hash(KEY s) { unsigned long h = 0; while (*s) h = (h << 5) + h + (unsigned char)*s++; return h; } static NAMED *lookup(HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { if (!createSize) return 0; table->v = calloc(INIT_SIZE, sizeof(NAMED *)); if (!table->v) return 0; table->size = INIT_SIZE; table->usedLim = INIT_SIZE / 2; i = hash(name) & (table->size - 1); } else { unsigned long h = hash(name); for (i = h & (table->size - 1); table->v[i]; i == 0 ? i = table->size - 1 : --i) { if (keyeq(name, table->v[i]->name)) return table->v[i]; } if (!createSize) return 0; if (table->used == table->usedLim) { /* check for overflow */ size_t newSize = table->size * 2; NAMED **newV = calloc(newSize, sizeof(NAMED *)); if (!newV) return 0; for (i = 0; i < table->size; i++) if (table->v[i]) { size_t j; for (j = hash(table->v[i]->name) & (newSize - 1); newV[j]; j == 0 ? j = newSize - 1 : --j) ; newV[j] = table->v[i]; } free(table->v); table->v = newV; table->size = newSize; table->usedLim = newSize/2; for (i = h & (table->size - 1); table->v[i]; i == 0 ? i = table->size - 1 : --i) ; } } table->v[i] = calloc(1, createSize); if (!table->v[i]) return 0; table->v[i]->name = name; (table->used)++; return table->v[i]; } static void hashTableDestroy(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { NAMED *p = table->v[i]; if (p) free(p); } if (table->v) free(table->v); } static void hashTableInit(HASH_TABLE *p) { p->size = 0; p->usedLim = 0; p->used = 0; p->v = 0; } static void hashTableIterInit(HASH_TABLE_ITER *iter, const HASH_TABLE *table) { iter->p = table->v; iter->end = iter->p + table->size; } static NAMED *hashTableIterNext(HASH_TABLE_ITER *iter) { while (iter->p != iter->end) { NAMED *tem = *(iter->p)++; if (tem) return tem; } return 0; } static void poolInit(STRING_POOL *pool) { pool->blocks = 0; pool->freeBlocks = 0; pool->start = 0; pool->ptr = 0; pool->end = 0; } static void poolClear(STRING_POOL *pool) { if (!pool->freeBlocks) pool->freeBlocks = pool->blocks; else { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; p->next = pool->freeBlocks; pool->freeBlocks = p; p = tem; } } pool->blocks = 0; pool->start = 0; pool->ptr = 0; pool->end = 0; } static void poolDestroy(STRING_POOL *pool) { BLOCK *p = pool->blocks; while (p) { BLOCK *tem = p->next; free(p); p = tem; } pool->blocks = 0; p = pool->freeBlocks; while (p) { BLOCK *tem = p->next; free(p); p = tem; } pool->freeBlocks = 0; pool->ptr = 0; pool->start = 0; pool->end = 0; } static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { if (!pool->ptr && !poolGrow(pool)) return 0; for (;;) { XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); if (ptr == end) break; if (!poolGrow(pool)) return 0; } return pool->start; } static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s) { do { if (!poolAppendChar(pool, *s)) return 0; } while (*s++); s = pool->start; poolFinish(pool); return s; } static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (!pool->ptr && !poolGrow(pool)) return 0; for (; n > 0; --n, s++) { if (!poolAppendChar(pool, *s)) return 0; } s = pool->start; poolFinish(pool); return s; } static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, const char *ptr, const char *end) { if (!poolAppend(pool, enc, ptr, end)) return 0; if (pool->ptr == pool->end && !poolGrow(pool)) return 0; *(pool->ptr)++ = 0; return pool->start; } static int poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { if (pool->start == 0) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; pool->blocks->next = 0; pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; pool->ptr = pool->start; return 1; } if (pool->end - pool->start < pool->freeBlocks->size) { BLOCK *tem = pool->freeBlocks->next; pool->freeBlocks->next = pool->blocks; pool->blocks = pool->freeBlocks; pool->freeBlocks = tem; memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char)); pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + pool->blocks->size; return 1; } } if (pool->blocks && pool->start == pool->blocks->s) { int blockSize = (pool->end - pool->start)*2; pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); if (!pool->blocks) return 0; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; pool->end = pool->start + blockSize; } else { BLOCK *tem; int blockSize = pool->end - pool->start; if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else blockSize *= 2; tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char)); if (!tem) return 0; tem->size = blockSize; tem->next = pool->blocks; pool->blocks = tem; if (pool->ptr != pool->start) memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char)); pool->ptr = tem->s + (pool->ptr - pool->start); pool->start = tem->s; pool->end = tem->s + blockSize; } return 1; } expat-1.2/xmlparse/xmlparse.h0100664000076400007640000004704407106705773014472 0ustar jjcjjc/* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #ifndef XmlParse_INCLUDED #define XmlParse_INCLUDED 1 #ifdef __cplusplus extern "C" { #endif #ifndef XMLPARSEAPI #define XMLPARSEAPI /* as nothing */ #endif typedef void *XML_Parser; #ifdef XML_UNICODE_WCHAR_T /* XML_UNICODE_WCHAR_T will work only if sizeof(wchar_t) == 2 and wchar_t uses Unicode. */ /* Information is UTF-16 encoded as wchar_ts */ #ifndef XML_UNICODE #define XML_UNICODE #endif #include typedef wchar_t XML_Char; typedef wchar_t XML_LChar; #else /* not XML_UNICODE_WCHAR_T */ #ifdef XML_UNICODE /* Information is UTF-16 encoded as unsigned shorts */ typedef unsigned short XML_Char; typedef char XML_LChar; #else /* not XML_UNICODE */ /* Information is UTF-8 encoded. */ typedef char XML_Char; typedef char XML_LChar; #endif /* not XML_UNICODE */ #endif /* not XML_UNICODE_WCHAR_T */ /* Constructs a new parser; encoding is the encoding specified by the external protocol or null if there is none specified. */ XML_Parser XMLPARSEAPI XML_ParserCreate(const XML_Char *encoding); /* Constructs a new parser and namespace processor. Element type names and attribute names that belong to a namespace will be expanded; unprefixed attribute names are never expanded; unprefixed element type names are expanded only if there is a default namespace. The expanded name is the concatenation of the namespace URI, the namespace separator character, and the local part of the name. If the namespace separator is '\0' then the namespace URI and the local part will be concatenated without any separator. When a namespace is not declared, the name and prefix will be passed through without expansion. */ XML_Parser XMLPARSEAPI XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); /* atts is array of name/value pairs, terminated by 0; names and values are 0 terminated. */ typedef void (*XML_StartElementHandler)(void *userData, const XML_Char *name, const XML_Char **atts); typedef void (*XML_EndElementHandler)(void *userData, const XML_Char *name); /* s is not 0 terminated. */ typedef void (*XML_CharacterDataHandler)(void *userData, const XML_Char *s, int len); /* target and data are 0 terminated */ typedef void (*XML_ProcessingInstructionHandler)(void *userData, const XML_Char *target, const XML_Char *data); /* data is 0 terminated */ typedef void (*XML_CommentHandler)(void *userData, const XML_Char *data); typedef void (*XML_StartCdataSectionHandler)(void *userData); typedef void (*XML_EndCdataSectionHandler)(void *userData); /* This is called for any characters in the XML document for which there is no applicable handler. This includes both characters that are part of markup which is of a kind that is not reported (comments, markup declarations), or characters that are part of a construct which could be reported but for which no handler has been supplied. The characters are passed exactly as they were in the XML document except that they will be encoded in UTF-8. Line boundaries are not normalized. Note that a byte order mark character is not passed to the default handler. There are no guarantees about how characters are divided between calls to the default handler: for example, a comment might be split between multiple calls. */ typedef void (*XML_DefaultHandler)(void *userData, const XML_Char *s, int len); /* This is called for the start of the DOCTYPE declaration when the name of the DOCTYPE is encountered. */ typedef void (*XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *doctypeName); /* This is called for the start of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ typedef void (*XML_EndDoctypeDeclHandler)(void *userData); /* This is called for a declaration of an unparsed (NDATA) entity. The base argument is whatever was set by XML_SetBase. The entityName, systemId and notationName arguments will never be null. The other arguments may be. */ typedef void (*XML_UnparsedEntityDeclHandler)(void *userData, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); /* This is called for a declaration of notation. The base argument is whatever was set by XML_SetBase. The notationName will never be null. The other arguments can be. */ typedef void (*XML_NotationDeclHandler)(void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); typedef void (*XML_ExternalParsedEntityDeclHandler)(void *userData, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); typedef void (*XML_InternalParsedEntityDeclHandler)(void *userData, const XML_Char *entityName, const XML_Char *replacementText, int replacementTextLength); /* When namespace processing is enabled, these are called once for each namespace declaration. The call to the start and end element handlers occur between the calls to the start and end namespace declaration handlers. For an xmlns attribute, prefix will be null. For an xmlns="" attribute, uri will be null. */ typedef void (*XML_StartNamespaceDeclHandler)(void *userData, const XML_Char *prefix, const XML_Char *uri); typedef void (*XML_EndNamespaceDeclHandler)(void *userData, const XML_Char *prefix); /* This is called if the document is not standalone (it has an external subset or a reference to a parameter entity, but does not have standalone="yes"). If this handler returns 0, then processing will not continue, and the parser will return a XML_ERROR_NOT_STANDALONE error. */ typedef int (*XML_NotStandaloneHandler)(void *userData); /* This is called for a reference to an external parsed general entity. The referenced entity is not automatically parsed. The application can parse it immediately or later using XML_ExternalEntityParserCreate. The parser argument is the parser parsing the entity containing the reference; it can be passed as the parser argument to XML_ExternalEntityParserCreate. The systemId argument is the system identifier as specified in the entity declaration; it will not be null. The base argument is the system identifier that should be used as the base for resolving systemId if systemId was relative; this is set by XML_SetBase; it may be null. The publicId argument is the public identifier as specified in the entity declaration, or null if none was specified; the whitespace in the public identifier will have been normalized as required by the XML spec. The context argument specifies the parsing context in the format expected by the context argument to XML_ExternalEntityParserCreate; context is valid only until the handler returns, so if the referenced entity is to be parsed later, it must be copied. The handler should return 0 if processing should not continue because of a fatal error in the handling of the external entity. In this case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING error. Note that unlike other handlers the first argument is the parser, not userData. */ typedef int (*XML_ExternalEntityRefHandler)(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); /* This structure is filled in by the XML_UnknownEncodingHandler to provide information to the parser about encodings that are unknown to the parser. The map[b] member gives information about byte sequences whose first byte is b. If map[b] is c where c is >= 0, then b by itself encodes the Unicode scalar value c. If map[b] is -1, then the byte sequence is malformed. If map[b] is -n, where n >= 2, then b is the first byte of an n-byte sequence that encodes a single Unicode scalar value. The data member will be passed as the first argument to the convert function. The convert function is used to convert multibyte sequences; s will point to a n-byte sequence where map[(unsigned char)*s] == -n. The convert function must return the Unicode scalar value represented by this byte sequence or -1 if the byte sequence is malformed. The convert function may be null if the encoding is a single-byte encoding, that is if map[b] >= -1 for all bytes b. When the parser is finished with the encoding, then if release is not null, it will call release passing it the data member; once release has been called, the convert function will not be called again. Expat places certain restrictions on the encodings that are supported using this mechanism. 1. Every ASCII character that can appear in a well-formed XML document, other than the characters $@\^`{}~ must be represented by a single byte, and that byte must be the same byte that represents that character in ASCII. 2. No character may require more than 4 bytes to encode. 3. All characters encoded must have Unicode scalar values <= 0xFFFF, (ie characters that would be encoded by surrogates in UTF-16 are not allowed). Note that this restriction doesn't apply to the built-in support for UTF-8 and UTF-16. 4. No Unicode character may be encoded by more than one distinct sequence of bytes. */ typedef struct { int map[256]; void *data; int (*convert)(void *data, const char *s); void (*release)(void *data); } XML_Encoding; /* This is called for an encoding that is unknown to the parser. The encodingHandlerData argument is that which was passed as the second argument to XML_SetUnknownEncodingHandler. The name argument gives the name of the encoding as specified in the encoding declaration. If the callback can provide information about the encoding, it must fill in the XML_Encoding structure, and return 1. Otherwise it must return 0. If info does not describe a suitable encoding, then the parser will return an XML_UNKNOWN_ENCODING error. */ typedef int (*XML_UnknownEncodingHandler)(void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); void XMLPARSEAPI XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end); void XMLPARSEAPI XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler); void XMLPARSEAPI XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler); void XMLPARSEAPI XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler); void XMLPARSEAPI XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end); /* This sets the default handler and also inhibits expansion of internal entities. The entity reference will be passed to the default handler. */ void XMLPARSEAPI XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler); /* This sets the default handler but does not inhibit expansion of internal entities. The entity reference will not be passed to the default handler. */ void XMLPARSEAPI XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler); void XMLPARSEAPI XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end); void XMLPARSEAPI XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler); void XMLPARSEAPI XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler); void XMLPARSEAPI XML_SetExternalParsedEntityDeclHandler(XML_Parser parser, XML_ExternalParsedEntityDeclHandler handler); void XMLPARSEAPI XML_SetInternalParsedEntityDeclHandler(XML_Parser parser, XML_InternalParsedEntityDeclHandler handler); void XMLPARSEAPI XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end); void XMLPARSEAPI XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler); void XMLPARSEAPI XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler); /* If a non-null value for arg is specified here, then it will be passed as the first argument to the external entity ref handler instead of the parser object. */ void XMLPARSEAPI XML_SetExternalEntityRefHandlerArg(XML_Parser, void *arg); void XMLPARSEAPI XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *encodingHandlerData); /* This can be called within a handler for a start element, end element, processing instruction or character data. It causes the corresponding markup to be passed to the default handler. */ void XMLPARSEAPI XML_DefaultCurrent(XML_Parser parser); /* This value is passed as the userData argument to callbacks. */ void XMLPARSEAPI XML_SetUserData(XML_Parser parser, void *userData); /* Returns the last value set by XML_SetUserData or null. */ #define XML_GetUserData(parser) (*(void **)(parser)) /* This is equivalent to supplying an encoding argument to XML_ParserCreate. It must not be called after XML_Parse or XML_ParseBuffer. */ int XMLPARSEAPI XML_SetEncoding(XML_Parser parser, const XML_Char *encoding); /* If this function is called, then the parser will be passed as the first argument to callbacks instead of userData. The userData will still be accessible using XML_GetUserData. */ void XMLPARSEAPI XML_UseParserAsHandlerArg(XML_Parser parser); /* Sets the base to be used for resolving relative URIs in system identifiers in declarations. Resolving relative identifiers is left to the application: this value will be passed through as the base argument to the XML_ExternalEntityRefHandler, XML_NotationDeclHandler and XML_UnparsedEntityDeclHandler. The base argument will be copied. Returns zero if out of memory, non-zero otherwise. */ int XMLPARSEAPI XML_SetBase(XML_Parser parser, const XML_Char *base); const XML_Char XMLPARSEAPI * XML_GetBase(XML_Parser parser); /* Returns the number of the attribute/value pairs passed in last call to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 2; thus this correspondds to an index into the atts array passed to the XML_StartElementHandler. */ int XMLPARSEAPI XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to XML_StartElementHandler, or -1 if there is no ID attribute. Each attribute/value pair counts as 2; thus this correspondds to an index into the atts array passed to the XML_StartElementHandler. */ int XMLPARSEAPI XML_GetIdAttributeIndex(XML_Parser parser); /* Parses some input. Returns 0 if a fatal error is detected. The last call to XML_Parse must have isFinal true; len may be zero for this call (or any other). */ int XMLPARSEAPI XML_Parse(XML_Parser parser, const char *s, int len, int isFinal); void XMLPARSEAPI * XML_GetBuffer(XML_Parser parser, int len); int XMLPARSEAPI XML_ParseBuffer(XML_Parser parser, int len, int isFinal); /* Creates an XML_Parser object that can parse an external general entity; context is a '\0'-terminated string specifying the parse context; encoding is a '\0'-terminated string giving the name of the externally specified encoding, or null if there is no externally specified encoding. The context string consists of a sequence of tokens separated by formfeeds (\f); a token consisting of a name specifies that the general entity of the name is open; a token of the form prefix=uri specifies the namespace for a particular prefix; a token of the form =uri specifies the default namespace. This can be called at any point after the first call to an ExternalEntityRefHandler so longer as the parser has not yet been freed. The new parser is completely independent and may safely be used in a separate thread. The handlers and userData are initialized from the parser argument. Returns 0 if out of memory. Otherwise returns a new XML_Parser object. */ XML_Parser XMLPARSEAPI XML_ExternalEntityParserCreate(XML_Parser parser, const XML_Char *context, const XML_Char *encoding); enum XML_ParamEntityParsing { XML_PARAM_ENTITY_PARSING_NEVER, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE, XML_PARAM_ENTITY_PARSING_ALWAYS }; /* Controls parsing of parameter entities (including the external DTD subset). If parsing of parameter entities is enabled, then references to external parameter entities (including the external DTD subset) will be passed to the handler set with XML_SetExternalEntityRefHandler. The context passed will be 0. Unlike external general entities, external parameter entities can only be parsed synchronously. If the external parameter entity is to be parsed, it must be parsed during the call to the external entity ref handler: the complete sequence of XML_ExternalEntityParserCreate, XML_Parse/XML_ParseBuffer and XML_ParserFree calls must be made during this call. After XML_ExternalEntityParserCreate has been called to create the parser for the external parameter entity (context must be 0 for this call), it is illegal to make any calls on the old parser until XML_ParserFree has been called on the newly created parser. If the library has been compiled without support for parameter entity parsing (ie without XML_DTD being defined), then XML_SetParamEntityParsing will return 0 if parsing of parameter entities is requested; otherwise it will return non-zero. */ int XMLPARSEAPI XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing parsing); enum XML_Error { XML_ERROR_NONE, XML_ERROR_NO_MEMORY, XML_ERROR_SYNTAX, XML_ERROR_NO_ELEMENTS, XML_ERROR_INVALID_TOKEN, XML_ERROR_UNCLOSED_TOKEN, XML_ERROR_PARTIAL_CHAR, XML_ERROR_TAG_MISMATCH, XML_ERROR_DUPLICATE_ATTRIBUTE, XML_ERROR_JUNK_AFTER_DOC_ELEMENT, XML_ERROR_PARAM_ENTITY_REF, XML_ERROR_UNDEFINED_ENTITY, XML_ERROR_RECURSIVE_ENTITY_REF, XML_ERROR_ASYNC_ENTITY, XML_ERROR_BAD_CHAR_REF, XML_ERROR_BINARY_ENTITY_REF, XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF, XML_ERROR_MISPLACED_XML_PI, XML_ERROR_UNKNOWN_ENCODING, XML_ERROR_INCORRECT_ENCODING, XML_ERROR_UNCLOSED_CDATA_SECTION, XML_ERROR_EXTERNAL_ENTITY_HANDLING, XML_ERROR_NOT_STANDALONE }; /* If XML_Parse or XML_ParseBuffer have returned 0, then XML_GetErrorCode returns information about the error. */ enum XML_Error XMLPARSEAPI XML_GetErrorCode(XML_Parser parser); /* These functions return information about the current parse location. They may be called when XML_Parse or XML_ParseBuffer return 0; in this case the location is the location of the character at which the error was detected. They may also be called from any other callback called to report some parse event; in this the location is the location of the first of the sequence of characters that generated the event. */ int XMLPARSEAPI XML_GetCurrentLineNumber(XML_Parser parser); int XMLPARSEAPI XML_GetCurrentColumnNumber(XML_Parser parser); long XMLPARSEAPI XML_GetCurrentByteIndex(XML_Parser parser); /* Return the number of bytes in the current event. Returns 0 if the event is in an internal entity. */ int XMLPARSEAPI XML_GetCurrentByteCount(XML_Parser parser); /* For backwards compatibility with previous versions. */ #define XML_GetErrorLineNumber XML_GetCurrentLineNumber #define XML_GetErrorColumnNumber XML_GetCurrentColumnNumber #define XML_GetErrorByteIndex XML_GetCurrentByteIndex /* Frees memory used by the parser. */ void XMLPARSEAPI XML_ParserFree(XML_Parser parser); /* Returns a string describing the error. */ const XML_LChar XMLPARSEAPI *XML_ErrorString(int code); #ifdef __cplusplus } #endif #endif /* not XmlParse_INCLUDED */ expat-1.2/xmlparse/xmlparse.dsp0100664000076400007640000001562007106716451015017 0ustar jjcjjc# Microsoft Developer Studio Project File - Name="xmlparse" - Package Owner=<4> # Microsoft Developer Studio Generated Build File, Format Version 6.00 # ** DO NOT EDIT ** # TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102 CFG=xmlparse - Win32 Release !MESSAGE This is not a valid makefile. To build this project using NMAKE, !MESSAGE use the Export Makefile command and run !MESSAGE !MESSAGE NMAKE /f "xmlparse.mak". !MESSAGE !MESSAGE You can specify a configuration when running NMAKE !MESSAGE by defining the macro CFG on the command line. For example: !MESSAGE !MESSAGE NMAKE /f "xmlparse.mak" CFG="xmlparse - Win32 Release" !MESSAGE !MESSAGE Possible choices for configuration are: !MESSAGE !MESSAGE "xmlparse - Win32 Release" (based on "Win32 (x86) Dynamic-Link Library") !MESSAGE "xmlparse - Win32 Debug" (based on "Win32 (x86) Dynamic-Link Library") !MESSAGE "xmlparse - Win32 MinSize" (based on "Win32 (x86) Dynamic-Link Library") !MESSAGE # Begin Project # PROP AllowPerConfigDependencies 0 # PROP Scc_ProjName "" # PROP Scc_LocalPath "" CPP=cl.exe MTL=midl.exe RSC=rc.exe !IF "$(CFG)" == "xmlparse - Win32 Release" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 0 # PROP BASE Output_Dir ".\Release" # PROP BASE Intermediate_Dir ".\Release" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 0 # PROP Output_Dir ".\Release" # PROP Intermediate_Dir ".\Release" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c # ADD CPP /nologo /W3 /GX /O2 /I "..\xmltok" /I "..\xmlwf" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "XML_NS" /D "WIN32" /D "_WINDOWS" /D "XML_DTD" /YX /FD /c # ADD BASE MTL /nologo /D "NDEBUG" /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x809 /d "NDEBUG" # ADD RSC /l 0x809 /d "NDEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll" /link50compat # SUBTRACT LINK32 /pdb:none !ELSEIF "$(CFG)" == "xmlparse - Win32 Debug" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 1 # PROP BASE Output_Dir ".\Debug" # PROP BASE Intermediate_Dir ".\Debug" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 1 # PROP Output_Dir ".\Debug" # PROP Intermediate_Dir ".\Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c # ADD CPP /nologo /MDd /W3 /Gm /GX /ZI /Od /I "..\xmltok" /I "..\xmlwf" /D "_DEBUG" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /D "WIN32" /D "_WINDOWS" /D "XML_DTD" /YX /FD /c # ADD BASE MTL /nologo /D "_DEBUG" /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x809 /d "_DEBUG" # ADD RSC /l 0x809 /d "_DEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:windows /dll /debug /machine:I386 # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /debug /machine:I386 /out:"..\dbgbin\xmlparse.dll" !ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 0 # PROP BASE Output_Dir "MinSize" # PROP BASE Intermediate_Dir "MinSize" # PROP BASE Ignore_Export_Lib 0 # PROP BASE Target_Dir "" # PROP Use_MFC 0 # PROP Use_Debug_Libraries 0 # PROP Output_Dir "MinSize" # PROP Intermediate_Dir "MinSize" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MD /W3 /GX /O2 /I "..\xmltok" /I "..\xmlwf" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "XML_NS" /YX /FD /c # ADD CPP /nologo /W3 /GX /O1 /I "..\xmltok" /I "..\xmlwf" /D "XML_MIN_SIZE" /D "XML_WINLIB" /D XMLPARSEAPI=__declspec(dllexport) /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /YX /FD /c # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD BASE RSC /l 0x809 /d "NDEBUG" # ADD RSC /l 0x809 /d "NDEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll" # SUBTRACT BASE LINK32 /profile # ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /base:"0x20000000" /entry:"DllMain" /subsystem:windows /dll /machine:I386 /out:"..\bin\xmlparse.dll" # SUBTRACT LINK32 /profile /nodefaultlib !ENDIF # Begin Target # Name "xmlparse - Win32 Release" # Name "xmlparse - Win32 Debug" # Name "xmlparse - Win32 MinSize" # Begin Group "Source Files" # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90" # Begin Source File SOURCE=..\xmltok\dllmain.c !IF "$(CFG)" == "xmlparse - Win32 Release" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 Debug" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize" !ENDIF # End Source File # Begin Source File SOURCE=.\xmlparse.c # End Source File # Begin Source File SOURCE=..\xmltok\xmlrole.c !IF "$(CFG)" == "xmlparse - Win32 Release" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 Debug" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize" !ENDIF # End Source File # Begin Source File SOURCE=..\xmltok\xmltok.c !IF "$(CFG)" == "xmlparse - Win32 Release" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 Debug" # PROP Exclude_From_Build 1 !ELSEIF "$(CFG)" == "xmlparse - Win32 MinSize" !ENDIF # End Source File # End Group # Begin Group "Header Files" # PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd" # Begin Source File SOURCE=.\xmlparse.h # End Source File # End Group # Begin Group "Resource Files" # PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe" # End Group # End Target # End Project expat-1.2/xmlwf/0040775000076400007640000000000007167265632011764 5ustar jjcjjcexpat-1.2/xmlwf/codepage.c0100664000076400007640000000235407077753352013701 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include "codepage.h" #ifdef WIN32 #define STRICT 1 #define WIN32_LEAN_AND_MEAN 1 #include int codepageMap(int cp, int *map) { int i; CPINFO info; if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2) return 0; for (i = 0; i < 256; i++) map[i] = -1; if (info.MaxCharSize > 1) { for (i = 0; i < MAX_LEADBYTES; i++) { int j, lim; if (info.LeadByte[i] == 0 && info.LeadByte[i + 1] == 0) break; lim = info.LeadByte[i + 1]; for (j = info.LeadByte[i]; j < lim; j++) map[j] = -2; } } for (i = 0; i < 256; i++) { if (map[i] == -1) { char c = i; unsigned short n; if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, &c, 1, &n, 1) == 1) map[i] = n; } } return 1; } int codepageConvert(int cp, const char *p) { unsigned short c; if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, p, 2, &c, 1) == 1) return c; return -1; } #else /* not WIN32 */ int codepageMap(int cp, int *map) { return 0; } int codepageConvert(int cp, const char *p) { return -1; } #endif /* not WIN32 */ expat-1.2/xmlwf/codepage.h0100664000076400007640000000030507077753354013702 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ int codepageMap(int cp, int *map); int codepageConvert(int cp, const char *p); expat-1.2/xmlwf/filemap.h0100664000076400007640000000063607077753354013557 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #ifdef XML_UNICODE int filemap(const wchar_t *name, void (*processor)(const void *, size_t, const wchar_t *, void *arg), void *arg); #else int filemap(const char *name, void (*processor)(const void *, size_t, const char *, void *arg), void *arg); #endif expat-1.2/xmlwf/readfilemap.c0100664000076400007640000000252007077753352014376 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #include #include #include #include #ifndef S_ISREG #ifndef S_IFREG #define S_IFREG _S_IFREG #endif #ifndef S_IFMT #define S_IFMT _S_IFMT #endif #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif /* not S_ISREG */ #ifndef O_BINARY #ifdef _O_BINARY #define O_BINARY _O_BINARY #else #define O_BINARY 0 #endif #endif int filemap(const char *name, void (*processor)(const void *, size_t, const char *, void *arg), void *arg) { size_t nbytes; int fd; int n; struct stat sb; void *p; fd = open(name, O_RDONLY|O_BINARY); if (fd < 0) { perror(name); return 0; } if (fstat(fd, &sb) < 0) { perror(name); return 0; } if (!S_ISREG(sb.st_mode)) { fprintf(stderr, "%s: not a regular file\n", name); return 0; } nbytes = sb.st_size; p = malloc(nbytes); if (!p) { fprintf(stderr, "%s: out of memory\n", name); return 0; } n = read(fd, p, nbytes); if (n < 0) { perror(name); close(fd); return 0; } if (n != nbytes) { fprintf(stderr, "%s: read unexpected number of bytes\n", name); close(fd); return 0; } processor(p, nbytes, name, arg); free(p); close(fd); return 1; } expat-1.2/xmlwf/unixfilemap.c0100664000076400007640000000206507077753352014452 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #include #include #include #include #include #include #ifndef MAP_FILE #define MAP_FILE 0 #endif #include "filemap.h" int filemap(const char *name, void (*processor)(const void *, size_t, const char *, void *arg), void *arg) { int fd; size_t nbytes; struct stat sb; void *p; fd = open(name, O_RDONLY); if (fd < 0) { perror(name); return 0; } if (fstat(fd, &sb) < 0) { perror(name); close(fd); return 0; } if (!S_ISREG(sb.st_mode)) { close(fd); fprintf(stderr, "%s: not a regular file\n", name); return 0; } nbytes = sb.st_size; p = (void *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, (off_t)0); if (p == (void *)-1) { perror(name); close(fd); return 0; } processor(p, nbytes, name, arg); munmap((caddr_t)p, nbytes); close(fd); return 1; } expat-1.2/xmlwf/win32filemap.c0100664000076400007640000000373507077753353014437 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #define STRICT 1 #define WIN32_LEAN_AND_MEAN 1 #ifdef XML_UNICODE_WCHAR_T #ifndef XML_UNICODE #define XML_UNICODE #endif #endif #ifdef XML_UNICODE #define UNICODE #define _UNICODE #endif /* XML_UNICODE */ #include #include #include #include "filemap.h" static void win32perror(const TCHAR *); int filemap(const TCHAR *name, void (*processor)(const void *, size_t, const TCHAR *, void *arg), void *arg) { HANDLE f; HANDLE m; DWORD size; DWORD sizeHi; void *p; f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (f == INVALID_HANDLE_VALUE) { win32perror(name); return 0; } size = GetFileSize(f, &sizeHi); if (size == (DWORD)-1) { win32perror(name); return 0; } if (sizeHi) { _ftprintf(stderr, _T("%s: bigger than 2Gb\n"), name); return 0; } /* CreateFileMapping barfs on zero length files */ if (size == 0) { static const char c = '\0'; processor(&c, 0, name, arg); CloseHandle(f); return 1; } m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL); if (m == NULL) { win32perror(name); CloseHandle(f); return 0; } p = MapViewOfFile(m, FILE_MAP_READ, 0, 0, 0); if (p == NULL) { win32perror(name); CloseHandle(m); CloseHandle(f); return 0; } processor(p, size, name, arg); UnmapViewOfFile(p); CloseHandle(m); CloseHandle(f); return 1; } static void win32perror(const TCHAR *s) { LPVOID buf; if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPTSTR) &buf, 0, NULL)) { _ftprintf(stderr, _T("%s: %s"), s, buf); fflush(stderr); LocalFree(buf); } else _ftprintf(stderr, _T("%s: unknown Windows error\n"), s); } expat-1.2/xmlwf/xmlfile.c0100664000076400007640000001111507106152064013547 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #include #include #include #include #include "xmlparse.h" #include "xmlfile.h" #include "xmltchar.h" #include "filemap.h" #ifdef _MSC_VER #include #endif #ifdef _POSIX_SOURCE #include #endif #ifndef O_BINARY #ifdef _O_BINARY #define O_BINARY _O_BINARY #else #define O_BINARY 0 #endif #endif #ifdef _DEBUG #define READ_SIZE 16 #else #define READ_SIZE (1024*8) #endif typedef struct { XML_Parser parser; int *retPtr; } PROCESS_ARGS; static void reportError(XML_Parser parser, const XML_Char *filename) { int code = XML_GetErrorCode(parser); const XML_Char *message = XML_ErrorString(code); if (message) ftprintf(stdout, T("%s:%d:%d: %s\n"), filename, XML_GetErrorLineNumber(parser), XML_GetErrorColumnNumber(parser), message); else ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code); } static void processFile(const void *data, size_t size, const XML_Char *filename, void *args) { XML_Parser parser = ((PROCESS_ARGS *)args)->parser; int *retPtr = ((PROCESS_ARGS *)args)->retPtr; if (!XML_Parse(parser, data, size, 1)) { reportError(parser, filename); *retPtr = 0; } else *retPtr = 1; } #ifdef WIN32 static int isAsciiLetter(XML_Char c) { return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z')); } #endif /* WIN32 */ static const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree) { XML_Char *s; *toFree = 0; if (!base || *systemId == T('/') #ifdef WIN32 || *systemId == T('\\') || (isAsciiLetter(systemId[0]) && systemId[1] == T(':')) #endif ) return systemId; *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char)); if (!*toFree) return systemId; tcscpy(*toFree, base); s = *toFree; if (tcsrchr(s, T('/'))) s = tcsrchr(s, T('/')) + 1; #ifdef WIN32 if (tcsrchr(s, T('\\'))) s = tcsrchr(s, T('\\')) + 1; #endif tcscpy(s, systemId); return *toFree; } static int externalEntityRefFilemap(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { int result; XML_Char *s; const XML_Char *filename; XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); PROCESS_ARGS args; args.retPtr = &result; args.parser = entParser; filename = resolveSystemId(base, systemId, &s); XML_SetBase(entParser, filename); if (!filemap(filename, processFile, &args)) result = 0; free(s); XML_ParserFree(entParser); return result; } static int processStream(const XML_Char *filename, XML_Parser parser) { int fd = topen(filename, O_BINARY|O_RDONLY); if (fd < 0) { tperror(filename); return 0; } for (;;) { int nread; char *buf = XML_GetBuffer(parser, READ_SIZE); if (!buf) { close(fd); ftprintf(stderr, T("%s: out of memory\n"), filename); return 0; } nread = read(fd, buf, READ_SIZE); if (nread < 0) { tperror(filename); close(fd); return 0; } if (!XML_ParseBuffer(parser, nread, nread == 0)) { reportError(parser, filename); close(fd); return 0; } if (nread == 0) { close(fd); break;; } } return 1; } static int externalEntityRefStream(XML_Parser parser, const XML_Char *context, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { XML_Char *s; const XML_Char *filename; int ret; XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0); filename = resolveSystemId(base, systemId, &s); XML_SetBase(entParser, filename); ret = processStream(filename, entParser); free(s); XML_ParserFree(entParser); return ret; } int XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags) { int result; if (!XML_SetBase(parser, filename)) { ftprintf(stderr, T("%s: out of memory"), filename); exit(1); } if (flags & XML_EXTERNAL_ENTITIES) XML_SetExternalEntityRefHandler(parser, (flags & XML_MAP_FILE) ? externalEntityRefFilemap : externalEntityRefStream); if (flags & XML_MAP_FILE) { PROCESS_ARGS args; args.retPtr = &result; args.parser = parser; if (!filemap(filename, processFile, &args)) result = 0; } else result = processStream(filename, parser); return result; } expat-1.2/xmlwf/xmlfile.h0100664000076400007640000000042507077753354013576 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #define XML_MAP_FILE 01 #define XML_EXTERNAL_ENTITIES 02 extern int XML_ProcessFile(XML_Parser parser, const XML_Char *filename, unsigned flags); expat-1.2/xmlwf/xmltchar.h0100664000076400007640000000147107077752126013756 0ustar jjcjjc#ifdef XML_UNICODE #ifndef XML_UNICODE_WCHAR_T #error xmlwf requires a 16-bit Unicode-compatible wchar_t #endif #define T(x) L ## x #define ftprintf fwprintf #define tfopen _wfopen #define fputts fputws #define puttc putwc #define tcscmp wcscmp #define tcscpy wcscpy #define tcscat wcscat #define tcschr wcschr #define tcsrchr wcsrchr #define tcslen wcslen #define tperror _wperror #define topen _wopen #define tmain wmain #define tremove _wremove #else /* not XML_UNICODE */ #define T(x) x #define ftprintf fprintf #define tfopen fopen #define fputts fputs #define puttc putc #define tcscmp strcmp #define tcscpy strcpy #define tcscat strcat #define tcschr strchr #define tcsrchr strrchr #define tcslen strlen #define tperror perror #define topen open #define tmain main #define tremove remove #endif /* not XML_UNICODE */ expat-1.2/xmlwf/xmlwf.c0100664000076400007640000004275407106710203013254 0ustar jjcjjc/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd See the file copying.txt for copying permission. */ #include #include #include #include #include "xmlparse.h" #include "codepage.h" #include "xmlfile.h" #include "xmltchar.h" #ifdef _MSC_VER #include #endif /* This ensures proper sorting. */ #define NSSEP T('\001') static void characterData(void *userData, const XML_Char *s, int len) { FILE *fp = userData; for (; len > 0; --len, ++s) { switch (*s) { case T('&'): fputts(T("&"), fp); break; case T('<'): fputts(T("<"), fp); break; case T('>'): fputts(T(">"), fp); break; #ifdef W3C14N case 13: fputts(T(" "), fp); break; #else case T('"'): fputts(T("""), fp); break; case 9: case 10: case 13: ftprintf(fp, T("&#%d;"), *s); break; #endif default: puttc(*s, fp); break; } } } static void attributeValue(FILE *fp, const XML_Char *s) { puttc(T('='), fp); puttc(T('"'), fp); for (;;) { switch (*s) { case 0: case NSSEP: puttc(T('"'), fp); return; case T('&'): fputts(T("&"), fp); break; case T('<'): fputts(T("<"), fp); break; case T('"'): fputts(T("""), fp); break; #ifdef W3C14N case 9: fputts(T(" "), fp); break; case 10: fputts(T(" "), fp); break; case 13: fputts(T(" "), fp); break; #else case T('>'): fputts(T(">"), fp); break; case 9: case 10: case 13: ftprintf(fp, T("&#%d;"), *s); break; #endif default: puttc(*s, fp); break; } s++; } } /* Lexicographically comparing UTF-8 encoded attribute values, is equivalent to lexicographically comparing based on the character number. */ static int attcmp(const void *att1, const void *att2) { return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2); } static void startElement(void *userData, const XML_Char *name, const XML_Char **atts) { int nAtts; const XML_Char **p; FILE *fp = userData; puttc(T('<'), fp); fputts(name, fp); p = atts; while (*p) ++p; nAtts = (p - atts) >> 1; if (nAtts > 1) qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp); while (*atts) { puttc(T(' '), fp); fputts(*atts++, fp); attributeValue(fp, *atts); atts++; } puttc(T('>'), fp); } static void endElement(void *userData, const XML_Char *name) { FILE *fp = userData; puttc(T('<'), fp); puttc(T('/'), fp); fputts(name, fp); puttc(T('>'), fp); } static int nsattcmp(const void *p1, const void *p2) { const XML_Char *att1 = *(const XML_Char **)p1; const XML_Char *att2 = *(const XML_Char **)p2; int sep1 = (tcsrchr(att1, NSSEP) != 0); int sep2 = (tcsrchr(att1, NSSEP) != 0); if (sep1 != sep2) return sep1 - sep2; return tcscmp(att1, att2); } static void startElementNS(void *userData, const XML_Char *name, const XML_Char **atts) { int nAtts; int nsi; const XML_Char **p; FILE *fp = userData; const XML_Char *sep; puttc(T('<'), fp); sep = tcsrchr(name, NSSEP); if (sep) { fputts(T("n1:"), fp); fputts(sep + 1, fp); fputts(T(" xmlns:n1"), fp); attributeValue(fp, name); nsi = 2; } else { fputts(name, fp); nsi = 1; } p = atts; while (*p) ++p; nAtts = (p - atts) >> 1; if (nAtts > 1) qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, nsattcmp); while (*atts) { name = *atts++; sep = tcsrchr(name, NSSEP); puttc(T(' '), fp); if (sep) { ftprintf(fp, T("n%d:"), nsi); fputts(sep + 1, fp); } else fputts(name, fp); attributeValue(fp, *atts); if (sep) { ftprintf(fp, T(" xmlns:n%d"), nsi++); attributeValue(fp, name); } atts++; } puttc(T('>'), fp); } static void endElementNS(void *userData, const XML_Char *name) { FILE *fp = userData; const XML_Char *sep; puttc(T('<'), fp); puttc(T('/'), fp); sep = tcsrchr(name, NSSEP); if (sep) { fputts(T("n1:"), fp); fputts(sep + 1, fp); } else fputts(name, fp); puttc(T('>'), fp); } #ifndef W3C14N static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data) { FILE *fp = userData; puttc(T('<'), fp); puttc(T('?'), fp); fputts(target, fp); puttc(T(' '), fp); fputts(data, fp); puttc(T('?'), fp); puttc(T('>'), fp); } #endif /* not W3C14N */ static void defaultCharacterData(XML_Parser parser, const XML_Char *s, int len) { XML_DefaultCurrent(parser); } static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) { XML_DefaultCurrent(parser); } static void defaultEndElement(XML_Parser parser, const XML_Char *name) { XML_DefaultCurrent(parser); } static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) { XML_DefaultCurrent(parser); } static void nopCharacterData(XML_Parser parser, const XML_Char *s, int len) { } static void nopStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) { } static void nopEndElement(XML_Parser parser, const XML_Char *name) { } static void nopProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) { } static void markup(XML_Parser parser, const XML_Char *s, int len) { FILE *fp = XML_GetUserData(parser); for (; len > 0; --len, ++s) puttc(*s, fp); } static void metaLocation(XML_Parser parser) { const XML_Char *uri = XML_GetBase(parser); if (uri) ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri); ftprintf(XML_GetUserData(parser), T(" byte=\"%ld\" nbytes=\"%d\" line=\"%d\" col=\"%d\""), XML_GetCurrentByteIndex(parser), XML_GetCurrentByteCount(parser), XML_GetCurrentLineNumber(parser), XML_GetCurrentColumnNumber(parser)); } static void metaStartDocument(XML_Parser parser) { fputts(T("\n"), XML_GetUserData(parser)); } static void metaEndDocument(XML_Parser parser) { fputts(T("\n"), XML_GetUserData(parser)); } static void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts) { FILE *fp = XML_GetUserData(parser); const XML_Char **specifiedAttsEnd = atts + XML_GetSpecifiedAttributeCount(parser); const XML_Char **idAttPtr; int idAttIndex = XML_GetIdAttributeIndex(parser); if (idAttIndex < 0) idAttPtr = 0; else idAttPtr = atts + idAttIndex; ftprintf(fp, T("\n"), fp); do { ftprintf(fp, T("= specifiedAttsEnd) fputts(T("\" defaulted=\"yes\"/>\n"), fp); else if (atts == idAttPtr) fputts(T("\" id=\"yes\"/>\n"), fp); else fputts(T("\"/>\n"), fp); } while (*(atts += 2)); fputts(T("\n"), fp); } else fputts(T("/>\n"), fp); } static void metaEndElement(XML_Parser parser, const XML_Char *name) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaComment(XML_Parser parser, const XML_Char *data) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } static void metaStartCdataSection(XML_Parser parser) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } static void metaEndCdataSection(XML_Parser parser) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } static void metaCharacterData(XML_Parser parser, const XML_Char *s, int len) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } static void metaStartDoctypeDecl(XML_Parser parser, const XML_Char *doctypeName) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaEndDoctypeDecl(XML_Parser parser) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } static void metaUnparsedEntityDecl(XML_Parser parser, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaNotationDecl(XML_Parser parser, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaExternalParsedEntityDecl(XML_Parser parser, const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("\n"), fp); } static void metaInternalParsedEntityDecl(XML_Parser parser, const XML_Char *entityName, const XML_Char *text, int textLen) { FILE *fp = XML_GetUserData(parser); ftprintf(fp, T("'), fp); characterData(fp, text, textLen); fputts(T("\n"), fp); } static void metaStartNamespaceDecl(XML_Parser parser, const XML_Char *prefix, const XML_Char *uri) { FILE *fp = XML_GetUserData(parser); fputts(T("\n"), fp); } else fputts(T("/>\n"), fp); } static void metaEndNamespaceDecl(XML_Parser parser, const XML_Char *prefix) { FILE *fp = XML_GetUserData(parser); if (!prefix) fputts(T("\n"), fp); else ftprintf(fp, T("\n"), prefix); } static int unknownEncodingConvert(void *data, const char *p) { return codepageConvert(*(int *)data, p); } static int unknownEncoding(void *userData, const XML_Char *name, XML_Encoding *info) { int cp; static const XML_Char prefixL[] = T("windows-"); static const XML_Char prefixU[] = T("WINDOWS-"); int i; for (i = 0; prefixU[i]; i++) if (name[i] != prefixU[i] && name[i] != prefixL[i]) return 0; cp = 0; for (; name[i]; i++) { static const XML_Char digits[] = T("0123456789"); const XML_Char *s = tcschr(digits, name[i]); if (!s) return 0; cp *= 10; cp += s - digits; if (cp >= 0x10000) return 0; } if (!codepageMap(cp, info->map)) return 0; info->convert = unknownEncodingConvert; /* We could just cast the code page integer to a void *, and avoid the use of release. */ info->release = free; info->data = malloc(sizeof(int)); if (!info->data) return 0; *(int *)info->data = cp; return 1; } static int notStandalone(void *userData) { return 0; } static void usage(const XML_Char *prog) { ftprintf(stderr, T("usage: %s [-n] [-p] [-r] [-s] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog); exit(1); } int tmain(int argc, XML_Char **argv) { int i, j; const XML_Char *outputDir = 0; const XML_Char *encoding = 0; unsigned processFlags = XML_MAP_FILE; int windowsCodePages = 0; int outputType = 0; int useNamespaces = 0; int requireStandalone = 0; int paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #ifdef _MSC_VER _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF); #endif i = 1; j = 0; while (i < argc) { if (j == 0) { if (argv[i][0] != T('-')) break; if (argv[i][1] == T('-') && argv[i][2] == T('\0')) { i++; break; } j++; } switch (argv[i][j]) { case T('r'): processFlags &= ~XML_MAP_FILE; j++; break; case T('s'): requireStandalone = 1; j++; break; case T('n'): useNamespaces = 1; j++; break; case T('p'): paramEntityParsing = XML_PARAM_ENTITY_PARSING_ALWAYS; /* fall through */ case T('x'): processFlags |= XML_EXTERNAL_ENTITIES; j++; break; case T('w'): windowsCodePages = 1; j++; break; case T('m'): outputType = 'm'; j++; break; case T('c'): outputType = 'c'; useNamespaces = 0; j++; break; case T('t'): outputType = 't'; j++; break; case T('d'): if (argv[i][j + 1] == T('\0')) { if (++i == argc) usage(argv[0]); outputDir = argv[i]; } else outputDir = argv[i] + j + 1; i++; j = 0; break; case T('e'): if (argv[i][j + 1] == T('\0')) { if (++i == argc) usage(argv[0]); encoding = argv[i]; } else encoding = argv[i] + j + 1; i++; j = 0; break; case T('\0'): if (j > 1) { i++; j = 0; break; } /* fall through */ default: usage(argv[0]); } } if (i == argc) usage(argv[0]); for (; i < argc; i++) { FILE *fp = 0; XML_Char *outName = 0; int result; XML_Parser parser; if (useNamespaces) parser = XML_ParserCreateNS(encoding, NSSEP); else parser = XML_ParserCreate(encoding); if (requireStandalone) XML_SetNotStandaloneHandler(parser, notStandalone); XML_SetParamEntityParsing(parser, paramEntityParsing); if (outputType == 't') { /* This is for doing timings; this gives a more realistic estimate of the parsing time. */ outputDir = 0; XML_SetElementHandler(parser, nopStartElement, nopEndElement); XML_SetCharacterDataHandler(parser, nopCharacterData); XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction); } else if (outputDir) { const XML_Char *file = argv[i]; if (tcsrchr(file, T('/'))) file = tcsrchr(file, T('/')) + 1; #ifdef WIN32 if (tcsrchr(file, T('\\'))) file = tcsrchr(file, T('\\')) + 1; #endif outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char)); tcscpy(outName, outputDir); tcscat(outName, T("/")); tcscat(outName, file); fp = tfopen(outName, T("wb")); if (!fp) { tperror(outName); exit(1); } setvbuf(fp, NULL, _IOFBF, 16384); #ifdef XML_UNICODE puttc(0xFEFF, fp); #endif XML_SetUserData(parser, fp); switch (outputType) { case 'm': XML_UseParserAsHandlerArg(parser); XML_SetElementHandler(parser, metaStartElement, metaEndElement); XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction); XML_SetCommentHandler(parser, metaComment); XML_SetCdataSectionHandler(parser, metaStartCdataSection, metaEndCdataSection); XML_SetCharacterDataHandler(parser, metaCharacterData); XML_SetDoctypeDeclHandler(parser, metaStartDoctypeDecl, metaEndDoctypeDecl); XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl); XML_SetNotationDeclHandler(parser, metaNotationDecl); XML_SetExternalParsedEntityDeclHandler(parser, metaExternalParsedEntityDecl); XML_SetInternalParsedEntityDeclHandler(parser, metaInternalParsedEntityDecl); XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, metaEndNamespaceDecl); metaStartDocument(parser); break; case 'c': XML_UseParserAsHandlerArg(parser); XML_SetDefaultHandler(parser, markup); XML_SetElementHandler(parser, defaultStartElement, defaultEndElement); XML_SetCharacterDataHandler(parser, defaultCharacterData); XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction); break; default: if (useNamespaces) XML_SetElementHandler(parser, startElementNS, endElementNS); else XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, characterData); #ifndef W3C14N XML_SetProcessingInstructionHandler(parser, processingInstruction); #endif /* not W3C14N */ break; } } if (windowsCodePages) XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0); result = XML_ProcessFile(parser, argv[i], processFlags); if (outputDir) { if (outputType == 'm') metaEndDocument(parser); fclose(fp); if (!result) tremove(outName); free(outName); } XML_ParserFree(parser); } return 0; } expat-1.2/xmlwf/xmlwf.dsp0100664000076400007640000001071207106726516013623 0ustar jjcjjc# Microsoft Developer Studio Project File - Name="xmlwf" - Package Owner=<4> # Microsoft Developer Studio Generated Build File, Format Version 6.00 # ** DO NOT EDIT ** # TARGTYPE "Win32 (x86) Console Application" 0x0103 CFG=xmlwf - Win32 Release !MESSAGE This is not a valid makefile. To build this project using NMAKE, !MESSAGE use the Export Makefile command and run !MESSAGE !MESSAGE NMAKE /f "xmlwf.mak". !MESSAGE !MESSAGE You can specify a configuration when running NMAKE !MESSAGE by defining the macro CFG on the command line. For example: !MESSAGE !MESSAGE NMAKE /f "xmlwf.mak" CFG="xmlwf - Win32 Release" !MESSAGE !MESSAGE Possible choices for configuration are: !MESSAGE !MESSAGE "xmlwf - Win32 Release" (based on "Win32 (x86) Console Application") !MESSAGE "xmlwf - Win32 Debug" (based on "Win32 (x86) Console Application") !MESSAGE # Begin Project # PROP AllowPerConfigDependencies 0 # PROP Scc_ProjName "" # PROP Scc_LocalPath "" CPP=cl.exe RSC=rc.exe !IF "$(CFG)" == "xmlwf - Win32 Release" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 0 # PROP BASE Output_Dir ".\Release" # PROP BASE Intermediate_Dir ".\Release" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 0 # PROP Output_Dir ".\Release" # PROP Intermediate_Dir ".\Release" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c # ADD CPP /nologo /W3 /GX /O2 /I "..\xmlparse" /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllimport) /YX /FD /c # ADD BASE RSC /l 0x809 /d "NDEBUG" # ADD RSC /l 0x809 /d "NDEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 # ADD LINK32 setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /out:"..\bin\xmlwf.exe" !ELSEIF "$(CFG)" == "xmlwf - Win32 Debug" # PROP BASE Use_MFC 0 # PROP BASE Use_Debug_Libraries 1 # PROP BASE Output_Dir ".\Debug" # PROP BASE Intermediate_Dir ".\Debug" # PROP BASE Target_Dir "." # PROP Use_MFC 0 # PROP Use_Debug_Libraries 1 # PROP Output_Dir ".\Debug" # PROP Intermediate_Dir ".\Debug" # PROP Ignore_Export_Lib 0 # PROP Target_Dir "." # ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c # ADD CPP /nologo /MDd /W3 /Gm /GX /ZI /Od /I ".\xmlparse" /I "..\xmlparse" /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /D XMLPARSEAPI=__declspec(dllimport) /YX /FD /c # ADD BASE RSC /l 0x809 /d "_DEBUG" # ADD RSC /l 0x809 /d "_DEBUG" BSC32=bscmake.exe # ADD BASE BSC32 /nologo # ADD BSC32 /nologo LINK32=link.exe # ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 # ADD LINK32 setargv.obj kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /out:"..\dbgbin\xmlwf.exe" !ENDIF # Begin Target # Name "xmlwf - Win32 Release" # Name "xmlwf - Win32 Debug" # Begin Group "Source Files" # PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat;for;f90" # Begin Source File SOURCE=.\codepage.c # End Source File # Begin Source File SOURCE=.\readfilemap.c # PROP Exclude_From_Build 1 # End Source File # Begin Source File SOURCE=.\unixfilemap.c # PROP Exclude_From_Build 1 # End Source File # Begin Source File SOURCE=.\win32filemap.c # End Source File # Begin Source File SOURCE=.\xmlfile.c # End Source File # Begin Source File SOURCE=.\xmlwf.c # End Source File # End Group # Begin Group "Header Files" # PROP Default_Filter "h;hpp;hxx;hm;inl;fi;fd" # Begin Source File SOURCE=.\codepage.h # End Source File # Begin Source File SOURCE=.\xmlfile.h # End Source File # Begin Source File SOURCE=.\xmltchar.h # End Source File # End Group # Begin Group "Resource Files" # PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe" # End Group # End Target # End Project expat-1.2/sample/0040775000076400007640000000000007167265632012110 5ustar jjcjjcexpat-1.2/sample/elements.c0100664000076400007640000000217707077752062014072 0ustar jjcjjc/* This is simple demonstration of how to use expat. This program reads an XML document from standard input and writes a line with the name of each element to standard output indenting child elements by one tab stop more than their parent element. */ #include #include "xmlparse.h" void startElement(void *userData, const char *name, const char **atts) { int i; int *depthPtr = userData; for (i = 0; i < *depthPtr; i++) putchar('\t'); puts(name); *depthPtr += 1; } void endElement(void *userData, const char *name) { int *depthPtr = userData; *depthPtr -= 1; } int main() { char buf[BUFSIZ]; XML_Parser parser = XML_ParserCreate(NULL); int done; int depth = 0; XML_SetUserData(parser, &depth); XML_SetElementHandler(parser, startElement, endElement); do { size_t len = fread(buf, 1, sizeof(buf), stdin); done = len < sizeof(buf); if (!XML_Parse(parser, buf, len, done)) { fprintf(stderr, "%s at line %d\n", XML_ErrorString(XML_GetErrorCode(parser)), XML_GetCurrentLineNumber(parser)); return 1; } } while (!done); XML_ParserFree(parser); return 0; } expat-1.2/sample/build.bat0100664000076400007640000000036506543143736013675 0ustar jjcjjc@echo off set LIB=..\xmlparse\Release;..\lib;%LIB% cl /nologo /DXMLTOKAPI=__declspec(dllimport) /DXMLPARSEAPI=__declspec(dllimport) /I..\xmlparse /Fe..\bin\elements elements.c xmlparse.lib @echo Run it using: ..\bin\elements ^<..\expat.html expat-1.2/expat.dsw0100664000076400007640000000256607147127412012464 0ustar jjcjjcMicrosoft Developer Studio Workspace File, Format Version 6.00 # WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE! ############################################################################### Project: "gennmtab"=.\gennmtab\gennmtab.dsp - Package Owner=<4> Package=<5> {{{ }}} Package=<4> {{{ }}} ############################################################################### Project: "xmlparse"=.\xmlparse\xmlparse.dsp - Package Owner=<4> Package=<5> {{{ }}} Package=<4> {{{ Begin Project Dependency Project_Dep_Name xmltok End Project Dependency }}} ############################################################################### Project: "xmltok"=.\xmltok\xmltok.dsp - Package Owner=<4> Package=<5> {{{ }}} Package=<4> {{{ Begin Project Dependency Project_Dep_Name gennmtab End Project Dependency }}} ############################################################################### Project: "xmlwf"=.\xmlwf\xmlwf.dsp - Package Owner=<4> Package=<5> {{{ }}} Package=<4> {{{ Begin Project Dependency Project_Dep_Name xmlparse End Project Dependency }}} ############################################################################### Global: Package=<5> {{{ }}} Package=<3> {{{ }}} ###############################################################################