XML-Parser-2.46/0000755000000000000000000000000013542324531012037 5ustar rootrootXML-Parser-2.46/Parser/0000755000000000000000000000000013542324531013273 5ustar rootrootXML-Parser-2.46/Parser/Encodings/0000755000000000000000000000000013542324531015204 5ustar rootrootXML-Parser-2.46/Parser/Encodings/iso-8859-2.enc0000644000000000000000000000206013542305435017237 0ustar rootrootISO-8859-2  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~A=Z`^dy}{B>[a_ez~|T9 CGPXnpbU: DHQYoqcXML-Parser-2.46/Parser/Encodings/iso-8859-7.enc0000644000000000000000000000206013542305435017244 0ustar rootrootISO-8859-7  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~   z XML-Parser-2.46/Parser/Encodings/iso-8859-5.enc0000644000000000000000000000206013542305435017242 0ustar rootrootISO-8859-5  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~     !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNO!QRSTUVWXYZ[\^_XML-Parser-2.46/Parser/Encodings/iso-8859-15.enc0000644000000000000000000000206013542305435017323 0ustar rootrootISO-8859-15  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ `a}~RSxXML-Parser-2.46/Parser/Encodings/x-sjis-unicode.enc0000644000000000000000000004343213542305435020544 0ustar rootrootx-sjis-unicode(%  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|} >abcdefghijklmnopqrstuvwxyz{|}~\@@OL@@^@c@ @@@W@@@@ K@ @ @ @ ?@ @ @v@3@@@j@'@@@^@@@@R@@@@F@@e > abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'000 000@>?00000N0000  \0 \ & %     00;=[]00 0 0 0 0 0000 ""`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%BNUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{Qyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQqXML-Parser-2.46/Parser/Encodings/windows-1250.enc0000644000000000000000000000206013542305435017752 0ustar rootrootwindows-1250  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~   & ! 0` 9Zd}y     "  !"a :[e~zA^{B_=>|T9 CGPXnpbU: DHQYoqcXML-Parser-2.46/Parser/Encodings/x-euc-jp-unicode.enc0000644000000000000000000011200213542305435020745 0ustar rootrootX-EUC-JP-Unicode4A  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~g?gL?^^PCKSVL8Q +^K^^^e^^!^^^;^^^U^^^o^^ +^ ^ ^ E^ ^ ^ _^ ^ ^ y^ ^ 5^ ^ ^O3^^>^^^X^^^r^^.^^^H^^^b^^^|^^8^^^R^^^l^^(^^^B^^^\^~Ca=}0VWWA^^^T^^^n^^ *^ ^ ^!D^!^"^"^^"^#^#x^#^$4^$^$^%N^%^& ^&h^&^'$^'^'^(>^(^(^)X^)^*^*r^*^+.^+^+^,H^,^-^-b^-^.^.|^.^/8^/^/^0R^0^1^1l^1^2(^2^2^3B^3C3  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOabcdefghijklmnopqrstuvwxyz{|}~PQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~000 000@>?00000N0000  \0 \ & %     00;=[]00 0 0 0 0 0000 ""`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%BNUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{QĐyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQq~!"!    RSTUVWXYZ[\^_&2A?JRf'138B@IKSg  " $0*.(469=;CGEPLTXVZ\`^dblpjrnhtxvy}{   !%+/)57:><DHFQMUYW[]a_ecmqksoiuwz~|NNNN NNN#N$N(N+N.N/N0N5N@NANDNGNQNZN\NcNhNiNtNuNyNNNNNNNNNNNNNNNNNNNNNNOOOOO O OOOOOO.O1O`O3O5O7O9O;O>O@OBOHOIOKOLOROTOVOXO_OcOjOlOnOqOwOxOyOzO}O~OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPP P PPPPPPPPPP"P'P.P0P2P3P5P@PAPBPEPFPJPLPNPQPRPSPWPYP_P`PbPcPfPgPjPmPpPqP;PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQ Q Q QPQQQQQQQQ#Q'Q(Q,Q-Q/Q1Q3Q4Q5Q8Q9QBQJQOQSQUQWQXQ_QdQfQ~QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRR"R(R1R2R5RSBSLSKSYS[SaScSeSlSmSrSyS~SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTT!T'T(T*T/T1T4T5TCTDTGTMTOT^TbTdTfTgTiTkTmTnTtTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUU U U UUU*U+U2U5U6U;UW?WEWFWLWMWRWbWeWgWhWkWmWnWoWpWqWsWtWuWwWyWzW{W|W~WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXX WX X XXXX X&X'X-X2X9X?XIXLXMXOXPXUX_XaXdXgXhXxX|XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYY Y YYYAYY!Y#Y$Y(Y/Y0Y3Y5Y6Y?YCYFYRYSYYY[Y]Y^Y_YaYcYkYmYoYrYuYvYyY{Y|YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZ Z ZZZZZ#Z$Z'Z(Z*Z-Z0ZDZEZGZHZLZPZUZ^ZcZeZgZmZwZzZ{Z~ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[4[[[[![%[-[8[A[K[L[R[V[^[h[n[o[|[}[~[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\ \\\\#\&\)\+\,\.\0\2\5\6\Y\Z\\\b\c\g\h\i\m\p\t\u\z\{\|\}\\\\\\\\\\\\\\\\\\\\\\\\\]]] ]]+]#]$]&]']1]4]9]=]?]B]C]F]H]U]Q]Y]J]_]`]a]b]d]j]m]p]y]z]~]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^ ^^^^^^ ^.^(^2^5^>^K^P^I^Q^V^X^[^\^^^h^j^k^l^m^n^p^^^^^^^^^^^^^^^^^^^^^^^^^^_________!_"_#_$_(_+_,_._0_4_6_;_=_?_@_D_E_G_M_P_T_X_[_`_c_d_g_o_r_t_u_x_z_}_~________________________________________`` ` ```````$`-`3`5`@`G`H`I`L`Q`T`V`W`]`a`g`q`~``````````````````````````````````````````aaa a aaaaaaaaaaa"a*a+a0a1a5a6a7a9aAaEaFaIa^a`alaraxa{a|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbb b"b#b'b)b+b9b=bBbCbDbFbLbPbQbRbTbVbZb\bdbmbobsbzb}bbbbbbbbbbbbbbbbbbbbbbccc c c ccccc)c*c-c5c6c9cgEgGgHgLgTgUg]gfglgngtgvg{gggggggggggggggggggggggggggggggggghRhhhhh(h'h,h-h/h0h1h3h;h?hDhEhJhLhUhWhXh[hkhnhohphqhrhuhyhzh{h|hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhii i iiiiii1i3i5i8i;iBiEiIiNiWi[icidieifihiiilipiqirizi{iiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjj j$j(j0j2j4j7j;j>j?jEjFjIjJjNjPjQjRjUjVj[jdjgjjjqjsj~jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkk kkkkkkk$k(k+k,k/k5k6k;k?kFkJkMkRkVkXk]k`kgkkknkpkuk}k~kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllll l llllll&l'l(l,l.l3l5l6l:l;l?lJlKlMlOlRlTlYl[l\lklmloltlvlxlyl{llllllllllllllllllllllllllllllllmmm mmmmmm&m'm(lgm.m/m1m9mu?u@uCuGuHuNuPuRuWu^u_uauouquyuzu{u|u}u~uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvv v vvvvvvvvvvv#v%v&v)v-v2v3v5v8v9v:vwBwEwFwJwMwNwOwRwVwWw\w^w_w`wbwdwgwjwlwpwrwswtwzw}wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxx x xxxx!x"x#x-x.x0x5x7xCxDxGxHxLxNxRx\x^x`xaxcxdxhxjxnxzx~xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyxxxxxyy yyyyyy y%y'y)y-y1y4y5y;y=y?yDyEyFyJyKyOyQyTyXy[y\ygyiykyryyy{y|y~yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzz z z zzzzz!z'z+z-z/z0z4z5z8z9z:zDzEzGzHzLzUzVzYz\z]z_z`zezgzjzmzuzxz~zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz{{{{#{'{){*{+{-{.{/{0{1{4{={?{@{A{G{N{U{`{d{f{i{j{m{o{r{s{w{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{|||||| | | ||||| |%|&|(|,|1|3|4|6|9|:|F|J|U|Q|R|S|Y|Z|[|\|]|^|a|c|g|i|m|n|p|r|y|||}|||||||||||||||||||||||||||||||}}}} }}}}}}}}#}&}*}-}1}<}=}>}@}A}G}H}M}Q}S}W}Y}Z}\}]}e}g}j}p}x}z}{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}~}}}}}}}}}}}~~~~~~~~ ~'~(~,~-~/~3~6~?~D~E~G~N~P~R~X~_~a~b~e~k~n~o~s~x~~~~~~~~~~~~~<;=>?CDGORS[\]acdefmq}~  $&,.04579:<>@D`dfmquȀ̀πҀԀՀ׀؀  $',05:@CEMX]adeoqr{ȇɇʇ·Շևهڇ܇߇ (-.025:@BEFIORWZ[\abcknpsuz{|}ԉՉ։׉؉ "$&+,/57=>@CEGIMNSVWX\]aeguvwyz{~ÊƊȊɊʊъӊԊՊ׊݊ߊ  -07EGIKOQSTWX[]Ycdfhimsuv{~ŌƌɌˌό֌Ռٌ݌ eilnōƍǍȍʍ΍эԍՍ׍ٍ  !"#&'136789=@AKMNOT[\]^abilmopqyz{ÎĎǎώюԎ܎ !#%'(,-.4567:@ACGOQRSTUX]^eƏʏˏ͏ЏҏӏՏ ()/*,-347?CDL[]bfglpty̐ÐĐŐǐȐՐאِؐܐݐߐҐ  %"#')./14679:<=CGHOSWYZ[adgmtyz{‘Ñőӑԑבّڑޑ #$%&(./035689:<>@BCFGJMNOQXY\]`aeghinopuvwxy{|}’ÒŒƒǒȒ˒̒͒ΒВӒՒגْؒܒݒߒ !$%')*3467GHIPQRUWXZ^degijmopqstvz}ēœƓǓɓʓ˓͓̓ӓٓܓޓߓ ./1234;?=CEHJLUY\_achkmnoqrxy~ƕȕɕ˕Еѕҕӕٕڕݕޕߕ"$%&,13789:<=ARTVWXant{|~ʖ]ؖږݖޖߖ !"#(13ACJNOUWXZ[cgjnsvwx{}ėŗǗɗʗ̗͗ΗЗїԗחؗٗݗޗۗ   #&+./0235%>DGJQRSVWYZbcefjl˜ŘȘ̘"&'+123459:;<@AFGHMNTXY[\^_`Ùəәԙٙڙܙޙ  "#$'-.3568GADJKLNQTV]ÚƚȚΚКҚ՚֚ךۚܚ  &+-34579:=HKLUVW[^acefhjklmnsuwxyǛțΛЛכ؛ݛߛ "#&'()*1567=ACDEIJNOPSTVX[]^_cij\khnpruw{ /0234:?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~0^1_XML-Parser-2.46/Parser/Encodings/windows-1251.enc0000644000000000000000000000206013542305435017753 0ustar rootrootwindows-1251  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ S  & ! 0 9   R     "  !"Y :Z\[_^VQ!TXUW !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOXML-Parser-2.46/Parser/Encodings/iso-8859-8.enc0000644000000000000000000000206013542305435017245 0ustar rootrootISO-8859-8  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ > XML-Parser-2.46/Parser/Encodings/windows-1255.enc0000644000000000000000000000206013542305435017757 0ustar rootrootwindows-1255  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~   & ! 0 9     "  !" :   XML-Parser-2.46/Parser/Encodings/iso-8859-4.enc0000644000000000000000000000206013542305435017241 0ustar rootrootISO-8859-4  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~8V(;`"f}W)<a#gJ~K. *EL6rhj/ +FM7sikXML-Parser-2.46/Parser/Encodings/x-euc-jp-jisx0221.enc0000644000000000000000000011200213542305435020601 0ustar rootrootX-EUC-JP-JISX02214A  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~g?gL?^^PCKSVL8Q +^K^^^e^^!^^^;^^^U^^^o^^ +^ ^ ^ E^ ^ ^ _^ ^ ^ y^ ^ 5^ ^ ^O3^^>^^^X^^^r^^.^^^H^^^b^^^|^^8^^^R^^^l^^(^^^B^^^\^~Ca=}0VWWA^^^T^^^n^^ *^ ^ ^!D^!^"^"^^"^#^#x^#^$4^$^$^%N^%^& ^&h^&^'$^'^'^(>^(^(^)X^)^*^*r^*^+.^+^+^,H^,^-^-b^-^.^.|^.^/8^/^/^0R^0^1^1l^1^2(^2^2^3B^3C3  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOabcdefghijklmnopqrstuvwxyz{|}~PQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~000 000@>?00000N0000  \0 \ & %     00;=[]00 0 0 0 0 0000 ""`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%BNUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{QĐyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQq~!"!    RSTUVWXYZ[\^_&2A?JRf'138B@IKSg  " $0*.(469=;CGEPLTXVZ\`^dblpjrnhtxvy}{   !%+/)57:><DHFQMUYW[]a_ecmqksoiuwz~|NNNN NNN#N$N(N+N.N/N0N5N@NANDNGNQNZN\NcNhNiNtNuNyNNNNNNNNNNNNNNNNNNNNNNOOOOO O OOOOOO.O1O`O3O5O7O9O;O>O@OBOHOIOKOLOROTOVOXO_OcOjOlOnOqOwOxOyOzO}O~OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOPPPPP P PPPPPPPPPP"P'P.P0P2P3P5P@PAPBPEPFPJPLPNPQPRPSPWPYP_P`PbPcPfPgPjPmPpPqP;PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPQQQQQ Q Q QPQQQQQQQQ#Q'Q(Q,Q-Q/Q1Q3Q4Q5Q8Q9QBQJQOQSQUQWQXQ_QdQfQ~QQQQQQQQQQQQQQQQQQQQQQQQQQQQQQRRRRRRRRR"R(R1R2R5RSBSLSKSYS[SaScSeSlSmSrSyS~SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTT!T'T(T*T/T1T4T5TCTDTGTMTOT^TbTdTfTgTiTkTmTnTtTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUU U U UUU*U+U2U5U6U;UW?WEWFWLWMWRWbWeWgWhWkWmWnWoWpWqWsWtWuWwWyWzW{W|W~WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXX WX X XXXX X&X'X-X2X9X?XIXLXMXOXPXUX_XaXdXgXhXxX|XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYY Y YYYAYY!Y#Y$Y(Y/Y0Y3Y5Y6Y?YCYFYRYSYYY[Y]Y^Y_YaYcYkYmYoYrYuYvYyY{Y|YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZ Z ZZZZZ#Z$Z'Z(Z*Z-Z0ZDZEZGZHZLZPZUZ^ZcZeZgZmZwZzZ{Z~ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[4[[[[![%[-[8[A[K[L[R[V[^[h[n[o[|[}[~[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\ \\\\#\&\)\+\,\.\0\2\5\6\Y\Z\\\b\c\g\h\i\m\p\t\u\z\{\|\}\\\\\\\\\\\\\\\\\\\\\\\\\]]] ]]+]#]$]&]']1]4]9]=]?]B]C]F]H]U]Q]Y]J]_]`]a]b]d]j]m]p]y]z]~]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^ ^^^^^^ ^.^(^2^5^>^K^P^I^Q^V^X^[^\^^^h^j^k^l^m^n^p^^^^^^^^^^^^^^^^^^^^^^^^^^_________!_"_#_$_(_+_,_._0_4_6_;_=_?_@_D_E_G_M_P_T_X_[_`_c_d_g_o_r_t_u_x_z_}_~________________________________________`` ` ```````$`-`3`5`@`G`H`I`L`Q`T`V`W`]`a`g`q`~``````````````````````````````````````````aaa a aaaaaaaaaaa"a*a+a0a1a5a6a7a9aAaEaFaIa^a`alaraxa{a|aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbb b"b#b'b)b+b9b=bBbCbDbFbLbPbQbRbTbVbZb\bdbmbobsbzb}bbbbbbbbbbbbbbbbbbbbbbccc c c ccccc)c*c-c5c6c9cgEgGgHgLgTgUg]gfglgngtgvg{gggggggggggggggggggggggggggggggggghRhhhhh(h'h,h-h/h0h1h3h;h?hDhEhJhLhUhWhXh[hkhnhohphqhrhuhyhzh{h|hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhii i iiiiii1i3i5i8i;iBiEiIiNiWi[icidieifihiiilipiqirizi{iiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjj j$j(j0j2j4j7j;j>j?jEjFjIjJjNjPjQjRjUjVj[jdjgjjjqjsj~jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkk kkkkkkk$k(k+k,k/k5k6k;k?kFkJkMkRkVkXk]k`kgkkknkpkuk}k~kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllll l llllll&l'l(l,l.l3l5l6l:l;l?lJlKlMlOlRlTlYl[l\lklmloltlvlxlyl{llllllllllllllllllllllllllllllllmmm mmmmmm&m'm(lgm.m/m1m9mu?u@uCuGuHuNuPuRuWu^u_uauouquyuzu{u|u}u~uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvv v vvvvvvvvvvv#v%v&v)v-v2v3v5v8v9v:vwBwEwFwJwMwNwOwRwVwWw\w^w_w`wbwdwgwjwlwpwrwswtwzw}wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxx x xxxx!x"x#x-x.x0x5x7xCxDxGxHxLxNxRx\x^x`xaxcxdxhxjxnxzx~xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyxxxxxyy yyyyyy y%y'y)y-y1y4y5y;y=y?yDyEyFyJyKyOyQyTyXy[y\ygyiykyryyy{y|y~yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzz z z zzzzz!z'z+z-z/z0z4z5z8z9z:zDzEzGzHzLzUzVzYz\z]z_z`zezgzjzmzuzxz~zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz{{{{#{'{){*{+{-{.{/{0{1{4{={?{@{A{G{N{U{`{d{f{i{j{m{o{r{s{w{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{|||||| | | ||||| |%|&|(|,|1|3|4|6|9|:|F|J|U|Q|R|S|Y|Z|[|\|]|^|a|c|g|i|m|n|p|r|y|||}|||||||||||||||||||||||||||||||}}}} }}}}}}}}#}&}*}-}1}<}=}>}@}A}G}H}M}Q}S}W}Y}Z}\}]}e}g}j}p}x}z}{}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}~}}}}}}}}}}}~~~~~~~~ ~'~(~,~-~/~3~6~?~D~E~G~N~P~R~X~_~a~b~e~k~n~o~s~x~~~~~~~~~~~~~<;=>?CDGORS[\]acdefmq}~  $&,.04579:<>@D`dfmquȀ̀πҀԀՀ׀؀  $',05:@CEMX]adeoqr{ȇɇʇ·Շևهڇ܇߇ (-.025:@BEFIORWZ[\abcknpsuz{|}ԉՉ։׉؉ "$&+,/57=>@CEGIMNSVWX\]aeguvwyz{~ÊƊȊɊʊъӊԊՊ׊݊ߊ  -07EGIKOQSTWX[]Ycdfhimsuv{~ŌƌɌˌό֌Ռٌ݌ eilnōƍǍȍʍ΍эԍՍ׍ٍ  !"#&'136789=@AKMNOT[\]^abilmopqyz{ÎĎǎώюԎ܎ !#%'(,-.4567:@ACGOQRSTUX]^eƏʏˏ͏ЏҏӏՏ ()/*,-347?CDL[]bfglpty̐ÐĐŐǐȐՐאِؐܐݐߐҐ  %"#')./14679:<=CGHOSWYZ[adgmtyz{‘Ñőӑԑבّڑޑ #$%&(./035689:<>@BCFGJMNOQXY\]`aeghinopuvwxy{|}’ÒŒƒǒȒ˒̒͒ΒВӒՒגْؒܒݒߒ !$%')*3467GHIPQRUWXZ^degijmopqstvz}ēœƓǓɓʓ˓͓̓ӓٓܓޓߓ ./1234;?=CEHJLUY\_achkmnoqrxy~ƕȕɕ˕Еѕҕӕٕڕݕޕߕ"$%&,13789:<=ARTVWXant{|~ʖ]ؖږݖޖߖ !"#(13ACJNOUWXZ[cgjnsvwx{}ėŗǗɗʗ̗͗ΗЗїԗחؗٗݗޗۗ   #&+./0235%>DGJQRSVWYZbcefjl˜ŘȘ̘"&'+123459:;<@AFGHMNTXY[\^_`Ùəәԙٙڙܙޙ  "#$'-.3568GADJKLNQTV]ÚƚȚΚКҚ՚֚ךۚܚ  &+-34579:=HKLUVW[^acefhjklmnsuwxyǛțΛЛכ؛ݛߛ "#&'()*1567=ACDEIJNOPSTVX[]^_cij\khnpruw{ /0234:?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~%%% %%%%%$%,%4%<%%%%%%%%# %"""H"d"e#!%P%Q%RQ%S%T%U%V%W%X%Y%Z%[%\%]%^%_%`%a%b%c%d%e%f%g%h%i%j%k%lN01F45D3E89:;<=>?O@ABC62LK7HMIGJ.&$%/ !"#,+(-)'*XML-Parser-2.46/Parser/Encodings/iso-8859-3.enc0000644000000000000000000000206013542305435017240 0ustar rootrootISO-8859-3  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~&$0^4{'%1_5|  l\  !m]XML-Parser-2.46/Parser/Encodings/Japanese_Encodings.msg0000644000000000000000000001132513542305435021437 0ustar rootrootMapping files for Japanese encodings 1998 12/25 Fuji Xerox Information Systems MURATA Makoto 1. Overview This version of XML::Parser and XML::Encoding does not come with map files for the charset "Shift_JIS" and the charset "euc-jp". Unfortunately, each of these charsets has more than one mapping. None of these mappings are considered as authoritative. Therefore, we have come to believe that it is dangerous to provide map files for these charsets. Rather, we introduce several private charsets and map files for these private charsets. If IANA, Unicode Consoritum, and JIS eventually reach a consensus, we will be able to provide map files for "Shift_JIS" and "euc-jp". 2. Different mappings from existing charsets to Unicode 1) Different mappings in JIS X0221 and Unicode The mapping between JIS X0208:1990 and Unicode 1.1 and the mapping between JIS X0212:1990 and Unicode 1.1 are published from Unicode consortium. They are available at ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0208.TXT and ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/JIS0212.TXT, respectively.) These mapping files have a note as below: # The kanji mappings are a normative part of ISO/IEC 10646. The # non-kanji mappings are provisional, pending definition of # official mappings by Japanese standards bodies. Unfortunately, the non-kanji mappings in the Japanese standard for ISO 10646/1, namely JIS X 0221:1995, is different from the Unicode Consortium mapping since 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015 (horizontal bar). Furthermore, JIS X 0221 clearly says that the mapping is informational and non-normative. As a result, some companies (e.g., Microsoft and Apple) have introduced slightly different mappings. Therefore, neither the Unicode consortium mapping nor the JIS X 0221 mapping are considered as authoritative. 2) Shift-JIS This charset is especially problematic, since its definition has been unclear since its inception. The current registration of the charset "Shift_JIS" is as below: >Name: Shift_JIS (preferred MIME name) >MIBenum: 17 >Source: A Microsoft code that extends csHalfWidthKatakana to include > kanji by adding a second byte when the value of the first > byte is in the ranges 81-9F or E0-EF. >Alias: MS_Kanji >Alias: csShiftJIS First, this does not reference to the mapping "Shift-JIS to Unicode" published by the Unicode consortium (available at ftp://ftp.unicode.org/Public/MAPPINGS/EASTASIA/JIS/SHIFTJIS.TXT). Second, "kanji" in this registration can be interepreted in different ways. Does this "kanji" reference to JIS X0208:1978, JIS X0208:1983, or JIS X0208:1990(== JIS X0208:1997)? These three standards are *incompatible* with each other. Moreover, we can even argue that "kanji" refers to JIS X0212 or ideographic characters in other countries. Third, each company has extended Shift JIS. For example, Microsoft introduced OEM extensions (NEC extensionsand IBM extensions). Forth, Shift JIS uses JIS X0201, which is almost upper-compatible with US-ASCII but is not quite. 5C and 7E of JIS X 0201 are different from backslash and tilde, respectively. However, many programming languages (e.g., Java) ignore this difference and assumes that 5C and 7E of Shift JIS are backslash and tilde. 3. Proposed charsets and mappings As a tentative solution, we introduce two private charsets for EUC-JP and four priviate charsets for Shift JIS. 1) EUC-JP We have two charsets, namely "x-eucjp-unicode" and "x-eucjp-jisx0221". Their difference is only one code point. The mapping for the former is based on the Unicode Consortium mapping, while the latter is based on the JIS X0221 mapping. 2) Shift JIS We have four charsets, namely x-sjis-unicode, x-sjis-jisx0221, x-sjis-jdk117, and x-sjis-cp932. The mapping for the charset x-sjis-unicode is the one published by the Unicode consortium. The mapping for x-sjis-jisx0221 is almost equivalent to x-sjis-unicode, but 0x213D of JIS X 0208 is mapped to U+2014 (em dash) rather than U+2015. The charset x-sjis-jdk117 is again almost equivalent to x-sjis-unicode, but 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde. The charset x-sjis-cp932 is used by Microsoft Windows, and its mapping is published from the Unicode Consortium (available at: ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.txt). The coded character set for this charset includes NEC-extensions and IBM-extensions. 0x5C and 0x7E of JIS X0201 are mapped to backslash and tilde; 0x213D is mapped to U+2015; and 0x2140, 0x2141, 0x2142, and 0x215E of JIS X 0208 are mapped to compatibility characters. Makoto Fuji Xerox Information Systems Tel: +81-44-812-7230 Fax: +81-44-812-7231 E-mail: murata@apsdc.ksp.fujixerox.co.jp XML-Parser-2.46/Parser/Encodings/ibm866.enc0000644000000000000000000000206013542305435016706 0ustar rootrootibm866  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'()*+,-./0123456789:;<=>?%%%%%$%a%b%V%U%c%Q%W%]%\%[%%%4%,%%%<%^%_%Z%T%i%f%`%P%l%g%h%d%e%Y%X%R%S%k%j%% %%%%%@ABCDEFGHIJKLMNOQTW^""!%XML-Parser-2.46/Parser/Encodings/windows-1252.enc0000644000000000000000000000206013542305435017754 0ustar rootrootwindows-1252  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~   & ! 0` 9R}     "  !"a :S~xXML-Parser-2.46/Parser/Encodings/euc-kr.enc0000644000000000000000000013135213542305435017070 0ustar rootrootEUC-KR}F  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~}?A}A;AAAuA3AAAmA+AAA eA #A A A ]A A AAUAAAAMA AAAEAAAA=A?AA^AAAxA'_AAAVA A` A`!A`!}A!A"A#YA$A$A%A&QA'A'A(A)IA*A*A+A,AA,A-A.{A/9A/A0A1sA21^2^3M^3^4 ^4g^4^5#^5^5^6=^6^6^7W^7^8^8q^8^9-^9^9^:G^:^;^;a^;^<^<{^<^=7^=^=^>Q^>^? ^?k^?^@'^@^@^AA^A^A^B[^B^C^Cu^C^D1^D^D^EK^E^F^Fe  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{| !"#%&'()*+.234567:;=>?ABCDEFGHIJLNOPQRSUVWYZ[]^_`abcdefghijklmnorsuvy{|}~¬ìŬƬǬɬʬˬͬάϬЬѬҬӬԬ֬ج٬ڬ۬ܬݬެ߬ !"#$%&'(*+./0123679:;=>?@ABCFHJKLMNOQRSUVWYZ[\]^_`bdefghijknoqrwxyz~­íŭƭǭɭʭ˭̭ͭέϭҭԭխ֭׭ح٭ڭۭݭޭ߭  !"#$%&'()*+,-./23569;<=>?BDGHIKOQRSUWXYZ[^bcdfgjkmnoqrstuvwz~®îŮƮǮȮɮʮˮήҮӮԮծ֮׮ڮۮݮޮ߮  !"#$%&'()*+./1356789:;>@DEFGJKLMNOQRSTUVWXYZ[^_`abcfghijklmnopqrstuvwxz{|}~¯ïįůƯʯ̯ϯЯѯүӯկ֯ׯدٯگۯݯޯ߯  !"#$%&')*+,-./0123456789:;<=>?@ABCFGIKMOPQRVXZ[\^_`abcdefghijklmnopqrstuvwxyz{~°ðưʰ˰̰ͰΰϰӰհְװٰڰ۰ܰݰް߰  !"&')*+-./01236:;<=>?BCEFGIJKLMNORSVWYZ[]^_abcdefghijklmnopqrstuvwz{}~±ñıűƱDZȱɱʱ˱ͱαϱѱұӱֱױرٱڱ۱ޱ !"#$%&'()*+,-./012356789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWYZ[]^_abcdefgjklmnopqrsvwxyz{}~²òIJŲƲDzʲ˲ͲβϲѲӲԲղֲײڲܲ޲߲  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSWYZ]`abcfhjlmorsuvwyz{|}~³óƳdzɳʳͳϳѳҳӳֳسڳܳ޳߳ !"#$%&'*,-./012356789:;<=>?@ABCDEFGHIJKLMNORSUVWYZ[\]^_bdfghijkmnopqrstuvwxyz{|}~´ôŴƴǴɴʴ˴̴ʹδϴѴҴӴԴִ״شٴڴ۴޴ߴ  !"#&+,-./235679:;<=>?BFGHIJNOQRSUVWXYZ[^bcdefghijklmnopqrstuvwxyz{|}~µõŵƵǵȵɵʵ˵εҵӵԵյֵ׵ٵڵ۵ܵݵ޵ߵ  !"#$&'()*+-./012356789:;<=>?@ABCDEFGIJKLMNOPQRSTUVWXYZ[\]^_`abcefgijklmnopqrstuvwxyz{|}~¶öĶŶƶǶȶɶʶ˶̶Ͷζ϶жѶҶӶնֶ׶ضٶڶ۶ܶ޶߶  !"#$%&'*+-.1234567:<=>?@ABCEFGIJKMNOPQRSVWXYZ[\]^_abcefgijklmnortvwxyz{~·÷ķŷƷȷʷ˷̷ͷηϷзѷҷӷԷշַ׷طٷڷ۷ܷ޷߷  !"#&')*+-./01236:;<=>?ABCEFGHIJKLMNOPRTUVWXYZ[^_abcefghijknprstuvwyz{}~¸ĸƸǸȸɸʸ˸͸θϸѸҸӸոָ׸ظٸڸ۸ܸ޸ !"#$%&'()*+,-./0123456789:;>?ABCEFGHIJKMNPRSTUVWZ[]^_abcdefgjlnopqrsvwyz{}~¹ùĹŹƹǹʹ˹͹ӹԹչֹ׹ڹܹ߹  !"#$%&'()*+,-./01234567:;=>?ACDEFGJLOPQRVWYZ[]^_`abcfjklmnorsuvwyz{|}~ºúźƺǺɺʺ˺̺ͺκϺкѺҺӺԺպֺ׺ںۺܺݺ޺ߺ !"#$%&'(*,-./012379:?@ABCFHJKLNQRSUVWYZ[\]^_`bdefghijkmnopqrstuvwxyz{|}~»ûŻƻǻɻʻ˻̻ͻλϻѻһԻջֻ׻ػٻڻۻܻݻ޻߻  !"#&(*+,./235679:;<=>?BFGHJKNOQRSTUVWXYZ[\^_`abcdefghijklmnopqrstuvwxyz{|}~¼üżƼǼȼɼʼ˼̼μҼӼԼּ׼ټڼۼݼ޼߼  !"#%&'()*+-./0123456789:;<=>?ABCDEFGJKMNOQRSTUVWZ[\]^_`abcefgijklmnopqrstuvwxyz{|}~½ýĽŽƽǽȽɽʽ˽̽ͽνϽнѽҽӽֽ׽ٽڽ۽ݽ޽߽  !"#$%&'()*+,-./0123456789:;<=>?@ABCFGIJKMOPQRSVX\]^_bcefgiklmnorvwxyz~¾þľžƾǾȾɾʾ˾̾;ξϾҾվ־پھ۾ܾݾ޾߾  !"#$%&'()*+,-./0123456789:;<=>?BCEFGIJKLMNORSTVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¿ÿĿƿǿȿɿʿ˿οϿѿҿӿտֿ׿ؿٿڿۿݿ޿  !"#$%&'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPRSTUVWYZ[]^_abcdefgjklmnopqrstuvwxyz{|}~ !"%()*+.23457:;=>?ABCDEFGJNOPQRSVWYZ[]^_`abcfjklmnoqrsuvwyz{|}~ !"#$%&'*,.0356789:;<=>?@ABCDEFGIJKLMNORSUVWYZ[\]^_abcdfghijknoqrsuvwxyz{~€‚ƒ„…†‡Š‹ŒŽ‘’“”•–—™šœžŸ ¡¢£¦§©ª«®¯°±²³¶¸º»¼½¾¿  !"#&'*+,-./0123456789:;<=>?@ABCDFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgjkmnoqstuvwz{~ÀÁÂÃÅÆÇÉÊËÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ  !"#%&'()*+-./12356789:;>?@ABCDEFGIJKLMNOPQRSTUVWXYZ[\]^_`abcfgijkmnopqrsvwxz{|}~āĂ㥹ĆćĈĉĊċČčĎďĐđĒēĕĖėĘęĚěĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĹĺĻĽľĿ  !"#$%&'*+-./1234567:<>?@ABCFGKOPQRVZ[\_bcefgijklmnorvwxyz{~ŁłŃŅņňʼnŊŋŎŐŒœŔŖřŚśŝŞşšŢţŤťŦŧŨŪūŬŭŮůŰűŲųŶŷźſ  !"#&')*+/1268:<=>?BCEFGIJKLMNORVWXYZ[^_abcdefghijkmnprstuvwz{}~ƁƂƃƄƅƆƇƊƌƎƏƐƑƒƓƖƗƙƚƛƝƞƟƠơƢƣƦƨƪƫƬƭƮƯƲƳƵƶƷƻƼƽƾƿ "#%&')*+,-./24689:;>?ABCEFGHIKNPYZ[]^_abcdefgijlmnopqrsvwyz{ǀǁǂdžNjnjǍǏǒǓǕǙǛǜǝǞǟǢǧǨǩǪǫǮǯDZDzdzǵǶǷǸǹǺǻǾ !"#%&'()*+.02345679:;=>?ABCDEFGJKNOPQRSUVWXYZ[\]^_`abcdefghijklmnorsuvwy{|}~ȂȄȈȉȊȎȏȐȑȒȓȕȖȗȘșȚțȜȞȠȢȣȤȥȦȧȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȾȿ 000 % &0 "%<"<    0000 0 0 0 0 0000"`"d"e""4 2 3!!+&B&@" "#"""a"R ;&&%%%%%%%%%%%!!!!!0"j"k""=""5"+","" """""*")"'"( !"#$%&'()*+-./012356789:;<=>?@ABCDEFGHIJKLMNORSUVWYZ[\]^_bdefghijkmno!!""^".""! 0%%%%&d&`&a&e&g&c"%%%%%%%%%%%&h&&&& !!!!!!&m&i&j&l22!3!"33!!qrsuvwxyz{}~ɀɁɂɃɄɅɆɇɊɋɍɎɏɑɒɓɔɕɖɗɚɜɞɟɠɡɢɣɤɥɦɧɨɩɪɫɬɭɮɯɰɱɲɳɴɵɶɷɸɹɺɻɼɽɾɿ  !"#$%&'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]  !"#$%&'(*+,-./0123456789:;<=>?@ABCDEF1112131415161718191:1;1<1=1>1?1@1A1B1C1D1E1F1G1H1I1J1K1L1M1N1O1P1Q1R1S1T1U1V1W1X1Y1Z1[1\1]1^1_1`1a1b1c1d1e1f1g1h1i1j1k1l1m1n1o1p1q1r1s1t1u1v1w1x1y1z1{1|1}1~1111111111111111GHIJKNOQRSUVWXYZ[^bcdefgijklmnopqrstuvwxyz{|~ʀʁʂʃʅʆʇʈʉʊʋʌʍʎʏʐʑʒʓʔʕʖʗʙʚʛʜʝʞʟʠʡʢʣʤʥʦʧ!p!q!r!s!t!u!v!w!x!y!`!a!b!c!d!e!f!g!h!iʨʩʪʫʬʭʮʯʰʱʲʳʴʵʶʷʸʹʺʻʾʿ %%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%B%%%%%%%% %%%!%"%&%'%)%*%-%.%1%2%5%6%9%:%=%>%@%A%C%D%E%F%G%H%I%J "#$%&'()*+,-./0123456789:;<=>?@BCDEFGJKMNOQRSTUVWZ[\^_`abcefghijkl333!3333333333333333333333333333333333333333333333333333!&3333333333333333333333mnopqrstuvwz{|}~ˀˁ˂˃˄˅ˆˇˈˉˊˋˌˍˎˏːˑ˒˓˔˕˖˗˘˙˚˛˝˞˟ˠˡˢˣˤ˥˦˧˨˩˪˫ˬ˭ˮ˯˰˱˲˳˴˵˶˷˹˺˻˼˽˾˿&2?ARfJ2`2a2b2c2d2e2f2g2h2i2j2k2l2m2n2o2p2q2r2s2t2u2v2w2x2y2z2{$$$$$$$$$$$$$$$$$$$$$$$$$$$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n!S!T![!\!]!^  #$'138@BSgKI2222222222 2 2 2 2 22222222222222$$$$$$$$$$$$$$$$$$$$$$$$$$$t$u$v$w$x$y$z${$|$}$~$$$$ t  %&*+-/1234567:?@ABCFGIJKMNOPQRSVZ[\]^_abcegijklmnoqrstvwxyz{|}~̀́̂̃̄̅̆̇̈̉̊̋̌̍̎̏̐̑̒̓0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~000000000000000000000̶̷̡̢̧̛̖̗̝̞̟̣̤̥̦̪̮̯̰̱̲̳̹̺̻̔̽̾̿̕̚00000000000000000000000000000000000000000000000000000000000000000000000000000000000000  !"#%&')*+-./012345678:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_ !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNOabcefghijknprstuvwyz{|}~͇͉͍͎̀́͂̓̈́͆͊͋͌ͅ͏͓͖͙͚͐͑͒͗͛ͣͦͨͪͫͬͭͮͯ͟͢͝͞͠͡ͱͲͳʹ͵Ͷͷ͸͹ͺͻͼͽ;Ϳ "#%&')*+,-./246789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWZ[]^bcdefgjlnopqrsvwyz{}~΀΁΂΃ΆΈΊ΋Ό΍ΎΏΒΓΕΖΗΙΚΛΜΝΞΟ΢ΦΧΨΩΪΫήίΰαβγδεζηθικλμνξο  $,-/0189<@KMTX\pqtwxzĬȬ̬լ׬ !"#%&'()*+.2345679:;<=>?@ABCDEFGHIJKLMNOPQRSVWYZ[]^_`abcfhjkl  ),-458?@ABCFHJKLMNOQRSUVWYZ[\]^_abcdefghijknoqrsuvwxyz{~ЀЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГД߱  4?BFGHIJKNOQRSUVWXYZ[^`bcdefgijkm@ACDEKLMPT\]_`aĵ̵͵ϵеѵص%,4HdhԶ(),/089;DHLTU`dhpqsu|}nopqrstuvwxyz{}~рстухцчщъыьэюяѐёђѓєѕіїјљњћќѝўџѢѣѥѦѧѩѪѫѬѭѮѯѲѴѶѷѸѹѻѽѾѿϷǷɷ $%(,45789@DQS\]`dlmoqx|øŸ̸иԸݸ߸ <=@DLOQXY\`hi  !"#$%&'()kmtux|ȹɹ̹ιϹйѹҹعٹ۹ݹ޹89<@BHIKMNSTUX\deghipqtxĺȺغٺ*+./12356789:;>@BCDEFGIJKLMNOPQRSTUVWXYZ[]^_`abcefghijklmnopqrstuvwxyz{|}~҂҃҅҆҇҉ҊҋҌ  )+4568;<=>DEGIMOPTXaclĻȻлӻ $%')-0148@ACDEILMP]ҍҎҏҒғҔҖҗҘҙҚқҝҞҟҡҢңҥҦҧҨҩҪҫҭҮүҰҲҳҴҵҶҷҺһҽҾļͼϼмѼռؼܼ $,@HILPXYdhԽսؽܽ DEHLNTUWYZ[`ad "#$&'*+-./1234567:>?@ABCFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghihjpqstu{|}оѾԾ׾ؾ @ADHPQUſ̿ͿпԿܿ߿?ABCEFGHIJKLMNOPQRSTUVWXYZ[]^_abcefghijklnpqrstuvwz{}~ԁԃԄԅԆԇԊԌԎԏԐԑԒԓԕԖԗԘԙԚԛԜԝ4<=HdehltuyĀĔĜĸļ (),089;=DEHIJLMNSTUWXY]^`adhpqstu|}ŀńŇŌōŏőŕŗŘŜŠũŴŵŸŹŻżŽžԞԟԠԡԢԣԤԥԦԧԨԪԫԬԭԮԯ԰ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ $%(,-.034579;@ADHPQSTU\]`loqxy|ƀƈƉƋƍƔƕƘƜƤƥƧƩưƱƴƸƹƺ  !"#$%&'()*+,-./0123456789:;>?ABCEFGHIJKNPRSTUVWZ[]^_abc  !$(01357<=@DJLMOQRSTUVWX\`hktux|}~ǃDŽDžLJLjljNJǎǐǑǔǖǗǘǚǠǡǣǤǥǦǬǭǰǴǼǽǿ dfgjlnopqrsvwyz{}~ՀՁՂՃՆՊՋՌՍՎՏՑՒՓՔՕՖ՗՘ՙ՚՛՜՝՞՟ՠաբգդզէըթժիլխծկհձղճմյնշոչպջռսվտ $,-/18<@HILMTpqtxzȀȁȃȅȆȇȋȌȍȔȝȟȡȨȼȽ ,4PQTX`aclpt|ɈɉɌɐɘəɛɝ !"#%&'()*+,./01234567:; )LMPT\]_`ah}ʄʘʼʽ !AHILPXY]dxy˜˸ !"'(),.089;=>?ABCDFGJLNOPRSVWYZ[]^_`abcdefhjklmnorsuvwxyz{|}~րցւքֆևֈ։֊֋֎֏֑֖֛֢֣֤֥֦֧֪֚֒֓֕֗֘֙֜֞֠֩<=>DEHLTUWXY`dfhpų̴̵̸̘̙̜̠̩̫̬̭̼ $(,9\`dlmoqx͈͔͕ͤͥͧͩ͘͜Ͱ  !$(0135ֱֲֳִֵֶַָֺּֽ֭֮֫֯־ֿ XY\_`ahikmtux|΄΅·ΉΐΑΔΘΠΡΣΤΥάέ $,-/018TUX\degipqtxπυόϡϨϰ-458<!"#$%&'*,./0123679:;=>?@ABCEFHJKLMNORSUZ[\]^_bdfghjkmnoqrsuvwxyz{~׀ׂ׃ׅׄ׆ׇ׊׋DEGIPTX`lmpt|}ЁФХШЬдезй 0148:@ACDELMPT\]_ahl|фшѠѡѤѨѰѱѳѵѺѼ ,-04<=?AH\׍׎׏בגדהוזחךלמןנסעףdҀҁ҄҈ҐґҕҜҠҤҬұҸҹҼҿ  !%(),089;<=DE|}ӀӄӌӍӏӐӑӘәӜӠӨөӫӭӴӸӼ@D\`dmoxy|ԀԂԈԉԋԍԔԩ <=@DLMOQXY\`ehikmtux|ՄՅՇՈՉՐե $-89<@EHIKMQTUX\gipqtփօ֌֍֐ְֹֻ֔֝֟֡֨֬ ()+-458ahe9iouvv{U[WQ|P(S\E]bcnddn py[ݎ}EN~NPe]^aiWqTGu+N^Pgph@Q RRjwR`/PHacdhpLt/t{Pʼnܙ(R.`]bOQIS!X^fm8prs{P[SfckNVPXJX`*a'biЛA[}_NPTU[ ]]e*eNh!jKrvw}^NߏNʙUNNEN]NOQwRS@SSTVWuW[]^abeQggikPkklBnpxrstwwzv}  ߈b3dҚEןW \@ʗTzَ͐X\Hcz[_zyzP&R8RSwWbcrk mw7SsWhvg:jopm̙Kfwkx<SW-YNcisxEzz|us5RWGuG{`̒jXQKRKRbhiuPRRaeh9it~{K뉲9љI NYdfjt4yy~_ &O S`%bqlr}}fNQbw܀OOQvQUVhW;WWYYGY[\]]^~_beeggg^hhj_k:l#l}lmst&t*ttuxuxxyAyGyHyz{}}-OHw!$Qe}OvT bhTUQ: Za bbfqV cOczSW!gi`ns"u7#$%} &'rVZ()*+,NC-QgYHg.Ys^tdy_`lbc{[[R/Yt_)`012tY3456789:;<=>?@ABCoDE`FGfHI\?JKLMNOPQZ%g{}RSTUVWXY\r-RL]Pde,ko|C~͉dbɁ؈^gmjrttoO] _ QcueNPQiQhj|||oҏOQ7RTB^anb>ejoy*܈#bjRfkwpy+bBabe#o#qIt}o&#JQRRm pȈ^eko|>suNO6V_\]`s{-΀F4HaOoyR`ddjo^prv\2ouxy}ɓX_g'p't|`~Q!p(rbxʌŒڌNP[^eqvBwJ|'XZA\bjmov;}/~78KRegimAnpt t`uYv$xk,^Qmb.xOP+]m}*_aDhaRҀQQi^z}uOR)STU\e`gNhmlrrttbu|lyψ̑ЖTo~qtWgmt3x,z{ |ditjuxx虬T[^Uo NSMZ)]_Nabc=fifno+pcw,;E;Ubg+l jzNY__g}T+WYZ[f'ghkqdu㐁EL@[_lsvv߄ QQMQRhlww }}bnŅQT T}ffi'nvw‡iOQRY^=aUdxdyfgj!kkr_ratAw8wۀ(g(lrgvwfzFklY"g&SoXY^cf4gsn:s+zׂד(R]aab bdeiYkfkq!su]~Fj'aXPRT;UOelv} } ^RlriTsZ\>]K_L_g*hicneef ginx}!+*2P OcW_bcgonCqvÀ̀ڈ)MjO/Op^gh"v}v~D^aj qiqujd~ACO{OpQ^hl>lNlr{ălt:PRXdjtvVx9eS^_R%wINPQu\[^wff:ghpuuyzݏ' OX!X1[fnkemnzo}su+܉\OPSS\[_g yy/9;g,NvOYI\\\cghpqt+~+"ҜN NOPRVRoT&TWY+Zf[Z[u[^fbvewemnnr6{&|?6PQ@tܖDٜSRT)VtXYTYn_abnfl~qv||}gO[__b])g hx|~ClNPSS*SQYZb^`abIbyegikkkklht5uxxyy|}ဥ>ƒ芹l^۟;V[*_lejkm\opr]sӘ;al7XNMNNNO:Oy@y`y{}}r цLjߊP^܍fߞRJigjPR*\qeclUsu#u{x0Nwdkq^N kgIhnkcoN PPQUFUV[@\\^8^^^`hQjanXr=r@rvye{ԈsaޗX^tUlza}"rrruu%m{XX]^^_`UbceMfffhhrt^{n}n}r͟ YYm^-`ffsglPmo_wxƑ˓+NPQHU[ [bGe~en2q}ttDttvly}~Uz9ux%MSh\QiTlm)n+ ;-gRafk~ ]emqnWY[`'`bff_s)svw{lVreNRkrmz{9}0oSV/XQ[\\]b@cdf-hlmnppqu&uuv{{|+} }9,m4 a7O\lg_m|~[k]d \s[`g~mފ R7ppQxpOSUVWXZ[\\^%ab bKcde6exj9kl4mo1qrsxttv&wayzWz|}}~a)1څꈖ8Bl֗ӛSX~Y[p[moZqt!t]__`BehiojSkm5msvw{M}#@cbđb Se]]']it_hob҉6rNNXPRSGbf~i^OSV6YZ\8\N\M^_`Cef/fBggswy:ń͉fiUzW[_`obi kn\q{҇UXߘO8OOT{Z [aT3UXbXYgZ[`aeVefdhlZopqsR{}2\KlsDs:ntevzi~ Q@Xdtuvp͙Tn&tzzهxZI[[himcstt,x}UL.f_egljsP-ZkjwY]l]s%uOPQX/Y-YY[]bdddfjHqtdzz~G~^p‡ YÐRa~k2mt~%OPQRWX[^aBimngnqtbu(u,s8Ɏ NOQPvQ*SSS[[\$aaer[st@vyPyy}Ն^GꖅR_gef1h/q\z6 NjRkoqSK1NqQCSTWWWZZ[`(a?clm9nrnr0s?tWшE`ƖbXg^OMPIPSqW YZ\ apfn-r2tK}Äf?_[U˛ONsOQQjU/U[z[^|^}^``aa ce8g ggiaiblm'n8os6s7t\u1vR}8Պۊ0BJ>zIɓnX kӀQAYk\9odsגۀbp}hW`iaGkNYTm-pcl㐑QaɁOPQ[aadikuwdcpNNO Y7Y]__[`!r>supuy 3Q茽 p7vNNRSpTVY[__nn}j5mwNOZO~Xen8NXYY`AzOQeSDNRi[UNR:TYY[P[W[\`caHnpqnstux}+(ɊǖO\RVef(p|pr5}Lr[qhkozv\fo[{|*6NNS X4XXYl\^3^_5cfgVjjk o?rFsPtz|x߁烊l#ψݍwQTW([bMgPh=hn=np}~! KNr-{͓OGONQ2TY^bguinjlnrs*u{}5W[Ο_RT Z[dXeunrvzM{|M~>߃{+ʍd_iOCOzPQhQxRMRjXaX|Y`\\U^`b0hkloqNt u0u8uQvr{L{{{~n>I?"+ZkRb*bmYvdz{}vS`\^o8p|cdzvNNNP\PuTHY[^@^^_`c:e?etefvfxgihjkcl@mmnn^ppssu:w[xyz z}|}Gꌞ-Jؒf̓ V\R6RU|X$^_`choym{,ͅDd=LJOQFQRV2__kcdefAffghhioonqgqr*tw:yVyZyz z||}D~pT m;՜e|[X\ SRbsP'[_`akhmt.z.}B}~1k*5~OPWP]^c+jN;OOOPZY݀TjThUYO[]^f]g1gh*lm2nJopsu|L}},}ۊ;p31NRDz|OQQW[\fYj=mZnoq uoz"!u˙N-NFS}jiklAzXafbpuu~RINKSTW0W@_ccdoe/efzggkbl`lo,wx%yIyW}󂝂rvzz7~TwUUXuc/d"fIfKhmikm%nsthtu[uvwwy~ ~/:ю돰2csOSYZ^hNtuyz̏egWoW}ݏ/_aoNOPSU]o]k!kdx{IʐncId>w@z/jdoqttz|~|~ }LR9[dg-}.PSXyaXaYaezًP P!RuU1Z<^_pa4e^f f6fino2sv!z9YքPW[[_icx&}܅!ǑQg{VQY`UPRT\:a}bbdenv `_NSCUY)]dlmszw!QTU_do}M5P\lmuw|=|dyXY^cwrRuwk܌^ftm}˗QRCfmn}.^RRTabbhiiZj5pq&x]yyyz xՃIIbOVqwׇ[_gQSXZ[`ad`~=p%dP]gXbciixjnkvy˂)ύKۚ6 Nu\y]z{Q{~.ĎYtf%i?tCQg.QE_l]w`ST9V4Z6\1pZ퍣_PtNS`n,\dOP$U\^_`ehlmquuvazzI}}n􆩏əRRGRŘ퉪NgoO[glmxtx'ݓ|yz1_NTU>XY`bSbg6iU5@P,SSUDW|bXdfkgoot"t88TQVWf_HakNpXp}Yj+cw=XTd-i[^noiQLSY*` aKklpl{΂ԍƐdodeQNTWa_hvu{R}qXí*9PxYWYb*a]ryWaZF]bddgwlm>r,t6x4wۘR$WBgrHt㌩*QkScLOiU`eWlmrLrz_mopaOPObArG{}MWj^sg UT [^c^_ e=[OHSS STTW^`bbcUlmfux2ށ/ބa ^EffprOR}_jaSgSjotyhhyǘĚCTziSJ_|buvBS9_<_lsubu{FNO\c>myrϘ0NQDRW_blnppPpqstiJaQn_W`agfYJNNNT|XXY}\_'b6bHf fgkmimnVnooop]rt%tZtvy\|~ႦkN_twje`bwZZfmn>t?B_`{T_l^lm*p}y ;ST[j:pkuuyyqAt de+xxzkN8UYP[^{`ckafehSnqet}i%m;ns>AQ^L_`M`a0aLfCfDiln_nobqLtv{|'RWQS/V^_`b`affgjmoppsj~j4ԊRsr[jkTV[]eHefhmmr;uMOPSTT that are variations on the encoding commonly called Shift_JIS: x-sjis-cp932.enc x-sjis-jdk117.enc x-sjis-jisx0221.enc x-sjis-unicode.enc (This is the same encoding as the shift_jis.enc that was distributed with this module in version 2.17) Please read his message (Japanese_Encodings.msg) about why these are here and why I've removed the shift_jis.enc encoding. We also have two contributed encodings that are variations of the EUC-JP encoding from Yoshida Masato : x-euc-jp-jisx0221.enc x-euc-jp-unicode.enc The comments that MURATA Makoto made in his message apply to these encodings too. KangChan Lee supplied the euc-kr encoding. Clark Cooper December 26, 1998 XML-Parser-2.46/Parser/Encodings/big5.enc0000644000000000000000000011740213542305435016530 0ustar rootrootBIG5YA  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~Y@Y@@@W@@@@S@@@@O@ @ @ @ K@ @ @ @G@@@@C@@@@?@@@|@;@@@x@7@@@t@1@@@n@ -@ @!@"j@#)@#@$@%f@&%@&@'@(b@)!@)@*@+^@,@,@-@.Z@/@/@0@1V@2@2@3@4R@5@5@6@7N@8 @8@9@:J@; @;@<@=F@>@>@?@@B@A?  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX0 00 "0 & %PdRTUVW\ 1 34O 56[]78009:00;<0 0 =>00 ?@0 0 AB00CDYZ[\]^    00 5 2 ;0%%%%%&&%%%%%%2! >?IJMNKL_`a ""f"g"`""R"abcdef"<")"*"" ""33"+"."5"4&@&B&A& !!!!!!!!"%"#<0 !! ijk333333333QYQ[Q^Q]QaQcUt|%%%%%%%%%%%%%%%%<%4%,%$%%%%%% %%%%m%n%p%o%P%^%j%a%%%%%q%r%s!`!a!b!c!d!e!f!g!h!i0!0"0#0$0%0&0'0(0)SD!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ11111 1 1 1 1 1111111111111111111 1!1"1#1$1%1&1'1(1)NNYNNNCN]NNNQ?QeQkQRRRSSAS\SN N NN N+N8QNENHN_N^NNQ@RRSCSSWXYY'Ys[P[Q[S[\\"\8\q]]]]]]^r^_ _bMNNN N-N0N9NK\9NNNNNNNNNNNNNNNQCQAQgQmQnQlQQRRRRRRSS9SHSGSES^SSSSXY)Y+Y*Y-[T\\$\:\o]^{^___bb6bKbNe/eeeeefgg(k kbkykkkll4pkr*r6r;rGrYr[rsNNNNNN;NMNONNNNNNNNNNNNQEQDQQQQQQR RRSSSSNSJSISaS`SoSnSSSSSSSSSSSSSSSSSSSSVVYY.Y1YtYv[U[\<]]]^^^s^|____b bSbTbRbQeeg.g,g*g+g-kcklll8lAl@l>rssttuuu(u)u0u1u2u3uv}vvvwwwy:yztzNNNRNSNiNNNNNO OO OO OOONNNNNNOO QIQGQFQHQhQqQQRRRRRRSS!S SpSqT TT T TTT TTT TTTTTVVVW3W0W(W-W,W/W)YYY7Y8YYxYY}YyYY[W[X[[[[[\\y]^^v^t____bb b bbcb[bXe6eeeeffg g=g4g1g5k!kdk{ll]lWlYl_l`lPlUlal[lMlNppr_r]v~z|s|6 3 nr~k@Lc!N2NOMOOOGOWO^O4O[OUO0OPOQO=O:O8OCOTOT&TNT'TFTCT3THTBTT)TJT9T;T8T.T5T6T TWPWOW;XY>YYYYYYYYYYYY[][\[Z[[[[[\,\@\A\?\>\\\\]^ ^^^^___d_b_w_y________bbbbbbbvbbmbb|b~bybsbbobbnbbbbe9e;e8efg_gNgOgPgQg\gVg^gIgFg`gSgWkeklBl^llllllljlzllpllhlll}llrl~ltllvllllpvp|p}pxrbrar`rrsu,u+u7u8vvwyyyzv|UoҊ7FUdpʏƏŏ]ᐑIƑ̖2.1*,N&NVNsNNNNNOoOOOsOOlOOOOOpOuOOiO{OO~OOOzQTQRQUQiQwQvQxQQR;R8R7R:R0R.R6RARRSRSTSSSQSfSwSxSySSSTsTuTTxTTT{TwTTTT|TTqTvTTTbThTT}TVWWwWjWiWaWfWdW|YYIYGYHYDYTYYYYYYYYYYYYYYYY[_[d[c[[[[[[\\H\E\F\\\\\\\^^^^^^^x^^^^^^_&_'_)____|_____```/`5``*``!`'`)`+`bbb?b>b@bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbe>eeefff fffffff fg g gmgggqggsgwggggogpggg~gguggg|gjgrk#kfkgkllllllllllllllllllllllllllllllllllpppppr,r-r8rHrgrirrrrrsssssu=uuuvvvvwwy>y@yAyyzzzyz|T NqRhˏΏԏѐǑѕw@?;DBR^NNNOOOOOOOOOOOOOOOOOOOOOOQWQQQRNRCRJRMRLRKRGRRRRS SWS{SSTTTTTTTTTTTTTTTTTTTTTTVWWWWWWWWYUYQYOYNYPYYYYYZYYYYZY[i[[[[[\\N\O\M\K\\]^^%^^}^^^__-_e_______``` `%``(`M`p`h`b`F`C`l`k`j`dbAbcc bbcbbcbbbbbbcce?eEeeef%f-f f'f/ff(f1f$fgggggggggggggggggggggggggkjkkkkkllm m2m*mAm%m m1mmm;m=m>m6mlm9m'm8m)m.m5mm+pppppppppr0rrrortrrrsssssssuu-uOuLuNuKuuuuuvxvvvvvvvwvvw w vvwwxxx x yFyIyHyGyyyyyzzzz|}}}}} }}}8 6րڀÀĀ̀ۀ΀ހ݁"ۂ ҂ׂ܂Ԃтނӂ߂Py{zMkԊts͎̏ʐΐÑKJ͕PKLMbi˗ۘߙNXNP P P#OP&P%OP)PPPc/cUcBcFcOcIc:cPc=c*c+c(cMcLeHeIeeefBfIfOfCfRfLfEfAfgggh!h8hHhFhSh9hBhTh)hhhLhQh=ghPh@hSfUFUjUfUDU^UaUCUJU1UVUOUUU/UdU8U.U\U,UcU3UAUWWW W WXX XWWWXX5WWY YbZ6ZAZIZfZjZ@ZwUUUUUU~UUUW X/X*X4X$X0X1X!XX XXY`ZwZZZZZ[s[q[[[[\ \ \1]L]P]4]G]^E^=^@^C^~^^^^_<_m___`````aa#`a```ah`a`a aabbIccccccccccccccccvcccdRcce^efebeceeefnfpftfvfoffzf~fwffgghhhhhihhhhhhhhhhi iihhinhk>k:k=kkkkl.l/l,n/n8nTn!n2ngnJn n%n#nn[nXn$nVnnn-n&non4nMn:n,nCnn>nnnnNncnDnrnin_qqq&q0q!q6qnqrLrrs6s%s4s)t:t*t3t"t%t5t6t4t/tt&t(u%u&ukujuuuuuuuv{v|vvvvwOwx]xlxoz zz zzzzzzz{I{V{F{P{R{T{M{K{O{Q||}^}P}h}U}+}n}r}a}f}b}p}sUՀ RUTKQN9F>LSt ń W ̃ʄ8܄ԃ߆[߆نԆۆІވWˆ;`U^a4a'a aa7b!b"dd>dd*d-d=d,dddd d6dddeleeffffffffgiimiZiwi`iTiui0iiJihiki^iSiyii]ici[kGkrkkkknnnnnnnnnnnnnnnnnnnnnnqNqYqiqdqIqgq\qlqfqLqeq^qFqhqVr:rRs7sEs?s>totZtUt_t^tAt?tYt[t\uvuxvuvuuuuuuvvw[wkwfw^wcwywjwlw\wewhwbwxxxxxxx|xxxyzyy,yzzz zzzzz{w{{`{n{g|||}}y}}}}[nijrVXqpxenskyzfGw=1ufkIl[<5acimF^\_ Y߈Ԉو܈؈݈ʈՈ҉krsfip|cqmbnly{>hbʌnjȌČ̌ōߍ掲 KJSBTAljiɒ7W8=@>[KdQ4IME9?Z͖˖ɖʖVtv  鞂 PPPPPPPPPPPPQbQRRS1SUVVVUVVV V VUVVVVUWWXuX~XXXXyXX}XY%Y"Y$YjYiZZZZZZZ[u[[[[[[[[[\ \b]]^[^c^U^W^T^^_ _F_p_aGa?aKawabaca_aZaXaub*ddXdTddxd_dzdQdgd4dmd{ereeefffiiiiiiiiiiiiiiiiiiikIkLl3o3onono)o>o o,ooo"nnoo1o8o2o#oo+o/oo*nonnnqqq}qqqr>rrsDsPtdtctjtptmuuv'v v v vvvww}wwaxxxxxxyyyz.z1zzzz{{{{u{{{{{{{|||}}}}}}}}}}}}}}}}}}}p߀^ZPKɄƄĄ˄ӄфʇ?;"%4U7)jӌьҍk`X\cY^b][uxwtx{|̖җ| AB󞼟;JQQPPPQQQ QQRRRRRRSV.V;V9V2V?V4V)VSVNVWVtV6V/V0XXXXXXXXYm[ Z[ Z[ [[[[[\d\e]]^b^_^a^^^^^^_H_q__avagana]aUaa|apaka~aaaaaaaaab.didodyddddddddddddddddddeuewexffffj#jijjjij!jj ijjijkPkNkkko?o|ooQofoToomo[oxonoozopodooXnooo`o_qqqqrVrsNsWtittt~tuv v)vv$v&v!v"vvvwwwwwxxxxxxxxz?z~F~7~2~C~+~=~1~E~A~4~9~H~5~?~/DqrposƁÁɁ q~gч҇Ƈȇˉ;6D8= A?sIKHJD>BE?}9M(uJeK~l[pZTʕ˕̕ȕƖ֗ӘF5;?Ϟޞܞݞ۟>KSVVXX[8_]ab3dddedddefg&jjjjjjjjk_kxkp p opoppqqqqswsuttuvVvXvRwwwwyyzazbz`zz|+|'|*||#|!|~T~U~^~Z~a~R~YHwv́ς υͅЅɅ(9,+PYcfd_UIMГԕ֕ЕՖܖٖۖޗ$MOLNS>?=.ONMʛɛțQ]`,Q3VXXX[^aaaaeeffjjjjppp(pppprr rXrsxsztttuuv_vawyyzkzi|>|?|8|=|7|@~k~m~y~i~j~s؅݅Յ `_V^A\XIZNOFY |rvlztTNѓߓÓȓܓݓ͓֓ؓדܖ*'aܗ^X[EI ֛۝arjlRVVVVVX[@[C[}[]aaeeefg'jp>p0p2rs{tvbvey&y*y,y+zz|L|C|M||~}~|~Lڂf  dplfo_k ˔0ęRQ+075 y/_caQ7Q8VVVY\l]aaeeefjkjkpLrrttviw|P~~-#"!jltw}_.35:82+892geWEC@>ϛTQ-%\fgQ×kUUMқI1>;ӝן4ljV]be#e+e*fktz|d|c|e~~~8?1c`dho\Z[WӚԚќTWV垟VXe,p^vqvrwP69bwjBHDƘp_"X_|}wr^kpc|l|n;rpq^֛#pdwڋwɚbe~Ŕ}~|wxTr(j1|r00000A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO$`$a$b$c$d$e$f$g$h$i$t$u$v$w$x$y$z${$|$}NBN\QSSNN NGNV \n_sNQNN.NNNNQRSlSW YY,\]ekklr?N1Ng9g8g;g:g?gOgORO_OAOXO-O3O?OaQQRRR!RRS ScSrSST0T7T*TTTETTT%TT=TOTAT(T$TGVVVWAWEWLWIWKWRYY@YYYYYYYYYY[[\(\*\\\\\\\\\\]^ ^^^^^^___x_v______________`_b:bbbbbbbqb{bzbpbbbwb}brbte7eeeeegEgGgYgUgLgHg]gMgZgKklllxlglkllllqlolillmllllflslel{lptpzrcrrrrrrrsssssu:u9uuvy=4xɏ0(/-N3OO|OO}OOOvOtOOOwOLOOjOOyOOxOOOOOOOOkOnQQQR5R2R3RFR1RS S SuuvvvvvwwwwwwyBy?yzxz{z|u|5 ‚ÂpomnVҏˏӏ͏֏Տא9=<:COOOOOOOOOOOOOOOOOOORDRIRRS=S|SSSSTTTTTTà TTTTTTTTpTTTTrTTWWWWWWWWWWWWWWWXY YSYYYZYYYYYYYYYYYYYYYYY[\L\\\\\\\\\\\\\\\\\\\\]^!^"^#^ ^$^^^^^^__._V_`7`9`T`r`^`E`S`G`I`[`L`@`B`_`$`D`X`f`nbBbCbc c bccbbcc bbcccbcbbeAeCeef6f!f2f5ff&f"f3f+f:ff4f9f.ggggggggggggggggggggggggggggggggggggggggggk(kkkkkkl l!m(m4m-mm9/%3-DQ%V?A&"BN*ZMZ9ZLZpZiZGZQZVZBZ\[r[n[[\Y]] ]]] ] ](] ]&]%]]0]]#]].^>^4^^^^^_6_8___`````````````````````c2ceccc}ccccccccocccnccuccmcc|cc;ccxcccccpeSefefaf[fYf\fbghyhhhhmhnhhiVhohhhhuhthhhwhh|hkhrhhhqh~hhhhhhhxh{hhhh}k6k3k7k8kkkkkl*mmmmntmmmmmmnmmmmmmmmmmmmmmmmmmmmmmmmmmmpq pqpq pqpqppqqqppq qqr~r{r|rssssss srssssssttsttssst t studucuuuuuuuvvvw9w/w-w1w2w4w3w=w%w;w5xHxRxIxMxJxLx&xExPydygyiyjycykyayyyyyzzz{5{G{4{%{0{"{${3{{*{{1{+{-{/{2{8{{#||||}5}=}8}6}:}E},})}A}G}>}?}J};}(cɀGCH%-,!'"83:42tzstu}~vYV†ņȆ̆ÆR։ىՊ0',9;\]}}{y؎ގݎ܎׎$  !ԐVXZSUz|mkqoj噗PPPPPPPPPPPhPPPPQ_QSSSSUUUUUwVEUUUUUUUUUU}UUUUUUW X)X7XXX'X#X(WXHX%XXX3X?X6X.X9X8X-X,X;YaZZZZzZZZxZZ|ZZZZZ7ZZZZZZZ{Z}ZZZZZ[[[[[[[\ \0]7]C]k]A]K]?]5]Q]N]U]3]:]R]=]1]Y]B]9]I]8]<]2]6]@]E^D^A_X___``````a`a aa`a````aaaa`aa bJccccccccdcccccccdaccccccccccccccce2egejede\eheeeeeeeef|flf{ffqfyfjfrgi hihi*hhhihhhhhiihhihiiphihhihhhhhi i ihhhhhhhihhii%hk9k;k?k?ef׀QOPԁCJROG=M:<=?u;σ#ƃȃヿ݃؃˃΃փɄ ބƒՃǃуÃă׃ۃ؆ӆچ݆܆׆шHVU׈ɉ݉ډۊNM9Y@WXDERHQJLO_؍Ӎ͍Ǎ֍܍ύՍٍȍ׍Ŏ-4/,ad_b` %& '${~–ȖÖlpnNNNPPPPPPPPPPPPPPPPPQRzRxR{R|UUUUUUUUUUUUUUUUUUUUUUUWXSXhXdXOXMXIXoXUXNX]XYXeX[X=XcXqXZZZZZZZZZZZZZZZZZZZZ[[[\\3]q]c]J]e]r]l]^]h]g]b]^O^N^J^M^K^^^^^_@__`aIaJa+aEa6a2a.aFa/aOa)a@b hb#b%b$cccddd d d$d3dCdddd9d7d"d#d d&d0d(dAd5d/d dd@d%d'd cdd.d!deoeefffffffffxg ifi_i8iNibiqi?iEiji9iBiWiYiziHiIi5ili3i=iehixi4iii@ioiDiviXiAitiLi;iKi7i\iOiQi2iRi/i{iF 2*-<:15B&'8$0뉝芫=hiՌό׍   # "$!zrysvzy}~-X}z~{Ηٙ͘ǚ>?`a_PQQ0PQQPPQ Q PQ RRRRVHVBVLV5VAVJVIVFVXVZV@V3V=V,V>V8V*V:WXXXXXXXXXXZZZZZ[Z[Z[[[[\g]]]]]]]]]]]]^i^]^`^\}^^^_I_aaayaaaaaaaaaaaaaafab-dndpddddddddddddhddevezeye{eefffffffjjjiijiij iiijjij'iijiij@jiij iij jjj%jij&jijkQkkkkllkloAo&o~oooooooboOooZoovolooUoroRoPoWooo]ooaoko}ogooSooiooocowojo{qqqqqqqqqqqqqqqqqrrsXsRs^s_s`s]s[sasZsYsbtttttt}ttt|tyuuu~v%vvvvv#vv(vvvvvwwwwxxxxxxxxxxxyyyyyykvz9zzz{{{{{{{{{||||}}}}}}~}}}}}}}vdgOSRPNQ$;)  '+ *(.1& 0 /bVcdwsXT[RaZQ^mjPN_]olzn\eO{ubgiZ   ϊƊӊъԊՊ׊Ŋ؊Êٌ>Mߌٌڌ݌獠 #%$.&',$ #spogk/+)*2&.ВÒĒْϒߒؒגݒ̒’ʒȒΒ͒Ւɒޒђƒ|ӗZЗϘ&)( 'ܙ͙ϙәԙΙəؙ֙˙י̚FCgtqfvuphdlӟQQQQQQS4SVpV`VnVsVfVcVmVrV^VwWWXXXXXXXX[[[[![[[[[([[ [[]]]]]]]]]]]]]^g^h^f^o^^^^^_K_aaaaaaaaaaaaddddddddddde3ee|effffffffffg#j4jfjIjgj2jhj>j]jmjvj[jQj(jZj;j?jAjjjdjPjOjTjojij`j[qNnuUg`f]Telcedy&0-.'1")#/,݋ߊȊފln3>8@E6<=A0?6.52974vy{356'z8<#F- ˓%4$)95* ͕ԗ 5/2$')皹3|~{z}% )"՞֞=Q&Q%Q"Q$Q Q)RVVVVVVV~VVVXXXX[-[%[2[#[,['[&[/[.[{[[]^l^j__aaaaaaaaaddddddeeeefjjjjjjjjjjjjjjjjjjjjjjjk[kl ooooooooooooooqqqqqqqsssnsotttttttttuuuuuvCvHvIvGvvwwwwwwwwwxxyxxxyxxyyyz\z[zVzXzTzZzzz||{|{{|{| {|| ||{{|{{|| |~-~<~B~3H~8~*~I~@~G~)~L~0~;~6~D~:E~},ā́ʁŁǁ[Z\{w|zxWyvhŇɇLJ̇ćʇއ53<>AR7B " OprqooNMSPLGC@~8dVG|X\vIPQ`mLjyWUROqw{a^cgNYǕɕÕŕ ՗ԗADJIEC%+,*32/-10H3Ag6./180EBC>7@=-Ȟڞ$#"TQ1Q-Q.VVVVVVYp[<\i\j]^m^naaaaaaaaaadeddeedeefffjjjjjjjjjjjk^kl pp p ppppopop&oop rqqrqsvttttttttuuv\vdvYvPvSvWvZvvvwwxy yyy yyyyyz_||)|| ||-||&|(|"|%|0~\~P~V~c~X~b~_~Q~`~W~SuсЂ_^ƅŅDžą˅΅ȅŅ҆$iۇ߇ԇ܇Ӈ؇㇤ׇو݉SKOLFPQI*'#305G/<>1%7&6.$;=:Bu\b`WV^eg[Za]iTFGHK(:;>ҕӕіז]–ߖؖݗ#"%חٗ֗ؗPQRA<:  ܛ)5JLKǛƛÛӛě\SOJ[KYVLWRT_XZߟ%+*)(LUQ4Q5RRSVVVVVVXXXY[=[>[?]^p_aeee e e eeeefjjjjjjjjjjjjjk`kl pp'p pp+p!p"p#p)pp$pp*r r rrrrrrrrttttuv`wwwwyyy!yyyyzgzh|3|<|9|,|;||~v~u~x~p~w~o~z~r~t~hKJxׁՂdacمڅׅ؅߅܅хޅ  bZ[Wa\X]YPHJ@SVTKUQBRWCwv mxsjo{RQOPS@?ޓǓϓ“ړГ̓ٓʓԓՓēΓғ}ڕۖ)+,(&ݗޗߘ\Y]WHGC%$" '#›  7ޛԛכܛٛ՛ڝwqx}ktupis{oyh-@AMVWXS7VVVX[E]]^^__aeeeeefffjjjjjjjjp02.3vtsEdcbU]W^ėŘVY RXPJMKUYLNžО876COqpnoVV[N\me-ffkp_pap]p`r#ttwy8yy|j~mC875K”khiFCGǗ^՛Ycgfb^` FtuvVe.ekkkkpbr&rwwy9|i|k|~~~~FGHyz|{nmoqsIr_hnm  Gx{zyWpf|o<Ôtxvu`tsqu phpe|j>=?ɗKst̙adfg$Hbkr'Lih.r)Kyvukzpipj~IXML-Parser-2.46/Parser/Encodings/x-sjis-jisx0221.enc0000644000000000000000000004343213542305435020400 0ustar rootrootx-sjis-jisx0221(%  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|} >abcdefghijklmnopqrstuvwxyz{|}~\@@OL@@^@c@ @@@W@@@@ K@ @ @ @ ?@ @ @v@3@@@j@'@@@^@@@@R@@@@F@@e > abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'000 000@>?00000N0000  \0 \ & %     00;=[]00 0 0 0 0 0000 ""`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%BNUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{Qyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQqXML-Parser-2.46/Parser/Encodings/x-sjis-jdk117.enc0000644000000000000000000004343213542305435020117 0ustar rootrootx-sjis-jdk117(%  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~abcdefghijklmnopqrstuvwxyz{|}~\@@OL@@^@c@ @@@W@@@@ K@ @ @ @ ?@ @ @v@3@@@j@'@@@^@@@@R@@@@F@@e\~ abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'000 000@>?00000N0000  \0 \ & %     00;=[]00 0 0 0 0 0000 ""`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%BNUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{Qyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQqXML-Parser-2.46/Parser/Encodings/x-sjis-cp932.enc0000644000000000000000000004762013542305435017761 0ustar rootrootx-sjis-cp932.  !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~abcdefghijklmnopqrstuvwxyz{|}~\g@@O^@@@]?@^t@@@L@ @@@@@@ @ w@ 4@ @ @ k@(@@@_@@@@S@@@@G@@@~@;@@@r@e/@@Q@@@ \~ abcdefghijklmnopqrstuvwxyz{|}~ !"#$%&'()*+,-000 000@>?00000N0000  <^"%\ & %     00;=[]00 0 0 0 0 0000 "`"f"g""4&B&@ 2 3! &&%%%%%%%%%%% ;0!!!!0"" """""*")"'"(!!""" "#"""a"R"j"k""=""5"+",!+ 0&o&m&j !%!"#$%&'()*+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ0A0B0C0D0E0F0G0H0I0J0K0L0M0N0O0P0Q0R0S0T0U0V0W0X0Y0Z0[0\0]0^0_0`0a0b0c0d0e0f0g0h0i0j0k0l0m0n0o0p0q0r0s0t0u0v0w0x0y0z0{0|0}0~00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 !"#$%&'()*+,-./012345Q6789:;<=>?@ABCDEFGHIJKLMNO%%% %%%%%,%$%4%<%%%%%%%#%3%+%;%K% %/%(%7%?%%0%%%8%B$`$a$b$c$d$e$f$g$h$i$j$k$l$m$n$o$p$q$r$s!`!a!b!c!d!e!f!g!h!i3I33"3M33'3363Q3W3 3&3#3+3J3;33333333{00!3!!222222122293~3}3|"R"a"+"."""" """5")"*NUZ?Tac(Y"uzP`cn%efhW'ebq[YІ{}b}b|[^c fhHǗgONO OMOPIVY7YZ\ `aapfipuOupy}}ÄcUzS;NNW߀xNXn8z2(/QASpTTVY_m-bpT S[p–So\zNxn&VUk;YSmftܕVBNKO SU[0_qf fhl8lm)t[vzN4[`muvʙ`iSQWX0YD[^`(cclopqqYqs?~vх`[XielZu%QY.Ye__bej*k'ksV,\l{Q\KahvraNYOSx`in)zON SNOUO=OOsRSV YZ[[yfggkLlpksyyz<{ۃwӇfV)NO\brYu;傽řNOVXJX^_`*``babbe9AffhmwppuLv}uQRYT[]ahimxˈWrmlWgΒRVT^bdhkSlWo"ooEtuvw z{|!}6f̌Qeә(N8T+\]svLw<\ TXOOSqUVhWYG[ [\^ ^~_cg:eeghhj_^0kll}uyH[cz}_w̏Z/__`hjtZxwN^NO|OPPQIQlRRRSSTTUWQWY}[T[][]]]^x^^^_`RaLbbce;ffCfgmh!hil_m*min/nu2vxlz?|}}}^}T*RLaʑuqx?M؝;R[RSTXboj_QKR;TJVz@w`sDo pu_`ښrۏkdNVWdXZZ`haffh9hmu}:nBNOPSU]o]]gltsxPWP^c+PPQgTX^Y[_ibMch=ksnp}rxx&yme}0܈ RdW(gPjQWB*X:iT]WxO\RJTd>f(ggz{V}"/h\{9SQR7[bddg-kvcLvfRN PS\q`dech_qsu#{~ۑxefkNNO:OR:SSUVXYYY[P\M^^+_`ce/[\eeegkbk{lsEyIy|}}+󉖊^ifnjܖ̘koNOtuxy: 3ꄔlP_X+z[NSWY1Z[`nou[{Prg\aJ~Q\chfeqny>}ʐnǐPR\:gSp|r5Lȓ+[_1`N;S[bKg1krsz.kRQSTj[cj9}VSTh[\1]Oabm2yy}B~MҁFrt/1KlƑNOOQESA_bglAn sc~&͒SY[my]~.|X~qQSO\f%wzQ_eiokmnodv}]uQRb@ffn^}rfRSYs^_`UdPQRS SGSTUFU1VYhYZ<[\\\\^^^_pbbbccwff f-fvg~hjj5lmn nXq d_xRbcdBb-z{}v INQHSCS`[\\]b&bGdhh4lmEmgo\qNq}ez{}~Jz9n΍xwRMUo8q6Qhy~U|VLXQ\cffiZruuyyVy|} }D4;a PRuSSP UXYOr=[\dS``c\cc?cdef]iioqNuvz|}}aIXlōpmPXaӅ5 OPtRGSs`ocIg_n,O\^e}SRQvc[X[k\ d gQ\NYY*lpQU>XY`bSg5iU@Ě(OSX[\^/_` aKb4flnހ΁Ԉ.ۛNSY'{,Lnp'SSUD[bXbblot"8o8QSSOFTYj1]zꏿhڌ7rHj=N9SXVWfbcekNmn[pwz{}=Ɔˊ[VX_>efjku7P$wW0_`efzl`uznE{u\z{Qyz6Zw@N-N[_bft6x4ZFuO^bceWgovrL̀)MP WZhisqdrXjyw)O/ReSZbglv}{|6fo r~Q{xr{{Hj^auQu`QkbnvzOpb{OVzXY䖼O4R$SJSS^d,egl>lNrHrsuT~A,錩{đqic=fiujvxЅCS*SQT&Y^_|`bIbybekluvxy}w^j |8P\>_gkt5w ;gzS9u_f_<_ub{Feg llpx2~+ނ *JҘlNONPRVWJY^=__b?fgghQ}!~2T ,SPS\Xdg4rgwfzFRlkX^LYTg,QvdixTWYf'gkTi^UggRh]NOSbg+lO~mNabno+Tsg*E]{\[ƇnJzY|lw RY"q!r_wۗ'ai ZZQT T}fvߏYr]nQMh}}bdxj!Y[_ksv}Q2g(vgbR\$b;|~UO`} SN_QYr:6_%wS_y}3Vg󅮔Sa alvR8U/OQQ*RS[^}`acg gngms6s7u1yPՊJćYNOYN?P^|Y[^ccdfiJim nqu(zIɉ! e} a~bk2lmtmge}o#,TBojp2RZA^_gi|imjorbr{~KQmy2P-Tqkjā`gNNkhin~xU_ NNN*N1N6NzYUYPYNYZYXYbY`YgYlYiYxYYO^OYYYYYYYYZ%ZZZZ ZZ@ZlZIZ5Z6ZbZjZZZZZZZZZZZZZ[ [ [[2Z[*[6[>[C[E[@[Q[U[Z[[[e[i[p[s[u[xe[z[[[[[[[[[[[[[[[[[[\\\\ \\ \"\(\8\9\A\F\N\S\P\O[q\l\nNb\v\y\\\Y\\\\\\\\\\\\\]\] ]]]\]]]]]"]]]]L]R]N]K]l]s]v]]]]]]]]]]]]]]]]]]]]^ ^^^^^6^7^D^C^@^N^W^T^_^b^d^G^u^v^z^^^^^^^^^^^^^^^^^^^^^^^__ _]_\_ ___)_-_8_A_H_L_N_/_Q_V_W_Y_a_m_s_w____________________`_`!`````)``1```+`&``:`Z`A`j`w`_`J`F`M`c`C`d`B`l`k`Y`````````````````_````aMaa``a``aa!``a aaGa>a(a'aJa?acMdcOcccccvcccccckcicccccccccdd4ddd&d6edd(ddgdodvdNe*ddddddddddddddd ddbdde,ddddedeeee$e#e+e4e5e7e6e8uKeHeVeUeMeXe^e]erexeeeeeeeeeeeeeeeeegrf fegsf5f6f4ffOfDfIfAf^f]fdfgfhf_fbfpffffffffffffffffff?fffffgggg&g'8g.g?g6gAg8g7gFg^g`gYgcgdggpgg|gjgggggggggggggggggggggjhhFh)h@hMh2hNhh+hYhchwhhhhhhhhjhhthhhihh~ihihi"i&hi hhhhi6iihhi%hhhi(i*ii#i!hiyiwi\ixikiTi~ini9iti=iYi0iai^i]iijiiiiiiii[iiiiij.iiiiiiijjik iiijijijij jjj#jjDj jrj6jxjGjbjYjfjHj8j"jjjjjjjjjjjjjjjjjjjjkjkk1kk8k7vk9kGkCkIkPkYkTk[k_kakxkykkkkkkkkkkkkkkkkkkkkkkkkklllll$l#l^lUlbljllllll~lhlslllllllllllllllllmMm6m+m=m8mm5m3mm mcmmdmZmymYmmommnn mmmmmmmmmmmmmmmmmn-nnn.nnrn_n>n#nkn+nvnMnnCn:nNn$nnn8nnnnnnnnnnnnnnnnoAopLnnno?no1no2no>onoozoxooooo[oomoo|oXoooofooooooooooooooop p opppoppotpppp0p>p2pQpcppppppppppppq pqqqeqUqqfqbqLqVqlqqqqqqqqqqqqqqqqqqqqr rrr(r-r,r0r2r;rsNsOsWsjshspsxsus{szsssssssstttot%st2t:tUt?t_tYtAt\titptctjtvt~ttttttsttttttttttuuuu uu uuuu&u,uz7zCzWzIzazbzizpzyz}zzzzzzzzzzzzzzzzzzzzzzzzzz{{{ {{3{{{{5{({6{P{z{{M{ {L{E{u{e{t{g{p{q{l{n{{{{{{{{{{]{{{{{{{{{||{{|`|||{{|| {|#|'|*||7|+|=|L|C|T|O|@|P|X|_|d|V|e|l|u|||||||||||||||||||||;|||||}}}}} }E}K}.}2}?}5}F}s}V}N}r}h}n}O}c}}}[}}}}}}}}}}}~=}}}}}}}}}}}}}~~ ~#~!~~1~~ ~ ~"~F~f~;~5~9~C~7~2~:~g~]~V~^~Y~Z~y~j~i~|~{~}~}~~~~~~~~~~~~8:ELMNPQUTX_`higxq܀ !(?;JFRXZ_bhsrpvy}Qۀـ݀Āڀց )#/KF>SQqneft_Ɂ́сف؁ȁځ߁ )+83@YX]Z_dbhjk.qwx~߂҂ރ܃ ك5421@9PE/+#|su΃؄  " 8m*(ALONIV[Zk_lot}:A?HLNPUblxz|bȌڌ  N͍gmqsύڍ֍̍ۍˍߍ B504JGILPHYd`*cUvr|ƎŎȎˎێ  &3;9EB>LIFNW\bcdڏ!  '659OPQRI>VX^hovr}bHۑ20JVXceisrɑˑБ֑ߑۑ,^WEIdH?KPZϒD."#:5;\`|nV֓דؓÓݓГȓ6+5!:ARD[`b^j)puw}Z|~oÕ͕̕Օԕ֕ܕ!(./BLOKw\^]_frlΖ˖ɖ͉Mܗ Ֆ$*09=>DFHBI\`dfhRҗkqy|z×Ɨȗ˗ܗOzߗ 8$!7=FOKkopqtsĘØƘ !$ ,.=>BIEPKQRLUߙۙݙؙљ+7EB@C>UM[W_bedikjϚњӚԚޚߚ"#%'()*./2DCOMNQXtʛƛϛћқԛ:   .%$!0G2F>Z`gvx *&#DA?>FH]^dQPYrozĝƝϝٝӝuy}a̞ΞϞОԞܞޞݞv!,>JRTc_`afgljwrvX/iǐYtdQq~HOpf1hȒf_EN(NNOOO9OVOOOOOP@P"OPPFPpPBPPPQJQdQQQRRRRRSSS$SrSSSTTTTUWYWeWWWXXY YSY[Y]YcYY[V[u/[[\\\\]']S]B]m]]]_!_4_g__`]````a `aa7a0abbcd`ddeNfff;f f.ff$fefWfYfsfffffg)gfghRghhDhihiij0jkjFjsj~jjkl?l\llolmmmommmmmmn9n\n'n{Lists} = []; $expat->{Curlist} = $expat->{Tree} = []; } sub Start { my $expat = shift; my $tag = shift; my $newlist = []; my $class = "${$expat}{Pkg}::$tag"; my $newobj = bless { @_, Kids => $newlist }, $class; push @{ $expat->{Lists} }, $expat->{Curlist}; push @{ $expat->{Curlist} }, $newobj; $expat->{Curlist} = $newlist; } sub End { my $expat = shift; my $tag = shift; $expat->{Curlist} = pop @{ $expat->{Lists} }; } sub Char { my $expat = shift; my $text = shift; my $class = "${$expat}{Pkg}::Characters"; my $clist = $expat->{Curlist}; my $pos = $#$clist; if ( $pos >= 0 and ref( $clist->[$pos] ) eq $class ) { $clist->[$pos]->{Text} .= $text; } else { push @$clist, bless { Text => $text }, $class; } } sub Final { my $expat = shift; delete $expat->{Curlist}; delete $expat->{Lists}; $expat->{Tree}; } 1; __END__ =head1 NAME XML::Parser::Style::Objects - Objects styler parser =head1 SYNOPSIS use XML::Parser; my $p = XML::Parser->new(Style => 'Objects', Pkg => 'MyNode'); my $tree = $p->parsefile('foo.xml'); =head1 DESCRIPTION This module implements XML::Parser's Objects style parser. This is similar to the Tree style, except that a hash object is created for each element. The corresponding object will be in the class whose name is created by appending "::" and the element name to the package set with the Pkg option. Non-markup text will be in the ::Characters class. The contents of the corresponding object will be in an anonymous array that is the value of the Kids property for that object. =head1 SEE ALSO L =cut XML-Parser-2.46/Parser/Style/Stream.pm0000644000000000000000000000705513542305435016175 0ustar rootroot# $Id: Stream.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ package XML::Parser::Style::Stream; use strict; # This style invented by Tim Bray sub Init { no strict 'refs'; my $expat = shift; $expat->{Text} = ''; my $sub = $expat->{Pkg} . "::StartDocument"; &$sub($expat) if defined(&$sub); } sub Start { no strict 'refs'; my $expat = shift; my $type = shift; doText($expat); $_ = "<$type"; %_ = @_; while (@_) { $_ .= ' ' . shift() . '="' . shift() . '"'; } $_ .= '>'; my $sub = $expat->{Pkg} . "::StartTag"; if ( defined(&$sub) ) { &$sub( $expat, $type ); } else { print; } } sub End { no strict 'refs'; my $expat = shift; my $type = shift; # Set right context for Text handler push( @{ $expat->{Context} }, $type ); doText($expat); pop( @{ $expat->{Context} } ); $_ = ""; my $sub = $expat->{Pkg} . "::EndTag"; if ( defined(&$sub) ) { &$sub( $expat, $type ); } else { print; } } sub Char { my $expat = shift; $expat->{Text} .= shift; } sub Proc { no strict 'refs'; my $expat = shift; my $target = shift; my $text = shift; doText($expat); $_ = ""; my $sub = $expat->{Pkg} . "::PI"; if ( defined(&$sub) ) { &$sub( $expat, $target, $text ); } else { print; } } sub Final { no strict 'refs'; my $expat = shift; my $sub = $expat->{Pkg} . "::EndDocument"; &$sub($expat) if defined(&$sub); } sub doText { no strict 'refs'; my $expat = shift; $_ = $expat->{Text}; if ( length($_) ) { my $sub = $expat->{Pkg} . "::Text"; if ( defined(&$sub) ) { &$sub($expat); } else { print; } $expat->{Text} = ''; } } 1; __END__ =head1 NAME XML::Parser::Style::Stream - Stream style for XML::Parser =head1 SYNOPSIS use XML::Parser; my $p = XML::Parser->new(Style => 'Stream', Pkg => 'MySubs'); $p->parsefile('foo.xml'); { package MySubs; sub StartTag { my ($e, $name) = @_; # do something with start tags } sub EndTag { my ($e, $name) = @_; # do something with end tags } sub Characters { my ($e, $data) = @_; # do something with text nodes } } =head1 DESCRIPTION This style uses the Pkg option to find subs in a given package to call for each event. If none of the subs that this style looks for is there, then the effect of parsing with this style is to print a canonical copy of the document without comments or declarations. All the subs receive as their 1st parameter the Expat instance for the document they're parsing. It looks for the following routines: =over 4 =item * StartDocument Called at the start of the parse . =item * StartTag Called for every start tag with a second parameter of the element type. The $_ variable will contain a copy of the tag and the %_ variable will contain attribute values supplied for that element. =item * EndTag Called for every end tag with a second parameter of the element type. The $_ variable will contain a copy of the end tag. =item * Text Called just before start or end tags with accumulated non-markup text in the $_ variable. =item * PI Called for processing instructions. The $_ variable will contain a copy of the PI and the target and data are sent as 2nd and 3rd parameters respectively. =item * EndDocument Called at conclusion of the parse. =back =cut XML-Parser-2.46/Parser/Style/Debug.pm0000644000000000000000000000200413542305435015755 0ustar rootroot# $Id: Debug.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ package XML::Parser::Style::Debug; use strict; sub Start { my $expat = shift; my $tag = shift; print STDERR "@{$expat->{Context}} \\\\ (@_)\n"; } sub End { my $expat = shift; my $tag = shift; print STDERR "@{$expat->{Context}} //\n"; } sub Char { my $expat = shift; my $text = shift; $text =~ s/([\x80-\xff])/sprintf "#x%X;", ord $1/eg; $text =~ s/([\t\n])/sprintf "#%d;", ord $1/eg; print STDERR "@{$expat->{Context}} || $text\n"; } sub Proc { my $expat = shift; my $target = shift; my $text = shift; my @foo = @{ $expat->{Context} }; print STDERR "@foo $target($text)\n"; } 1; __END__ =head1 NAME XML::Parser::Style::Debug - Debug style for XML::Parser =head1 SYNOPSIS use XML::Parser; my $p = XML::Parser->new(Style => 'Debug'); $p->parsefile('foo.xml'); =head1 DESCRIPTION This just prints out the document in outline form to STDERR. Nothing special is returned by parse. =cut XML-Parser-2.46/Parser/Style/Subs.pm0000644000000000000000000000217713542305435015656 0ustar rootroot# $Id: Subs.pm,v 1.1 2003-07-27 16:07:49 matt Exp $ package XML::Parser::Style::Subs; sub Start { no strict 'refs'; my $expat = shift; my $tag = shift; my $sub = $expat->{Pkg} . "::$tag"; eval { &$sub( $expat, $tag, @_ ) }; } sub End { no strict 'refs'; my $expat = shift; my $tag = shift; my $sub = $expat->{Pkg} . "::${tag}_"; eval { &$sub( $expat, $tag ) }; } 1; __END__ =head1 NAME XML::Parser::Style::Subs - glue for handling element callbacks =head1 SYNOPSIS use XML::Parser; my $p = XML::Parser->new(Style => 'Subs', Pkg => 'MySubs'); $p->parsefile('foo.xml'); { package MySubs; sub foo { # start of foo tag } sub foo_ { # end of foo tag } } =head1 DESCRIPTION Each time an element starts, a sub by that name in the package specified by the Pkg option is called with the same parameters that the Start handler gets called with. Each time an element ends, a sub with that name appended with an underscore ("_"), is called with the same parameters that the End handler gets called with. Nothing special is returned by parse. =cut XML-Parser-2.46/Parser/Style/Tree.pm0000644000000000000000000000435213542305435015636 0ustar rootroot# $Id: Tree.pm,v 1.2 2003-07-31 07:54:51 matt Exp $ package XML::Parser::Style::Tree; $XML::Parser::Built_In_Styles{Tree} = 1; sub Init { my $expat = shift; $expat->{Lists} = []; $expat->{Curlist} = $expat->{Tree} = []; } sub Start { my $expat = shift; my $tag = shift; my $newlist = [ {@_} ]; push @{ $expat->{Lists} }, $expat->{Curlist}; push @{ $expat->{Curlist} }, $tag => $newlist; $expat->{Curlist} = $newlist; } sub End { my $expat = shift; my $tag = shift; $expat->{Curlist} = pop @{ $expat->{Lists} }; } sub Char { my $expat = shift; my $text = shift; my $clist = $expat->{Curlist}; my $pos = $#$clist; if ( $pos > 0 and $clist->[ $pos - 1 ] eq '0' ) { $clist->[$pos] .= $text; } else { push @$clist, 0 => $text; } } sub Final { my $expat = shift; delete $expat->{Curlist}; delete $expat->{Lists}; $expat->{Tree}; } 1; __END__ =head1 NAME XML::Parser::Style::Tree - Tree style parser =head1 SYNOPSIS use XML::Parser; my $p = XML::Parser->new(Style => 'Tree'); my $tree = $p->parsefile('foo.xml'); =head1 DESCRIPTION This module implements XML::Parser's Tree style parser. When parsing a document, C will return a parse tree for the document. Each node in the tree takes the form of a tag, content pair. Text nodes are represented with a pseudo-tag of "0" and the string that is their content. For elements, the content is an array reference. The first item in the array is a (possibly empty) hash reference containing attributes. The remainder of the array is a sequence of tag-content pairs representing the content of the element. So for example the result of parsing: Hello thereHowdydo would be: Tag Content ================================================================== [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [ {}, 0, "Howdy", ref, [{}]], 0, "do" ] ] The root document "foo", has 3 children: a "head" element, a "bar" element and the text "do". After the empty attribute hash, these are represented in it's contents by 3 tag-content pairs. =cut XML-Parser-2.46/Parser/LWPExternEnt.pl0000644000000000000000000000305613542305435016135 0ustar rootroot# LWPExternEnt.pl # # Copyright (c) 2000 Clark Cooper # All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package XML::Parser; use URI; use URI::file; use LWP::UserAgent; ## ## Note that this external entity handler reads the entire entity into ## memory, so it will choke on huge ones. It would be really nice if ## LWP::UserAgent optionally returned us an IO::Handle. ## sub lwp_ext_ent_handler { my ($xp, $base, $sys) = @_; # We don't use public id my $uri; if (defined $base) { # Base may have been set by parsefile, which is agnostic about # whether its a file or URI. my $base_uri = new URI($base); unless (defined $base_uri->scheme) { $base_uri = URI->new_abs($base_uri, URI::file->cwd); } $uri = URI->new_abs($sys, $base_uri); } else { $uri = new URI($sys); unless (defined $uri->scheme) { $uri = URI->new_abs($uri, URI::file->cwd); } } my $ua = $xp->{_lwpagent}; unless (defined $ua) { $ua = $xp->{_lwpagent} = new LWP::UserAgent(); $ua->env_proxy(); } my $req = new HTTP::Request('GET', $uri); my $res = $ua->request($req); if ($res->is_error) { $xp->{ErrorMessage} .= "\n" . $res->status_line . " $uri"; return undef; } $xp->{_BaseStack} ||= []; push(@{$xp->{_BaseStack}}, $base); $xp->base($uri); return $res->content; } # End lwp_ext_ent_handler sub lwp_ext_ent_cleanup { my ($xp) = @_; $xp->base(pop(@{$xp->{_BaseStack}})); } # End lwp_ext_ent_cleanup 1; XML-Parser-2.46/Expat/0000755000000000000000000000000013542324531013120 5ustar rootrootXML-Parser-2.46/Expat/typemap0000644000000000000000000000103213542305435014520 0ustar rootroot# ##### XML::Parser::Expat typemap # XML_Parser T_PTR Encinfo * T_ENCOBJ ################################################################ INPUT T_ENCOBJ if (sv_derived_from($arg, \"XML::Parser::Encinfo\")) { IV tmp = SvIV((SV*)SvRV($arg)); $var = ($type) tmp; } else croak(\"$var is not of type XML::Parser::Encinfo\") ################################################################ OUTPUT T_ENCOBJ if ($var) { sv_setref_pv($arg, \"XML::Parser::Encinfo\", (void*)$var); } else $arg = &PL_sv_undef; XML-Parser-2.46/Expat/encoding.h0000644000000000000000000000616013542305435015064 0ustar rootroot/***************************************************************** ** encoding.h ** ** Copyright 1998 Clark Cooper ** All rights reserved. ** ** This program is free software; you can redistribute it and/or ** modify it under the same terms as Perl itself. */ #ifndef ENCODING_H #define ENCODING_H 1 #define ENCMAP_MAGIC 0xfeebface typedef struct prefixmap { unsigned char min; unsigned char len; /* 0 => 256 */ unsigned short bmap_start; unsigned char ispfx[32]; unsigned char ischar[32]; } PrefixMap; typedef struct encinf { unsigned short prefixes_size; unsigned short bytemap_size; int firstmap[256]; PrefixMap *prefixes; unsigned short *bytemap; } Encinfo; typedef struct encmaphdr { unsigned int magic; char name[40]; unsigned short pfsize; unsigned short bmsize; int map[256]; } Encmap_Header; /*================================================================ ** Structure of Encoding map binary encoding ** ** Note that all shorts and ints are in network order, ** so when packing or unpacking with perl, use 'n' and 'N' respectively. ** In C, use the htonl family of functions. ** ** The basic structure is: ** ** _______________________ ** |Header (including map expat needs for 1st byte) ** |PrefixMap * pfsize ** | This section isn't included for single-byte encodings. ** | For multiple byte encodings, when a byte represents a prefix ** | then it indexes into this vector instead of mapping to a ** | Unicode character. The PrefixMap type is declared above. The ** | ispfx and ischar fields are bitvectors indicating whether ** | the byte being mapped is a prefix or character respectively. ** | If neither is set, then the character is not mapped to Unicode. ** | ** | The min field is the 1st byte mapped for this prefix; the ** | len field is the number of bytes mapped; and bmap_start is ** | the starting index of the map for this prefix in the overall ** | map (next section). ** |unsigned short * bmsize ** | This section also is omitted for single-byte encodings. ** | Each short is either a Unicode scalar or an index into the ** | PrefixMap vector. ** ** The header for these files is declared above as the Encmap_Header type. ** The magic field is a magic number which should match the ENCMAP_MAGIC ** macro above. The next 40 bytes stores IANA registered name for the ** encoding. The pfsize field holds the number of PrefixMaps, which should ** be zero for single byte encodings. The bmsize field holds the number of ** shorts used for the overall map. ** ** The map field contains either the Unicode scalar encoded by the 1st byte ** or -n where n is the number of bytes that such a 1st byte implies (Expat ** requires that the number of bytes to encode a character is indicated by ** the 1st byte) or -1 if the byte doesn't map to any Unicode character. ** ** If the encoding is a multiple byte encoding, then there will be PrefixMap ** and character map sections. The 1st PrefixMap (index 0), covers a range ** of bytes that includes all 1st byte prefixes. ** ** Look at convert_to_unicode in Expat.xs to see how this data structure ** is used. */ #endif /* ndef ENCODING_H */ XML-Parser-2.46/Expat/Expat.pm0000644000000000000000000010452013542323734014545 0ustar rootrootpackage XML::Parser::Expat; use strict; #use warnings; No warnings numeric?? use XSLoader; use Carp; our $VERSION = '2.46'; our ( %Encoding_Table, @Encoding_Path, $have_File_Spec ); use File::Spec (); %Encoding_Table = (); if ($have_File_Spec) { @Encoding_Path = ( grep( -d $_, map( File::Spec->catdir( $_, qw(XML Parser Encodings) ), @INC ) ), File::Spec->curdir ); } else { @Encoding_Path = ( grep( -d $_, map( $_ . '/XML/Parser/Encodings', @INC ) ), '.' ); } XSLoader::load( 'XML::Parser::Expat', $VERSION ); our %Handler_Setters = ( Start => \&SetStartElementHandler, End => \&SetEndElementHandler, Char => \&SetCharacterDataHandler, Proc => \&SetProcessingInstructionHandler, Comment => \&SetCommentHandler, CdataStart => \&SetStartCdataHandler, CdataEnd => \&SetEndCdataHandler, Default => \&SetDefaultHandler, Unparsed => \&SetUnparsedEntityDeclHandler, Notation => \&SetNotationDeclHandler, ExternEnt => \&SetExternalEntityRefHandler, ExternEntFin => \&SetExtEntFinishHandler, Entity => \&SetEntityDeclHandler, Element => \&SetElementDeclHandler, Attlist => \&SetAttListDeclHandler, Doctype => \&SetDoctypeHandler, DoctypeFin => \&SetEndDoctypeHandler, XMLDecl => \&SetXMLDeclHandler ); sub new { my ( $class, %args ) = @_; my $self = bless \%args, $_[0]; $args{_State_} = 0; $args{Context} = []; $args{Namespaces} ||= 0; $args{ErrorMessage} ||= ''; if ( $args{Namespaces} ) { $args{Namespace_Table} = {}; $args{Namespace_List} = [undef]; $args{Prefix_Table} = {}; $args{New_Prefixes} = []; } $args{_Setters} = \%Handler_Setters; $args{Parser} = ParserCreate( $self, $args{ProtocolEncoding}, $args{Namespaces} ); $self; } sub load_encoding { my ($file) = @_; $file =~ s!([^/]+)$!\L$1\E!; $file .= '.enc' unless $file =~ /\.enc$/; unless ( $file =~ m!^/! ) { foreach (@Encoding_Path) { my $tmp = ( $have_File_Spec ? File::Spec->catfile( $_, $file ) : "$_/$file" ); if ( -e $tmp ) { $file = $tmp; last; } } } open( my $fh, '<', $file ) or croak("Couldn't open encmap $file:\n$!\n"); binmode($fh); my $data; my $br = sysread( $fh, $data, -s $file ); croak("Trouble reading $file:\n$!\n") unless defined($br); close($fh); my $name = LoadEncoding( $data, $br ); croak("$file isn't an encmap file") unless defined($name); $name; } # End load_encoding sub setHandlers { my ( $self, @handler_pairs ) = @_; croak("Uneven number of arguments to setHandlers method") if ( int(@handler_pairs) & 1 ); my @ret; while (@handler_pairs) { my $type = shift @handler_pairs; my $handler = shift @handler_pairs; croak 'Handler for $type not a Code ref' unless ( !defined($handler) or !$handler or ref($handler) eq 'CODE' ); my $hndl = $self->{_Setters}->{$type}; unless ( defined($hndl) ) { my @types = sort keys %{ $self->{_Setters} }; croak("Unknown Expat handler type: $type\n Valid types: @types"); } my $old = &$hndl( $self->{Parser}, $handler ); push( @ret, $type, $old ); } return @ret; } sub xpcroak { my ( $self, $message ) = @_; my $eclines = $self->{ErrorContext}; my $line = GetCurrentLineNumber( $_[0]->{Parser} ); $message .= " at line $line"; $message .= ":\n" . $self->position_in_context($eclines) if defined($eclines); croak $message; } sub xpcarp { my ( $self, $message ) = @_; my $eclines = $self->{ErrorContext}; my $line = GetCurrentLineNumber( $_[0]->{Parser} ); $message .= ' at line $line'; $message .= ":\n" . $self->position_in_context($eclines) if defined($eclines); carp $message; } sub default_current { my $self = shift; if ( $self->{_State_} == 1 ) { return DefaultCurrent( $self->{Parser} ); } } sub recognized_string { my $self = shift; if ( $self->{_State_} == 1 ) { return RecognizedString( $self->{Parser} ); } } sub original_string { my $self = shift; if ( $self->{_State_} == 1 ) { return OriginalString( $self->{Parser} ); } } sub current_line { my $self = shift; if ( $self->{_State_} == 1 ) { return GetCurrentLineNumber( $self->{Parser} ); } } sub current_column { my $self = shift; if ( $self->{_State_} == 1 ) { return GetCurrentColumnNumber( $self->{Parser} ); } } sub current_byte { my $self = shift; if ( $self->{_State_} == 1 ) { return GetCurrentByteIndex( $self->{Parser} ); } } sub base { my ( $self, $newbase ) = @_; my $p = $self->{Parser}; my $oldbase = GetBase($p); SetBase( $p, $newbase ) if @_ > 1; return $oldbase; } sub context { my $ctx = $_[0]->{Context}; @$ctx; } sub current_element { my ($self) = @_; @{ $self->{Context} } ? $self->{Context}->[-1] : undef; } sub in_element { my ( $self, $element ) = @_; @{ $self->{Context} } ? $self->eq_name( $self->{Context}->[-1], $element ) : undef; } sub within_element { my ( $self, $element ) = @_; my $cnt = 0; foreach ( @{ $self->{Context} } ) { $cnt++ if $self->eq_name( $_, $element ); } return $cnt; } sub depth { my ($self) = @_; int( @{ $self->{Context} } ); } sub element_index { my ($self) = @_; if ( $self->{_State_} == 1 ) { return ElementIndex( $self->{Parser} ); } } ################ # Namespace methods sub namespace { my ( $self, $name ) = @_; local ($^W) = 0; $self->{Namespace_List}->[ int($name) ]; } sub eq_name { my ( $self, $nm1, $nm2 ) = @_; local ($^W) = 0; int($nm1) == int($nm2) and $nm1 eq $nm2; } sub generate_ns_name { my ( $self, $name, $namespace ) = @_; $namespace ? GenerateNSName( $name, $namespace, $self->{Namespace_Table}, $self->{Namespace_List} ) : $name; } sub new_ns_prefixes { my ($self) = @_; if ( $self->{Namespaces} ) { return @{ $self->{New_Prefixes} }; } return (); } sub expand_ns_prefix { my ( $self, $prefix ) = @_; if ( $self->{Namespaces} ) { my $stack = $self->{Prefix_Table}->{$prefix}; return ( defined($stack) and @$stack ) ? $stack->[-1] : undef; } return undef; } sub current_ns_prefixes { my ($self) = @_; if ( $self->{Namespaces} ) { my %set = %{ $self->{Prefix_Table} }; if ( exists $set{'#default'} and not defined( $set{'#default'}->[-1] ) ) { delete $set{'#default'}; } return keys %set; } return (); } ################################################################ # Namespace declaration handlers # sub NamespaceStart { my ( $self, $prefix, $uri ) = @_; $prefix = '#default' unless defined $prefix; my $stack = $self->{Prefix_Table}->{$prefix}; if ( defined $stack ) { push( @$stack, $uri ); } else { $self->{Prefix_Table}->{$prefix} = [$uri]; } # The New_Prefixes list gets emptied at end of startElement function # in Expat.xs push( @{ $self->{New_Prefixes} }, $prefix ); } sub NamespaceEnd { my ( $self, $prefix ) = @_; $prefix = '#default' unless defined $prefix; my $stack = $self->{Prefix_Table}->{$prefix}; if ( @$stack > 1 ) { pop(@$stack); } else { delete $self->{Prefix_Table}->{$prefix}; } } ################ sub specified_attr { my $self = shift; if ( $self->{_State_} == 1 ) { return GetSpecifiedAttributeCount( $self->{Parser} ); } } sub finish { my ($self) = @_; if ( $self->{_State_} == 1 ) { my $parser = $self->{Parser}; UnsetAllHandlers($parser); } } sub position_in_context { my ( $self, $lines ) = @_; if ( $self->{_State_} == 1 ) { my $parser = $self->{Parser}; my ( $string, $linepos ) = PositionContext( $parser, $lines ); return '' unless defined($string); my $col = GetCurrentColumnNumber($parser); my $ptr = ( '=' x ( $col - 1 ) ) . '^' . "\n"; my $ret; my $dosplit = $linepos < length($string); $string .= "\n" unless $string =~ /\n$/; if ($dosplit) { $ret = substr( $string, 0, $linepos ) . $ptr . substr( $string, $linepos ); } else { $ret = $string . $ptr; } return $ret; } } sub xml_escape { my $self = shift; my $text = shift; study $text; $text =~ s/\&/\&/g; $text =~ s/ 1; if ( $_ eq '>' ) { $text =~ s/>/\>/g; } elsif ( $_ eq '"' ) { $text =~ s/\"/\"/; } elsif ( $_ eq "'" ) { $text =~ s/\'/\'/; } else { my $rep = '&#' . sprintf( 'x%X', ord($_) ) . ';'; if (/\W/) { my $ptrn = "\\$_"; $text =~ s/$ptrn/$rep/g; } else { $text =~ s/$_/$rep/g; } } } $text; } sub skip_until { my $self = shift; if ( $self->{_State_} <= 1 ) { SkipUntil( $self->{Parser}, $_[0] ); } } sub release { my $self = shift; ParserRelease( $self->{Parser} ); } sub DESTROY { my $self = shift; ParserFree( $self->{Parser} ); } sub parse { my $self = shift; my $arg = shift; croak 'Parse already in progress (Expat)' if $self->{_State_}; $self->{_State_} = 1; my $parser = $self->{Parser}; my $ioref; my $result = 0; if ( defined $arg ) { local *@; if ( ref($arg) and UNIVERSAL::isa( $arg, 'IO::Handle' ) ) { $ioref = $arg; } elsif ( $] < 5.008 and defined tied($arg) ) { require IO::Handle; $ioref = $arg; } else { require IO::Handle; eval { no strict 'refs'; $ioref = *{$arg}{IO} if defined *{$arg}; }; if ( ref($ioref) eq 'FileHandle' ) { #for perl 5.10.x and possibly earlier, see t/file_open_scalar.t require FileHandle; } } } if ( defined($ioref) ) { my $delim = $self->{Stream_Delimiter}; my $prev_rs; my $ioclass = ref $ioref; $ioclass = 'IO::Handle' if !length $ioclass; $prev_rs = $ioclass->input_record_separator("\n$delim\n") if defined($delim); $result = ParseStream( $parser, $ioref, $delim ); $ioclass->input_record_separator($prev_rs) if defined($delim); } else { $result = ParseString( $parser, $arg ); } $self->{_State_} = 2; $result or croak $self->{ErrorMessage}; } sub parsestring { my $self = shift; $self->parse(@_); } sub parsefile { my $self = shift; croak 'Parser has already been used' if $self->{_State_}; open( my $fh, '<', $_[0] ) or croak "Couldn't open $_[0]:\n$!"; binmode($fh); my $ret = $self->parse($fh); close($fh); $ret; } ################################################################ package #hide from PAUSE XML::Parser::ContentModel; use overload '""' => \&asString, 'eq' => \&thiseq; sub EMPTY () { 1 } sub ANY () { 2 } sub MIXED () { 3 } sub NAME () { 4 } sub CHOICE () { 5 } sub SEQ () { 6 } sub isempty { return $_[0]->{Type} == EMPTY; } sub isany { return $_[0]->{Type} == ANY; } sub ismixed { return $_[0]->{Type} == MIXED; } sub isname { return $_[0]->{Type} == NAME; } sub name { return $_[0]->{Tag}; } sub ischoice { return $_[0]->{Type} == CHOICE; } sub isseq { return $_[0]->{Type} == SEQ; } sub quant { return $_[0]->{Quant}; } sub children { my $children = $_[0]->{Children}; if ( defined $children ) { return @$children; } return undef; } sub asString { my ($self) = @_; my $ret; if ( $self->{Type} == NAME ) { $ret = $self->{Tag}; } elsif ( $self->{Type} == EMPTY ) { return 'EMPTY'; } elsif ( $self->{Type} == ANY ) { return 'ANY'; } elsif ( $self->{Type} == MIXED ) { $ret = '(#PCDATA'; foreach ( @{ $self->{Children} } ) { $ret .= '|' . $_; } $ret .= ')'; } else { my $sep = $self->{Type} == CHOICE ? '|' : ','; $ret = '(' . join( $sep, map { $_->asString } @{ $self->{Children} } ) . ')'; } $ret .= $self->{Quant} if $self->{Quant}; return $ret; } sub thiseq { my $self = shift; return $self->asString eq $_[0]; } ################################################################ package #hide from PAUSE XML::Parser::ExpatNB; use Carp; our @ISA = qw(XML::Parser::Expat); sub parse { my $self = shift; my $class = ref($self); croak "parse method not supported in $class"; } sub parsestring { my $self = shift; my $class = ref($self); croak "parsestring method not supported in $class"; } sub parsefile { my $self = shift; my $class = ref($self); croak "parsefile method not supported in $class"; } sub parse_more { my ( $self, $data ) = @_; $self->{_State_} = 1; my $ret = XML::Parser::Expat::ParsePartial( $self->{Parser}, $data ); croak $self->{ErrorMessage} unless $ret; } sub parse_done { my $self = shift; my $ret = XML::Parser::Expat::ParseDone( $self->{Parser} ); unless ($ret) { my $msg = $self->{ErrorMessage}; $self->release; croak $msg; } $self->{_State_} = 2; my $result = $ret; my @result = (); my $final = $self->{FinalHandler}; if ( defined $final ) { if (wantarray) { @result = &$final($self); } else { $result = &$final($self); } } $self->release; return unless defined wantarray; return wantarray ? @result : $result; } ################################################################ package #hide from PAUSE XML::Parser::Encinfo; sub DESTROY { my $self = shift; XML::Parser::Expat::FreeEncoding($self); } 1; __END__ =head1 NAME XML::Parser::Expat - Lowlevel access to James Clark's expat XML parser =head1 SYNOPSIS use XML::Parser::Expat; $parser = XML::Parser::Expat->new; $parser->setHandlers('Start' => \&sh, 'End' => \&eh, 'Char' => \&ch); open(my $fh, '<', 'info.xml') or die "Couldn't open"; $parser->parse($fh); close($fh); # $parser->parse(' here we go '); sub sh { my ($p, $el, %atts) = @_; $p->setHandlers('Char' => \&spec) if ($el eq 'special'); ... } sub eh { my ($p, $el) = @_; $p->setHandlers('Char' => \&ch) # Special elements won't contain if ($el eq 'special'); # other special elements ... } =head1 DESCRIPTION This module provides an interface to James Clark's XML parser, expat. As in expat, a single instance of the parser can only parse one document. Calls to parsestring after the first for a given instance will die. Expat (and XML::Parser::Expat) are event based. As the parser recognizes parts of the document (say the start or end of an XML element), then any handlers registered for that type of an event are called with suitable parameters. =head1 METHODS =over 4 =item new This is a class method, the constructor for XML::Parser::Expat. Options are passed as keyword value pairs. The recognized options are: =over 4 =item * ProtocolEncoding The protocol encoding name. The default is none. The expat built-in encodings are: C, C, C, and C. Other encodings may be used if they have encoding maps in one of the directories in the @Encoding_Path list. Setting the protocol encoding overrides any encoding in the XML declaration. =item * Namespaces When this option is given with a true value, then the parser does namespace processing. By default, namespace processing is turned off. When it is turned on, the parser consumes I attributes and strips off prefixes from element and attributes names where those prefixes have a defined namespace. A name's namespace can be found using the L<"namespace"> method and two names can be checked for absolute equality with the L<"eq_name"> method. =item * NoExpand Normally, the parser will try to expand references to entities defined in the internal subset. If this option is set to a true value, and a default handler is also set, then the default handler will be called when an entity reference is seen in text. This has no effect if a default handler has not been registered, and it has no effect on the expansion of entity references inside attribute values. =item * Stream_Delimiter This option takes a string value. When this string is found alone on a line while parsing from a stream, then the parse is ended as if it saw an end of file. The intended use is with a stream of xml documents in a MIME multipart format. The string should not contain a trailing newline. =item * ErrorContext When this option is defined, errors are reported in context. The value of ErrorContext should be the number of lines to show on either side of the line in which the error occurred. =item * ParseParamEnt Unless standalone is set to "yes" in the XML declaration, setting this to a true value allows the external DTD to be read, and parameter entities to be parsed and expanded. =item * Base The base to use for relative pathnames or URLs. This can also be done by using the base method. =back =item setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) This method registers handlers for the various events. If no handlers are registered, then a call to parsestring or parsefile will only determine if the corresponding XML document is well formed (by returning without error.) This may be called from within a handler, after the parse has started. Setting a handler to something that evaluates to false unsets that handler. This method returns a list of type, handler pairs corresponding to the input. The handlers returned are the ones that were in effect before the call to setHandlers. The recognized events and the parameters passed to the corresponding handlers are: =over 4 =item * Start (Parser, Element [, Attr, Val [,...]]) This event is generated when an XML start tag is recognized. Parser is an XML::Parser::Expat instance. Element is the name of the XML element that is opened with the start tag. The Attr & Val pairs are generated for each attribute in the start tag. =item * End (Parser, Element) This event is generated when an XML end tag is recognized. Note that an XML empty tag () generates both a start and an end event. There is always a lower level start and end handler installed that wrap the corresponding callbacks. This is to handle the context mechanism. A consequence of this is that the default handler (see below) will not see a start tag or end tag unless the default_current method is called. =item * Char (Parser, String) This event is generated when non-markup is recognized. The non-markup sequence of characters is in String. A single non-markup sequence of characters may generate multiple calls to this handler. Whatever the encoding of the string in the original document, this is given to the handler in UTF-8. =item * Proc (Parser, Target, Data) This event is generated when a processing instruction is recognized. =item * Comment (Parser, String) This event is generated when a comment is recognized. =item * CdataStart (Parser) This is called at the start of a CDATA section. =item * CdataEnd (Parser) This is called at the end of a CDATA section. =item * Default (Parser, String) This is called for any characters that don't have a registered handler. This includes both characters that are part of markup for which no events are generated (markup declarations) and characters that could generate events, but for which no handler has been registered. Whatever the encoding in the original document, the string is returned to the handler in UTF-8. =item * Unparsed (Parser, Entity, Base, Sysid, Pubid, Notation) This is called for a declaration of an unparsed entity. Entity is the name of the entity. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Notation is the notation name. Base and Pubid may be undefined. =item * Notation (Parser, Notation, Base, Sysid, Pubid) This is called for a declaration of notation. Notation is the notation name. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. =item * ExternEnt (Parser, Base, Sysid, Pubid) This is called when an external entity is referenced. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, and Pubid may be undefined. This handler should either return a string, which represents the contents of the external entity, or return an open filehandle that can be read to obtain the contents of the external entity, or return undef, which indicates the external entity couldn't be found and will generate a parse error. If an open filehandle is returned, it must be returned as either a glob (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). =item * ExternEntFin (Parser) This is called after an external entity has been parsed. It allows applications to perform cleanup on actions performed in the above ExternEnt handler. =item * Entity (Parser, Name, Val, Sysid, Pubid, Ndata, IsParam) This is called when an entity is declared. For internal entities, the Val parameter will contain the value and the remaining three parameters will be undefined. For external entities, the Val parameter will be undefined, the Sysid parameter will have the system id, the Pubid parameter will have the public id if it was provided (it will be undefined otherwise), the Ndata parameter will contain the notation for unparsed entities. If this is a parameter entity declaration, then the IsParam parameter is true. Note that this handler and the Unparsed handler above overlap. If both are set, then this handler will not be called for unparsed entities. =item * Element (Parser, Name, Model) The element handler is called when an element declaration is found. Name is the element name, and Model is the content model as an XML::Parser::ContentModel object. See L<"XML::Parser::ContentModel Methods"> for methods available for this class. =item * Attlist (Parser, Elname, Attname, Type, Default, Fixed) This handler is called for each attribute in an ATTLIST declaration. So an ATTLIST declaration that has multiple attributes will generate multiple calls to this handler. The Elname parameter is the name of the element with which the attribute is being associated. The Attname parameter is the name of the attribute. Type is the attribute type, given as a string. Default is the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted string (i.e. the returned string will begin and end with a quote character). If Fixed is true, then this is a fixed attribute. =item * Doctype (Parser, Name, Sysid, Pubid, Internal) This handler is called for DOCTYPE declarations. Name is the document type name. Sysid is the system id of the document type, if it was provided, otherwise it's undefined. Pubid is the public id of the document type, which will be undefined if no public id was given. Internal will be true or false, indicating whether or not the doctype declaration contains an internal subset. =item * DoctypeFin (Parser) This handler is called after parsing of the DOCTYPE declaration has finished, including any internal or external DTD declarations. =item * XMLDecl (Parser, Version, Encoding, Standalone) This handler is called for XML declarations. Version is a string containing the version. Encoding is either undefined or contains an encoding string. Standalone is either undefined, or true or false. Undefined indicates that no standalone parameter was given in the XML declaration. True or false indicates "yes" or "no" respectively. =back =item namespace(name) Return the URI of the namespace that the name belongs to. If the name doesn't belong to any namespace, an undef is returned. This is only valid on names received through the Start or End handlers from a single document, or through a call to the generate_ns_name method. In other words, don't use names generated from one instance of XML::Parser::Expat with other instances. =item eq_name(name1, name2) Return true if name1 and name2 are identical (i.e. same name and from the same namespace.) This is only meaningful if both names were obtained through the Start or End handlers from a single document, or through a call to the generate_ns_name method. =item generate_ns_name(name, namespace) Return a name, associated with a given namespace, good for using with the above 2 methods. The namespace argument should be the namespace URI, not a prefix. =item new_ns_prefixes When called from a start tag handler, returns namespace prefixes declared with this start tag. If called elsewhere (or if there were no namespace prefixes declared), it returns an empty list. Setting of the default namespace is indicated with '#default' as a prefix. =item expand_ns_prefix(prefix) Return the uri to which the given prefix is currently bound. Returns undef if the prefix isn't currently bound. Use '#default' to find the current binding of the default namespace (if any). =item current_ns_prefixes Return a list of currently bound namespace prefixes. The order of the the prefixes in the list has no meaning. If the default namespace is currently bound, '#default' appears in the list. =item recognized_string Returns the string from the document that was recognized in order to call the current handler. For instance, when called from a start handler, it will give us the start-tag string. The string is encoded in UTF-8. This method doesn't return a meaningful string inside declaration handlers. =item original_string Returns the verbatim string from the document that was recognized in order to call the current handler. The string is in the original document encoding. This method doesn't return a meaningful string inside declaration handlers. =item default_current When called from a handler, causes the sequence of characters that generated the corresponding event to be sent to the default handler (if one is registered). Use of this method is deprecated in favor the recognized_string method, which you can use without installing a default handler. This method doesn't deliver a meaningful string to the default handler when called from inside declaration handlers. =item xpcroak(message) Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then croak with the formed message. =item xpcarp(message) Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then carp with the formed message. =item current_line Returns the line number of the current position of the parse. =item current_column Returns the column number of the current position of the parse. =item current_byte Returns the current position of the parse. =item base([NEWBASE]); Returns the current value of the base for resolving relative URIs. If NEWBASE is supplied, changes the base to that value. =item context Returns a list of element names that represent open elements, with the last one being the innermost. Inside start and end tag handlers, this will be the tag of the parent element. =item current_element Returns the name of the innermost currently opened element. Inside start or end handlers, returns the parent of the element associated with those tags. =item in_element(NAME) Returns true if NAME is equal to the name of the innermost currently opened element. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the generate_ns_name method to create the NAME argument. =item within_element(NAME) Returns the number of times the given name appears in the context list. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the generate_ns_name method to create the NAME argument. =item depth Returns the size of the context list. =item element_index Returns an integer that is the depth-first visit order of the current element. This will be zero outside of the root element. For example, this will return 1 when called from the start handler for the root element start tag. =item skip_until(INDEX) INDEX is an integer that represents an element index. When this method is called, all handlers are suspended until the start tag for an element that has an index number equal to INDEX is seen. If a start handler has been set, then this is the first tag that the start handler will see after skip_until has been called. =item position_in_context(LINES) Returns a string that shows the current parse position. LINES should be an integer >= 0 that represents the number of lines on either side of the current parse line to place into the returned string. =item xml_escape(TEXT [, CHAR [, CHAR ...]]) Returns TEXT with markup characters turned into character entities. Any additional characters provided as arguments are also turned into character references where found in TEXT. =item parse (SOURCE) The SOURCE parameter should either be a string containing the whole XML document, or it should be an open IO::Handle. Only a single document may be parsed for a given instance of XML::Parser::Expat, so this will croak if it's been called previously for this instance. =item parsestring(XML_DOC_STRING) Parses the given string as an XML document. Only a single document may be parsed for a given instance of XML::Parser::Expat, so this will die if either parsestring or parsefile has been called for this instance previously. This method is deprecated in favor of the parse method. =item parsefile(FILENAME) Parses the XML document in the given file. Will die if parsestring or parsefile has been called previously for this instance. =item is_defaulted(ATTNAME) NO LONGER WORKS. To find out if an attribute is defaulted please use the specified_attr method. =item specified_attr When the start handler receives lists of attributes and values, the non-defaulted (i.e. explicitly specified) attributes occur in the list first. This method returns the number of specified items in the list. So if this number is equal to the length of the list, there were no defaulted values. Otherwise the number points to the index of the first defaulted attribute name. =item finish Unsets all handlers (including internal ones that set context), but expat continues parsing to the end of the document or until it finds an error. It should finish up a lot faster than with the handlers set. =item release There are data structures used by XML::Parser::Expat that have circular references. This means that these structures will never be garbage collected unless these references are explicitly broken. Calling this method breaks those references (and makes the instance unusable.) Normally, higher level calls handle this for you, but if you are using XML::Parser::Expat directly, then it's your responsibility to call it. =back =head2 XML::Parser::ContentModel Methods The element declaration handlers are passed objects of this class as the content model of the element declaration. They also represent content particles, components of a content model. When referred to as a string, these objects are automagicly converted to a string representation of the model (or content particle). =over 4 =item isempty This method returns true if the object is "EMPTY", false otherwise. =item isany This method returns true if the object is "ANY", false otherwise. =item ismixed This method returns true if the object is "(#PCDATA)" or "(#PCDATA|...)*", false otherwise. =item isname This method returns if the object is an element name. =item ischoice This method returns true if the object is a choice of content particles. =item isseq This method returns true if the object is a sequence of content particles. =item quant This method returns undef or a string representing the quantifier ('?', '*', '+') associated with the model or particle. =item children This method returns undef or (for mixed, choice, and sequence types) an array of component content particles. There will always be at least one component for choices and sequences, but for a mixed content model of pure PCDATA, "(#PCDATA)", then an undef is returned. =back =head2 XML::Parser::ExpatNB Methods The class XML::Parser::ExpatNB is a subclass of XML::Parser::Expat used for non-blocking access to the expat library. It does not support the parse, parsestring, or parsefile methods, but it does have these additional methods: =over 4 =item parse_more(DATA) Feed expat more text to munch on. =item parse_done Tell expat that it's gotten the whole document. =back =head1 FUNCTIONS =over 4 =item XML::Parser::Expat::load_encoding(ENCODING) Load an external encoding. ENCODING is either the name of an encoding or the name of a file. The basename is converted to lowercase and a '.enc' extension is appended unless there's one already there. Then, unless it's an absolute pathname (i.e. begins with '/'), the first file by that name discovered in the @Encoding_Path path list is used. The encoding in the file is loaded and kept in the %Encoding_Table table. Earlier encodings of the same name are replaced. This function is automatically called by expat when it encounters an encoding it doesn't know about. Expat shouldn't call this twice for the same encoding name. The only reason users should use this function is to explicitly load an encoding not contained in the @Encoding_Path list. =back =head1 AUTHORS Larry Wall > wrote version 1.0. Clark Cooper > picked up support, changed the API for this version (2.x), provided documentation, and added some standard package features. =cut XML-Parser-2.46/Expat/Expat.xs0000644000000000000000000013146713542306327014574 0ustar rootroot/***************************************************************** ** Expat.xs ** ** Copyright 1998 Larry Wall and Clark Cooper ** All rights reserved. ** ** This program is free software; you can redistribute it and/or ** modify it under the same terms as Perl itself. ** */ #include #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #undef convert #include "patchlevel.h" #include "encoding.h" /* Version 5.005_5x (Development version for 5.006) doesn't like sv_... anymore, but 5.004 doesn't know about PL_sv.. Don't want to push up required version just for this. */ #if PATCHLEVEL < 5 #define PL_sv_undef sv_undef #define PL_sv_no sv_no #define PL_sv_yes sv_yes #define PL_na na #endif #define BUFSIZE 32768 #define NSDELIM '|' /* Macro to update handler fields. Used in the various handler setting XSUBS */ #define XMLP_UPD(fld) \ RETVAL = cbv->fld ? newSVsv(cbv->fld) : &PL_sv_undef;\ if (cbv->fld) {\ if (cbv->fld != fld)\ sv_setsv(cbv->fld, fld);\ }\ else\ cbv->fld = newSVsv(fld) /* Macro to push old handler value onto return stack. This is done here to get around a bug in 5.004 sv_2mortal function. */ #define PUSHRET \ ST(0) = RETVAL;\ if (RETVAL != &PL_sv_undef && SvREFCNT(RETVAL)) sv_2mortal(RETVAL) typedef struct { SV* self_sv; XML_Parser p; AV* context; AV* new_prefix_list; HV *nstab; AV *nslst; unsigned int st_serial; unsigned int st_serial_stackptr; unsigned int st_serial_stacksize; unsigned int * st_serial_stack; unsigned int skip_until; SV *recstring; char * delim; STRLEN delimlen; unsigned ns:1; unsigned no_expand:1; unsigned parseparam:1; /* Callback handlers */ SV* start_sv; SV* end_sv; SV* char_sv; SV* proc_sv; SV* cmnt_sv; SV* dflt_sv; SV* entdcl_sv; SV* eledcl_sv; SV* attdcl_sv; SV* doctyp_sv; SV* doctypfin_sv; SV* xmldec_sv; SV* unprsd_sv; SV* notation_sv; SV* extent_sv; SV* extfin_sv; SV* startcd_sv; SV* endcd_sv; } CallbackVector; static HV* EncodingTable = NULL; static XML_Char nsdelim[] = {NSDELIM, '\0'}; static char *QuantChar[] = {"", "?", "*", "+"}; /* Forward declarations */ static void suspend_callbacks(CallbackVector *); static void resume_callbacks(CallbackVector *); #if PATCHLEVEL < 5 && SUBVERSION < 5 /* ================================================================ ** This is needed where the length is explicitly given. The expat ** library may sometimes give us zero-length strings. Perl's newSVpv ** interprets a zero length as a directive to do a strlen. This ** function is used when we want to force length to mean length, even ** if zero. */ static SV * newSVpvn(char *s, STRLEN len) { register SV *sv; sv = newSV(0); sv_setpvn(sv, s, len); return sv; } /* End newSVpvn */ #define ERRSV GvSV(errgv) #endif #ifdef SvUTF8_on static SV * newUTF8SVpv(char *s, STRLEN len) { register SV *sv; sv = newSVpv(s, len); SvUTF8_on(sv); return sv; } /* End new UTF8SVpv */ static SV * newUTF8SVpvn(char *s, STRLEN len) { register SV *sv; sv = newSV(0); sv_setpvn(sv, s, len); SvUTF8_on(sv); return sv; } #else /* SvUTF8_on not defined */ #define newUTF8SVpv newSVpv #define newUTF8SVpvn newSVpvn #endif static void* mymalloc(size_t size) { #ifndef LEAKTEST return safemalloc(size); #else return safexmalloc(328,size); #endif } static void* myrealloc(void *p, size_t s) { #ifndef LEAKTEST return saferealloc(p, s); #else return safexrealloc(p, s); #endif } static void myfree(void *p) { Safefree(p); } static XML_Memory_Handling_Suite ms = {mymalloc, myrealloc, myfree}; static void append_error(XML_Parser parser, char * err) { dSP; CallbackVector * cbv; SV ** errstr; cbv = (CallbackVector*) XML_GetUserData(parser); errstr = hv_fetch((HV*)SvRV(cbv->self_sv), "ErrorMessage", 12, 0); if (errstr && SvPOK(*errstr)) { SV ** errctx = hv_fetch((HV*) SvRV(cbv->self_sv), "ErrorContext", 12, 0); int dopos = !err && errctx && SvOK(*errctx); if (! err) err = (char *) XML_ErrorString(XML_GetErrorCode(parser)); sv_catpvf(*errstr, "\n%s at line %ld, column %ld, byte %ld%s", err, (long)XML_GetCurrentLineNumber(parser), (long)XML_GetCurrentColumnNumber(parser), (long)XML_GetCurrentByteIndex(parser), dopos ? ":\n" : ""); /* See https://rt.cpan.org/Ticket/Display.html?id=92030 It explains why type conversion is used. */ if (dopos) { int count; ENTER ; SAVETMPS ; PUSHMARK(sp); XPUSHs(cbv->self_sv); XPUSHs(*errctx); PUTBACK ; count = perl_call_method("position_in_context", G_SCALAR); SPAGAIN ; if (count >= 1) { sv_catsv(*errstr, POPs); } PUTBACK ; FREETMPS ; LEAVE ; } } } /* End append_error */ static SV * generate_model(XML_Content *model) { HV * hash = newHV(); SV * obj = newRV_noinc((SV *) hash); sv_bless(obj, gv_stashpv("XML::Parser::ContentModel", 1)); hv_store(hash, "Type", 4, newSViv(model->type), 0); if (model->quant != XML_CQUANT_NONE) { hv_store(hash, "Quant", 5, newSVpv(QuantChar[model->quant], 1), 0); } switch(model->type) { case XML_CTYPE_NAME: hv_store(hash, "Tag", 3, newUTF8SVpv((char *)model->name, 0), 0); break; case XML_CTYPE_MIXED: case XML_CTYPE_CHOICE: case XML_CTYPE_SEQ: if (model->children && model->numchildren) { AV * children = newAV(); int i; for (i = 0; i < model->numchildren; i++) { av_push(children, generate_model(&model->children[i])); } hv_store(hash, "Children", 8, newRV_noinc((SV *) children), 0); } break; } return obj; } /* End generate_model */ static int parse_stream(XML_Parser parser, SV * ioref) { dSP; SV * tbuff; SV * tsiz; char * linebuff; STRLEN lblen; STRLEN br = 0; int buffsize; int done = 0; int ret = 1; char * msg = NULL; CallbackVector * cbv; char *buff = (char *) 0; cbv = (CallbackVector*) XML_GetUserData(parser); ENTER; SAVETMPS; if (cbv->delim) { int cnt; SV * tline; PUSHMARK(SP); XPUSHs(ioref); PUTBACK ; cnt = perl_call_method("getline", G_SCALAR); SPAGAIN; if (cnt != 1) croak("getline method call failed"); tline = POPs; if (! SvOK(tline)) { lblen = 0; } else { char * chk; linebuff = SvPV(tline, lblen); chk = &linebuff[lblen - cbv->delimlen - 1]; if (lblen > cbv->delimlen + 1 && *chk == *cbv->delim && chk[cbv->delimlen] == '\n' && strnEQ(++chk, cbv->delim + 1, cbv->delimlen - 1)) lblen -= cbv->delimlen + 1; } PUTBACK ; buffsize = lblen; done = lblen == 0; } else { tbuff = newSV(0); tsiz = newSViv(BUFSIZE); /* in UTF-8 characters */ buffsize = BUFSIZE * 6; /* in bytes that encode an UTF-8 string */ } while (! done) { char *buffer = XML_GetBuffer(parser, buffsize); if (! buffer) croak("Ran out of memory for input buffer"); SAVETMPS; if (cbv->delim) { Copy(linebuff, buffer, lblen, char); br = lblen; done = 1; } else { int cnt; SV * rdres; char * tb; PUSHMARK(SP); EXTEND(SP, 3); PUSHs(ioref); PUSHs(tbuff); PUSHs(tsiz); PUTBACK ; cnt = perl_call_method("read", G_SCALAR); SPAGAIN ; if (cnt != 1) croak("read method call failed"); rdres = POPs; if (! SvOK(rdres)) croak("read error"); tb = SvPV(tbuff, br); if (br > 0) { if (br > buffsize) croak("The input buffer is not large enough for read UTF-8 decoded string"); Copy(tb, buffer, br, char); } else done = 1; PUTBACK ; } ret = XML_ParseBuffer(parser, br, done); SPAGAIN; /* resync local SP in case callbacks changed global stack */ if (! ret) break; FREETMPS; } if (! ret) append_error(parser, msg); if (! cbv->delim) { SvREFCNT_dec(tsiz); SvREFCNT_dec(tbuff); } FREETMPS; LEAVE; return ret; } /* End parse_stream */ static SV * gen_ns_name(const char * name, HV * ns_table, AV * ns_list) { char *pos = strchr(name, NSDELIM); SV * ret; if (pos && pos > name) { SV ** name_ent = hv_fetch(ns_table, (char *) name, pos - name, TRUE); ret = newUTF8SVpv(&pos[1], 0); if (name_ent) { int index; if (SvOK(*name_ent)) { index = SvIV(*name_ent); } else { av_push(ns_list, newUTF8SVpv((char *) name, pos - name)); index = av_len(ns_list); sv_setiv(*name_ent, (IV) index); } sv_setiv(ret, (IV) index); SvPOK_on(ret); } } else ret = newUTF8SVpv((char *) name, 0); return ret; } /* End gen_ns_name */ static void characterData(void *userData, const char *s, int len) { dSP; CallbackVector* cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpvn((char*)s,len))); PUTBACK; perl_call_sv(cbv->char_sv, G_DISCARD); FREETMPS; LEAVE; } /* End characterData */ static void startElement(void *userData, const char *name, const char **atts) { dSP; CallbackVector* cbv = (CallbackVector*) userData; SV ** pcontext; unsigned do_ns = cbv->ns; unsigned skipping = 0; SV ** pnstab; SV ** pnslst; SV * elname; cbv->st_serial++; if (cbv->skip_until) { skipping = cbv->st_serial < cbv->skip_until; if (! skipping) { resume_callbacks(cbv); cbv->skip_until = 0; } } if (cbv->st_serial_stackptr >= cbv->st_serial_stacksize) { unsigned int newsize = cbv->st_serial_stacksize + 512; Renew(cbv->st_serial_stack, newsize, unsigned int); cbv->st_serial_stacksize = newsize; } cbv->st_serial_stack[++cbv->st_serial_stackptr] = cbv->st_serial; if (do_ns) elname = gen_ns_name(name, cbv->nstab, cbv->nslst); else elname = newUTF8SVpv((char *)name, 0); if (! skipping && SvTRUE(cbv->start_sv)) { const char **attlim = atts; while (*attlim) attlim++; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, attlim - atts + 2); PUSHs(cbv->self_sv); PUSHs(elname); while (*atts) { SV * attname; attname = (do_ns ? gen_ns_name(*atts, cbv->nstab, cbv->nslst) : newUTF8SVpv((char *) *atts, 0)); atts++; PUSHs(sv_2mortal(attname)); if (*atts) PUSHs(sv_2mortal(newUTF8SVpv((char*)*atts++,0))); } PUTBACK; perl_call_sv(cbv->start_sv, G_DISCARD); FREETMPS; LEAVE; } av_push(cbv->context, elname); if (cbv->ns) { av_clear(cbv->new_prefix_list); } } /* End startElement */ static void endElement(void *userData, const char *name) { dSP; CallbackVector* cbv = (CallbackVector*) userData; SV *elname; elname = av_pop(cbv->context); if (! cbv->st_serial_stackptr) { croak("endElement: Start tag serial number stack underflow"); } if (! cbv->skip_until && SvTRUE(cbv->end_sv)) { ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(cbv->self_sv); PUSHs(elname); PUTBACK; perl_call_sv(cbv->end_sv, G_DISCARD); FREETMPS; LEAVE; } cbv->st_serial_stackptr--; SvREFCNT_dec(elname); } /* End endElement */ static void processingInstruction(void *userData, const char *target, const char *data) { dSP; CallbackVector* cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 3); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char*)target,0))); PUSHs(sv_2mortal(newUTF8SVpv((char*)data,0))); PUTBACK; perl_call_sv(cbv->proc_sv, G_DISCARD); FREETMPS; LEAVE; } /* End processingInstruction */ static void commenthandle(void *userData, const char *string) { dSP; CallbackVector * cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char*) string, 0))); PUTBACK; perl_call_sv(cbv->cmnt_sv, G_DISCARD); FREETMPS; LEAVE; } /* End commenthandler */ static void startCdata(void *userData) { dSP; CallbackVector* cbv = (CallbackVector*) userData; if (cbv->startcd_sv) { ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(cbv->self_sv); PUTBACK; perl_call_sv(cbv->startcd_sv, G_DISCARD); FREETMPS; LEAVE; } } /* End startCdata */ static void endCdata(void *userData) { dSP; CallbackVector* cbv = (CallbackVector*) userData; if (cbv->endcd_sv) { ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(cbv->self_sv); PUTBACK; perl_call_sv(cbv->endcd_sv, G_DISCARD); FREETMPS; LEAVE; } } /* End endCdata */ static void nsStart(void *userdata, const XML_Char *prefix, const XML_Char *uri){ dSP; CallbackVector* cbv = (CallbackVector*) userdata; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 3); PUSHs(cbv->self_sv); PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); PUSHs(uri ? sv_2mortal(newUTF8SVpv((char *)uri, 0)) : &PL_sv_undef); PUTBACK; perl_call_method("NamespaceStart", G_DISCARD); FREETMPS; LEAVE; } /* End nsStart */ static void nsEnd(void *userdata, const XML_Char *prefix) { dSP; CallbackVector* cbv = (CallbackVector*) userdata; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(cbv->self_sv); PUSHs(prefix ? sv_2mortal(newUTF8SVpv((char *)prefix, 0)) : &PL_sv_undef); PUTBACK; perl_call_method("NamespaceEnd", G_DISCARD); FREETMPS; LEAVE; } /* End nsEnd */ static void defaulthandle(void *userData, const char *string, int len) { dSP; CallbackVector* cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpvn((char*)string, len))); PUTBACK; perl_call_sv(cbv->dflt_sv, G_DISCARD); FREETMPS; LEAVE; } /* End defaulthandle */ static void elementDecl(void *data, const char *name, XML_Content *model) { dSP; CallbackVector *cbv = (CallbackVector*) data; SV *cmod; ENTER; SAVETMPS; cmod = generate_model(model); Safefree(model); PUSHMARK(sp); EXTEND(sp, 3); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char *)name, 0))); PUSHs(sv_2mortal(cmod)); PUTBACK; perl_call_sv(cbv->eledcl_sv, G_DISCARD); FREETMPS; LEAVE; } /* End elementDecl */ static void attributeDecl(void *data, const char * elname, const char * attname, const char * att_type, const char * dflt, int reqorfix) { dSP; CallbackVector *cbv = (CallbackVector*) data; SV * dfltsv; if (dflt) { dfltsv = newUTF8SVpv("'", 1); sv_catpv(dfltsv, (char *) dflt); sv_catpv(dfltsv, "'"); } else { dfltsv = newUTF8SVpv(reqorfix ? "#REQUIRED" : "#IMPLIED", 0); } ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 5); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char *)elname, 0))); PUSHs(sv_2mortal(newUTF8SVpv((char *)attname, 0))); PUSHs(sv_2mortal(newUTF8SVpv((char *)att_type, 0))); PUSHs(sv_2mortal(dfltsv)); if (dflt && reqorfix) XPUSHs(&PL_sv_yes); PUTBACK; perl_call_sv(cbv->attdcl_sv, G_DISCARD); FREETMPS; LEAVE; } /* End attributeDecl */ static void entityDecl(void *data, const char *name, int isparam, const char *value, int vlen, const char *base, const char *sysid, const char *pubid, const char *notation) { dSP; CallbackVector *cbv = (CallbackVector*) data; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 6); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); PUSHs(value ? sv_2mortal(newUTF8SVpvn((char*)value, vlen)) : &PL_sv_undef); PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char *)sysid, 0)) : &PL_sv_undef); PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char *)pubid, 0)) : &PL_sv_undef); PUSHs(notation ? sv_2mortal(newUTF8SVpv((char *)notation, 0)) : &PL_sv_undef); if (isparam) XPUSHs(&PL_sv_yes); PUTBACK; perl_call_sv(cbv->entdcl_sv, G_DISCARD); FREETMPS; LEAVE; } /* End entityDecl */ static void doctypeStart(void *userData, const char* name, const char* sysid, const char* pubid, int hasinternal) { dSP; CallbackVector *cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 5); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char*)name, 0))); PUSHs(sysid ? sv_2mortal(newUTF8SVpv((char*)sysid, 0)) : &PL_sv_undef); PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*)pubid, 0)) : &PL_sv_undef); PUSHs(hasinternal ? &PL_sv_yes : &PL_sv_no); PUTBACK; perl_call_sv(cbv->doctyp_sv, G_DISCARD); FREETMPS; LEAVE; } /* End doctypeStart */ static void doctypeEnd(void *userData) { dSP; CallbackVector *cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 1); PUSHs(cbv->self_sv); PUTBACK; perl_call_sv(cbv->doctypfin_sv, G_DISCARD); FREETMPS; LEAVE; } /* End doctypeEnd */ static void xmlDecl(void *userData, const char *version, const char *encoding, int standalone) { dSP; CallbackVector *cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 4); PUSHs(cbv->self_sv); PUSHs(version ? sv_2mortal(newUTF8SVpv((char *)version, 0)) : &PL_sv_undef); PUSHs(encoding ? sv_2mortal(newUTF8SVpv((char *)encoding, 0)) : &PL_sv_undef); PUSHs(standalone == -1 ? &PL_sv_undef : (standalone ? &PL_sv_yes : &PL_sv_no)); PUTBACK; perl_call_sv(cbv->xmldec_sv, G_DISCARD); FREETMPS; LEAVE; } /* End xmlDecl */ static void unparsedEntityDecl(void *userData, const char* entity, const char* base, const char* sysid, const char* pubid, const char* notation) { dSP; CallbackVector* cbv = (CallbackVector*) userData; ENTER; SAVETMPS; PUSHMARK(sp); EXTEND(sp, 6); PUSHs(cbv->self_sv); PUSHs(sv_2mortal(newUTF8SVpv((char*) entity, 0))); PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); PUSHs(sv_2mortal(newUTF8SVpv((char*) sysid, 0))); PUSHs(pubid ? sv_2mortal(newUTF8SVpv((char*) pubid, 0)) : &PL_sv_undef); PUSHs(sv_2mortal(newUTF8SVpv((char*) notation, 0))); PUTBACK; perl_call_sv(cbv->unprsd_sv, G_DISCARD); FREETMPS; LEAVE; } /* End unparsedEntityDecl */ static void notationDecl(void *userData, const char *name, const char *base, const char *sysid, const char *pubid) { dSP; CallbackVector* cbv = (CallbackVector*) userData; PUSHMARK(sp); XPUSHs(cbv->self_sv); XPUSHs(sv_2mortal(newUTF8SVpv((char*) name, 0))); if (base) { XPUSHs(sv_2mortal(newUTF8SVpv((char *) base, 0))); } else if (sysid || pubid) { XPUSHs(&PL_sv_undef); } if (sysid) { XPUSHs(sv_2mortal(newUTF8SVpv((char *) sysid, 0))); } else if (pubid) { XPUSHs(&PL_sv_undef); } if (pubid) XPUSHs(sv_2mortal(newUTF8SVpv((char *) pubid, 0))); PUTBACK; perl_call_sv(cbv->notation_sv, G_DISCARD); } /* End notationDecl */ static int externalEntityRef(XML_Parser parser, const char* open, const char* base, const char* sysid, const char* pubid) { dSP; #if defined(USE_THREADS) && PATCHLEVEL==6 dTHX; #endif int count; int ret = 0; int parse_done = 0; CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); if (! cbv->extent_sv) return 0; ENTER ; SAVETMPS ; PUSHMARK(sp); EXTEND(sp, pubid ? 4 : 3); PUSHs(cbv->self_sv); PUSHs(base ? sv_2mortal(newUTF8SVpv((char*) base, 0)) : &PL_sv_undef); PUSHs(sv_2mortal(newSVpv((char*) sysid, 0))); if (pubid) PUSHs(sv_2mortal(newUTF8SVpv((char*) pubid, 0))); PUTBACK ; count = perl_call_sv(cbv->extent_sv, G_SCALAR); SPAGAIN ; if (count >= 1) { SV * result = POPs; int type; if (result && (type = SvTYPE(result)) > 0) { SV **pval = hv_fetch((HV*) SvRV(cbv->self_sv), "Parser", 6, 0); if (! pval || ! SvIOK(*pval)) append_error(parser, "Can't find parser entry in XML::Parser object"); else { XML_Parser entpar; char *errmsg = (char *) 0; entpar = XML_ExternalEntityParserCreate(parser, open, 0); XML_SetBase(entpar, XML_GetBase(parser)); sv_setiv(*pval, (IV) entpar); cbv->p = entpar; PUSHMARK(sp); EXTEND(sp, 2); PUSHs(*pval); PUSHs(result); PUTBACK; count = perl_call_pv("XML::Parser::Expat::Do_External_Parse", G_SCALAR | G_EVAL); SPAGAIN; if (SvTRUE(ERRSV)) { char *hold; STRLEN len; POPs; hold = SvPV(ERRSV, len); New(326, errmsg, len + 1, char); if (len) Copy(hold, errmsg, len, char); goto Extparse_Cleanup; } if (count > 0) ret = POPi; parse_done = 1; Extparse_Cleanup: cbv->p = parser; sv_setiv(*pval, (IV) parser); XML_ParserFree(entpar); if (cbv->extfin_sv) { PUSHMARK(sp); PUSHs(cbv->self_sv); PUTBACK; perl_call_sv(cbv->extfin_sv, G_DISCARD); SPAGAIN; } if (SvTRUE(ERRSV)) append_error(parser, SvPV_nolen(ERRSV)); } } } if (! ret && ! parse_done) append_error(parser, "Handler couldn't resolve external entity"); PUTBACK ; FREETMPS ; LEAVE ; return ret; } /* End externalEntityRef */ /*================================================================ ** This is the function that expat calls to convert multi-byte sequences ** for external encodings. Each byte in the sequence is used to index ** into the current map to either set the next map or, in the case of ** the final byte, to get the corresponding Unicode scalar, which is ** returned. */ static int convert_to_unicode(void *data, const char *seq) { Encinfo *enc = (Encinfo *) data; PrefixMap *curpfx; int count; int index = 0; for (count = 0; count < 4; count++) { unsigned char byte = (unsigned char) seq[count]; unsigned char bndx; unsigned char bmsk; int offset; curpfx = &enc->prefixes[index]; offset = ((int) byte) - curpfx->min; if (offset < 0) break; if (offset >= curpfx->len && curpfx->len != 0) break; bndx = byte >> 3; bmsk = 1 << (byte & 0x7); if (curpfx->ispfx[bndx] & bmsk) { index = enc->bytemap[curpfx->bmap_start + offset]; } else if (curpfx->ischar[bndx] & bmsk) { return enc->bytemap[curpfx->bmap_start + offset]; } else break; } return -1; } /* End convert_to_unicode */ static int unknownEncoding(void *unused, const char *name, XML_Encoding *info) { SV ** encinfptr; Encinfo *enc; int namelen; int i; char buff[42]; namelen = strlen(name); if (namelen > 40) return 0; /* Make uppercase */ for (i = 0; i < namelen; i++) { char c = name[i]; if (c >= 'a' && c <= 'z') c -= 'a' - 'A'; buff[i] = c; } if (! EncodingTable) { EncodingTable = perl_get_hv("XML::Parser::Expat::Encoding_Table", FALSE); if (! EncodingTable) croak("Can't find XML::Parser::Expat::Encoding_Table"); } encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); if (! encinfptr || ! SvOK(*encinfptr)) { /* Not found, so try to autoload */ dSP; int count; ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(sv_2mortal(newSVpvn(buff,namelen))); PUTBACK; perl_call_pv("XML::Parser::Expat::load_encoding", G_DISCARD); encinfptr = hv_fetch(EncodingTable, buff, namelen, 0); FREETMPS; LEAVE; if (! encinfptr || ! SvOK(*encinfptr)) return 0; } if (! sv_derived_from(*encinfptr, "XML::Parser::Encinfo")) croak("Entry in XML::Parser::Expat::Encoding_Table not an Encinfo object"); enc = (Encinfo *) SvIV((SV*)SvRV(*encinfptr)); Copy(enc->firstmap, info->map, 256, int); info->release = NULL; if (enc->prefixes_size) { info->data = (void *) enc; info->convert = convert_to_unicode; } else { info->data = NULL; info->convert = NULL; } return 1; } /* End unknownEncoding */ static void recString(void *userData, const char *string, int len) { CallbackVector *cbv = (CallbackVector*) userData; if (cbv->recstring) { sv_catpvn(cbv->recstring, (char *) string, len); } else { cbv->recstring = newUTF8SVpvn((char *) string, len); } } /* End recString */ static void suspend_callbacks(CallbackVector *cbv) { if (SvTRUE(cbv->char_sv)) { XML_SetCharacterDataHandler(cbv->p, (XML_CharacterDataHandler) 0); } if (SvTRUE(cbv->proc_sv)) { XML_SetProcessingInstructionHandler(cbv->p, (XML_ProcessingInstructionHandler) 0); } if (SvTRUE(cbv->cmnt_sv)) { XML_SetCommentHandler(cbv->p, (XML_CommentHandler) 0); } if (SvTRUE(cbv->startcd_sv) || SvTRUE(cbv->endcd_sv)) { XML_SetCdataSectionHandler(cbv->p, (XML_StartCdataSectionHandler) 0, (XML_EndCdataSectionHandler) 0); } if (SvTRUE(cbv->unprsd_sv)) { XML_SetUnparsedEntityDeclHandler(cbv->p, (XML_UnparsedEntityDeclHandler) 0); } if (SvTRUE(cbv->notation_sv)) { XML_SetNotationDeclHandler(cbv->p, (XML_NotationDeclHandler) 0); } if (SvTRUE(cbv->extent_sv)) { XML_SetExternalEntityRefHandler(cbv->p, (XML_ExternalEntityRefHandler) 0); } } /* End suspend_callbacks */ static void resume_callbacks(CallbackVector *cbv) { if (SvTRUE(cbv->char_sv)) { XML_SetCharacterDataHandler(cbv->p, characterData); } if (SvTRUE(cbv->proc_sv)) { XML_SetProcessingInstructionHandler(cbv->p, processingInstruction); } if (SvTRUE(cbv->cmnt_sv)) { XML_SetCommentHandler(cbv->p, commenthandle); } if (SvTRUE(cbv->startcd_sv) || SvTRUE(cbv->endcd_sv)) { XML_SetCdataSectionHandler(cbv->p, startCdata, endCdata); } if (SvTRUE(cbv->unprsd_sv)) { XML_SetUnparsedEntityDeclHandler(cbv->p, unparsedEntityDecl); } if (SvTRUE(cbv->notation_sv)) { XML_SetNotationDeclHandler(cbv->p, notationDecl); } if (SvTRUE(cbv->extent_sv)) { XML_SetExternalEntityRefHandler(cbv->p, externalEntityRef); } } /* End resume_callbacks */ MODULE = XML::Parser::Expat PACKAGE = XML::Parser::Expat PREFIX = XML_ XML_Parser XML_ParserCreate(self_sv, enc_sv, namespaces) SV * self_sv SV * enc_sv int namespaces CODE: { CallbackVector *cbv; enum XML_ParamEntityParsing pep = XML_PARAM_ENTITY_PARSING_NEVER; char *enc = (char *) (SvTRUE(enc_sv) ? SvPV_nolen(enc_sv) : 0); SV ** spp; Newz(320, cbv, 1, CallbackVector); cbv->self_sv = SvREFCNT_inc(self_sv); Newz(325, cbv->st_serial_stack, 1024, unsigned int); spp = hv_fetch((HV*)SvRV(cbv->self_sv), "NoExpand", 8, 0); if (spp && SvTRUE(*spp)) cbv->no_expand = 1; spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Context", 7, 0); if (! spp || ! *spp || !SvROK(*spp)) croak("XML::Parser instance missing Context"); cbv->context = (AV*) SvRV(*spp); cbv->ns = (unsigned) namespaces; if (namespaces) { spp = hv_fetch((HV*)SvRV(cbv->self_sv), "New_Prefixes", 12, 0); if (! spp || ! *spp || !SvROK(*spp)) croak("XML::Parser instance missing New_Prefixes"); cbv->new_prefix_list = (AV *) SvRV(*spp); spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_Table", 15, FALSE); if (! spp || ! *spp || !SvROK(*spp)) croak("XML::Parser instance missing Namespace_Table"); cbv->nstab = (HV *) SvRV(*spp); spp = hv_fetch((HV*)SvRV(cbv->self_sv), "Namespace_List", 14, FALSE); if (! spp || ! *spp || !SvROK(*spp)) croak("XML::Parser instance missing Namespace_List"); cbv->nslst = (AV *) SvRV(*spp); RETVAL = XML_ParserCreate_MM(enc, &ms, nsdelim); XML_SetNamespaceDeclHandler(RETVAL,nsStart, nsEnd); } else { RETVAL = XML_ParserCreate_MM(enc, &ms, NULL); } cbv->p = RETVAL; XML_SetUserData(RETVAL, (void *) cbv); XML_SetElementHandler(RETVAL, startElement, endElement); XML_SetUnknownEncodingHandler(RETVAL, unknownEncoding, 0); spp = hv_fetch((HV*)SvRV(cbv->self_sv), "ParseParamEnt", 13, FALSE); if (spp && SvTRUE(*spp)) { pep = XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE; cbv->parseparam = 1; } XML_SetParamEntityParsing(RETVAL, pep); } OUTPUT: RETVAL void XML_ParserRelease(parser) XML_Parser parser CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); SvREFCNT_dec(cbv->self_sv); } void XML_ParserFree(parser) XML_Parser parser CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); Safefree(cbv->st_serial_stack); /* Clean up any SVs that we have */ /* (Note that self_sv must already be taken care of or we couldn't be here */ if (cbv->recstring) SvREFCNT_dec(cbv->recstring); if (cbv->start_sv) SvREFCNT_dec(cbv->start_sv); if (cbv->end_sv) SvREFCNT_dec(cbv->end_sv); if (cbv->char_sv) SvREFCNT_dec(cbv->char_sv); if (cbv->proc_sv) SvREFCNT_dec(cbv->proc_sv); if (cbv->cmnt_sv) SvREFCNT_dec(cbv->cmnt_sv); if (cbv->dflt_sv) SvREFCNT_dec(cbv->dflt_sv); if (cbv->entdcl_sv) SvREFCNT_dec(cbv->entdcl_sv); if (cbv->eledcl_sv) SvREFCNT_dec(cbv->eledcl_sv); if (cbv->attdcl_sv) SvREFCNT_dec(cbv->attdcl_sv); if (cbv->doctyp_sv) SvREFCNT_dec(cbv->doctyp_sv); if (cbv->doctypfin_sv) SvREFCNT_dec(cbv->doctypfin_sv); if (cbv->xmldec_sv) SvREFCNT_dec(cbv->xmldec_sv); if (cbv->unprsd_sv) SvREFCNT_dec(cbv->unprsd_sv); if (cbv->notation_sv) SvREFCNT_dec(cbv->notation_sv); if (cbv->extent_sv) SvREFCNT_dec(cbv->extent_sv); if (cbv->extfin_sv) SvREFCNT_dec(cbv->extfin_sv); if (cbv->startcd_sv) SvREFCNT_dec(cbv->startcd_sv); if (cbv->endcd_sv) SvREFCNT_dec(cbv->endcd_sv); /* ================ */ Safefree(cbv); XML_ParserFree(parser); } int XML_ParseString(parser, sv) XML_Parser parser SV * sv CODE: { CallbackVector * cbv; STRLEN len; char *s = SvPV(sv, len); cbv = (CallbackVector *) XML_GetUserData(parser); RETVAL = XML_Parse(parser, s, len, 1); SPAGAIN; /* XML_Parse might have changed stack pointer */ if (! RETVAL) append_error(parser, NULL); } OUTPUT: RETVAL int XML_ParseStream(parser, ioref, delim) XML_Parser parser SV * ioref SV * delim CODE: { SV **delimsv; CallbackVector * cbv; cbv = (CallbackVector *) XML_GetUserData(parser); if (SvOK(delim)) { cbv->delim = SvPV(delim, cbv->delimlen); } else { cbv->delim = (char *) 0; } RETVAL = parse_stream(parser, ioref); SPAGAIN; /* parse_stream might have changed stack pointer */ } OUTPUT: RETVAL int XML_ParsePartial(parser, sv) XML_Parser parser SV * sv CODE: { STRLEN len; char *s = SvPV(sv, len); CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); RETVAL = XML_Parse(parser, s, len, 0); if (! RETVAL) append_error(parser, NULL); } OUTPUT: RETVAL int XML_ParseDone(parser) XML_Parser parser CODE: { RETVAL = XML_Parse(parser, "", 0, 1); if (! RETVAL) append_error(parser, NULL); } OUTPUT: RETVAL SV * XML_SetStartElementHandler(parser, start_sv) XML_Parser parser SV * start_sv CODE: { CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(start_sv); PUSHRET; } SV * XML_SetEndElementHandler(parser, end_sv) XML_Parser parser SV * end_sv CODE: { CallbackVector *cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(end_sv); PUSHRET; } SV * XML_SetCharacterDataHandler(parser, char_sv) XML_Parser parser SV * char_sv CODE: { XML_CharacterDataHandler charhndl = (XML_CharacterDataHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(char_sv); if (SvTRUE(char_sv)) charhndl = characterData; XML_SetCharacterDataHandler(parser, charhndl); PUSHRET; } SV * XML_SetProcessingInstructionHandler(parser, proc_sv) XML_Parser parser SV * proc_sv CODE: { XML_ProcessingInstructionHandler prochndl = (XML_ProcessingInstructionHandler) 0; CallbackVector* cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(proc_sv); if (SvTRUE(proc_sv)) prochndl = processingInstruction; XML_SetProcessingInstructionHandler(parser, prochndl); PUSHRET; } SV * XML_SetCommentHandler(parser, cmnt_sv) XML_Parser parser SV * cmnt_sv CODE: { XML_CommentHandler cmnthndl = (XML_CommentHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(cmnt_sv); if (SvTRUE(cmnt_sv)) cmnthndl = commenthandle; XML_SetCommentHandler(parser, cmnthndl); PUSHRET; } SV * XML_SetDefaultHandler(parser, dflt_sv) XML_Parser parser SV * dflt_sv CODE: { XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(dflt_sv); if (SvTRUE(dflt_sv)) dflthndl = defaulthandle; if (cbv->no_expand) XML_SetDefaultHandler(parser, dflthndl); else XML_SetDefaultHandlerExpand(parser, dflthndl); PUSHRET; } SV * XML_SetUnparsedEntityDeclHandler(parser, unprsd_sv) XML_Parser parser SV * unprsd_sv CODE: { XML_UnparsedEntityDeclHandler unprsdhndl = (XML_UnparsedEntityDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(unprsd_sv); if (SvTRUE(unprsd_sv)) unprsdhndl = unparsedEntityDecl; XML_SetUnparsedEntityDeclHandler(parser, unprsdhndl); PUSHRET; } SV * XML_SetNotationDeclHandler(parser, notation_sv) XML_Parser parser SV * notation_sv CODE: { XML_NotationDeclHandler nothndlr = (XML_NotationDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(notation_sv); if (SvTRUE(notation_sv)) nothndlr = notationDecl; XML_SetNotationDeclHandler(parser, nothndlr); PUSHRET; } SV * XML_SetExternalEntityRefHandler(parser, extent_sv) XML_Parser parser SV * extent_sv CODE: { XML_ExternalEntityRefHandler exthndlr = (XML_ExternalEntityRefHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(extent_sv); if (SvTRUE(extent_sv)) exthndlr = externalEntityRef; XML_SetExternalEntityRefHandler(parser, exthndlr); PUSHRET; } SV * XML_SetExtEntFinishHandler(parser, extfin_sv) XML_Parser parser SV * extfin_sv CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); /* There is no corresponding handler for this in expat. This is called from the externalEntityRef function above after parsing the external entity. */ XMLP_UPD(extfin_sv); PUSHRET; } SV * XML_SetEntityDeclHandler(parser, entdcl_sv) XML_Parser parser SV * entdcl_sv CODE: { XML_EntityDeclHandler enthndlr = (XML_EntityDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(entdcl_sv); if (SvTRUE(entdcl_sv)) enthndlr = entityDecl; XML_SetEntityDeclHandler(parser, enthndlr); PUSHRET; } SV * XML_SetElementDeclHandler(parser, eledcl_sv) XML_Parser parser SV * eledcl_sv CODE: { XML_ElementDeclHandler eldeclhndlr = (XML_ElementDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(eledcl_sv); if (SvTRUE(eledcl_sv)) eldeclhndlr = elementDecl; XML_SetElementDeclHandler(parser, eldeclhndlr); PUSHRET; } SV * XML_SetAttListDeclHandler(parser, attdcl_sv) XML_Parser parser SV * attdcl_sv CODE: { XML_AttlistDeclHandler attdeclhndlr = (XML_AttlistDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(attdcl_sv); if (SvTRUE(attdcl_sv)) attdeclhndlr = attributeDecl; XML_SetAttlistDeclHandler(parser, attdeclhndlr); PUSHRET; } SV * XML_SetDoctypeHandler(parser, doctyp_sv) XML_Parser parser SV * doctyp_sv CODE: { XML_StartDoctypeDeclHandler dtsthndlr = (XML_StartDoctypeDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); int set = 0; XMLP_UPD(doctyp_sv); if (SvTRUE(doctyp_sv)) dtsthndlr = doctypeStart; XML_SetStartDoctypeDeclHandler(parser, dtsthndlr); PUSHRET; } SV * XML_SetEndDoctypeHandler(parser, doctypfin_sv) XML_Parser parser SV * doctypfin_sv CODE: { XML_EndDoctypeDeclHandler dtendhndlr = (XML_EndDoctypeDeclHandler) 0; CallbackVector * cbv = (CallbackVector*) XML_GetUserData(parser); XMLP_UPD(doctypfin_sv); if (SvTRUE(doctypfin_sv)) dtendhndlr = doctypeEnd; XML_SetEndDoctypeDeclHandler(parser, dtendhndlr); PUSHRET; } SV * XML_SetXMLDeclHandler(parser, xmldec_sv) XML_Parser parser SV * xmldec_sv CODE: { XML_XmlDeclHandler xmldechndlr = (XML_XmlDeclHandler) 0; CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); XMLP_UPD(xmldec_sv); if (SvTRUE(xmldec_sv)) xmldechndlr = xmlDecl; XML_SetXmlDeclHandler(parser, xmldechndlr); PUSHRET; } void XML_SetBase(parser, base) XML_Parser parser SV * base CODE: { char * b; if (! SvOK(base)) { b = (char *) 0; } else { b = SvPV_nolen(base); } XML_SetBase(parser, b); } SV * XML_GetBase(parser) XML_Parser parser CODE: { const char *ret = XML_GetBase(parser); if (ret) { ST(0) = sv_newmortal(); sv_setpv(ST(0), ret); } else { ST(0) = &PL_sv_undef; } } void XML_PositionContext(parser, lines) XML_Parser parser int lines PREINIT: int parsepos; int size; const char *pos = XML_GetInputContext(parser, &parsepos, &size); const char *markbeg; const char *limit; const char *markend; int length, relpos; int cnt; PPCODE: if (! pos) return; for (markbeg = &pos[parsepos], cnt = 0; markbeg >= pos; markbeg--) { if (*markbeg == '\n') { cnt++; if (cnt > lines) break; } } markbeg++; relpos = 0; limit = &pos[size]; for (markend = &pos[parsepos + 1], cnt = 0; markend < limit; markend++) { if (*markend == '\n') { if (cnt == 0) relpos = (markend - markbeg) + 1; cnt++; if (cnt > lines) { markend++; break; } } } length = markend - markbeg; if (relpos == 0) relpos = length; EXTEND(sp, 2); PUSHs(sv_2mortal(newSVpvn((char *) markbeg, length))); PUSHs(sv_2mortal(newSViv(relpos))); SV * GenerateNSName(name, xml_namespace, table, list) SV * name SV * xml_namespace SV * table SV * list CODE: { STRLEN nmlen, nslen; char * nmstr; char * nsstr; char * buff; char * bp; char * blim; nmstr = SvPV(name, nmlen); nsstr = SvPV(xml_namespace, nslen); /* Form a namespace-name string that looks like expat's */ New(321, buff, nmlen + nslen + 2, char); bp = buff; blim = bp + nslen; while (bp < blim) *bp++ = *nsstr++; *bp++ = NSDELIM; blim = bp + nmlen; while (bp < blim) *bp++ = *nmstr++; *bp = '\0'; RETVAL = gen_ns_name(buff, (HV *) SvRV(table), (AV *) SvRV(list)); Safefree(buff); } OUTPUT: RETVAL void XML_DefaultCurrent(parser) XML_Parser parser CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); XML_DefaultCurrent(parser); } SV * XML_RecognizedString(parser) XML_Parser parser CODE: { XML_DefaultHandler dflthndl = (XML_DefaultHandler) 0; CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); if (cbv->dflt_sv) { dflthndl = defaulthandle; } if (cbv->recstring) { sv_setpvn(cbv->recstring, "", 0); } if (cbv->no_expand) XML_SetDefaultHandler(parser, recString); else XML_SetDefaultHandlerExpand(parser, recString); XML_DefaultCurrent(parser); if (cbv->no_expand) XML_SetDefaultHandler(parser, dflthndl); else XML_SetDefaultHandlerExpand(parser, dflthndl); RETVAL = newSVsv(cbv->recstring); } OUTPUT: RETVAL int XML_GetErrorCode(parser) XML_Parser parser int XML_GetCurrentLineNumber(parser) XML_Parser parser int XML_GetCurrentColumnNumber(parser) XML_Parser parser long XML_GetCurrentByteIndex(parser) XML_Parser parser int XML_GetSpecifiedAttributeCount(parser) XML_Parser parser char * XML_ErrorString(code) int code CODE: const char *ret = XML_ErrorString(code); ST(0) = sv_newmortal(); sv_setpv((SV*)ST(0), ret); SV * XML_LoadEncoding(data, size) char * data int size CODE: { Encmap_Header *emh = (Encmap_Header *) data; unsigned pfxsize, bmsize; if (size < sizeof(Encmap_Header) || ntohl(emh->magic) != ENCMAP_MAGIC) { RETVAL = &PL_sv_undef; } else { Encinfo *entry; SV *sv; PrefixMap *pfx; unsigned short *bm; int namelen; int i; pfxsize = ntohs(emh->pfsize); bmsize = ntohs(emh->bmsize); if (size != (sizeof(Encmap_Header) + pfxsize * sizeof(PrefixMap) + bmsize * sizeof(unsigned short))) { RETVAL = &PL_sv_undef; } else { /* Convert to uppercase and get name length */ for (i = 0; i < sizeof(emh->name); i++) { char c = emh->name[i]; if (c == (char) 0) break; if (c >= 'a' && c <= 'z') emh->name[i] -= 'a' - 'A'; } namelen = i; RETVAL = newSVpvn(emh->name, namelen); New(322, entry, 1, Encinfo); entry->prefixes_size = pfxsize; entry->bytemap_size = bmsize; for (i = 0; i < 256; i++) { entry->firstmap[i] = ntohl(emh->map[i]); } pfx = (PrefixMap *) &data[sizeof(Encmap_Header)]; bm = (unsigned short *) (((char *) pfx) + sizeof(PrefixMap) * pfxsize); New(323, entry->prefixes, pfxsize, PrefixMap); New(324, entry->bytemap, bmsize, unsigned short); for (i = 0; i < pfxsize; i++, pfx++) { PrefixMap *dest = &entry->prefixes[i]; dest->min = pfx->min; dest->len = pfx->len; dest->bmap_start = ntohs(pfx->bmap_start); Copy(pfx->ispfx, dest->ispfx, sizeof(pfx->ispfx) + sizeof(pfx->ischar), unsigned char); } for (i = 0; i < bmsize; i++) entry->bytemap[i] = ntohs(bm[i]); sv = newSViv(0); sv_setref_pv(sv, "XML::Parser::Encinfo", (void *) entry); if (! EncodingTable) { EncodingTable = perl_get_hv("XML::Parser::Expat::Encoding_Table", FALSE); if (! EncodingTable) croak("Can't find XML::Parser::Expat::Encoding_Table"); } hv_store(EncodingTable, emh->name, namelen, sv, 0); } } } OUTPUT: RETVAL void XML_FreeEncoding(enc) Encinfo * enc CODE: Safefree(enc->bytemap); Safefree(enc->prefixes); Safefree(enc); SV * XML_OriginalString(parser) XML_Parser parser CODE: { int parsepos, size; const char *buff = XML_GetInputContext(parser, &parsepos, &size); if (buff) { RETVAL = newSVpvn((char *) &buff[parsepos], XML_GetCurrentByteCount(parser)); } else { RETVAL = newSVpv("", 0); } } OUTPUT: RETVAL SV * XML_SetStartCdataHandler(parser, startcd_sv) XML_Parser parser SV * startcd_sv CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); XML_StartCdataSectionHandler scdhndl = (XML_StartCdataSectionHandler) 0; XMLP_UPD(startcd_sv); if (SvTRUE(startcd_sv)) scdhndl = startCdata; XML_SetStartCdataSectionHandler(parser, scdhndl); PUSHRET; } SV * XML_SetEndCdataHandler(parser, endcd_sv) XML_Parser parser SV * endcd_sv CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); XML_EndCdataSectionHandler ecdhndl = (XML_EndCdataSectionHandler) 0; XMLP_UPD(endcd_sv); if (SvTRUE(endcd_sv)) ecdhndl = endCdata; XML_SetEndCdataSectionHandler(parser, ecdhndl); PUSHRET; } void XML_UnsetAllHandlers(parser) XML_Parser parser CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); suspend_callbacks(cbv); if (cbv->ns) { XML_SetNamespaceDeclHandler(cbv->p, (XML_StartNamespaceDeclHandler) 0, (XML_EndNamespaceDeclHandler) 0); } XML_SetElementHandler(parser, (XML_StartElementHandler) 0, (XML_EndElementHandler) 0); XML_SetUnknownEncodingHandler(parser, (XML_UnknownEncodingHandler) 0, (void *) 0); } int XML_ElementIndex(parser) XML_Parser parser CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); RETVAL = cbv->st_serial_stack[cbv->st_serial_stackptr]; } OUTPUT: RETVAL void XML_SkipUntil(parser, index) XML_Parser parser unsigned int index CODE: { CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); if (index <= cbv->st_serial) return; cbv->skip_until = index; suspend_callbacks(cbv); } int XML_Do_External_Parse(parser, result) XML_Parser parser SV * result CODE: { int type; CallbackVector * cbv = (CallbackVector *) XML_GetUserData(parser); if (SvROK(result) && SvOBJECT(SvRV(result))) { RETVAL = parse_stream(parser, result); } else if (isGV(result)) { RETVAL = parse_stream(parser, sv_2mortal(newRV((SV*) GvIOp(result)))); } else if (SvPOK(result)) { STRLEN eslen; int pret; char *entstr = SvPV(result, eslen); RETVAL = XML_Parse(parser, entstr, eslen, 1); } } OUTPUT: RETVAL XML-Parser-2.46/Expat/Makefile.PL0000644000000000000000000000131013542305435015067 0ustar rootrootuse ExtUtils::MakeMaker; use Config; use English; my $libs = "-lexpat"; my @extras = (); push(@extras, INC => "-I$expat_incpath") if $expat_incpath; $libs = "-L$expat_libpath $libs" if $expat_libpath; push(@extras, CAPI => 'TRUE') if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32') and ($Config{archname} =~ /-object\b/i)); push(@extras, ABSTRACT => "Lowlevel access to James Clark's expat XML parser", AUTHOR => 'Matt Sergeant (matt@sergeant.org)') if ($ExtUtils::MakeMaker::VERSION >= 5.4301); WriteMakefile( NAME => 'XML::Parser::Expat', C => ['Expat.c'], LIBS => $libs, XSPROTOARG => '-noprototypes', VERSION_FROM => 'Expat.pm', @extras ); XML-Parser-2.46/t/0000755000000000000000000000000013542324531012302 5ustar rootrootXML-Parser-2.46/t/skip.t0000644000000000000000000000171012703346371013440 0ustar rootrootBEGIN { print "1..4\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; my $cmnt_count = 0; my $pi_count = 0; my $between_count = 0; my $authseen = 0; sub init { my $xp = shift; $xp->skip_until(1); # Skip through prolog } sub proc { $pi_count++; } sub cmnt { $cmnt_count++; } sub start { my ( $xp, $el ) = @_; my $ndx = $xp->element_index; if ( !$authseen and $el eq 'authlist' ) { $authseen = 1; $xp->skip_until(2000); } elsif ( $authseen and $ndx < 2000 ) { $between_count++; } } my $p = new XML::Parser( Handlers => { Init => \&init, Start => \&start, Comment => \&cmnt, Proc => \&proc } ); $p->parsefile('samples/REC-xml-19980210.xml'); print "not " if $between_count; print "ok 2\n"; print "not " if $pi_count; print "ok 3\n"; print "not " unless $cmnt_count == 5; print "ok 4\n"; XML-Parser-2.46/t/namespaces.t0000644000000000000000000000604012703346371014612 0ustar rootrootBEGIN { print "1..16\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; ################################################################ # Check namespaces $docstring = <<'End_of_doc;'; End_of_doc; my $gname; sub init { my $xp = shift; $gname = $xp->generate_ns_name( 'alpha', 'urn:young-frankenstein' ); } sub start { my $xp = shift; my $el = shift; if ( $el eq 'foo' ) { print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles'; print "ok 2\n"; print "not " unless $xp->new_ns_prefixes == 2; print "ok 3\n"; while (@_) { my $att = shift; my $val = shift; if ( $att eq 'alpha' ) { print "not " unless $xp->eq_name( $gname, $att ); print "ok 4\n"; last; } } } elsif ( $el eq 'zebra' ) { print "not " unless $xp->new_ns_prefixes == 0; print "ok 5\n"; print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles'; print "ok 6\n"; } elsif ( $el eq 'tango' ) { print "not " if $xp->namespace( $_[0] ); print "ok 8\n"; print "not " unless $_[0] eq $_[2]; print "ok 9\n"; print "not " if $xp->eq_name( $_[0], $_[2] ); print "ok 10\n"; my $cnt = 0; foreach ( $xp->new_ns_prefixes ) { $cnt++ if $_ eq '#default'; $cnt++ if $_ eq 'zoo'; } print "not " unless $cnt == 2; print "ok 11\n"; } } sub end { my $xp = shift; my $el = shift; if ( $el eq 'zebra' ) { print "not " unless $xp->expand_ns_prefix('#default') eq 'urn:blazing-saddles'; print "ok 7\n"; } elsif ( $el eq 'everywhere' ) { print "not " unless $xp->namespace($el) eq 'urn:blazing-saddles'; print "ok 16\n"; } } sub proc { my $xp = shift; my $target = shift; if ( $target eq 'nscheck' ) { print "not " if $xp->new_ns_prefixes > 0; print "ok 12\n"; my $cnt = 0; foreach ( $xp->current_ns_prefixes ) { $cnt++ if $_ eq 'zoo'; $cnt++ if $_ eq 'bar'; } print "not " unless $cnt == 2; print "ok 13\n"; print "not " unless $xp->expand_ns_prefix('bar') eq 'urn:young-frankenstein'; print "ok 14\n"; print "not " unless $xp->expand_ns_prefix('zoo') eq 'urn:high-anxiety'; print "ok 15\n"; } } my $parser = new XML::Parser( ErrorContext => 2, Namespaces => 1, Handlers => { Start => \&start, End => \&end, Proc => \&proc, Init => \&init } ); $parser->parse($docstring); XML-Parser-2.46/t/stream.t0000644000000000000000000000165013542323743013770 0ustar rootrootBEGIN { print "1..3\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; my $delim = '------------123453As23lkjlklz877'; my $file = 'samples/REC-xml-19980210.xml'; my $tmpfile = 'stream.tmp'; my $cnt = 0; open( my $out_fh, '>', $tmpfile ) or die "Couldn't open $tmpfile for output"; open( my $in_fh, '<', $file ) or die "Couldn't open $file for input"; while (<$in_fh>) { print $out_fh $_; } close($in_fh); print $out_fh "$delim\n"; open( $in_fh, $file ); while (<$in_fh>) { print $out_fh $_; } close($in_fh); close($out_fh); my $parser = new XML::Parser( Stream_Delimiter => $delim, Handlers => { Comment => sub { $cnt++; } } ); open( my $fh, $tmpfile ); $parser->parse($fh); print "not " if ( $cnt != 37 ); print "ok 2\n"; $cnt = 0; $parser->parse($fh); print "not " if ( $cnt != 37 ); print "ok 3\n"; close($fh); unlink($tmpfile); XML-Parser-2.46/t/partial.t0000644000000000000000000000127313542303426014126 0ustar rootrootBEGIN { print "1..3\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; my $cnt = 0; my $str; sub tmpchar { my ( $xp, $data ) = @_; if ( $xp->current_element eq 'day' ) { $str = $xp->original_string; $xp->setHandlers( Char => 0 ); } } my $p = new XML::Parser( Handlers => { Comment => sub { $cnt++; }, Char => \&tmpchar } ); my $xpnb = $p->parse_start; open( my $rec, '<', 'samples/REC-xml-19980210.xml' ); while (<$rec>) { $xpnb->parse_more($_); } close($rec); $xpnb->parse_done; print "not " unless $cnt == 37; print "ok 2\n"; print "not " unless $str eq '&draft.day;'; print "ok 3\n"; XML-Parser-2.46/t/file_open_scalar.t0000644000000000000000000000105512703346371015761 0ustar rootroot use if $] < 5.006, Test::More => skip_all => 'syntax requires perl 5.6'; #tests behaviour on perls 5.10? .. 5.10.1 package Some::Fake::Packege; sub fake_sub { require FileHandle; } package main; use Test::More tests => 1; use XML::Parser; use strict; my $count = 0; my $parser = XML::Parser->new( ErrorContext => 2 ); $parser->setHandlers( Comment => sub { $count++; } ); open my $fh, '<', 'samples/REC-xml-19980210.xml' or die; #on 5.10 $fh would be a FileHandle object without a real FileHandle class $parser->parse($fh); is( $count, 37 ); XML-Parser-2.46/t/decl.t0000644000000000000000000001212713542323743013405 0ustar rootroot#!/usr/bin/perl use strict; use warnings; use Test::More tests => 40; use XML::Parser; ok("loaded"); my $bigval = <<'End_of_bigval;'; This is a large string value to test whether the declaration parser still works when the entity or attribute default value may be broken into multiple calls to the default handler. 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 01234567890123456789012345678901234567890123456789012345678901234567890123456789 End_of_bigval; $bigval =~ s/\n/ /g; my $docstr = <<"End_of_Doc;"; ]> End_of_Doc; my $entcnt = 0; my %ents; sub enth1 { my ( $p, $name, $val, $sys, $pub, $notation ) = @_; is( $val, 'a' ) if ( $name eq 'alpha' ); is( $val, 'stinky animal' ) if ( $name eq 'skunk' ); if ( $name eq 'logo' ) { ok( !defined($val) ); is( $sys, 'logo.gif' ); is( $pub, '//Widgets Corp/Logo' ); is( $notation, 'gif' ); } } my $parser = new XML::Parser( ErrorContext => 2, NoLWP => 1, ParseParamEnt => 1, Handlers => { Entity => \&enth1 } ); eval { $parser->parse($docstr) }; sub eleh { my ( $p, $name, $model ) = @_; if ( $name eq 'junk' ) { is( $model, '((bar|foo|xyz+),zebra*)' ); ok $model->isseq; my @parts = $model->children; ok( $parts[0]->ischoice ); my @cparts = $parts[0]->children; is( $cparts[0], 'bar' ); is( $cparts[1], 'foo' ); is( $cparts[2], 'xyz+' ); is( $cparts[2]->name, 'xyz' ); is( $parts[1]->name, 'zebra' ); is( $parts[1]->quant, '*' ); } if ( $name eq 'xyz' ) { ok( $model->ismixed ); ok( !defined( $model->children ) ); } if ( $name eq 'zebra' ) { ok( $model->ismixed ); is( ( $model->children )[1], 'strong' ); } if ( $name eq 'bar' ) { ok( $model->isany ); } } sub enth2 { my ( $p, $name, $val, $sys, $pub, $notation ) = @_; is( $val, 'a' ) if ( $name eq 'alpha' ); is( $val, 'stinky animal' ) if ( $name eq 'skunk' ); is( $val, $bigval ) if ( $name eq 'big' ); ok( !defined($val) and $sys eq 'logo.gif' and $pub eq '//Widgets Corp/Logo' and $notation eq 'gif' ) if ( $name eq 'logo' ); } sub doc { my ( $p, $name, $sys, $pub, $intdecl ) = @_; is( $name, 'foo' ); is( $sys, 't/foo.dtd' ); ok($intdecl); } sub att { my ( $p, $elname, $attname, $type, $default, $fixed ) = @_; if ( $elname eq 'junk' ) { if ( $attname eq 'id' and $type eq 'ID' ) { is( $default, '#REQUIRED' ); ok( !$fixed ); } elsif ( $attname eq 'version' and $type eq 'CDATA' ) { is( $default, "'1.0'" ); ok($fixed); } elsif ( $attname eq 'color' and $type eq '(red|green|blue)' ) { is( $default, "'green'" ); } elsif ( $attname eq 'foo' and $type eq 'NOTATION(x|y|z)' ) { is( $default, '#IMPLIED' ); } } elsif ( $elname eq 'bar' ) { is( $attname, 'big' ); is( $default, "'$bigval'" ); } } sub xd { my ( $p, $version, $enc, $stand ) = @_; if ( defined($version) ) { is( $version, '1.0' ); is( $enc, 'ISO-8859-1' ); ok( !defined($stand) ); } else { is( $enc, 'x-sjis-unicode' ); } } $parser->setHandlers( Entity => \&enth2, Element => \&eleh, Attlist => \&att, Doctype => \&doc, XMLDecl => \&xd ); $| = 1; $parser->parse($docstr); XML-Parser-2.46/t/ext.ent0000644000000000000000000000004112703346340013605 0ustar rootroot XML-Parser-2.46/t/foo.dtd0000644000000000000000000000046012703346340013562 0ustar rootroot %ext; ]]> ]]> XML-Parser-2.46/t/finish.t0000644000000000000000000000104612703346371013754 0ustar rootrootBEGIN { print "1..3\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; my $stcount = 0; my $encount = 0; sub st { my ( $exp, $el ) = @_; $stcount++; $exp->finish if $el eq 'loc'; } sub end { $encount++; } $parser = new XML::Parser( Handlers => { Start => \&st, End => \&end }, ErrorContext => 2 ); $parser->parsefile('samples/REC-xml-19980210.xml'); print "not " unless $stcount == 12; print "ok 2\n"; print "not " unless $encount == 8; print "ok 3\n"; XML-Parser-2.46/t/cdata.t0000644000000000000000000000125212703346370013546 0ustar rootrootBEGIN { print "1..2\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; my $count = 0; my $cdata_part = "<<< & > '' << &&&>&&&&;<"; my $doc = " hello there"; my $acc = ''; sub ch { my ( $xp, $data ) = @_; $acc .= $data; } sub stcd { my $xp = shift; $xp->setHandlers( Char => \&ch ); } sub ecd { my $xp = shift; $xp->setHandlers( Char => 0 ); } $parser = new XML::Parser( ErrorContext => 2, Handlers => { CdataStart => \&stcd, CdataEnd => \&ecd } ); $parser->parse($doc); print "not " unless ( $acc eq $cdata_part ); print "ok 2\n"; XML-Parser-2.46/t/styles.t0000644000000000000000000000244213542303724014015 0ustar rootrootuse Test; BEGIN { plan tests => 13 } use XML::Parser; use IO::File; my $xmlstr = 'bar'; { # Debug style my $parser = XML::Parser->new( Style => 'Debug' ); ok($parser); my $tmpfile = IO::File->new_tmpfile(); open( OLDERR, ">&STDERR" ); open( STDERR, ">&" . $tmpfile->fileno ) || die "Cannot re-open STDERR : $!"; $parser->parse($xmlstr); close(STDERR); open( STDERR, ">&OLDERR" ); close(OLDERR); seek( $tmpfile, 0, 0 ); my $warn = 0; $warn++ while (<$tmpfile>); ok( $warn, 3, "Check we got three warnings out" ); } { # Object style my $parser = XML::Parser->new( Style => 'Objects' ); ok($parser); my $tree = $parser->parse($xmlstr); ok($tree); } { # Stream style my $parser = XML::Parser->new( Style => 'Stream' ); ok($parser); } { # Subs style my $parser = XML::Parser->new( Style => 'Subs' ); ok($parser); } { # Tree style my $parser = XML::Parser->new( Style => 'Tree' ); ok($parser); my $tree = $parser->parse($xmlstr); ok( ref($tree), 'ARRAY' ); ok( $tree->[0], 'foo' ); ok( ref( $tree->[1] ), 'ARRAY' ); ok( ref( $tree->[1]->[0] ), 'HASH' ); ok( $tree->[1][1], '0' ); ok( $tree->[1][2], 'bar' ); } XML-Parser-2.46/t/external_ent.t0000644000000000000000000000231713542304443015162 0ustar rootroot#!/usr/bin/perl use strict; use warnings; use Test::More tests => 4; use XML::Parser; ################################################################ # Check default external entity handler my $txt = ''; sub txt { my ( $xp, $data ) = @_; $txt .= $data; } my $docstring = <<'End_of_XML;'; ]> a = "&a;" b = "&b;" And here they are again in reverse order: b = "&b;" a = "&a;" End_of_XML; my $ent_fh; open( $ent_fh, '>', 'a.ent' ) or die "Couldn't open a.ent for writing"; print $ent_fh "This ('&c;') is a quote of c"; close($ent_fh); open( $ent_fh, '>', 'b.ent' ) or die "Couldn't open b.ent for writing"; print $ent_fh "Hello, I'm B"; close($ent_fh); open( $ent_fh, '>', 'c.ent' ) or die "Couldn't open c.ent for writing"; print $ent_fh "Hurrah for C"; close($ent_fh); my $p = new XML::Parser( Handlers => { Char => \&txt } ); $p->parse($docstring); my %check = ( a => "This ('Hurrah for C') is a quote of c", b => "Hello, I'm B" ); while ( $txt =~ /([ab]) = "(.*)"/g ) { my ( $k, $v ) = ( $1, $2 ); is($check{$k}, $v); } unlink('a.ent'); unlink('b.ent'); unlink('c.ent'); XML-Parser-2.46/t/parament.t0000644000000000000000000000364712703346371014314 0ustar rootroot#!/usr/bin/perl use strict; use warnings; use Test::More tests => 13; use XML::Parser; my $internal_subset = <<'End_of_internal;'; [ ] End_of_internal; my $doc = <<"End_of_doc;"; Happy, happy &joy;, &joy; &more; End_of_doc; my $bartxt = ''; my $internal_exists = 0; sub start { my ( $xp, $el, %atts ) = @_; if ( $el eq 'foo' ) { ok( !defined $atts{top} ); ok( defined $atts{zz} ); } elsif ( $el eq 'bar' ) { is( $atts{xyz}, 'b' ); } elsif ( $el eq 'ext' ) { is( $atts{type}, 'flag' ); } elsif ( $el eq 'more' ) { pass("got 'more'"); } } sub char { my ( $xp, $text ) = @_; $bartxt .= $text if $xp->current_element eq 'bar'; } sub attl { my ( $xp, $el, $att, $type, $dflt, $fixed ) = @_; ok( ( $att eq 'xyz' and $dflt eq "'b'" ), 'when el eq bar' ) if ( $el eq 'bar' ); ok( !( $att eq 'top' and $dflt eq '"hello"' ), 'when el eq foo' ) if ( $el eq 'foo' ); } sub dtd { my ( $xp, $name, $sysid, $pubid, $internal ) = @_; pass("doctype called"); $internal_exists = $internal; } my $p = new XML::Parser( ParseParamEnt => 1, ErrorContext => 2, Handlers => { Start => \&start, Char => \&char, Attlist => \&attl, Doctype => \&dtd } ); eval { $p->parse($doc) }; if ( $] < 5.006 ) { is( $bartxt, "\xe5\x83\x96, \xe5\x83\x96" ); } else { is( $bartxt, chr(0x50d6) . ", " . chr(0x50d6) ); } ok( $internal_exists, 'internal exists' ); $doc =~ s/[\s\n]+\[[^]]*\][\s\n]+//m; $p->setHandlers( Start => sub { my ( $xp, $el, %atts ) = @_; if ( $el eq 'foo' ) { ok( defined( $atts{zz} ) ); } } ); $p->parse($doc); XML-Parser-2.46/t/ext2.ent0000644000000000000000000000001012703346340013663 0ustar rootroot XML-Parser-2.46/t/file.t0000644000000000000000000000035512703346371013415 0ustar rootroot use Test::More tests => 1; use XML::Parser; my $count = 0; $parser = XML::Parser->new( ErrorContext => 2 ); $parser->setHandlers( Comment => sub { $count++; } ); $parser->parsefile('samples/REC-xml-19980210.xml'); is( $count, 37 ); XML-Parser-2.46/t/astress.t0000644000000000000000000001164513542305074014163 0ustar rootroot# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { print "1..27\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; use FileHandle; # Make 5.10.0 happy. $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): # Test 2 my $parser = new XML::Parser( ProtocolEncoding => 'ISO-8859-1' ); if ($parser) { print "ok 2\n"; } else { print "not ok 2\n"; exit; } my @ndxstack; my $indexok = 1; # Need this external entity open( ZOE, '>zoe.ent' ); print ZOE "'cute'"; close(ZOE); # XML string for tests my $xmlstring = <<"End_of_XML;"; ]> First line in foo Fran is &fran; and Zoe is &zoe; 1st line in bar 2nd line in bar 3rd line in bar This, '\240', would be a bad character in UTF-8. End_of_XML; # Handlers my @tests; my $pos = ''; sub ch { my ( $p, $str ) = @_; $tests[4]++; $tests[5]++ if ( $str =~ /2nd line/ and $p->in_element('blah') ); if ( $p->in_element('boom') ) { $tests[17]++ if $str =~ /pretty/; $tests[18]++ if $str =~ /cute/; } } sub st { my ( $p, $el, %atts ) = @_; $ndxstack[ $p->depth ] = $p->element_index; $tests[6]++ if ( $el eq 'bar' and $atts{stomp} eq 'jill' ); if ( $el eq 'zap' and $atts{'ref'} eq 'zing' ) { $tests[7]++; $p->default_current; } elsif ( $el eq 'bar' ) { $tests[22]++ if $p->recognized_string eq ''; } } sub eh { my ( $p, $el ) = @_; $indexok = 0 unless $p->element_index == $ndxstack[ $p->depth ]; if ( $el eq 'zap' ) { $tests[8]++; my @old = $p->setHandlers( 'Char', \&newch ); $tests[19]++ if $p->current_line == 17; $tests[20]++ if $p->current_column == 20; $tests[23]++ if ( $old[0] eq 'Char' and $old[1] == \&ch ); } if ( $el eq 'boom' ) { $p->setHandlers( 'Default', \&dh ); } } sub dh { my ( $p, $str ) = @_; if ( $str =~ /doozy/ ) { $tests[9]++; $pos = $p->position_in_context(1); } $tests[10]++ if $str =~ /^setHandlers( 'Char' => \&ch, 'Start' => \&st, 'End' => \&eh, 'Proc' => \&pi, 'Notation' => \¬e, 'Unparsed' => \&unp, 'ExternEnt' => \&extent, 'ExternEntFin' => sub { close(FOO); } ); }; if ($@) { print "not ok 3\n"; exit; } print "ok 3\n"; # Test 4..20 eval { $parser->parsestring($xmlstring); }; if ($@) { print "Parse error:\n$@"; } else { $tests[21]++; } unlink('zoe.ent') if ( -f 'zoe.ent' ); for ( 4 .. 23 ) { print "not " unless $tests[$_]; print "ok $_\n"; } $cmpstr = << 'End_of_Cmp;'; 2nd line in bar 3rd line in bar ===================^ End_of_Cmp; if ( $cmpstr ne $pos ) { print "not "; } print "ok 24\n"; print "not " unless $indexok; print "ok 25\n"; # Test that memory leak through autovivifying symbol table entries is fixed. my $count = 0; $parser = new XML::Parser( Handlers => { Start => sub { $count++ } } ); $xmlstring = 'Sea'; eval { $parser->parsestring($xmlstring); }; if ( $count != 2 ) { print "not "; } print "ok 26\n"; if ( defined( *{$xmlstring} ) ) { print "not "; } print "ok 27\n"; XML-Parser-2.46/t/encoding.t0000644000000000000000000000425212703346371014264 0ustar rootrootBEGIN { print "1..6\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; ################################################################ # Check encoding my $xmldec = "\n"; my $docstring = <<"End_of_doc;"; <\x8e\x83>\x90\x46\x81\x41\x98\x61\x81\x41\x99\x44 End_of_doc; my $doc = $xmldec . $docstring; my @bytes; my $lastel; sub text { my ( $xp, $data ) = @_; push( @bytes, unpack( 'U0C*', $data ) ); # was fixed 5.10 } sub start { my ( $xp, $el ) = @_; $lastel = $el; } my $p = XML::Parser->new( Handlers => { Start => \&start, Char => \&text } ); $p->parse($doc); my $exptag = ( $] < 5.006 ) ? "\xe7\xa5\x89" # U+7949 blessings 0x8e83 : chr(0x7949); my @expected = ( 0xe8, 0x89, 0xb2, # U+8272 beauty 0x9046 0xe3, 0x80, 0x81, # U+3001 comma 0x8141 0xe5, 0x92, 0x8c, # U+548C peace 0x9861 0xe3, 0x80, 0x81, # U+3001 comma 0x8141 0xe5, 0x83, 0x96, # U+50D6 joy 0x9944 0x0a ); if ( $lastel eq $exptag ) { print "ok 2\n"; } else { print "not ok 2\n"; } if ( @bytes != @expected ) { print "not ok 3\n"; } else { my $i; for ( $i = 0; $i < @expected; $i++ ) { if ( $bytes[$i] != $expected[$i] ) { print "not ok 3\n"; exit; } } print "ok 3\n"; } $lastel = ''; $p->parse( $docstring, ProtocolEncoding => 'X-SJIS-UNICODE' ); if ( $lastel eq $exptag ) { print "ok 4\n"; } else { print "not ok 4\n"; } # Test the CP-1252 Win-Latin-1 mapping $docstring = qq( ); my %attr; sub get_attr { my ( $xp, $el, @list ) = @_; %attr = @list; } $p = XML::Parser->new( Handlers => { Start => \&get_attr } ); eval { $p->parse($docstring) }; if ($@) { print "not "; # couldn't load the map } print "ok 5\n"; if ( $attr{euro} ne ( $] < 5.006 ? "\xE2\x82\xAC" : chr(0x20AC) ) or $attr{lsq} ne ( $] < 5.006 ? "\xE2\x80\x98" : chr(0x2018) ) or $attr{rdq} ne ( $] < 5.006 ? "\xE2\x80\x9D" : chr(0x201D) ) ) { print "not "; } print "ok 6\n"; XML-Parser-2.46/t/defaulted.t0000644000000000000000000000165012703346371014432 0ustar rootrootBEGIN { print "1..4\n"; } END { print "not ok 1\n" unless $loaded; } use XML::Parser; $loaded = 1; print "ok 1\n"; $doc = <<'End_of_Doc;'; ]> End_of_Doc; sub st { my $xp = shift; my $el = shift; if ( $el eq 'bar' ) { my %atts = @_; my %isdflt; my $specified = $xp->specified_attr; for ( my $i = $specified; $i < @_; $i += 2 ) { $isdflt{ $_[$i] } = 1; } if ( defined $atts{xx} ) { print 'not ' if $isdflt{'xx'}; print "ok 2\n"; print 'not ' unless $isdflt{'zz'}; print "ok 3\n"; } else { print 'not ' if $isdflt{'zz'}; print "ok 4\n"; } } } $p = new XML::Parser( Handlers => { Start => \&st } ); $p->parse($doc); XML-Parser-2.46/Makefile.PL0000644000000000000000000001106113542323665014017 0ustar rootrootuse 5.004005; #Devel::CheckLib use ExtUtils::MakeMaker; use lib qw(inc); use Devel::CheckLib; use Config; $expat_libpath = $ENV{EXPATLIBPATH} || ''; $expat_incpath = $ENV{EXPATINCPATH} || ''; my @replacement_args; foreach (@ARGV) { if (/^EXPAT(LIB|INC)PATH=(.+)/) { if ( $1 eq 'LIB' ) { $expat_libpath = $2; } else { $expat_incpath = $2; } #push(@replacement_args, "$1=$2"); } else { push( @replacement_args, $_ ); } } @ARGV = @replacement_args; unless ( check_lib( # fill in what you prompted the user for here lib => [qw(expat)], header => ['expat.h'], incpath => $expat_incpath, ( $expat_libpath ? ( libpath => $expat_libpath ) : () ), ) ) { warn <<'Expat_Not_Installed;'; Expat must be installed prior to building XML::Parser and I can't find it in the standard library directories. Install 'expat-devel' (or 'libexpat1-dev') package with your OS package manager. See 'README'. Or you can download expat from: http://sourceforge.net/projects/expat/ If expat is installed, but in a non-standard directory, then use the following options to Makefile.PL: EXPATLIBPATH=... To set the directory in which to find libexpat EXPATINCPATH=... To set the directory in which to find expat.h For example: perl Makefile.PL EXPATLIBPATH=/home/me/lib EXPATINCPATH=/home/me/include Note that if you build against a shareable library in a non-standard location you may (on some platforms) also have to set your LD_LIBRARY_PATH environment variable at run time for perl to find the library. Expat_Not_Installed; # exiting before Makefile generation silences CPANTesters reports # when expat is not available. exit 0; } if ( not $expat_libpath and $] >= 5.006001 and $^O ne 'MSWin32' ) { require ExtUtils::Liblist; # Buggy before this ($expat_libpath) = ExtUtils::Liblist->ext('-lexpat'); } # Don't try to descend into Expat directory for testing sub MY::test { my $self = shift; my $hold = delete $self->{DIR}; my $ret = $self->MM::test(@_); $self->{DIR} = $hold if defined($hold); $ret; } my @extras = (); push( @extras, CAPI => 'TRUE' ) if ( $PERL_VERSION >= 5.005 and $OSNAME eq 'MSWin32' and $Config{archname} =~ /-object\b/i ); WriteMakefile1( ABSTRACT_FROM => 'Parser.pm', AUTHOR => 'Clark Cooper (coopercc@netheaven.com)', LICENSE => 'perl', MIN_PERL_VERSION => '5.00405', META_MERGE => { resources => { bugtracker => 'https://github.com/toddr/XML-Parser/issues', repository => 'http://github.com/toddr/XML-Parser', }, }, TEST_REQUIRES => { 'Test::More' => 0, 'warnings' => 0, }, NAME => 'XML::Parser', DIR => [qw(Expat)], dist => { COMPRESS => 'gzip', SUFFIX => '.gz' }, VERSION_FROM => 'Parser.pm', PREREQ_PM => { 'LWP::UserAgent' => 0, #for tests }, $^O =~ /win/i ? ( dist => { TAR => 'ptar', TARFLAGS => '-c -C -f', }, ) : (), @extras ); sub WriteMakefile1 { #Compatibility code for old versions of EU::MM. Written by Alexandr Ciornii, version 0.23. Added by eumm-upgrade. my %params = @_; my $eumm_version = $ExtUtils::MakeMaker::VERSION; $eumm_version = eval $eumm_version; die "EXTRA_META is deprecated" if exists $params{EXTRA_META}; die "License not specified" if not exists $params{LICENSE}; if ( $params{AUTHOR} and ref( $params{AUTHOR} ) eq 'ARRAY' and $eumm_version < 6.5705 ) { $params{META_ADD}->{author} = $params{AUTHOR}; $params{AUTHOR} = join( ', ', @{ $params{AUTHOR} } ); } if ( $params{TEST_REQUIRES} and $eumm_version < 6.64 ) { $params{BUILD_REQUIRES} = { %{ $params{BUILD_REQUIRES} || {} }, %{ $params{TEST_REQUIRES} } }; delete $params{TEST_REQUIRES}; } if ( $params{BUILD_REQUIRES} and $eumm_version < 6.5503 ) { #EUMM 6.5502 has problems with BUILD_REQUIRES $params{PREREQ_PM} = { %{ $params{PREREQ_PM} || {} }, %{ $params{BUILD_REQUIRES} } }; delete $params{BUILD_REQUIRES}; } delete $params{CONFIGURE_REQUIRES} if $eumm_version < 6.52; delete $params{MIN_PERL_VERSION} if $eumm_version < 6.48; delete $params{META_MERGE} if $eumm_version < 6.46; delete $params{META_ADD} if $eumm_version < 6.46; delete $params{LICENSE} if $eumm_version < 6.31; WriteMakefile(%params); } XML-Parser-2.46/samples/0000755000000000000000000000000013542324531013503 5ustar rootrootXML-Parser-2.46/samples/REC-xml-19980210.xml0000644000000000000000000046717512703346340016373 0ustar rootroot "> '"> amp, lt, gt, apos, quot"> ]>
Extensible Markup Language (XML) 1.0 REC-xml-&iso6.doc.date; W3C Recommendation &draft.day;&draft.month;&draft.year; http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date; http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.xml http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.html http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.pdf http://www.w3.org/TR/1998/REC-xml-&iso6.doc.date;.ps http://www.w3.org/TR/REC-xml http://www.w3.org/TR/PR-xml-971208 Tim Bray Textuality and Netscape tbray@textuality.com Jean Paoli Microsoft jeanpa@microsoft.com C. M. Sperberg-McQueen University of Illinois at Chicago cmsmcq@uic.edu

The Extensible Markup Language (XML) is a subset of SGML that is completely described in this document. Its goal is to enable generic SGML to be served, received, and processed on the Web in the way that is now possible with HTML. XML has been designed for ease of implementation and for interoperability with both SGML and HTML.

This document has been reviewed by W3C Members and other interested parties and has been endorsed by the Director as a W3C Recommendation. It is a stable document and may be used as reference material or cited as a normative reference from another document. W3C's role in making the Recommendation is to draw attention to the specification and to promote its widespread deployment. This enhances the functionality and interoperability of the Web.

This document specifies a syntax created by subsetting an existing, widely used international text processing standard (Standard Generalized Markup Language, ISO 8879:1986(E) as amended and corrected) for use on the World Wide Web. It is a product of the W3C XML Activity, details of which can be found at http://www.w3.org/XML. A list of current W3C Recommendations and other technical documents can be found at http://www.w3.org/TR.

This specification uses the term URI, which is defined by , a work in progress expected to update and .

The list of known errors in this specification is available at http://www.w3.org/XML/xml-19980210-errata.

Please report errors in this document to xml-editor@w3.org.

Chicago, Vancouver, Mountain View, et al.: World-Wide Web Consortium, XML Working Group, 1996, 1997.

Created in electronic form.

English Extended Backus-Naur Form (formal grammar) 1997-12-03 : CMSMcQ : yet further changes 1997-12-02 : TB : further changes (see TB to XML WG, 2 December 1997) 1997-12-02 : CMSMcQ : deal with as many corrections and comments from the proofreaders as possible: entify hard-coded document date in pubdate element, change expansion of entity WebSGML, update status description as per Dan Connolly (am not sure about refernece to Berners-Lee et al.), add 'The' to abstract as per WG decision, move Relationship to Existing Standards to back matter and combine with References, re-order back matter so normative appendices come first, re-tag back matter so informative appendices are tagged informdiv1, remove XXX XXX from list of 'normative' specs in prose, move some references from Other References to Normative References, add RFC 1738, 1808, and 2141 to Other References (they are not normative since we do not require the processor to enforce any rules based on them), add reference to 'Fielding draft' (Berners-Lee et al.), move notation section to end of body, drop URIchar non-terminal and use SkipLit instead, lose stray reference to defunct nonterminal 'markupdecls', move reference to Aho et al. into appendix (Tim's right), add prose note saying that hash marks and fragment identifiers are NOT part of the URI formally speaking, and are NOT legal in system identifiers (processor 'may' signal an error). Work through: Tim Bray reacting to James Clark, Tim Bray on his own, Eve Maler, NOT DONE YET: change binary / text to unparsed / parsed. handle James's suggestion about < in attriubte values uppercase hex characters, namechar list, 1997-12-01 : JB : add some column-width parameters 1997-12-01 : CMSMcQ : begin round of changes to incorporate recent WG decisions and other corrections: binding sources of character encoding info (27 Aug / 3 Sept), correct wording of Faust quotation (restore dropped line), drop SDD from EncodingDecl, change text at version number 1.0, drop misleading (wrong!) sentence about ignorables and extenders, modify definition of PCData to make bar on msc grammatical, change grammar's handling of internal subset (drop non-terminal markupdecls), change definition of includeSect to allow conditional sections, add integral-declaration constraint on internal subset, drop misleading / dangerous sentence about relationship of entities with system storage objects, change table body tag to htbody as per EM change to DTD, add rule about space normalization in public identifiers, add description of how to generate our name-space rules from Unicode character database (needs further work!). 1997-10-08 : TB : Removed %-constructs again, new rules for PE appearance. 1997-10-01 : TB : Case-sensitive markup; cleaned up element-type defs, lotsa little edits for style 1997-09-25 : TB : Change to elm's new DTD, with substantial detail cleanup as a side-effect 1997-07-24 : CMSMcQ : correct error (lost *) in definition of ignoreSectContents (thanks to Makoto Murata) Allow all empty elements to have end-tags, consistent with SGML TC (as per JJC). 1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections: introduce the term 'empty-element tag', note that all empty elements may use it, and elements declared EMPTY must use it. Add WFC requiring encoding decl to come first in an entity. Redefine notations to point to PIs as well as binary entities. Change autodetection table by removing bytes 3 and 4 from examples with Byte Order Mark. Add content model as a term and clarify that it applies to both mixed and element content. 1997-06-30 : CMSMcQ : change date, some cosmetic changes, changes to productions for choice, seq, Mixed, NotationType, Enumeration. Follow James Clark's suggestion and prohibit conditional sections in internal subset. TO DO: simplify production for ignored sections as a result, since we don't need to worry about parsers which don't expand PErefs finding a conditional section. 1997-06-29 : TB : various edits 1997-06-29 : CMSMcQ : further changes: Suppress old FINAL EDIT comments and some dead material. Revise occurrences of % in grammar to exploit Henry Thompson's pun, especially markupdecl and attdef. Remove RMD requirement relating to element content (?). 1997-06-28 : CMSMcQ : Various changes for 1 July draft: Add text for draconian error handling (introduce the term Fatal Error). RE deleta est (changing wording from original announcement to restrict the requirement to validating parsers). Tag definition of validating processor and link to it. Add colon as name character. Change def of %operator. Change standard definitions of lt, gt, amp. Strip leading zeros from #x00nn forms. 1997-04-02 : CMSMcQ : final corrections of editorial errors found in last night's proofreading. Reverse course once more on well-formed: Webster's Second hyphenates it, and that's enough for me. 1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self 1997-03-31 : Tim Bray : many changes 1997-03-29 : CMSMcQ : some Henry Thompson (on entity handling), some Charles Goldfarb, some ERB decisions (PE handling in miscellaneous declarations. Changed Ident element to accept def attribute. Allow normalization of Unicode characters. move def of systemliteral into section on literals. 1997-03-28 : CMSMcQ : make as many corrections as possible, from Terry Allen, Norbert Mikula, James Clark, Jon Bosak, Henry Thompson, Paul Grosso, and self. Among other things: give in on "well formed" (Terry is right), tentatively rename QuotedCData as AttValue and Literal as EntityValue to be more informative, since attribute values are the only place QuotedCData was used, and vice versa for entity text and Literal. (I'd call it Entity Text, but 8879 uses that name for both internal and external entities.) 1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply my changes dated 03-20 and 03-21. Normalize old 'may not' to 'must not' except in the one case where it meant 'may or may not'. 1997-03-21 : TB : massive changes on plane flight from Chicago to Vancouver 1997-03-21 : CMSMcQ : correct as many reported errors as possible. 1997-03-20 : CMSMcQ : correct typos listed in CMSMcQ hand copy of spec. 1997-03-20 : CMSMcQ : cosmetic changes preparatory to revision for WWW conference April 1997: restore some of the internal entity references (e.g. to docdate, etc.), change character xA0 to &nbsp; and define nbsp as &#160;, and refill a lot of paragraphs for legibility. 1996-11-12 : CMSMcQ : revise using Tim's edits: Add list type of NUMBERED and change most lists either to BULLETS or to NUMBERED. Suppress QuotedNames, Names (not used). Correct trivial-grammar doc type decl. Rename 'marked section' as 'CDATA section' passim. Also edits from James Clark: Define the set of characters from which [^abc] subtracts. Charref should use just [0-9] not Digit. Location info needs cleaner treatment: remove? (ERB question). One example of a PI has wrong pic. Clarify discussion of encoding names. Encoding failure should lead to unspecified results; don't prescribe error recovery. Don't require exposure of entity boundaries. Ignore white space in element content. Reserve entity names of the form u-NNNN. Clarify relative URLs. And some of my own: Correct productions for content model: model cannot consist of a name, so "elements ::= cp" is no good. 1996-11-11 : CMSMcQ : revise for style. Add new rhs to entity declaration, for parameter entities. 1996-11-10 : CMSMcQ : revise for style. Fix / complete section on names, characters. Add sections on parameter entities, conditional sections. Still to do: Add compatibility note on deterministic content models. Finish stylistic revision. 1996-10-31 : TB : Add Entity Handling section 1996-10-30 : TB : Clean up term & termdef. Slip in ERB decision re EMPTY. 1996-10-28 : TB : Change DTD. Implement some of Michael's suggestions. Change comments back to //. Introduce language for XML namespace reservation. Add section on white-space handling. Lots more cleanup. 1996-10-24 : CMSMcQ : quick tweaks, implement some ERB decisions. Characters are not integers. Comments are /* */ not //. Add bibliographic refs to 10646, HyTime, Unicode. Rename old Cdata as MsData since it's only seen in marked sections. Call them attribute-value pairs not name-value pairs, except once. Internal subset is optional, needs '?'. Implied attributes should be signaled to the app, not have values supplied by processor. 1996-10-16 : TB : track down & excise all DSD references; introduce some EBNF for entity declarations. 1996-10-?? : TB : consistency check, fix up scraps so they all parse, get formatter working, correct a few productions. 1996-10-10/11 : CMSMcQ : various maintenance, stylistic, and organizational changes: Replace a few literals with xmlpio and pic entities, to make them consistent and ensure we can change pic reliably when the ERB votes. Drop paragraph on recognizers from notation section. Add match, exact match to terminology. Move old 2.2 XML Processors and Apps into intro. Mention comments, PIs, and marked sections in discussion of delimiter escaping. Streamline discussion of doctype decl syntax. Drop old section of 'PI syntax' for doctype decl, and add section on partial-DTD summary PIs to end of Logical Structures section. Revise DSD syntax section to use Tim's subset-in-a-PI mechanism. 1996-10-10 : TB : eliminate name recognizers (and more?) 1996-10-09 : CMSMcQ : revise for style, consistency through 2.3 (Characters) 1996-10-09 : CMSMcQ : re-unite everything for convenience, at least temporarily, and revise quickly 1996-10-08 : TB : first major homogenization pass 1996-10-08 : TB : turn "current" attribute on div type into CDATA 1996-10-02 : TB : remould into skeleton + entities 1996-09-30 : CMSMcQ : add a few more sections prior to exchange with Tim. 1996-09-20 : CMSMcQ : finish transcribing notes. 1996-09-19 : CMSMcQ : begin transcribing notes for draft. 1996-09-13 : CMSMcQ : made outline from notes of 09-06, do some housekeeping
Introduction

Extensible Markup Language, abbreviated XML, describes a class of data objects called XML documents and partially describes the behavior of computer programs which process them. XML is an application profile or restricted form of SGML, the Standard Generalized Markup Language . By construction, XML documents are conforming SGML documents.

XML documents are made up of storage units called entities, which contain either parsed or unparsed data. Parsed data is made up of characters, some of which form character data, and some of which form markup. Markup encodes a description of the document's storage layout and logical structure. XML provides a mechanism to impose constraints on the storage layout and logical structure.

A software module called an XML processor is used to read XML documents and provide access to their content and structure. It is assumed that an XML processor is doing its work on behalf of another module, called the application. This specification describes the required behavior of an XML processor in terms of how it must read XML data and the information it must provide to the application.

Origin and Goals

XML was developed by an XML Working Group (originally known as the SGML Editorial Review Board) formed under the auspices of the World Wide Web Consortium (W3C) in 1996. It was chaired by Jon Bosak of Sun Microsystems with the active participation of an XML Special Interest Group (previously known as the SGML Working Group) also organized by the W3C. The membership of the XML Working Group is given in an appendix. Dan Connolly served as the WG's contact with the W3C.

The design goals for XML are:

XML shall be straightforwardly usable over the Internet.

XML shall support a wide variety of applications.

XML shall be compatible with SGML.

It shall be easy to write programs which process XML documents.

The number of optional features in XML is to be kept to the absolute minimum, ideally zero.

XML documents should be human-legible and reasonably clear.

The XML design should be prepared quickly.

The design of XML shall be formal and concise.

XML documents shall be easy to create.

Terseness in XML markup is of minimal importance.

This specification, together with associated standards (Unicode and ISO/IEC 10646 for characters, Internet RFC 1766 for language identification tags, ISO 639 for language name codes, and ISO 3166 for country name codes), provides all the information necessary to understand XML Version &XML.version; and construct computer programs to process it.

This version of the XML specification &doc.distribution;.

Terminology

The terminology used to describe XML documents is defined in the body of this specification. The terms defined in the following list are used in building those definitions and in describing the actions of an XML processor:

Conforming documents and XML processors are permitted to but need not behave as described.

Conforming documents and XML processors are required to behave as described; otherwise they are in error.

A violation of the rules of this specification; results are undefined. Conforming software may detect and report an error and may recover from it.

An error which a conforming XML processor must detect and report to the application. After encountering a fatal error, the processor may continue processing the data to search for further errors and may report such errors to the application. In order to support correction of errors, the processor may make unprocessed data from the document (with intermingled character data and markup) available to the application. Once a fatal error is detected, however, the processor must not continue normal processing (i.e., it must not continue to pass character data and information about the document's logical structure to the application in the normal way).

Conforming software may or must (depending on the modal verb in the sentence) behave as described; if it does, it must provide users a means to enable or disable the behavior described.

A rule which applies to all valid XML documents. Violations of validity constraints are errors; they must, at user option, be reported by validating XML processors.

A rule which applies to all well-formed XML documents. Violations of well-formedness constraints are fatal errors.

(Of strings or names:) Two strings or names being compared must be identical. Characters with multiple possible representations in ISO/IEC 10646 (e.g. characters with both precomposed and base+diacritic forms) match only if they have the same representation in both strings. At user option, processors may normalize such characters to some canonical form. No case folding is performed. (Of strings and rules in the grammar:) A string matches a grammatical production if it belongs to the language generated by that production. (Of content and content models:) An element matches its declaration when it conforms in the fashion described in the constraint .

A feature of XML included solely to ensure that XML remains compatible with SGML.

A non-binding recommendation included to increase the chances that XML documents can be processed by the existing installed base of SGML processors which predate the &WebSGML;.

Documents

A data object is an XML document if it is well-formed, as defined in this specification. A well-formed XML document may in addition be valid if it meets certain further constraints.

Each XML document has both a logical and a physical structure. Physically, the document is composed of units called entities. An entity may refer to other entities to cause their inclusion in the document. A document begins in a "root" or document entity. Logically, the document is composed of declarations, elements, comments, character references, and processing instructions, all of which are indicated in the document by explicit markup. The logical and physical structures must nest properly, as described in .

Well-Formed XML Documents

A textual object is a well-formed XML document if:

Taken as a whole, it matches the production labeled document.

It meets all the well-formedness constraints given in this specification.

Each of the parsed entities which is referenced directly or indirectly within the document is well-formed.

Document document prolog element Misc*

Matching the document production implies that:

It contains one or more elements.

There is exactly one element, called the root, or document element, no part of which appears in the content of any other element. For all other elements, if the start-tag is in the content of another element, the end-tag is in the content of the same element. More simply stated, the elements, delimited by start- and end-tags, nest properly within each other.

As a consequence of this, for each non-root element C in the document, there is one other element P in the document such that C is in the content of P, but is not in the content of any other element that is in the content of P. P is referred to as the parent of C, and C as a child of P.

Characters

A parsed entity contains text, a sequence of characters, which may represent markup or character data. A character is an atomic unit of text as specified by ISO/IEC 10646 . Legal characters are tab, carriage return, line feed, and the legal graphic characters of Unicode and ISO/IEC 10646. The use of "compatibility characters", as defined in section 6.8 of , is discouraged. Character Range Char #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.

The mechanism for encoding character code points into bit patterns may vary from entity to entity. All XML processors must accept the UTF-8 and UTF-16 encodings of 10646; the mechanisms for signaling which of the two is in use, or for bringing other encodings into play, are discussed later, in .

Common Syntactic Constructs

This section defines some symbols used widely in the grammar.

S (white space) consists of one or more space (#x20) characters, carriage returns, line feeds, or tabs. White Space S (#x20 | #x9 | #xD | #xA)+

Characters are classified for convenience as letters, digits, or other characters. Letters consist of an alphabetic or syllabic base character possibly followed by one or more combining characters, or of an ideographic character. Full definitions of the specific characters in each class are given in .

A Name is a token beginning with a letter or one of a few punctuation characters, and continuing with letters, digits, hyphens, underscores, colons, or full stops, together known as name characters. Names beginning with the string "xml", or any string which would match (('X'|'x') ('M'|'m') ('L'|'l')), are reserved for standardization in this or future versions of this specification.

The colon character within XML names is reserved for experimentation with name spaces. Its meaning is expected to be standardized at some future point, at which point those documents using the colon for experimental purposes may need to be updated. (There is no guarantee that any name-space mechanism adopted for XML will in fact use the colon as a name-space delimiter.) In practice, this means that authors should not use the colon in XML names except as part of name-space experiments, but that XML processors should accept the colon as a name character.

An Nmtoken (name token) is any mixture of name characters. Names and Tokens NameChar Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender Name (Letter | '_' | ':') (NameChar)* Names Name (S Name)* Nmtoken (NameChar)+ Nmtokens Nmtoken (S Nmtoken)*

Literal data is any quoted string not containing the quotation mark used as a delimiter for that string. Literals are used for specifying the content of internal entities (EntityValue), the values of attributes (AttValue), and external identifiers (SystemLiteral). Note that a SystemLiteral can be parsed without scanning for markup. Literals EntityValue '"' ([^%&"] | PEReference | Reference)* '"' |  "'" ([^%&'] | PEReference | Reference)* "'" AttValue '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'" SystemLiteral ('"' [^"]* '"') | ("'" [^']* "'") PubidLiteral '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" PubidChar #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]

Character Data and Markup

Text consists of intermingled character data and markup. Markup takes the form of start-tags, end-tags, empty-element tags, entity references, character references, comments, CDATA section delimiters, document type declarations, and processing instructions.

All text that is not markup constitutes the character data of the document.

The ampersand character (&) and the left angle bracket (<) may appear in their literal form only when used as markup delimiters, or within a comment, a processing instruction, or a CDATA section. They are also legal within the literal entity value of an internal entity declaration; see . If they are needed elsewhere, they must be escaped using either numeric character references or the strings "&amp;" and "&lt;" respectively. The right angle bracket (>) may be represented using the string "&gt;", and must, for compatibility, be escaped using "&gt;" or a character reference when it appears in the string "]]>" in content, when that string is not marking the end of a CDATA section.

In the content of elements, character data is any string of characters which does not contain the start-delimiter of any markup. In a CDATA section, character data is any string of characters not including the CDATA-section-close delimiter, "]]>".

To allow attribute values to contain both single and double quotes, the apostrophe or single-quote character (') may be represented as "&apos;", and the double-quote character (") as "&quot;". Character Data CharData [^<&]* - ([^<&]* ']]>' [^<&]*)

Comments

Comments may appear anywhere in a document outside other markup; in addition, they may appear within the document type declaration at places allowed by the grammar. They are not part of the document's character data; an XML processor may, but need not, make it possible for an application to retrieve the text of comments. For compatibility, the string "--" (double-hyphen) must not occur within comments. Comments Comment '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

An example of a comment: <!&como; declarations for <head> & <body> &comc;>

Processing Instructions

Processing instructions (PIs) allow documents to contain instructions for applications. Processing Instructions PI '<?' PITarget (S (Char* - (Char* &pic; Char*)))? &pic; PITarget Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) PIs are not part of the document's character data, but must be passed through to the application. The PI begins with a target (PITarget) used to identify the application to which the instruction is directed. The target names "XML", "xml", and so on are reserved for standardization in this or future versions of this specification. The XML Notation mechanism may be used for formal declaration of PI targets.

CDATA Sections

CDATA sections may occur anywhere character data may occur; they are used to escape blocks of text containing characters which would otherwise be recognized as markup. CDATA sections begin with the string "<![CDATA[" and end with the string "]]>": CDATA Sections CDSect CDStart CData CDEnd CDStart '<![CDATA[' CData (Char* - (Char* ']]>' Char*)) CDEnd ']]>' Within a CDATA section, only the CDEnd string is recognized as markup, so that left angle brackets and ampersands may occur in their literal form; they need not (and cannot) be escaped using "&lt;" and "&amp;". CDATA sections cannot nest.

An example of a CDATA section, in which "<greeting>" and "</greeting>" are recognized as character data, not markup: <![CDATA[<greeting>Hello, world!</greeting>]]>

Prolog and Document Type Declaration

XML documents may, and should, begin with an XML declaration which specifies the version of XML being used. For example, the following is a complete XML document, well-formed but not valid: Hello, world! ]]> and so is this: Hello, world! ]]>

The version number "1.0" should be used to indicate conformance to this version of this specification; it is an error for a document to use the value "1.0" if it does not conform to this version of this specification. It is the intent of the XML working group to give later versions of this specification numbers other than "1.0", but this intent does not indicate a commitment to produce any future versions of XML, nor if any are produced, to use any particular numbering scheme. Since future versions are not ruled out, this construct is provided as a means to allow the possibility of automatic version recognition, should it become necessary. Processors may signal an error if they receive documents labeled with versions they do not support.

The function of the markup in an XML document is to describe its storage and logical structure and to associate attribute-value pairs with its logical structures. XML provides a mechanism, the document type declaration, to define constraints on the logical structure and to support the use of predefined storage units. An XML document is valid if it has an associated document type declaration and if the document complies with the constraints expressed in it.

The document type declaration must appear before the first element in the document. Prolog prolog XMLDecl? Misc* (doctypedecl Misc*)? XMLDecl &xmlpio; VersionInfo EncodingDecl? SDDecl? S? &pic; VersionInfo S 'version' Eq (' VersionNum ' | " VersionNum ") Eq S? '=' S? VersionNum ([a-zA-Z0-9_.:] | '-')+ Misc Comment | PI | S

The XML document type declaration contains or points to markup declarations that provide a grammar for a class of documents. This grammar is known as a document type definition, or DTD. The document type declaration can point to an external subset (a special kind of external entity) containing markup declarations, or can contain the markup declarations directly in an internal subset, or can do both. The DTD for a document consists of both subsets taken together.

A markup declaration is an element type declaration, an attribute-list declaration, an entity declaration, or a notation declaration. These declarations may be contained in whole or in part within parameter entities, as described in the well-formedness and validity constraints below. For fuller information, see .

Document Type Definition doctypedecl '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' markupdecl elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment

The markup declarations may be made up in whole or in part of the replacement text of parameter entities. The productions later in this specification for individual nonterminals (elementdecl, AttlistDecl, and so on) describe the declarations after all the parameter entities have been included.

Root Element Type

The Name in the document type declaration must match the element type of the root element.

Proper Declaration/PE Nesting

Parameter-entity replacement text must be properly nested with markup declarations. That is to say, if either the first character or the last character of a markup declaration (markupdecl above) is contained in the replacement text for a parameter-entity reference, both must be contained in the same replacement text.

PEs in Internal Subset

In the internal DTD subset, parameter-entity references can occur only where markup declarations can occur, not within markup declarations. (This does not apply to references that occur in external parameter entities or to the external subset.)

Like the internal subset, the external subset and any external parameter entities referred to in the DTD must consist of a series of complete markup declarations of the types allowed by the non-terminal symbol markupdecl, interspersed with white space or parameter-entity references. However, portions of the contents of the external subset or of external parameter entities may conditionally be ignored by using the conditional section construct; this is not allowed in the internal subset. External Subset extSubset TextDecl? extSubsetDecl extSubsetDecl ( markupdecl | conditionalSect | PEReference | S )*

The external subset and external parameter entities also differ from the internal subset in that in them, parameter-entity references are permitted within markup declarations, not only between markup declarations.

An example of an XML document with a document type declaration: Hello, world! ]]> The system identifier "hello.dtd" gives the URI of a DTD for the document.

The declarations can also be given locally, as in this example: ]> Hello, world! ]]> If both the external and internal subsets are used, the internal subset is considered to occur before the external subset. This has the effect that entity and attribute-list declarations in the internal subset take precedence over those in the external subset.

Standalone Document Declaration

Markup declarations can affect the content of the document, as passed from an XML processor to an application; examples are attribute defaults and entity declarations. The standalone document declaration, which may appear as a component of the XML declaration, signals whether or not there are such declarations which appear external to the document entity. Standalone Document Declaration SDDecl S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))

In a standalone document declaration, the value "yes" indicates that there are no markup declarations external to the document entity (either in the DTD external subset, or in an external parameter entity referenced from the internal subset) which affect the information passed from the XML processor to the application. The value "no" indicates that there are or may be such external markup declarations. Note that the standalone document declaration only denotes the presence of external declarations; the presence, in a document, of references to external entities, when those entities are internally declared, does not change its standalone status.

If there are no external markup declarations, the standalone document declaration has no meaning. If there are external markup declarations but there is no standalone document declaration, the value "no" is assumed.

Any XML document for which standalone="no" holds can be converted algorithmically to a standalone document, which may be desirable for some network delivery applications.

Standalone Document Declaration

The standalone document declaration must have the value "no" if any external markup declarations contain declarations of:

attributes with default values, if elements to which these attributes apply appear in the document without specifications of values for these attributes, or

entities (other than &magicents;), if references to those entities appear in the document, or

attributes with values subject to normalization, where the attribute appears in the document with a value which will change as a result of normalization, or

element types with element content, if white space occurs directly within any instance of those types.

An example XML declaration with a standalone document declaration:<?xml version="&XML.version;" standalone='yes'?>

White Space Handling

In editing XML documents, it is often convenient to use "white space" (spaces, tabs, and blank lines, denoted by the nonterminal S in this specification) to set apart the markup for greater readability. Such white space is typically not intended for inclusion in the delivered version of the document. On the other hand, "significant" white space that should be preserved in the delivered version is common, for example in poetry and source code.

An XML processor must always pass all characters in a document that are not markup through to the application. A validating XML processor must also inform the application which of these characters constitute white space appearing in element content.

A special attribute named xml:space may be attached to an element to signal an intention that in that element, white space should be preserved by applications. In valid documents, this attribute, like any other, must be declared if it is used. When declared, it must be given as an enumerated type whose only possible values are "default" and "preserve". For example:]]>

The value "default" signals that applications' default white-space processing modes are acceptable for this element; the value "preserve" indicates the intent that applications preserve all the white space. This declared intent is considered to apply to all elements within the content of the element where it is specified, unless overriden with another instance of the xml:space attribute.

The root element of any document is considered to have signaled no intentions as regards application space handling, unless it provides a value for this attribute or the attribute is declared with a default value.

End-of-Line Handling

XML parsed entities are often stored in computer files which, for editing convenience, are organized into lines. These lines are typically separated by some combination of the characters carriage-return (#xD) and line-feed (#xA).

To simplify the tasks of applications, wherever an external parsed entity or the literal entity value of an internal parsed entity contains either the literal two-character sequence "#xD#xA" or a standalone literal #xD, an XML processor must pass to the application the single character #xA. (This behavior can conveniently be produced by normalizing all line breaks to #xA on input, before parsing.)

Language Identification

In document processing, it is often useful to identify the natural or formal language in which the content is written. A special attribute named xml:lang may be inserted in documents to specify the language used in the contents and attribute values of any element in an XML document. In valid documents, this attribute, like any other, must be declared if it is used. The values of the attribute are language identifiers as defined by , "Tags for the Identification of Languages": Language Identification LanguageID Langcode ('-' Subcode)* Langcode ISO639Code | IanaCode | UserCode ISO639Code ([a-z] | [A-Z]) ([a-z] | [A-Z]) IanaCode ('i' | 'I') '-' ([a-z] | [A-Z])+ UserCode ('x' | 'X') '-' ([a-z] | [A-Z])+ Subcode ([a-z] | [A-Z])+ The Langcode may be any of the following:

a two-letter language code as defined by , "Codes for the representation of names of languages"

a language identifier registered with the Internet Assigned Numbers Authority ; these begin with the prefix "i-" (or "I-")

a language identifier assigned by the user, or agreed on between parties in private use; these must begin with the prefix "x-" or "X-" in order to ensure that they do not conflict with names later standardized or registered with IANA

There may be any number of Subcode segments; if the first subcode segment exists and the Subcode consists of two letters, then it must be a country code from , "Codes for the representation of names of countries." If the first subcode consists of more than two letters, it must be a subcode for the language in question registered with IANA, unless the Langcode begins with the prefix "x-" or "X-".

It is customary to give the language code in lower case, and the country code (if any) in upper case. Note that these values, unlike other names in XML documents, are case insensitive.

For example: The quick brown fox jumps over the lazy dog.

What colour is it?

What color is it?

Habe nun, ach! Philosophie, Juristerei, und Medizin und leider auch Theologie durchaus studiert mit heiem Bemh'n. ]]>

The intent declared with xml:lang is considered to apply to all attributes and content of the element where it is specified, unless overridden with an instance of xml:lang on another element within that content.

A simple declaration for xml:lang might take the form xml:lang NMTOKEN #IMPLIED but specific default values may also be given, if appropriate. In a collection of French poems for English students, with glosses and notes in English, the xml:lang attribute might be declared this way: ]]>

Logical Structures

Each XML document contains one or more elements, the boundaries of which are either delimited by start-tags and end-tags, or, for empty elements, by an empty-element tag. Each element has a type, identified by name, sometimes called its "generic identifier" (GI), and may have a set of attribute specifications. Each attribute specification has a name and a value.

Element element EmptyElemTag | STag content ETag

This specification does not constrain the semantics, use, or (beyond syntax) names of the element types and attributes, except that names beginning with a match to (('X'|'x')('M'|'m')('L'|'l')) are reserved for standardization in this or future versions of this specification.

Element Type Match

The Name in an element's end-tag must match the element type in the start-tag.

Element Valid

An element is valid if there is a declaration matching elementdecl where the Name matches the element type, and one of the following holds:

The declaration matches EMPTY and the element has no content.

The declaration matches children and the sequence of child elements belongs to the language generated by the regular expression in the content model, with optional white space (characters matching the nonterminal S) between each pair of child elements.

The declaration matches Mixed and the content consists of character data and child elements whose types match names in the content model.

The declaration matches ANY, and the types of any child elements have been declared.

Start-Tags, End-Tags, and Empty-Element Tags

The beginning of every non-empty XML element is marked by a start-tag. Start-tag STag '<' Name (S Attribute)* S? '>' Attribute Name Eq AttValue The Name in the start- and end-tags gives the element's type. The Name-AttValue pairs are referred to as the attribute specifications of the element, with the Name in each pair referred to as the attribute name and the content of the AttValue (the text between the ' or " delimiters) as the attribute value.

Unique Att Spec

No attribute name may appear more than once in the same start-tag or empty-element tag.

Attribute Value Type

The attribute must have been declared; the value must be of the type declared for it. (For attribute types, see .)

No External Entity References

Attribute values cannot contain direct or indirect entity references to external entities.

No < in Attribute Values

The replacement text of any entity referred to directly or indirectly in an attribute value (other than "&lt;") must not contain a <.

An example of a start-tag: <termdef id="dt-dog" term="dog">

The end of every element that begins with a start-tag must be marked by an end-tag containing a name that echoes the element's type as given in the start-tag: End-tag ETag '</' Name S? '>'

An example of an end-tag:</termdef>

The text between the start-tag and end-tag is called the element's content: Content of Elements content (element | CharData | Reference | CDSect | PI | Comment)*

If an element is empty, it must be represented either by a start-tag immediately followed by an end-tag or by an empty-element tag. An empty-element tag takes a special form: Tags for Empty Elements EmptyElemTag '<' Name (S Attribute)* S? '/>'

Empty-element tags may be used for any element which has no content, whether or not it is declared using the keyword EMPTY. For interoperability, the empty-element tag must be used, and can only be used, for elements which are declared EMPTY.

Examples of empty elements: <IMG align="left" src="http://www.w3.org/Icons/WWW/w3c_home" /> <br></br> <br/>

Element Type Declarations

The element structure of an XML document may, for validation purposes, be constrained using element type and attribute-list declarations. An element type declaration constrains the element's content.

Element type declarations often constrain which element types can appear as children of the element. At user option, an XML processor may issue a warning when a declaration mentions an element type for which no declaration is provided, but this is not an error.

An element type declaration takes the form: Element Type Declaration elementdecl '<!ELEMENT' S Name S contentspec S? '>' contentspec 'EMPTY' | 'ANY' | Mixed | children where the Name gives the element type being declared.

Unique Element Type Declaration

No element type may be declared more than once.

Examples of element type declarations: <!ELEMENT br EMPTY> <!ELEMENT p (#PCDATA|emph)* > <!ELEMENT %name.para; %content.para; > <!ELEMENT container ANY>

Element Content

An element type has element content when elements of that type must contain only child elements (no character data), optionally separated by white space (characters matching the nonterminal S). In this case, the constraint includes a content model, a simple grammar governing the allowed types of the child elements and the order in which they are allowed to appear. The grammar is built on content particles (cps), which consist of names, choice lists of content particles, or sequence lists of content particles: Element-content Models children (choice | seq) ('?' | '*' | '+')? cp (Name | choice | seq) ('?' | '*' | '+')? choice '(' S? cp ( S? '|' S? cp )* S? ')' seq '(' S? cp ( S? ',' S? cp )* S? ')' where each Name is the type of an element which may appear as a child. Any content particle in a choice list may appear in the element content at the location where the choice list appears in the grammar; content particles occurring in a sequence list must each appear in the element content in the order given in the list. The optional character following a name or list governs whether the element or the content particles in the list may occur one or more (+), zero or more (*), or zero or one times (?). The absence of such an operator means that the element or content particle must appear exactly once. This syntax and meaning are identical to those used in the productions in this specification.

The content of an element matches a content model if and only if it is possible to trace out a path through the content model, obeying the sequence, choice, and repetition operators and matching each element in the content against an element type in the content model. For compatibility, it is an error if an element in the document can match more than one occurrence of an element type in the content model. For more information, see .

Proper Group/PE Nesting

Parameter-entity replacement text must be properly nested with parenthetized groups. That is to say, if either of the opening or closing parentheses in a choice, seq, or Mixed construct is contained in the replacement text for a parameter entity, both must be contained in the same replacement text.

For interoperability, if a parameter-entity reference appears in a choice, seq, or Mixed construct, its replacement text should not be empty, and neither the first nor last non-blank character of the replacement text should be a connector (| or ,).

Examples of element-content models: <!ELEMENT spec (front, body, back?)> <!ELEMENT div1 (head, (p | list | note)*, div2*)> <!ELEMENT dictionary-body (%div.mix; | %dict.mix;)*>

Mixed Content

An element type has mixed content when elements of that type may contain character data, optionally interspersed with child elements. In this case, the types of the child elements may be constrained, but not their order or their number of occurrences: Mixed-content Declaration Mixed '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' where the Names give the types of elements that may appear as children.

No Duplicate Types

The same name must not appear more than once in a single mixed-content declaration.

Examples of mixed content declarations: <!ELEMENT p (#PCDATA|a|ul|b|i|em)*> <!ELEMENT p (#PCDATA | %font; | %phrase; | %special; | %form;)* > <!ELEMENT b (#PCDATA)>

Attribute-List Declarations

Attributes are used to associate name-value pairs with elements. Attribute specifications may appear only within start-tags and empty-element tags; thus, the productions used to recognize them appear in . Attribute-list declarations may be used:

To define the set of attributes pertaining to a given element type.

To establish type constraints for these attributes.

To provide default values for attributes.

Attribute-list declarations specify the name, data type, and default value (if any) of each attribute associated with a given element type: Attribute-list Declaration AttlistDecl '<!ATTLIST' S Name AttDef* S? '>' AttDef S Name S AttType S DefaultDecl The Name in the AttlistDecl rule is the type of an element. At user option, an XML processor may issue a warning if attributes are declared for an element type not itself declared, but this is not an error. The Name in the AttDef rule is the name of the attribute.

When more than one AttlistDecl is provided for a given element type, the contents of all those provided are merged. When more than one definition is provided for the same attribute of a given element type, the first declaration is binding and later declarations are ignored. For interoperability, writers of DTDs may choose to provide at most one attribute-list declaration for a given element type, at most one attribute definition for a given attribute name, and at least one attribute definition in each attribute-list declaration. For interoperability, an XML processor may at user option issue a warning when more than one attribute-list declaration is provided for a given element type, or more than one attribute definition is provided for a given attribute, but this is not an error.

Attribute Types

XML attribute types are of three kinds: a string type, a set of tokenized types, and enumerated types. The string type may take any literal string as a value; the tokenized types have varying lexical and semantic constraints, as noted: Attribute Types AttType StringType | TokenizedType | EnumeratedType StringType 'CDATA' TokenizedType 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'

ID

Values of type ID must match the Name production. A name must not appear more than once in an XML document as a value of this type; i.e., ID values must uniquely identify the elements which bear them.

One ID per Element Type

No element type may have more than one ID attribute specified.

ID Attribute Default

An ID attribute must have a declared default of #IMPLIED or #REQUIRED.

IDREF

Values of type IDREF must match the Name production, and values of type IDREFS must match Names; each Name must match the value of an ID attribute on some element in the XML document; i.e. IDREF values must match the value of some ID attribute.

Entity Name

Values of type ENTITY must match the Name production, values of type ENTITIES must match Names; each Name must match the name of an unparsed entity declared in the DTD.

Name Token

Values of type NMTOKEN must match the Nmtoken production; values of type NMTOKENS must match Nmtokens.

Enumerated attributes can take one of a list of values provided in the declaration. There are two kinds of enumerated types: Enumerated Attribute Types EnumeratedType NotationType | Enumeration NotationType 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' Enumeration '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' A NOTATION attribute identifies a notation, declared in the DTD with associated system and/or public identifiers, to be used in interpreting the element to which the attribute is attached.

Notation Attributes

Values of this type must match one of the notation names included in the declaration; all notation names in the declaration must be declared.

Enumeration

Values of this type must match one of the Nmtoken tokens in the declaration.

For interoperability, the same Nmtoken should not occur more than once in the enumerated attribute types of a single element type.

Attribute Defaults

An attribute declaration provides information on whether the attribute's presence is required, and if not, how an XML processor should react if a declared attribute is absent in a document. Attribute Defaults DefaultDecl '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)

In an attribute declaration, #REQUIRED means that the attribute must always be provided, #IMPLIED that no default value is provided. If the declaration is neither #REQUIRED nor #IMPLIED, then the AttValue value contains the declared default value; the #FIXED keyword states that the attribute must always have the default value. If a default value is declared, when an XML processor encounters an omitted attribute, it is to behave as though the attribute were present with the declared default value.

Required Attribute

If the default declaration is the keyword #REQUIRED, then the attribute must be specified for all elements of the type in the attribute-list declaration.

Attribute Default Legal

The declared default value must meet the lexical constraints of the declared attribute type.

Fixed Attribute Default

If an attribute has a default value declared with the #FIXED keyword, instances of that attribute must match the default value.

Examples of attribute-list declarations: <!ATTLIST termdef id ID #REQUIRED name CDATA #IMPLIED> <!ATTLIST list type (bullets|ordered|glossary) "ordered"> <!ATTLIST form method CDATA #FIXED "POST">

Attribute-Value Normalization

Before the value of an attribute is passed to the application or checked for validity, the XML processor must normalize it as follows:

a character reference is processed by appending the referenced character to the attribute value

an entity reference is processed by recursively processing the replacement text of the entity

a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20 to the normalized value, except that only a single #x20 is appended for a "#xD#xA" sequence that is part of an external parsed entity or the literal entity value of an internal parsed entity

other characters are processed by appending them to the normalized value

If the declared value is not CDATA, then the XML processor must further process the normalized attribute value by discarding any leading and trailing space (#x20) characters, and by replacing sequences of space (#x20) characters by a single space (#x20) character.

All attributes for which no declaration has been read should be treated by a non-validating parser as if declared CDATA.

Conditional Sections

Conditional sections are portions of the document type declaration external subset which are included in, or excluded from, the logical structure of the DTD based on the keyword which governs them. Conditional Section conditionalSect includeSect | ignoreSect includeSect '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' ignoreSect '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' ignoreSectContents Ignore ('<![' ignoreSectContents ']]>' Ignore)* Ignore Char* - (Char* ('<![' | ']]>') Char*)

Like the internal and external DTD subsets, a conditional section may contain one or more complete declarations, comments, processing instructions, or nested conditional sections, intermingled with white space.

If the keyword of the conditional section is INCLUDE, then the contents of the conditional section are part of the DTD. If the keyword of the conditional section is IGNORE, then the contents of the conditional section are not logically part of the DTD. Note that for reliable parsing, the contents of even ignored conditional sections must be read in order to detect nested conditional sections and ensure that the end of the outermost (ignored) conditional section is properly detected. If a conditional section with a keyword of INCLUDE occurs within a larger conditional section with a keyword of IGNORE, both the outer and the inner conditional sections are ignored.

If the keyword of the conditional section is a parameter-entity reference, the parameter entity must be replaced by its content before the processor decides whether to include or ignore the conditional section.

An example: <!ENTITY % draft 'INCLUDE' > <!ENTITY % final 'IGNORE' > <![%draft;[ <!ELEMENT book (comments*, title, body, supplements?)> ]]> <![%final;[ <!ELEMENT book (title, body, supplements?)> ]]>

Physical Structures

An XML document may consist of one or many storage units. These are called entities; they all have content and are all (except for the document entity, see below, and the external DTD subset) identified by name. Each XML document has one entity called the document entity, which serves as the starting point for the XML processor and may contain the whole document.

Entities may be either parsed or unparsed. A parsed entity's contents are referred to as its replacement text; this text is considered an integral part of the document.

An unparsed entity is a resource whose contents may or may not be text, and if text, may not be XML. Each unparsed entity has an associated notation, identified by name. Beyond a requirement that an XML processor make the identifiers for the entity and notation available to the application, XML places no constraints on the contents of unparsed entities.

Parsed entities are invoked by name using entity references; unparsed entities by name, given in the value of ENTITY or ENTITIES attributes.

General entities are entities for use within the document content. In this specification, general entities are sometimes referred to with the unqualified term entity when this leads to no ambiguity. Parameter entities are parsed entities for use within the DTD. These two types of entities use different forms of reference and are recognized in different contexts. Furthermore, they occupy different namespaces; a parameter entity and a general entity with the same name are two distinct entities.

Character and Entity References

A character reference refers to a specific character in the ISO/IEC 10646 character set, for example one not directly accessible from available input devices. Character Reference CharRef '&#' [0-9]+ ';' | '&hcro;' [0-9a-fA-F]+ ';' Legal Character

Characters referred to using character references must match the production for Char.

If the character reference begins with "&#x", the digits and letters up to the terminating ; provide a hexadecimal representation of the character's code point in ISO/IEC 10646. If it begins just with "&#", the digits up to the terminating ; provide a decimal representation of the character's code point.

An entity reference refers to the content of a named entity. References to parsed general entities use ampersand (&) and semicolon (;) as delimiters. Parameter-entity references use percent-sign (%) and semicolon (;) as delimiters.

Entity Reference Reference EntityRef | CharRef EntityRef '&' Name ';' PEReference '%' Name ';' Entity Declared

In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, or a document with "standalone='yes'", the Name given in the entity reference must match that in an entity declaration, except that well-formed documents need not declare any of the following entities: &magicents;. The declaration of a parameter entity must precede any reference to it. Similarly, the declaration of a general entity must precede any reference to it which appears in a default value in an attribute-list declaration.

Note that if entities are declared in the external subset or in external parameter entities, a non-validating processor is not obligated to read and process their declarations; for such documents, the rule that an entity must be declared is a well-formedness constraint only if standalone='yes'.

Entity Declared

In a document with an external subset or external parameter entities with "standalone='no'", the Name given in the entity reference must match that in an entity declaration. For interoperability, valid documents should declare the entities &magicents;, in the form specified in . The declaration of a parameter entity must precede any reference to it. Similarly, the declaration of a general entity must precede any reference to it which appears in a default value in an attribute-list declaration.

Parsed Entity

An entity reference must not contain the name of an unparsed entity. Unparsed entities may be referred to only in attribute values declared to be of type ENTITY or ENTITIES.

No Recursion

A parsed entity must not contain a recursive reference to itself, either directly or indirectly.

In DTD

Parameter-entity references may only appear in the DTD.

Examples of character and entity references: Type <key>less-than</key> (&hcro;3C;) to save options. This document was prepared on &docdate; and is classified &security-level;.

Example of a parameter-entity reference: %ISOLat2;]]>

Entity Declarations

Entities are declared thus: Entity Declaration EntityDecl GEDecl | PEDecl GEDecl '<!ENTITY' S Name S EntityDef S? '>' PEDecl '<!ENTITY' S '%' S Name S PEDef S? '>' EntityDef EntityValue | (ExternalID NDataDecl?) PEDef EntityValue | ExternalID The Name identifies the entity in an entity reference or, in the case of an unparsed entity, in the value of an ENTITY or ENTITIES attribute. If the same entity is declared more than once, the first declaration encountered is binding; at user option, an XML processor may issue a warning if entities are declared multiple times.

Internal Entities

If the entity definition is an EntityValue, the defined entity is called an internal entity. There is no separate physical storage object, and the content of the entity is given in the declaration. Note that some processing of entity and character references in the literal entity value may be required to produce the correct replacement text: see .

An internal entity is a parsed entity.

Example of an internal entity declaration: <!ENTITY Pub-Status "This is a pre-release of the specification.">

External Entities

If the entity is not internal, it is an external entity, declared as follows: External Entity Declaration ExternalID 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral NDataDecl S 'NDATA' S Name If the NDataDecl is present, this is a general unparsed entity; otherwise it is a parsed entity.

Notation Declared

The Name must match the declared name of a notation.

The SystemLiteral is called the entity's system identifier. It is a URI, which may be used to retrieve the entity. Note that the hash mark (#) and fragment identifier frequently used with URIs are not, formally, part of the URI itself; an XML processor may signal an error if a fragment identifier is given as part of a system identifier. Unless otherwise provided by information outside the scope of this specification (e.g. a special XML element type defined by a particular DTD, or a processing instruction defined by a particular application specification), relative URIs are relative to the location of the resource within which the entity declaration occurs. A URI might thus be relative to the document entity, to the entity containing the external DTD subset, or to some other external parameter entity.

An XML processor should handle a non-ASCII character in a URI by representing the character in UTF-8 as one or more bytes, and then escaping these bytes with the URI escaping mechanism (i.e., by converting each byte to %HH, where HH is the hexadecimal notation of the byte value).

In addition to a system identifier, an external identifier may include a public identifier. An XML processor attempting to retrieve the entity's content may use the public identifier to try to generate an alternative URI. If the processor is unable to do so, it must use the URI specified in the system literal. Before a match is attempted, all strings of white space in the public identifier must be normalized to single space characters (#x20), and leading and trailing white space must be removed.

Examples of external entity declarations: <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml"> <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml"> <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif >

Parsed Entities The Text Declaration

External parsed entities may each begin with a text declaration. Text Declaration TextDecl &xmlpio; VersionInfo? EncodingDecl S? &pic;

The text declaration must be provided literally, not by reference to a parsed entity. No text declaration may appear at any position other than the beginning of an external parsed entity.

Well-Formed Parsed Entities

The document entity is well-formed if it matches the production labeled document. An external general parsed entity is well-formed if it matches the production labeled extParsedEnt. An external parameter entity is well-formed if it matches the production labeled extPE. Well-Formed External Parsed Entity extParsedEnt TextDecl? content extPE TextDecl? extSubsetDecl An internal general parsed entity is well-formed if its replacement text matches the production labeled content. All internal parameter entities are well-formed by definition.

A consequence of well-formedness in entities is that the logical and physical structures in an XML document are properly nested; no start-tag, end-tag, empty-element tag, element, comment, processing instruction, character reference, or entity reference can begin in one entity and end in another.

Character Encoding in Entities

Each external parsed entity in an XML document may use a different encoding for its characters. All XML processors must be able to read entities in either UTF-8 or UTF-16.

Entities encoded in UTF-16 must begin with the Byte Order Mark described by ISO/IEC 10646 Annex E and Unicode Appendix B (the ZERO WIDTH NO-BREAK SPACE character, #xFEFF). This is an encoding signature, not part of either the markup or the character data of the XML document. XML processors must be able to use this character to differentiate between UTF-8 and UTF-16 encoded documents.

Although an XML processor is required to read only entities in the UTF-8 and UTF-16 encodings, it is recognized that other encodings are used around the world, and it may be desired for XML processors to read entities that use them. Parsed entities which are stored in an encoding other than UTF-8 or UTF-16 must begin with a text declaration containing an encoding declaration: Encoding Declaration EncodingDecl S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) EncName [A-Za-z] ([A-Za-z0-9._] | '-')* Encoding name contains only Latin characters In the document entity, the encoding declaration is part of the XML declaration. The EncName is the name of the encoding used.

In an encoding declaration, the values "UTF-8", "UTF-16", "ISO-10646-UCS-2", and "ISO-10646-UCS-4" should be used for the various encodings and transformations of Unicode / ISO/IEC 10646, the values "ISO-8859-1", "ISO-8859-2", ... "ISO-8859-9" should be used for the parts of ISO 8859, and the values "ISO-2022-JP", "Shift_JIS", and "EUC-JP" should be used for the various encoded forms of JIS X-0208-1997. XML processors may recognize other encodings; it is recommended that character encodings registered (as charsets) with the Internet Assigned Numbers Authority , other than those just listed, should be referred to using their registered names. Note that these registered names are defined to be case-insensitive, so processors wishing to match against them should do so in a case-insensitive way.

In the absence of information provided by an external transport protocol (e.g. HTTP or MIME), it is an error for an entity including an encoding declaration to be presented to the XML processor in an encoding other than that named in the declaration, for an encoding declaration to occur other than at the beginning of an external entity, or for an entity which begins with neither a Byte Order Mark nor an encoding declaration to use an encoding other than UTF-8. Note that since ASCII is a subset of UTF-8, ordinary ASCII entities do not strictly need an encoding declaration.

It is a fatal error when an XML processor encounters an entity with an encoding that it is unable to process.

Examples of encoding declarations: <?xml encoding='UTF-8'?> <?xml encoding='EUC-JP'?>

XML Processor Treatment of Entities and References

The table below summarizes the contexts in which character references, entity references, and invocations of unparsed entities might appear and the required behavior of an XML processor in each case. The labels in the leftmost column describe the recognition context:

as a reference anywhere after the start-tag and before the end-tag of an element; corresponds to the nonterminal content.

as a reference within either the value of an attribute in a start-tag, or a default value in an attribute declaration; corresponds to the nonterminal AttValue.

as a Name, not a reference, appearing either as the value of an attribute which has been declared as type ENTITY, or as one of the space-separated tokens in the value of an attribute which has been declared as type ENTITIES.

as a reference within a parameter or internal entity's literal entity value in the entity's declaration; corresponds to the nonterminal EntityValue.

as a reference within either the internal or external subsets of the DTD, but outside of an EntityValue or AttValue.

Entity Type Character Parameter Internal General External Parsed General Unparsed Reference in Content Not recognized Included Included if validating Forbidden Included Reference in Attribute Value Not recognized Included in literal Forbidden Forbidden Included Occurs as Attribute Value Not recognized Forbidden Forbidden Notify Not recognized Reference in EntityValue Included in literal Bypassed Bypassed Forbidden Included Reference in DTD Included as PE Forbidden Forbidden Forbidden Forbidden Not Recognized

Outside the DTD, the % character has no special significance; thus, what would be parameter entity references in the DTD are not recognized as markup in content. Similarly, the names of unparsed entities are not recognized except when they appear in the value of an appropriately declared attribute.

Included

An entity is included when its replacement text is retrieved and processed, in place of the reference itself, as though it were part of the document at the location the reference was recognized. The replacement text may contain both character data and (except for parameter entities) markup, which must be recognized in the usual way, except that the replacement text of entities used to escape markup delimiters (the entities &magicents;) is always treated as data. (The string "AT&amp;T;" expands to "AT&T;" and the remaining ampersand is not recognized as an entity-reference delimiter.) A character reference is included when the indicated character is processed in place of the reference itself.

Included If Validating

When an XML processor recognizes a reference to a parsed entity, in order to validate the document, the processor must include its replacement text. If the entity is external, and the processor is not attempting to validate the XML document, the processor may, but need not, include the entity's replacement text. If a non-validating parser does not include the replacement text, it must inform the application that it recognized, but did not read, the entity.

This rule is based on the recognition that the automatic inclusion provided by the SGML and XML entity mechanism, primarily designed to support modularity in authoring, is not necessarily appropriate for other applications, in particular document browsing. Browsers, for example, when encountering an external parsed entity reference, might choose to provide a visual indication of the entity's presence and retrieve it for display only on demand.

Forbidden

The following are forbidden, and constitute fatal errors:

the appearance of a reference to an unparsed entity.

the appearance of any character or general-entity reference in the DTD except within an EntityValue or AttValue.

a reference to an external entity in an attribute value.

Included in Literal

When an entity reference appears in an attribute value, or a parameter entity reference appears in a literal entity value, its replacement text is processed in place of the reference itself as though it were part of the document at the location the reference was recognized, except that a single or double quote character in the replacement text is always treated as a normal data character and will not terminate the literal. For example, this is well-formed: ]]> while this is not: <!ENTITY EndAttr "27'" > <element attribute='a-&EndAttr;>

Notify

When the name of an unparsed entity appears as a token in the value of an attribute of declared type ENTITY or ENTITIES, a validating processor must inform the application of the system and public (if any) identifiers for both the entity and its associated notation.

Bypassed

When a general entity reference appears in the EntityValue in an entity declaration, it is bypassed and left as is.

Included as PE

Just as with external parsed entities, parameter entities need only be included if validating. When a parameter-entity reference is recognized in the DTD and included, its replacement text is enlarged by the attachment of one leading and one following space (#x20) character; the intent is to constrain the replacement text of parameter entities to contain an integral number of grammatical tokens in the DTD.

Construction of Internal Entity Replacement Text

In discussing the treatment of internal entities, it is useful to distinguish two forms of the entity's value. The literal entity value is the quoted string actually present in the entity declaration, corresponding to the non-terminal EntityValue. The replacement text is the content of the entity, after replacement of character references and parameter-entity references.

The literal entity value as given in an internal entity declaration (EntityValue) may contain character, parameter-entity, and general-entity references. Such references must be contained entirely within the literal entity value. The actual replacement text that is included as described above must contain the replacement text of any parameter entities referred to, and must contain the character referred to, in place of any character references in the literal entity value; however, general-entity references must be left as-is, unexpanded. For example, given the following declarations: ]]> then the replacement text for the entity "book" is: La Peste: Albert Camus, © 1947 Éditions Gallimard. &rights; The general-entity reference "&rights;" would be expanded should the reference "&book;" appear in the document's content or an attribute value.

These simple rules may have complex interactions; for a detailed discussion of a difficult example, see .

Predefined Entities

Entity and character references can both be used to escape the left angle bracket, ampersand, and other delimiters. A set of general entities (&magicents;) is specified for this purpose. Numeric character references may also be used; they are expanded immediately when recognized and must be treated as character data, so the numeric character references "&#60;" and "&#38;" may be used to escape < and & when they occur in character data.

All XML processors must recognize these entities whether they are declared or not. For interoperability, valid XML documents should declare these entities, like any others, before using them. If the entities in question are declared, they must be declared as internal entities whose replacement text is the single character being escaped or a character reference to that character, as shown below. ]]> Note that the < and & characters in the declarations of "lt" and "amp" are doubly escaped to meet the requirement that entity replacement be well-formed.

Notation Declarations

Notations identify by name the format of unparsed entities, the format of elements which bear a notation attribute, or the application to which a processing instruction is addressed.

Notation declarations provide a name for the notation, for use in entity and attribute-list declarations and in attribute specifications, and an external identifier for the notation which may allow an XML processor or its client application to locate a helper application capable of processing data in the given notation. Notation Declarations NotationDecl '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' PublicID 'PUBLIC' S PubidLiteral

XML processors must provide applications with the name and external identifier(s) of any notation declared and referred to in an attribute value, attribute definition, or entity declaration. They may additionally resolve the external identifier into the system identifier, file name, or other information needed to allow the application to call a processor for data in the notation described. (It is not an error, however, for XML documents to declare and refer to notations for which notation-specific applications are not available on the system where the XML processor or application is running.)

Document Entity

The document entity serves as the root of the entity tree and a starting-point for an XML processor. This specification does not specify how the document entity is to be located by an XML processor; unlike other entities, the document entity has no name and might well appear on a processor input stream without any identification at all.

Conformance Validating and Non-Validating Processors

Conforming XML processors fall into two classes: validating and non-validating.

Validating and non-validating processors alike must report violations of this specification's well-formedness constraints in the content of the document entity and any other parsed entities that they read.

Validating processors must report violations of the constraints expressed by the declarations in the DTD, and failures to fulfill the validity constraints given in this specification. To accomplish this, validating XML processors must read and process the entire DTD and all external parsed entities referenced in the document.

Non-validating processors are required to check only the document entity, including the entire internal DTD subset, for well-formedness. While they are not required to check the document for validity, they are required to process all the declarations they read in the internal DTD subset and in any parameter entity that they read, up to the first reference to a parameter entity that they do not read; that is to say, they must use the information in those declarations to normalize attribute values, include the replacement text of internal entities, and supply default attribute values. They must not process entity declarations or attribute-list declarations encountered after a reference to a parameter entity that is not read, since the entity may have contained overriding declarations.

Using XML Processors

The behavior of a validating XML processor is highly predictable; it must read every piece of a document and report all well-formedness and validity violations. Less is required of a non-validating processor; it need not read any part of the document other than the document entity. This has two effects that may be important to users of XML processors:

Certain well-formedness errors, specifically those that require reading external entities, may not be detected by a non-validating processor. Examples include the constraints entitled Entity Declared, Parsed Entity, and No Recursion, as well as some of the cases described as forbidden in .

The information passed from the processor to the application may vary, depending on whether the processor reads parameter and external entities. For example, a non-validating processor may not normalize attribute values, include the replacement text of internal entities, or supply default attribute values, where doing so depends on having read declarations in external or parameter entities.

For maximum reliability in interoperating between different XML processors, applications which use non-validating processors should not rely on any behaviors not required of such processors. Applications which require facilities such as the use of default attributes or internal entities which are declared in external entities should use validating XML processors.

Notation

The formal grammar of XML is given in this specification using a simple Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines one symbol, in the form symbol ::= expression

Symbols are written with an initial capital letter if they are defined by a regular expression, or with an initial lower case letter otherwise. Literal strings are quoted.

Within the expression on the right-hand side of a rule, the following expressions are used to match strings of one or more characters:

where N is a hexadecimal integer, the expression matches the character in ISO/IEC 10646 whose canonical (UCS-4) code value, when interpreted as an unsigned binary number, has the value indicated. The number of leading zeros in the #xN form is insignificant; the number of leading zeros in the corresponding code value is governed by the character encoding in use and is not significant for XML.

matches any character with a value in the range(s) indicated (inclusive).

matches any character with a value outside the range indicated.

matches any character with a value not among the characters given.

matches a literal string matching that given inside the double quotes.

matches a literal string matching that given inside the single quotes.

These symbols may be combined to match more complex patterns as follows, where A and B represent simple expressions:

expression is treated as a unit and may be combined as described in this list.

matches A or nothing; optional A.

matches A followed by B.

matches A or B but not both.

matches any string that matches A but does not match B.

matches one or more occurrences of A.

matches zero or more occurrences of A.

Other notations used in the productions are:

comment.

well-formedness constraint; this identifies by name a constraint on well-formed documents associated with a production.

validity constraint; this identifies by name a constraint on valid documents associated with a production.

References Normative References (Internet Assigned Numbers Authority) Official Names for Character Sets, ed. Keld Simonsen et al. See ftp://ftp.isi.edu/in-notes/iana/assignments/character-sets. IETF (Internet Engineering Task Force). RFC 1766: Tags for the Identification of Languages, ed. H. Alvestrand. 1995. (International Organization for Standardization). ISO 639:1988 (E). Code for the representation of names of languages. [Geneva]: International Organization for Standardization, 1988. (International Organization for Standardization). ISO 3166-1:1997 (E). Codes for the representation of names of countries and their subdivisions — Part 1: Country codes [Geneva]: International Organization for Standardization, 1997. ISO (International Organization for Standardization). ISO/IEC 10646-1993 (E). Information technology — Universal Multiple-Octet Coded Character Set (UCS) — Part 1: Architecture and Basic Multilingual Plane. [Geneva]: International Organization for Standardization, 1993 (plus amendments AM 1 through AM 7). The Unicode Consortium. The Unicode Standard, Version 2.0. Reading, Mass.: Addison-Wesley Developers Press, 1996. Other References Aho, Alfred V., Ravi Sethi, and Jeffrey D. Ullman. Compilers: Principles, Techniques, and Tools. Reading: Addison-Wesley, 1986, rpt. corr. 1988. Berners-Lee, T., R. Fielding, and L. Masinter. Uniform Resource Identifiers (URI): Generic Syntax and Semantics. 1997. (Work in progress; see updates to RFC1738.) Brggemann-Klein, Anne. Regular Expressions into Finite Automata. Extended abstract in I. Simon, Hrsg., LATIN 1992, S. 97-98. Springer-Verlag, Berlin 1992. Full Version in Theoretical Computer Science 120: 197-213, 1993. Brggemann-Klein, Anne, and Derick Wood. Deterministic Regular Languages. Universitt Freiburg, Institut fr Informatik, Bericht 38, Oktober 1991. James Clark. Comparison of SGML and XML. See http://www.w3.org/TR/NOTE-sgml-xml-971215. IETF (Internet Engineering Task Force). RFC 1738: Uniform Resource Locators (URL), ed. T. Berners-Lee, L. Masinter, M. McCahill. 1994. IETF (Internet Engineering Task Force). RFC 1808: Relative Uniform Resource Locators, ed. R. Fielding. 1995. IETF (Internet Engineering Task Force). RFC 2141: URN Syntax, ed. R. Moats. 1997. ISO (International Organization for Standardization). ISO 8879:1986(E). Information processing — Text and Office Systems — Standard Generalized Markup Language (SGML). First edition — 1986-10-15. [Geneva]: International Organization for Standardization, 1986. ISO (International Organization for Standardization). ISO/IEC 10744-1992 (E). Information technology — Hypermedia/Time-based Structuring Language (HyTime). [Geneva]: International Organization for Standardization, 1992. Extended Facilities Annexe. [Geneva]: International Organization for Standardization, 1996. Character Classes

Following the characteristics defined in the Unicode standard, characters are classed as base characters (among others, these contain the alphabetic characters of the Latin alphabet, without diacritics), ideographic characters, and combining characters (among others, this class contains most diacritics); these classes combine to form the class of letters. Digits and extenders are also distinguished. Characters Letter BaseChar | Ideographic BaseChar [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3] Ideographic [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] CombiningChar [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A Digit [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29] Extender #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]

The character classes defined here can be derived from the Unicode character database as follows:

Name start characters must have one of the categories Ll, Lu, Lo, Lt, Nl.

Name characters other than Name-start characters must have one of the categories Mc, Me, Mn, Lm, or Nd.

Characters in the compatibility area (i.e. with character code greater than #xF900 and less than #xFFFE) are not allowed in XML names.

Characters which have a font or compatibility decomposition (i.e. those with a "compatibility formatting tag" in field 5 of the database -- marked by field 5 beginning with a "<") are not allowed.

The following characters are treated as name-start characters rather than name characters, because the property file classifies them as Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.

Characters #x20DD-#x20E0 are excluded (in accordance with Unicode, section 5.14).

Character #x00B7 is classified as an extender, because the property list so identifies it.

Character #x0387 is added as a name character, because #x00B7 is its canonical equivalent.

Characters ':' and '_' are allowed as name-start characters.

Characters '-' and '.' are allowed as name characters.

XML and SGML

XML is designed to be a subset of SGML, in that every valid XML document should also be a conformant SGML document. For a detailed comparison of the additional restrictions that XML places on documents beyond those of SGML, see .

Expansion of Entity and Character References

This appendix contains some examples illustrating the sequence of entity- and character-reference recognition and expansion, as specified in .

If the DTD contains the declaration An ampersand (&#38;) may be escaped numerically (&#38;#38;) or with a general entity (&amp;).

" > ]]> then the XML processor will recognize the character references when it parses the entity declaration, and resolve them before storing the following string as the value of the entity "example": An ampersand (&) may be escaped numerically (&#38;) or with a general entity (&amp;).

]]>
A reference in the document to "&example;" will cause the text to be reparsed, at which time the start- and end-tags of the "p" element will be recognized and the three references will be recognized and expanded, resulting in a "p" element with the following content (all data, no delimiters or markup):

A more complex example will illustrate the rules and their effects fully. In the following example, the line numbers are solely for reference. 2 4 5 ' > 6 %xx; 7 ]> 8 This sample shows a &tricky; method. ]]> This produces the following:

in line 4, the reference to character 37 is expanded immediately, and the parameter entity "xx" is stored in the symbol table with the value "%zz;". Since the replacement text is not rescanned, the reference to parameter entity "zz" is not recognized. (And it would be an error if it were, since "zz" is not yet declared.)

in line 5, the character reference "&#60;" is expanded immediately and the parameter entity "zz" is stored with the replacement text "<!ENTITY tricky "error-prone" >", which is a well-formed entity declaration.

in line 6, the reference to "xx" is recognized, and the replacement text of "xx" (namely "%zz;") is parsed. The reference to "zz" is recognized in its turn, and its replacement text ("<!ENTITY tricky "error-prone" >") is parsed. The general entity "tricky" has now been declared, with the replacement text "error-prone".

in line 8, the reference to the general entity "tricky" is recognized, and it is expanded, so the full content of the "test" element is the self-describing (and ungrammatical) string This sample shows a error-prone method.

Deterministic Content Models

For compatibility, it is required that content models in element type declarations be deterministic.

SGML requires deterministic content models (it calls them "unambiguous"); XML processors built using SGML systems may flag non-deterministic content models as errors.

For example, the content model ((b, c) | (b, d)) is non-deterministic, because given an initial b the parser cannot know which b in the model is being matched without looking ahead to see which element follows the b. In this case, the two references to b can be collapsed into a single reference, making the model read (b, (c | d)). An initial b now clearly matches only a single name in the content model. The parser doesn't need to look ahead to see what follows; either c or d would be accepted.

More formally: a finite state automaton may be constructed from the content model using the standard algorithms, e.g. algorithm 3.5 in section 3.9 of Aho, Sethi, and Ullman . In many such algorithms, a follow set is constructed for each position in the regular expression (i.e., each leaf node in the syntax tree for the regular expression); if any position has a follow set in which more than one following position is labeled with the same element type name, then the content model is in error and may be reported as an error.

Algorithms exist which allow many but not all non-deterministic content models to be reduced automatically to equivalent deterministic models; see Brggemann-Klein 1991 .

Autodetection of Character Encodings

The XML encoding declaration functions as an internal label on each entity, indicating which character encoding is in use. Before an XML processor can read the internal label, however, it apparently has to know what character encoding is in use—which is what the internal label is trying to indicate. In the general case, this is a hopeless situation. It is not entirely hopeless in XML, however, because XML limits the general case in two ways: each implementation is assumed to support only a finite set of character encodings, and the XML encoding declaration is restricted in position and content in order to make it feasible to autodetect the character encoding in use in each entity in normal cases. Also, in many cases other sources of information are available in addition to the XML data stream itself. Two cases may be distinguished, depending on whether the XML entity is presented to the processor without, or with, any accompanying (external) information. We consider the first case first.

Because each XML entity not in UTF-8 or UTF-16 format must begin with an XML encoding declaration, in which the first characters must be '<?xml', any conforming processor can detect, after two to four octets of input, which of the following cases apply. In reading this list, it may help to know that in UCS-4, '<' is "#x0000003C" and '?' is "#x0000003F", and the Byte Order Mark required of UTF-16 data streams is "#xFEFF".

00 00 00 3C: UCS-4, big-endian machine (1234 order)

3C 00 00 00: UCS-4, little-endian machine (4321 order)

00 00 3C 00: UCS-4, unusual octet order (2143)

00 3C 00 00: UCS-4, unusual octet order (3412)

FE FF: UTF-16, big-endian

FF FE: UTF-16, little-endian

00 3C 00 3F: UTF-16, big-endian, no Byte Order Mark (and thus, strictly speaking, in error)

3C 00 3F 00: UTF-16, little-endian, no Byte Order Mark (and thus, strictly speaking, in error)

3C 3F 78 6D: UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns for the ASCII characters, the encoding declaration itself may be read reliably

4C 6F A7 94: EBCDIC (in some flavor; the full encoding declaration must be read to tell which code page is in use)

other: UTF-8 without an encoding declaration, or else the data stream is corrupt, fragmentary, or enclosed in a wrapper of some kind

This level of autodetection is enough to read the XML encoding declaration and parse the character-encoding identifier, which is still necessary to distinguish the individual members of each family of encodings (e.g. to tell UTF-8 from 8859, and the parts of 8859 from each other, or to distinguish the specific EBCDIC code page in use, and so on).

Because the contents of the encoding declaration are restricted to ASCII characters, a processor can reliably read the entire encoding declaration as soon as it has detected which family of encodings is in use. Since in practice, all widely used character encodings fall into one of the categories above, the XML encoding declaration allows reasonably reliable in-band labeling of character encodings, even when external sources of information at the operating-system or transport-protocol level are unreliable.

Once the processor has detected the character encoding in use, it can act appropriately, whether by invoking a separate input routine for each case, or by calling the proper conversion function on each character of input.

Like any self-labeling system, the XML encoding declaration will not work if any software changes the entity's character set or encoding without updating the encoding declaration. Implementors of character-encoding routines should be careful to ensure the accuracy of the internal and external information used to label the entity.

The second possible case occurs when the XML entity is accompanied by encoding information, as in some file systems and some network protocols. When multiple sources of information are available, their relative priority and the preferred method of handling conflict should be specified as part of the higher-level protocol used to deliver XML. Rules for the relative priority of the internal label and the MIME-type label in an external header, for example, should be part of the RFC document defining the text/xml and application/xml MIME types. In the interests of interoperability, however, the following rules are recommended.

If an XML entity is in a file, the Byte-Order Mark and encoding-declaration PI are used (if present) to determine the character encoding. All other heuristics and sources of information are solely for error recovery.

If an XML entity is delivered with a MIME type of text/xml, then the charset parameter on the MIME type determines the character encoding method; all other heuristics and sources of information are solely for error recovery.

If an XML entity is delivered with a MIME type of application/xml, then the Byte-Order Mark and encoding-declaration PI are used (if present) to determine the character encoding. All other heuristics and sources of information are solely for error recovery.

These rules apply only in the absence of protocol-level documentation; in particular, when the MIME types text/xml and application/xml are defined, the recommendations of the relevant RFC will supersede these rules.

W3C XML Working Group

This specification was prepared and approved for publication by the W3C XML Working Group (WG). WG approval of this specification does not necessarily imply that all WG members voted for its approval. The current and former members of the XML WG are:

Jon Bosak, SunChair James ClarkTechnical Lead Tim Bray, Textuality and NetscapeXML Co-editor Jean Paoli, MicrosoftXML Co-editor C. M. Sperberg-McQueen, U. of Ill.XML Co-editor Dan Connolly, W3CW3C Liaison Paula Angerstein, Texcel Steve DeRose, INSO Dave Hollander, HP Eliot Kimber, ISOGEN Eve Maler, ArborText Tom Magliery, NCSA Murray Maloney, Muzmo and Grif Makoto Murata, Fuji Xerox Information Systems Joel Nava, Adobe Conleth O'Connell, Vignette Peter Sharpe, SoftQuad John Tigue, DataChannel
XML-Parser-2.46/samples/xmlfilter0000755000000000000000000001441312703346700015442 0ustar rootroot#!/usr/local/bin/perl -w # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ use XML::Parser; my $Usage = <<'End_of_Usage;'; Usage is: xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat] [{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile Prints on standard output the result of filtering the given xmlfile for elements according to the switches. A '-' option will drop the element from the output; a '+' will keep it. The output should also be a well-formed XML document. -h Print this message -nl Emit a newline prior to every start tag. [-+]root Drop (or keep) the root element. Defaults to keep. If the root element were named "foo", then -root would be equivalent to -el=foo. Note that even if you're dropping the root element, it's start and end tag are kept in order that the output remains a well-formed XML document. [-+]el=elname Drop (or keep) elements of type elname. [-+]el:elnamepat Drop (or keep) element whose type name matches elnamepat. [-+]att:attname Drop (or keep) elements which have an attribute = attname. [-+]att:attname:attvalpat Drop (or keep) elements which have an attribute = attname and for which the attribute value matches attvalpat. End_of_Usage; my $pass = 1; my $do_newline = 0; my $attcheck = 0; my %drop_el; my @drop_elpat; my %keep_el; my @keep_elpat; my %drop_att; my %keep_att; my $always_true = sub { 1; }; my $root_element = ''; my $in_cdata = 0; # Process options while ( defined( $ARGV[0] ) and $ARGV[0] =~ /^[-+]/ ) { my $opt = shift; if ( $opt eq '-root' ) { $pass = 0; } elsif ( $opt eq '+root' ) { $pass = 1; } elsif ( $opt eq '-h' ) { print $Usage; exit; } elsif ( $opt eq '-nl' ) { $do_newline = 1; } elsif ( $opt =~ /^([-+])el([:=])(\S*)/ ) { my ( $disp, $kind, $pattern ) = ( $1, $2, $3 ); my ( $hashref, $aref ); if ( $disp eq '-' ) { $hashref = \%drop_el; $aref = \@drop_elpat; } else { $hashref = \%keep_el; $aref = \@keep_elpat; } if ( $kind eq '=' ) { $hashref->{$pattern} = 1; } else { push( @$aref, $pattern ); } } elsif ( $opt =~ /^([-+])att:(\w+)(?::(\S*))?/ ) { my ( $disp, $id, $pattern ) = ( $1, $2, $3 ); my $ref = ( $disp eq '-' ) ? \%drop_att : \%keep_att; if ( defined($pattern) ) { $pattern =~ s!/!\\/!g; my $sub; eval "\$sub = sub {\$_[0] =~ /$pattern/;};"; $ref->{$id} = $sub; } else { $ref->{$id} = $always_true; } $attcheck = 1; } else { die "Unknown option: $opt\n$Usage"; } } my $drop_el_pattern = join( '|', @drop_elpat ); my $keep_el_pattern = join( '|', @keep_elpat ); my $drop_sub; if ($drop_el_pattern) { eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}"; } else { $drop_sub = sub { }; } my $keep_sub; if ($keep_el_pattern) { eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}"; } else { $keep_sub = sub { }; } my $doc = shift; die "No file specified\n$Usage" unless defined($doc); my @togglestack = (); my $p = new XML::Parser( ErrorContext => 2, Handlers => { Start => \&start_handler, End => \&end_handler } ); if ($pass) { $p->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } $p->parsefile($doc); print "\n" unless $pass; ################ ## End of main ################ sub start_handler { my $xp = shift; my $el = shift; unless ($root_element) { $root_element = $el; print "<$el>\n" unless $pass; } my ( $elref, $attref, $sub ); if ($pass) { $elref = \%drop_el; $attref = \%drop_att; $sub = $drop_sub; } else { $elref = \%keep_el; $attref = \%keep_att; $sub = $keep_sub; } if ( defined( $elref->{$el} ) or &$sub($el) or check_atts( $attref, @_ ) ) { $pass = !$pass; if ($pass) { $xp->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } else { $xp->setHandlers( Char => 0, CdataStart => 0, CdataEnd => 0 ); } push( @togglestack, $xp->depth ); } if ($pass) { print "\n" if $do_newline; print "<$el"; while (@_) { my $id = shift; my $val = shift; $val = $xp->xml_escape( $val, "'" ); print " $id='$val'"; } print ">"; } } # End start_handler sub end_handler { my $xp = shift; my $el = shift; if ($pass) { print ""; } if ( @togglestack and $togglestack[-1] == $xp->depth ) { $pass = !$pass; if ($pass) { $xp->setHandlers( Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end ); } else { $xp->setHandlers( Char => 0, CdataStart => 0, CdataEnd => 0 ); } pop(@togglestack); } } # End end_handler sub char_handler { my ( $xp, $text ) = @_; if ( length($text) ) { $text = $xp->xml_escape( $text, '>' ) unless $in_cdata; print $text; } } # End char_handler sub cdata_start { my $xp = shift; print ''; $in_cdata = 0; } sub check_atts { return $attcheck unless $attcheck; my $ref = shift; while (@_) { my $id = shift; my $val = shift; if ( defined( $ref->{$id} ) ) { my $ret = &{ $ref->{$id} }($val); return $ret if $ret; } } return 0; } # End check_atts # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: XML-Parser-2.46/samples/canontst.xml0000644000000000000000000000077612703346340016070 0ustar rootroot ] > Here is a PI: . Like it? XML-Parser-2.46/samples/xmlcomments0000755000000000000000000000126212703346700016000 0ustar rootroot#!/usr/local/bin/perl -w # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ use XML::Parser; my $file = shift; die "Can't find file \"$file\"" unless -f $file; my $count = 0; my $parser = new XML::Parser( ErrorContext => 2, ParseParamEnt => 0 ); $parser->setHandlers( Comment => \&comments ); $parser->parsefile($file); print "Found $count comments.\n"; ################ ## End of main ################ sub comments { my ( $p, $data ) = @_; my $line = $p->current_line; $data =~ s/\n/\n\t/g; print "$line:\t\n"; $count++; } # End comments # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: XML-Parser-2.46/samples/ctest.dtd0000644000000000000000000000010712703346340015320 0ustar rootroot XML-Parser-2.46/samples/canonical0000755000000000000000000000543712703346700015371 0ustar rootroot#!/usr/local/bin/perl -w # # Copyright 1999 Clark Cooper # All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ # # This program take an XML document (either on standard input or # from a filename supplied as an argument) and generates corresponding # canonical XML document on the standard output. The definition of # "Canonical XML" that I'm using is taken from the working draft # published by W3C on 19-Jan-2000: # # http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html # # The latest version of this document is at: # # http://www.w3.org/TR/xml-c14n # use XML::Parser; my $indoctype = 0; my $inroot = 0; my $p = new XML::Parser( ErrorContext => 2, Namespaces => 1, ParseParamEnt => 1, Handlers => { Start => \&sthndl, End => \&endhndl, Char => \&chrhndl, Proc => \&proc, Doctype => sub { $indoctype = 1 }, DoctypeFin => sub { $indoctype = 0 } } ); my $file = shift; if ( defined $file ) { $p->parsefile($file); } else { $p->parse(*STDIN); } ################ ## End main ################ sub sthndl { my $xp = shift; my $el = shift; $inroot = 1 unless $inroot; my $ns_index = 1; my $elns = $xp->namespace($el); if ( defined $elns ) { my $pfx = 'n' . $ns_index++; print "<$pfx:$el xmlns:$pfx=\"$elns\""; } else { print "<$el"; } if (@_) { for ( my $i = 0; $i < @_; $i += 2 ) { my $nm = $_[$i]; my $ns = $xp->namespace($nm); $_[$i] = defined($ns) ? "$ns\01$nm" : "\01$nm"; } my %atts = @_; my @ids = sort keys %atts; foreach my $id (@ids) { my ( $ns, $nm ) = split( /\01/, $id ); my $val = $xp->xml_escape( $atts{$id}, '"', "\x9", "\xA", "\xD" ); if ( length($ns) ) { my $pfx = 'n' . $ns_index++; print " $pfx:$nm=\"$val\" xmlns:$pfx=\"$ns\""; } else { print " $nm=\"$val\""; } } } print '>'; } # End sthndl sub endhndl { my ( $xp, $el ) = @_; my $nm = $xp->namespace($el) ? "n1:$el" : $el; print ""; if ( $xp->depth == 0 ) { $inroot = 0; print "\n"; } } # End endhndl sub chrhndl { my ( $xp, $data ) = @_; print $xp->xml_escape( $data, '>', "\xD" ); } # End chrhndl sub proc { my ( $xp, $target, $data ) = @_; unless ($indoctype) { print ""; print "\n" unless $inroot; } } # Tell emacs that this is really a perl script #Local Variables: #mode: perl #End: XML-Parser-2.46/samples/xmlstats0000755000000000000000000000657212703346700015322 0ustar rootroot#!/usr/local/bin/perl -w # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ package Elinfo; sub new { bless { COUNT => 0, MINLEV => undef, SEEN => 0, CHARS => 0, EMPTY => 1, PTAB => {}, KTAB => {}, ATAB => {} }, shift; } package main; use English; use XML::Parser; my %elements; my $seen = 0; my $root; my $file = shift; my $subform = ' @<<<<<<<<<<<<<<< @>>>>'; die "Can't find file \"$file\"" unless -f $file; my $parser = new XML::Parser( ErrorContext => 2 ); $parser->setHandlers( Start => \&start_handler, Char => \&char_handler ); $parser->parsefile($file); set_minlev( $root, 0 ); my $el; foreach $el ( sort bystruct keys %elements ) { my $ref = $elements{$el}; print "\n================\n$el: ", $ref->{COUNT}, "\n"; print "Had ", $ref->{CHARS}, " bytes of character data\n" if $ref->{CHARS}; print "Always empty\n" if $ref->{EMPTY}; showtab( 'Parents', $ref->{PTAB}, 0 ); showtab( 'Children', $ref->{KTAB}, 1 ); showtab( 'Attributes', $ref->{ATAB}, 0 ); } ################ ## End of main ################ sub start_handler { my $p = shift; my $el = shift; my $elinf = $elements{$el}; if ( not defined($elinf) ) { $elements{$el} = $elinf = new Elinfo; $elinf->{SEEN} = $seen++; } $elinf->{COUNT}++; my $partab = $elinf->{PTAB}; my $parent = $p->current_element; if ( defined($parent) ) { $partab->{$parent}++; my $pinf = $elements{$parent}; # Increment our slot in parent's child table $pinf->{KTAB}->{$el}++; $pinf->{EMPTY} = 0; } else { $root = $el; } # Deal with attributes my $atab = $elinf->{ATAB}; while (@_) { my $att = shift; $atab->{$att}++; shift; # Throw away value } } # End start_handler sub char_handler { my ( $p, $data ) = @_; my $inf = $elements{ $p->current_element }; $inf->{EMPTY} = 0; if ( $data =~ /\S/ ) { $inf->{CHARS} += length($data); } } # End char_handler sub set_minlev { my ( $el, $lev ) = @_; my $elinfo = $elements{$el}; if ( !defined( $elinfo->{MINLEV} ) or $elinfo->{MINLEV} > $lev ) { my $newlev = $lev + 1; $elinfo->{MINLEV} = $lev; foreach ( keys %{ $elinfo->{KTAB} } ) { set_minlev( $_, $newlev ); } } } # End set_minlev sub bystruct { my $refa = $elements{$a}; my $refb = $elements{$b}; $refa->{MINLEV} <=> $refb->{MINLEV} or $refa->{SEEN} <=> $refb->{SEEN}; } # End bystruct sub showtab { my ( $title, $table, $dosum ) = @_; my @list = sort keys %{$table}; if (@list) { print "\n $title:\n"; my $item; my $sum = 0; foreach $item (@list) { my $cnt = $table->{$item}; $sum += $cnt; formline( $subform, $item, $cnt ); print $ACCUMULATOR, "\n"; $ACCUMULATOR = ''; } if ( $dosum and @list > 1 ) { print " =====\n"; formline( $subform, '', $sum ); print $ACCUMULATOR, "\n"; $ACCUMULATOR = ''; } } } # End showtab # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: XML-Parser-2.46/MANIFEST0000644000000000000000000000744413542324532013202 0ustar rootrootinc/Devel/CheckLib.pm Changes Change log Expat/Expat.pm XML::Parser::Expat module Expat/Expat.xs Extension library Expat/Makefile.PL perl MakeMaker script for XML::Parser::Expat Expat/encoding.h Header file; describes *.enc structure Expat/typemap XS typemap MANIFEST This file Makefile.PL perl MakeMaker script for XML::Parser Parser.pm XML::Parser module Parser/LWPExternEnt.pl LWP based external entity handler Parser/Encodings/Japanese_Encodings.msg Message about Japanese encodings. Parser/Encodings/README Info about encoding maps Parser/Encodings/big5.enc Big5 binary encoding map Parser/Encodings/euc-kr.enc EUC-KR binary encoding map Parser/Encodings/iso-8859-2.enc ISO-8859-2 binary encoding map Parser/Encodings/iso-8859-3.enc ISO-8859-3 binary encoding map Parser/Encodings/iso-8859-4.enc ISO-8859-4 binary encoding map Parser/Encodings/iso-8859-5.enc ISO-8859-5 binary encoding map Parser/Encodings/iso-8859-7.enc ISO-8859-7 binary encoding map Parser/Encodings/iso-8859-8.enc ISO-8859-8 binary encoding map Parser/Encodings/iso-8859-9.enc ISO-8859-9 binary encoding map Parser/Encodings/iso-8859-15.enc ISO-8859-15 binary encoding map Parser/Encodings/windows-1250.enc cp1250-WinLatin2 binary encoding map Parser/Encodings/windows-1251.enc cp1251-Russian binary encoding map Parser/Encodings/windows-1252.enc cp1252-WinLatin1 binary encoding map Parser/Encodings/windows-1255.enc hebrew Parser/Encodings/x-euc-jp-jisx0221.enc X-euc-jp-jisx0221 encoding map Parser/Encodings/x-euc-jp-unicode.enc X-euc-jp-unicde encoding map Parser/Encodings/x-sjis-cp932.enc x-sjis-cp932 encoding map Parser/Encodings/x-sjis-jdk117.enc x-sjis-jdk117 encoding map Parser/Encodings/x-sjis-jisx0221.enc x-sjis-jisx0221 encoding map Parser/Encodings/x-sjis-unicode.enc x-sjis-unicode encoding map Parser/Encodings/ibm866.enc Parser/Encodings/koi8-r.enc Parser/Style/Debug.pm Debug style parser Parser/Style/Objects.pm Objects style parser Parser/Style/Stream.pm Stream style parser Parser/Style/Subs.pm Subs style parser Parser/Style/Tree.pm Tree style parser README Short explanation samples/canonical A utility to generate canonical XML samples/canontst.xml An xml document to demonstrate canonical samples/ctest.dtd An external DTD used by canontst.xml samples/REC-xml-19980210.xml The XML spec in xml form samples/xmlcomments A utility to extract comments samples/xmlfilter A utility to filter elements samples/xmlstats A utility to report on element statistics t/astress.t Test script t/cdata.t Test script t/decl.t Test script t/defaulted.t Test script t/encoding.t Test script t/external_ent.t Test script t/file.t Test script t/file_open_scalar.t Test script t/finish.t Test script t/ext.ent External entity for parament.t test t/ext2.ent External entity for parament.t test t/foo.dtd External DTD for parament.t test t/namespaces.t Test script t/parament.t Test script t/partial.t Test script t/skip.t Test script t/stream.t Test script t/styles.t Test script META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) XML-Parser-2.46/inc/0000755000000000000000000000000013542324531012610 5ustar rootrootXML-Parser-2.46/inc/Devel/0000755000000000000000000000000013542324531013647 5ustar rootrootXML-Parser-2.46/inc/Devel/CheckLib.pm0000644000000000000000000003571512703346470015670 0ustar rootroot# $Id: CheckLib.pm,v 1.25 2008/10/27 12:16:23 drhyde Exp $ package Devel::CheckLib; use 5.00405; #postfix foreach use strict; use vars qw($VERSION @ISA @EXPORT); $VERSION = '0.99'; use Config qw(%Config); use Text::ParseWords 'quotewords'; use File::Spec; use File::Temp; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(assert_lib check_lib_or_exit check_lib); # localising prevents the warningness leaking out of this module local $^W = 1; # use warnings is a 5.6-ism _findcc(); # bomb out early if there's no compiler =head1 NAME Devel::CheckLib - check that a library is available =head1 DESCRIPTION Devel::CheckLib is a perl module that checks whether a particular C library and its headers are available. =head1 SYNOPSIS use Devel::CheckLib; check_lib_or_exit( lib => 'jpeg', header => 'jpeglib.h' ); check_lib_or_exit( lib => [ 'iconv', 'jpeg' ] ); # or prompt for path to library and then do this: check_lib_or_exit( lib => 'jpeg', libpath => $additional_path ); =head1 USING IT IN Makefile.PL or Build.PL If you want to use this from Makefile.PL or Build.PL, do not simply copy the module into your distribution as this may cause problems when PAUSE and search.cpan.org index the distro. Instead, use the use-devel-checklib script. =head1 HOW IT WORKS You pass named parameters to a function, describing to it how to build and link to the libraries. It works by trying to compile some code - which defaults to this: int main(void) { return 0; } and linking it to the specified libraries. If something pops out the end which looks executable, it gets executed, and if main() returns 0 we know that it worked. That tiny program is built once for each library that you specify, and (without linking) once for each header file. If you want to check for the presence of particular functions in a library, or even that those functions return particular results, then you can pass your own function body for main() thus: check_lib_or_exit( function => 'foo();if(libversion() > 5) return 0; else return 1;' incpath => ... libpath => ... lib => ... header => ... ); In that case, it will fail to build if either foo() or libversion() don't exist, and main() will return the wrong value if libversion()'s return value isn't what you want. =head1 FUNCTIONS All of these take the same named parameters and are exported by default. To avoid exporting them, C. =head2 assert_lib This takes several named parameters, all of which are optional, and dies with an error message if any of the libraries listed can not be found. B: dying in a Makefile.PL or Build.PL may provoke a 'FAIL' report from CPAN Testers' automated smoke testers. Use C instead. The named parameters are: =over =item lib Must be either a string with the name of a single library or a reference to an array of strings of library names. Depending on the compiler found, library names will be fed to the compiler either as C<-l> arguments or as C<.lib> file names. (E.g. C<-ljpeg> or C) =item libpath a string or an array of strings representing additional paths to search for libraries. =item LIBS a C-style space-seperated list of libraries (each preceded by '-l') and directories (preceded by '-L'). This can also be supplied on the command-line. =item debug If true - emit information during processing that can be used for debugging. =back And libraries are no use without header files, so ... =over =item header Must be either a string with the name of a single header file or a reference to an array of strings of header file names. =item incpath a string or an array of strings representing additional paths to search for headers. =item INC a C-style space-seperated list of incpaths, each preceded by '-I'. This can also be supplied on the command-line. =back =head2 check_lib_or_exit This behaves exactly the same as C except that instead of dieing, it warns (with exactly the same error message) and exits. This is intended for use in Makefile.PL / Build.PL when you might want to prompt the user for various paths and things before checking that what they've told you is sane. If any library or header is missing, it exits with an exit value of 0 to avoid causing a CPAN Testers 'FAIL' report. CPAN Testers should ignore this result -- which is what you want if an external library dependency is not available. =head2 check_lib This behaves exactly the same as C except that it is silent, returning false instead of dieing, or true otherwise. =cut sub check_lib_or_exit { eval 'assert_lib(@_)'; if ($@) { warn $@; exit; } } sub check_lib { eval 'assert_lib(@_)'; return $@ ? 0 : 1; } sub assert_lib { my %args = @_; my ( @libs, @libpaths, @headers, @incpaths ); # FIXME: these four just SCREAM "refactor" at me @libs = ( ref( $args{lib} ) ? @{ $args{lib} } : $args{lib} ) if $args{lib}; @libpaths = ( ref( $args{libpath} ) ? @{ $args{libpath} } : $args{libpath} ) if $args{libpath}; @headers = ( ref( $args{header} ) ? @{ $args{header} } : $args{header} ) if $args{header}; @incpaths = ( ref( $args{incpath} ) ? @{ $args{incpath} } : $args{incpath} ) if $args{incpath}; # work-a-like for Makefile.PL's LIBS and INC arguments # if given as command-line argument, append to %args for my $arg (@ARGV) { for my $mm_attr_key (qw(LIBS INC)) { if ( my ($mm_attr_value) = $arg =~ /\A $mm_attr_key = (.*)/x ) { # it is tempting to put some \s* into the expression, but the # MM command-line parser only accepts LIBS etc. followed by =, # so we should not be any more lenient with whitespace than that $args{$mm_attr_key} .= " $mm_attr_value"; } } } # using special form of split to trim whitespace if ( defined( $args{LIBS} ) ) { foreach my $arg ( split( ' ', $args{LIBS} ) ) { die("LIBS argument badly-formed: $arg\n") unless ( $arg =~ /^-[lLR]/ ); push @{ $arg =~ /^-l/ ? \@libs : \@libpaths }, substr( $arg, 2 ); } } if ( defined( $args{INC} ) ) { foreach my $arg ( split( ' ', $args{INC} ) ) { die("INC argument badly-formed: $arg\n") unless ( $arg =~ /^-I/ ); push @incpaths, substr( $arg, 2 ); } } my ( $cc, $ld ) = _findcc(); my @missing; my @wrongresult; my @use_headers; # first figure out which headers we can't find ... for my $header (@headers) { push @use_headers, $header; my ( $ch, $cfile ) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' ); my $ofile = $cfile; $ofile =~ s/\.c$/$Config{_o}/; print $ch qq{#include <$_>\n} for @use_headers; print $ch qq{int main(void) { return 0; }\n}; close($ch); my $exefile = File::Temp::mktemp('assertlibXXXXXXXX') . $Config{_exe}; my @sys_cmd; # FIXME: re-factor - almost identical code later when linking if ( $Config{cc} eq 'cl' ) { # Microsoft compiler require Win32; @sys_cmd = ( @$cc, $cfile, "/Fe$exefile", ( map { '/I' . Win32::GetShortPathName($_) } @incpaths ), "/link", @$ld ); } elsif ( $Config{cc} =~ /bcc32(\.exe)?/ ) { # Borland @sys_cmd = ( @$cc, @$ld, ( map { "-I$_" } @incpaths ), "-o$exefile", $cfile ); } else { # Unix-ish: gcc, Sun, AIX (gcc, cc), ... @sys_cmd = ( @$cc, @$ld, $cfile, ( map { "-I$_" } @incpaths ), "-o", "$exefile" ); } warn "# @sys_cmd\n" if $args{debug}; my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd); push @missing, $header if $rv != 0 || !-x $exefile; _cleanup_exe($exefile); unlink $ofile if -e $ofile; unlink $cfile; } # now do each library in turn with headers my ( $ch, $cfile ) = File::Temp::tempfile( 'assertlibXXXXXXXX', SUFFIX => '.c' ); my $ofile = $cfile; $ofile =~ s/\.c$/$Config{_o}/; print $ch qq{#include <$_>\n} foreach (@headers); print $ch "int main(void) { " . ( $args{function} || 'return 0;' ) . " }\n"; close($ch); for my $lib (@libs) { my $exefile = File::Temp::mktemp('assertlibXXXXXXXX') . $Config{_exe}; my @sys_cmd; if ( $Config{cc} eq 'cl' ) { # Microsoft compiler require Win32; my @libpath = map { q{/libpath:} . Win32::GetShortPathName($_) } @libpaths; # this is horribly sensitive to the order of arguments @sys_cmd = ( @$cc, $cfile, "${lib}.lib", "/Fe$exefile", ( map { '/I' . Win32::GetShortPathName($_) } @incpaths ), "/link", @$ld, ( map { '/libpath:' . Win32::GetShortPathName($_) } @libpaths ), ); } elsif ( $Config{cc} eq 'CC/DECC' ) { # VMS } elsif ( $Config{cc} =~ /bcc32(\.exe)?/ ) { # Borland @sys_cmd = ( @$cc, @$ld, "-o$exefile", ( map { "-I$_" } @incpaths ), ( map { "-L$_" } @libpaths ), "-l$lib", $cfile ); } else { # Unix-ish # gcc, Sun, AIX (gcc, cc) @sys_cmd = ( @$cc, @$ld, $cfile, "-o", "$exefile", ( map { "-I$_" } @incpaths ), ( map { "-L$_" } @libpaths ), "-l$lib", ); } warn "# @sys_cmd\n" if $args{debug}; my $rv = $args{debug} ? system(@sys_cmd) : _quiet_system(@sys_cmd); push @missing, $lib if $rv != 0 || !-x $exefile; my $absexefile = File::Spec->rel2abs($exefile); $absexefile = '"' . $absexefile . '"' if $absexefile =~ m/\s/; push @wrongresult, $lib if $rv == 0 && -x $exefile && system($absexefile) != 0; unlink $ofile if -e $ofile; _cleanup_exe($exefile); } unlink $cfile; my $miss_string = join( q{, }, map { qq{'$_'} } @missing ); die("Can't link/include C library $miss_string, aborting.\n") if @missing; my $wrong_string = join( q{, }, map { qq{'$_'} } @wrongresult ); die("wrong result: $wrong_string\n") if @wrongresult; } sub _cleanup_exe { my ($exefile) = @_; my $ofile = $exefile; $ofile =~ s/$Config{_exe}$/$Config{_o}/; unlink $exefile if -f $exefile; unlink $ofile if -f $ofile; unlink "$exefile\.manifest" if -f "$exefile\.manifest"; if ( $Config{cc} eq 'cl' ) { # MSVC also creates foo.ilk and foo.pdb my $ilkfile = $exefile; $ilkfile =~ s/$Config{_exe}$/.ilk/; my $pdbfile = $exefile; $pdbfile =~ s/$Config{_exe}$/.pdb/; unlink $ilkfile if -f $ilkfile; unlink $pdbfile if -f $pdbfile; } return; } # return ($cc, $ld) # where $cc is an array ref of compiler name, compiler flags # where $ld is an array ref of linker flags sub _findcc { # Need to use $keep=1 to work with MSWin32 backslashes and quotes my $Config_ccflags = $Config{ccflags}; # use copy so ASPerl will compile my @Config_ldflags = (); for my $config_val ( @Config{qw(ldflags perllibs)} ) { push @Config_ldflags, $config_val if ( $config_val =~ /\S/ ); } my @ccflags = grep { length } quotewords( '\s+', 1, $Config_ccflags || '' ); my @ldflags = grep { length } quotewords( '\s+', 1, @Config_ldflags ); my @paths = split( /$Config{path_sep}/, $ENV{PATH} ); my @cc = split( /\s+/, $Config{cc} ); return ( [ @cc, @ccflags ], \@ldflags ) if -x $cc[0]; foreach my $path (@paths) { my $compiler = File::Spec->catfile( $path, $cc[0] ) . $Config{_exe}; return ( [ $compiler, @cc[ 1 .. $#cc ], @ccflags ], \@ldflags ) if -x $compiler; } die("Couldn't find your C compiler\n"); } # code substantially borrowed from IPC::Run3 sub _quiet_system { my (@cmd) = @_; # save handles local *STDOUT_SAVE; local *STDERR_SAVE; open STDOUT_SAVE, ">&STDOUT" or die "CheckLib: $! saving STDOUT"; open STDERR_SAVE, ">&STDERR" or die "CheckLib: $! saving STDERR"; # redirect to nowhere local *DEV_NULL; open DEV_NULL, ">" . File::Spec->devnull or die "CheckLib: $! opening handle to null device"; open STDOUT, ">&" . fileno DEV_NULL or die "CheckLib: $! redirecting STDOUT to null handle"; open STDERR, ">&" . fileno DEV_NULL or die "CheckLib: $! redirecting STDERR to null handle"; # run system command my $rv = system(@cmd); # restore handles open STDOUT, ">&" . fileno STDOUT_SAVE or die "CheckLib: $! restoring STDOUT handle"; open STDERR, ">&" . fileno STDERR_SAVE or die "CheckLib: $! restoring STDERR handle"; return $rv; } =head1 PLATFORMS SUPPORTED You must have a C compiler installed. We check for C<$Config{cc}>, both literally as it is in Config.pm and also in the $PATH. It has been tested with varying degrees on rigourousness on: =over =item gcc (on Linux, *BSD, Mac OS X, Solaris, Cygwin) =item Sun's compiler tools on Solaris =item IBM's tools on AIX =item SGI's tools on Irix 6.5 =item Microsoft's tools on Windows =item MinGW on Windows (with Strawberry Perl) =item Borland's tools on Windows =item QNX =back =head1 WARNINGS, BUGS and FEEDBACK This is a very early release intended primarily for feedback from people who have discussed it. The interface may change and it has not been adequately tested. Feedback is most welcome, including constructive criticism. Bug reports should be made using L or by email. When submitting a bug report, please include the output from running: perl -V perl -MDevel::CheckLib -e0 =head1 SEE ALSO L L =head1 AUTHORS David Cantrell Edavid@cantrell.org.ukE David Golden Edagolden@cpan.orgE Yasuhiro Matsumoto Emattn@cpan.orgE Thanks to the cpan-testers-discuss mailing list for prompting us to write it in the first place; to Chris Williams for help with Borland support; to Tony Cook for help with Microsoft compiler command-line options =head1 COPYRIGHT and LICENCE Copyright 2007 David Cantrell. Portions copyright 2007 David Golden. This module is free-as-in-speech software, and may be used, distributed, and modified under the same conditions as perl itself. =head1 CONSPIRACY This module is also free-as-in-mason software. =cut 1; XML-Parser-2.46/Changes0000644000000000000000000006524113542324437013347 0ustar rootrootRevision history for Perl extension XML::Parser. 2.46 2019-09-24 (by Todd Rinaldo) - use foreach not for for loops - produce README.md so travis will show up on github - remove use vars and switch to our. - travis-ci testing from 5.8..5.28 - Convert XML::Parser to use 3 arg opens with no barewords. - Migrate tracker to github - Switch to XSLoader - Fix a buffer overwrite in parse_stream() 2.44 2015-01-12 (by Todd Rinaldo) - RT 99098 - Revert "Add more useful error message on parse to Expat". It breaks XML::Twig. Calling code will need to do this if it's needed. - RT 100959 - Add use FileHandle to t/astress.t - Make perl 5.10.0 happy. 2.43 2014-12-11 (by Todd Rinaldo) - POD patch to man from Debian via Nicholas Bamber - POD patch from Debian via gregor herrmann. - Add more useful error message on parse to Expat - Fix LWP dependency to be LWP::Useragent - Bump to 2.43 for overdue release to CPAN. 2.42_01 2013-07-12 (by Todd Rinaldo) - Added instructions to README for OSX - XS changes: stop using SvPV(string, PL_na) - Fix documentation typos 2.41 2011-06-01 (by Todd Rinaldo) - Tests are cleaned. promoting to stable. No changes since 2.40_02 2.40_02 2011-05-31 (by Todd Rinaldo) - TODO some tests which fail in Free BSD due to improper expat CVE patch http://www.freebsd.org/cgi/query-pr.cgi?pr=157469 2.40_01 2011-05-24 (by Todd Rinaldo) - better installation instructions - Small spelling patches from Debian package - Thanks Nicholas Bamber - RT 68399 - Upgrade Devel::CheckLib to 0.93 to make it perl 5.14 compliant - qw() - RT 67207 - Stop doing tied on globs - Thanks sprout - RT 31319 - Fix doc links in POD for XML/Parser.pm 2.40 2010-09-16 (by Alexandr Ciornii) - Add windows-1251.enc, ibm866.enc, koi8-r.enc (Russian) - Add windows-1255.enc (Hebrew) - Update iso-8859-7.enc (RT#40712) - Use Devel::CheckLib - Better description of expat packages - Better Perl style in both code and docs 2.36 - Fix for Carp::Heavy bugs 2.35 (mostly by Alexandr Ciornii) - Works in 5.10 (Andreas J. Koenig) - Added license in Makefile.PL (Alexandr Ciornii) - Makefile.PL also searches for expat in C:/lib/Expat-2.0.0 (Alexandr Ciornii) - No longer uses variable named 'namespace' in Expat.xs (Jeff Hunter) 2.33 - Fixed Tree style (grantm) - Fixed some non-utf8 stuff in DTDs (patch in XML::DOM tarball) 2.32 - Memory leak fix (Juerd Waalboer). - Added windows-1252 encoding - Styles moved to separate .pm files to make loading faster and ease maintainence - Don't load IO::Handle unless we really need to 2.31 Tue Apr 2 13:39:51 EST 2002 - Ilya Zakharevich and Dave Mitchell both provided patches to fix problems module had with 5.8.0 - Dave Mitchell also made some UTF-8 related fixes to the test suite. 2.30 Thu Oct 5 12:47:36 EDT 2000 - Get rid of ContentStash global. Not that big a deal looking it up everytime and gets rid of a potential threading problem. - Switch to shareable library version of expat from sourceforge (i.e. no longer include expat source and require that libexpat be installed) - Bob Tribit demonstrated a fix for problems in compiling under perl 5.6.0 with 5.005 threading. - Matt Sergeant discovered a typo ('IO::Handler' instead of 'IO::Handle') in Expat.pm that caused IO::Handle objects to be treated as strings instead of handles. - Matt Sergeant also provided a patch to allow tied handles to work properly in calls to parse. - Eric Bohlman reported a failure when incremental parsing and external parsing were used together. Need to give explicit package when calling Do_External_Parse from externalEntityRef otherwise fails when called through ExpatNB. 2.29 Sun May 21 21:19:45 EDT 2000 - In expat, notation declaration handler registration wasn't surviving through external entity references. - Chase Tingley discovered that text accumulation in the Stream style wasn't working across processing instructions and recommended the appropriate fix. - Jochen Wiedmann , noted that you couldn't use ExpatNB directly because it wasn't setting the protective _State_ variable. Now doing this in the parse_more method of ExpatNB. - At the suggestion of Grant Hopwood , now calling the env_proxy method on the LWP::UserAgent in the LWP external entity handler when it's created to set any proxies from environment variables. - Grant McLean, Matt Sergeant (& others I may have missed) noted that loading the LWP & URI modules slowed startup of the module, even if the application didn't need it. The default LWP handler is now dynamicly loaded (along with LWP & URI modules) the first time an external entity is referenced. Also provided a NoLWP option to XML::Parser that forces the file based external entity handler. - Fixed allocation errors in element declaration patches in expat - The Expat base method now works, even before expat starts parsing. - Changed the canonical script to take an optional file argument. - Enno Derksen reported that the attlist handler was not returning NOTATION type attlist information. - Michel Rodriguez , noted that the constructor for XML::Parser objects no longer checked for the existence of applications installed external entity handlers before installing the default ones. - Burkhard Meier sent in a fix for compiler directives in Expat/Makefile.PL for Win32 machines. A change in 5.6.0 caused the old conditional to fail. - Forgot to document changes to the Entity declaration handler: there is an additional "IsParam" argument that indicates whether or not the entity is a parameter entity. This information is no longer passed on in the name. - Ben Low reported an undefined macro with version 5.004_04. 2.28 Mon Mar 27 21:21:50 EST 2000 - Junked local (Expat.xs) declaration parsing and patched expat to handle XML declarations, element declarations, attlist declarations, and all entity declarations. By eliminating both shadow buffers and local declaration parsing in Expat.xs, I've eliminated the two most common sources of serious bugs in the expat interface. o thus fixed the segfault and parse position bugs reported by Ivan Kurmanov o and the doctype bug reported by Kevin Lund o The element declaration handler no longer receives a string, but an XML::Parser::ContentModel object that represents the parsed model, but still looks like a string if referred to as a string. This class is documented in the XML::Parser::Expat pod under "XML::Parser::ContentModel Methods". o The doctype declaration handler no longer receives the internal subset as a string, but in its place a true or undef value indicating whether or not there is an internal subset. Also, it's called prior to processing either the internal or external DTD subset (as suggested by Enno Derksen .) o There is a new DoctypeFin handler that's called after finishing parsing all of the DOCTYPE declaration, including any internal or external DTD declarations. o One bit of lossage is that recognized_string, original_string, and default_current no longer work inside declaration handlers. - Added a handler that gets called after parsing external entities: ExternEntFin. Suggested by Jeff Horner . - parsefile, file_ext_ent_handler, & lwp_ext_ent_handler now all set the base path. This problem has been raised more than once and I'm not sure to whom credit should be given. - The file_ext_ent_handler now opens a file handle instead of reading the entire entity at once. - Merged patches supplied by Larry Wall to (for perl 5.6 and beyond) tag generated strings as UTF-8, where appropriate. - Fixed a bug in xml_escape reported by Jerry Geiger . It failed when requesting escaping of perl regex meta-characters. - Laurent Caprani reported a bug in the Proc handler for the Debug style. - sent in a patch for the element index mechanism. I was popping the stack too soon in the endElement fcn. - Jim Miner sent in a patch to fix a warning in Expat.pm. - Kurt Starsinic pointed out that the eval used to check for string versus IO handle was leaving $@ dirty, thereby foiling higher level exception handlers - An expat question by Paul Prescod helped me see that exeptions in the parse call bypass the Expat release method, causing memory leaks. - Mark D. Anderson noted that calling recognized_string from the Final method caused a dump. There are a bunch of methods that should not be called after parsing has finished. These now have protective if statements around them. - Updated canonical utility to conform to newer version of Canonical XML working draft. 2.27 Sat Sep 25 18:26:44 EDT 1999 - Corrected documentation in Parser.pm - Deal with XML_NS and XML_BYTE_ORDER macros in Expat/Makefile.PL - Chris Thorman noted that "require 'URI::URL.pm'" in Parser.pm was in error (should be "require 'URI/URL.pm'") - Andrew McNaughton noted "use English" and use of '$&' slowed down regex handling for whole application, so they were excised from XML::Parser::Expat. - Work around "modification of read-only value" bug in perl 5.004 - Enno Derksen reported that the Doctype handler wasn't being called when ParseParamEnt was set. - Now using Version 19990728 of expat, with local patches. - Got rid of shadow buffer o thus fixed the error reported by Ashley Sanders o and removed ExpatNB limitations that Peter Billam noted. - Vadim Konovalov had a problem compiling for multi-threading that was fixed by changing Perl_sv_setsv to sv_setsv. - Added new Expat method: skip_until(index) - Backward incompatible change to method xml_escape: to get former behavior use $xp->xml_escape($string, '>', ...) - Added utility, canonical, to samples 2.26 Sun Jul 25 19:06:41 EDT 1999 - Ken Beesley discovered that declarations in the external subset are not sent to registered handlers when there is no internal subset. - Fixed parse_dtd to work when entity values or attribute defaults are so large that they might be broken across multiple calls to the default handler. - For lwp_ext_ent_handler, use URI::URL instead of URI so that old 5.004 installations will work with it. 2.25 Fri Jul 23 06:23:43 EDT 1999 - Now using Version 1990709 of expat. No local patches. - Numerous people reported a SEGV problem when running t/cdata on various platforms and versions of perl. The problem was introduced with the setHandlers change. In some cases an un-initialized value was being returned. - Added an additional external entity handler, lwp_ext_ent_handler, that deals with general URIs. It is installed instead of the "file only" handler if the LWP package is installed. 2.24 Thu Jul 8 23:05:50 EDT 1999 - KangChan Lee supplied the EUC-KR encoding map. - Enno Derksen forwarded reports by Jon Eisenzopf and Stefaan Onderbeke about a core dump using XML::DOM. This was due to a bug in the prolog parsing part of XML::Parser. - Loic Dachary discovered that changing G_DISCARD to G_VOID introduced a small memory leak. Changed G_VOID back to G_DISCARD. - As suggested by Ben Holzman , the setHandlers methods of both Parser and Expat now return lists that consist of type, handler pairs that correspond to the input, but the handlers returned are the ones that were in effect prior to the call. - Now using Version 19990626 of expat with a local patch (provided by James Clark.) - Added option ParseParamEnt. When set to a true value, parameter entities are parsed and the external DTD is read (unless standalone set to "Yes" in document). 2.23 Mon Apr 26 21:30:28 EDT 1999 - Fixed a bug in the ExpatNB class reported by Gabe Beged-Dov . The ErrorMessage attribute wasn't being initialized for ExpatNB. This should have been done in the Expat constructor. - Applied patch provided by Nathan Kurz to fix more perl stack manipulation errors in Expat.xs. - Applied another patch by Nathan to change perl_call_sv flag from G_DISCARD to G_VOID for callbacks, which helps performance. - Murata Makoto reported a problem on Win32 platforms that only showed up when UTF-16 was being used. The needed call to binmode was added to the parsefile methods. - Added documentation for release method that was added in release 2.20 to Expat pod. (Point raised by ) - Now using Version 19990425 of expat. No local patches. - Added specified_attr method and made ineffective the is_defaulted method. 2.22 Sun Apr 4 11:47:25 EDT 1999 - Loic Dachary reported a core dump with a small file with a comment that wasn't properly closed. Fixed in expat by updating positionPtr properly in final call of XML_Parse. (Reported to & acknowledged by James Clark.) - Made more fixes to Expat.xs position calculation. - Loic Dachary provided patches for fixing a memory growth problem with large documents. (Garbage collection wasn't happening frequently enough.) - As suggested by Gabe Beged-Dov , added a non-blocking parse mechanism: - Added parse_start method to XML::Parser, which returns a XML::Parser::ExpatNB object. - Added XML::Parser::ExpatNB class, which is a subclass of Expat and has the additional methods parse_more & parse_done - Made some performance tweaks as suggested by performance thread on perl-xml discussion list. [With negligible results] - Tried to clarify Tree style structure in Parser pod 2.21 Sun Mar 21 17:42:04 EST 1999 - Warren Vik provided patches for a bug introduced with the is_defaulted method. It manifested itself by bogusly reporting duplicate attributes. - Now using latest expat from ftp://ftp.jclark.com/pub/test/expat.zip, Version 19990307. (Plus any patches in Expat/expat.patches.) - As suggested by Tim Bray, added an xml_escape method to Expat. - Murray Nesbitt had build problems on Win32 that were solved by swapping 2 include files in Expat.xs - Added following Expat namespace methods: new_ns_prefixes expand_ns_prefix current_ns_prefixes - Fixed memory handling in recognized_string method to get rid of "Attempt to free unreferenced scalar" bug. 2.20 Sun Feb 28 15:35:52 EST 1999 - Fixed miscellaneous bugs in xmlfilter. - In the default external entity handler, prepend the base only for relative URLs. - Chris Nandor provided patches for building on Macintosh. - As suggested by Matt Sergeant , added the finish method to Expat. - Matt also provided a fix to a bug he discovered in the Streams style. - Fixed a parse position bug reported by Enno Derksen that was affecting both original_string and position_in_context. - Fixed a gross memory leak reported by David Megginson, : there was a circular reference to the Expat object and the internal end handler for context was not freeing element names after they were removed from the context stack. - Now using expat Version 19990109 (Plus any patches in Expat/expat.patches) - Added is_defaulted method to Expat to tell if an attribute was defaulted. (Requested by Enno Derksen for XML::DOM.) - Matt Sergeant reported that the XML::Parser parse methods weren't propagating array context to the Final handler. Now they are. - Fixed more memory leaks (again reported by David Megginson). The SVs pointing to the handlers weren't being reclaimed when the callback vector was freed. - Added the element_index method to Expat. 2.19 Sun Jan 3 11:23:45 EST 1999 - When the recognized string is long enough, expat uses multiple calls to reportDefault. Fixed recString handler in Expat.xs to deal with this properly. - Added original_string method to Expat. This returns the untranslated string (i.e. original encoding) that caused current event. - Alberto Accomazzi sent in more patches for perl5.005_54 incompatibilities. - Alberto also fingered a nasty memory bug in Expat.xs that arose sometimes when you registered a declaration handler but no default handler. It would give you a "Not a CODE reference" error in a place that wasn't using any CODE references. - reported a problem with compiling expat on a Sun 4 due to non-exsitance of memmove on that OS. Provided a workaround in Makefile.PL - Now using expat Version 19981231 from James Clark's test directory. - Made patch to this version in order to support original_string (see Expat/expat.patches.) - Added CdataStart and CdataEnd handlers to expat. 2.18 Sun Dec 27 07:39:23 EST 1998 - Alberto Accomazzi pointed out that the DESTROY sub in the new XML::Parser::Encinfo package was pointing to the wrong package for calling FreeEncoding. - Tarang Kumar Patel reported the mis-declaration of an integer as unsigned in the convert_to_unicode function in Expat.xs. - Glenn R. Kronschnabl reported a problem with ExternEnt handlers when using parsefile. Turned out to be an unmatched ENTER; SAVETMPS pair that screwed up the Perl stack. - Tom Hughes reported that the fix I put in for the swith to PL_sv.. names failed with 5.0005_54, since these became real variables instead of macros. Switched to just checking the PATCHLEVEL macro. - Yoshida Masato provided the EUC-JP encodings (the corresponding XML files are in XML::Encoding 1.01 or later.) - With the advice of MURATA Makoto , removed the Shift_JIS encoding and replaced it with 4 variations he provided. He also provided an explanatory message. - Added the recognized_string method to Expat, deprecating default_current. - Now using expat Version 19981122 from James Clark's test directory (this fixes another bug with external entity reference handlers) - Added a default external entity handler that only accesses file: based URLs. 2.17 Sun Dec 13 17:39:58 EST 1998 - Replaced uses of malloc, realloc, and free with New, Renew, and Safefree respectively - In Expat.pm, fixed methods in_element and within_element to work correctly with namespaces. - xmlfilter - Substitute quoted equivalents for special characters in attribute values. - position_in_context was off by one line when position was at the end of line. - For the context methods in Expat.pm, do the right thing when the context list is empty. - Added methods xpcroak and xpcarp to Expat. - Alberto Accomazzi noted that perl releases 5.005_5* (the pre 5.006 development versions) won't accept sv_undef (and related constants) anymore and we have to switch to PL_sv_... - Alberto also reported a warning in the newer versions of IO::Handle about input_record_separator not being treated on a per-handle basis. - Fixed bug that Jon Udell reported in Stream style: Text handler most of the time didn't see proper context. - Added XML::Parser::Expat::load_encoding function and support for external encodings. 2.16 Tue Oct 27 22:27:33 EST 1998 - Fixed bug reported by Enno Derksen : Now treats parameter entity declarations correctly. The entity handler sees the name beginning with '%' if it's a parameter entity declaration. - Nigel Hutchison pointed out that stream.t wasn't portable off Unix systems. Replaced with portable version. - Fixed bug reported by Enno Derksen : XML Declaration was firing off both XMLDecl handler *and* Default handler. - Added option NoExpand to Expat to turn off expansion of entity references when a default handler is set. 2.15 Tue Oct 20 14:50:11 EDT 1998 - In Expat's parse method, account for undefined previous record separators. - Simplify a couple of Expat methods. - Re-ordered Changes entries to put latest changes first. - In XML::Parser::new, set Handlers if not already set - New Handler (XMLDecl) for handling XML declarations - New Handler (Doctype) for handling DOCTYPE declarations - New Handler (Entity) for handling ENTITY declarations in the internal subset. - New Handler (Element) for handling ELEMENT declarations in the internal subset. - New Handler (Attlist) for handling ATTLIST declarations in the internal subset. - Documented new handlers - Added t/decl.t to test new handlers 2.14 Sun Oct 11 22:17:15 EDT 1998 - Always use method calls for streams. - Use perl's input_record_separator to find delimiter (i.e. each "line" is an entire XML doc with delimiter appended) - Deal with line being longer than buffer. 2.13 Thu Oct 8 16:58:39 EDT 1998 - Fixed a major oops in Expat.xs where I was trying to decrement a refcnt on an unallocated SV, leading to a segment violation. (Why did this show up on HPUX but not Linux?) 2.12 Thu Oct 8 00:05:10 EDT 1998 - Incorporated fix to t/astress.t from (Mike Fletcher). - Change to xmlstats from (David Alan Black) - Access Handlers_Setters in Expat and Handler_Types in Parser through object reference (following admonition in perltoot about class data.) - Added Stream_Delimiter option to Expat. - In the parse_stream function in Expat.xs, if we either have a Stream_Delimiter or if there's no file descriptor, use method calls instead. For Stream_Delimiter in particular, the function now uses the getline method so it can check for the delimiter without consuming stuff past the delimiter from the stream. 2.11 Sun Oct 4 22:15:53 EDT 1998 - Swapped out local patch for expat and swapped in James Clark's patch. - Pass on all Parser attributes (other than those excluded by Non_Expat_Options) to the instance of Expat created at parse time. - New method for Expat: generate_ns_name - Split test.pl into t/*.t and change Makefile.PL so we don't do a useless descent into Expat subdir for testing. - Stop the numeric warning for eq_name and namespace method. 2.10 Fri Sep 25 18:36:46 EDT 1998 - Uses expat Version 19980924 (with local patch - see Expat/expat/xmlparse/xmlparse.c.diff) - Use newSVpvn when PERL_VERSION >= 5.005 - Completed xmlfilter - Added support for namespace processing: o Namespaces option to XML::Parser and XML::Parser::Expat o Two new methods in Expat: namespace - to return namespace associated with name eq_name - compare 2 names for equality across namespaces. - Use expat's new SetDefaultHandlerExpand instead of SetDefaultHandler so that entity expansion may continue even if the default handler is set. - Moved test.pl back up main level and changed to work with XML::Parser - Added tests for namespaces 2.09 Fri Sep 18 10:33:38 EDT 1998 - Fixed errors that caused -w to fret in XML::Parser. - Fixed depth method in XML::Parser::Expat - There were a few places in Expat.xs where garbage strings may have been returned due to the expat library giving us zero-length strings. Fixed by using a local version of newSVpv where length means length, even when zero. - The default handler setter in Expat.xs, was inappropriately setting cbv->dflt_sv when there was a null handler. 2.08 Thu Sep 17 11:47:13 EDT 1998 - Make XML::Parser higher-level re-usable parser objects. Old object now becomes XML::Parser::Expat. - The XML::Parser object now supports the style mechanism very close to that in the 1.0 version. 2.07 Wed Sep 9 11:03:43 EDT 1998 - Added some samples (xmlcomments & xmlstats) - Now requires 5.004 (due to sv_catpvf) - Changed Makefile.PL to allow automatic manification - Added a test that reads xml spec (to check buffer boundary errors) 2.06 Tue Sep 1 10:40:41 EDT 1998 - Fixed the methods current_line, current_byte, and current_column - Added some tests 2.05 Mon Aug 31 15:29:42 EDT 1998 - Made Makefile.PL changes suggested by Murray Nesbitt to support building on Win32 and for making PPM binaries. - Added method parse - Changed parsestring and parsefile to use new parse method - Deprecated parsestring method - Improved error handling in the ExternEnt handler 2.04 Wed Aug 26 13:25:01 EDT 1998 - Uses expat Version 1.0 of August 14, 1998 - Some document changes - Changed dist section in Makefile.PL - Added ExternEnt handler - Added tests for ExternEnt 2.03 Fri Aug 21 17:19:26 EDT 1998 - Changed InitEncoding to ProtocolEncoding. Default to none. Pass null string to expat's ParserCreate when there is no ProtocolEncoding. - Fixed bug in parsefile & parsestring where they were referring to an ErrorContext *method* instead of a field. - Fixed position_in_context bugs: -- 'last' in do {} while (); -- insert newline before pointer when no following newline in buffer. - Added some additional tests 2.02 Thu Aug 20 14:05:08 EDT 1998 - Fixed parsefile problem reported by "Robert Hanson" , using a modification of his suggested fix. - Responded to problem reported by Bart Schuller by pre-expanding parts of the XML_UPD macro to avoid confusing some versions of gcc. - Changed the constructor to take the option InitEncoding, which gets passed to the ParserCreate call. When not given, defaults to UTF-8. - Added method position_in_context - Added Constructor option ErrorContext and added reporting of errors in context. 2.01 Wed Aug 19 11:42:42 EDT 1998 - Added methods: default_current, base, current_line, current_column, current_byte, context - Added some tests - parsestring and parsefile now croak if they're re-used - Filled in some documentation 2.00 Mon Aug 17 12:01:33 EDT 1998 - repackaged with James Clark's most recent expat - changed to an API closer to expat 1.00 March 1998 - Larry Wall's original version XML-Parser-2.46/README0000644000000000000000000005151113542322354012723 0ustar rootrootNAME XML::Parser - A perl module for parsing XML documents SYNOPSIS use XML::Parser; $p1 = XML::Parser->new(Style => 'Debug'); $p1->parsefile('REC-xml-19980210.xml'); $p1->parse('Hello World'); # Alternative $p2 = XML::Parser->new(Handlers => {Start => \&handle_start, End => \&handle_end, Char => \&handle_char}); $p2->parse($socket); # Another alternative $p3 = XML::Parser->new(ErrorContext => 2); $p3->setHandlers(Char => \&text, Default => \&other); open(my $fh, 'xmlgenerator |'); $p3->parse($foo, ProtocolEncoding => 'ISO-8859-1'); close($foo); $p3->parsefile('junk.xml', ErrorContext => 3); DESCRIPTION This module provides ways to parse XML documents. It is built on top of XML::Parser::Expat, which is a lower level interface to James Clark's expat library. Each call to one of the parsing methods creates a new instance of XML::Parser::Expat which is then used to parse the document. Expat options may be provided when the XML::Parser object is created. These options are then passed on to the Expat object on each parse call. They can also be given as extra arguments to the parse methods, in which case they override options given at XML::Parser creation time. The behavior of the parser is controlled either by "STYLES" and/or "HANDLERS" options, or by "setHandlers" method. These all provide mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. If neither "Style" nor "Handlers" are specified, then parsing just checks the document for being well-formed. When underlying handlers get called, they receive as their first parameter the *Expat* object, not the Parser object. METHODS new This is a class method, the constructor for XML::Parser. Options are passed as keyword value pairs. Recognized options are: * Style This option provides an easy way to create a given style of parser. The built in styles are: "Debug", "Subs", "Tree", "Objects", and "Stream". These are all defined in separate packages under "XML::Parser::Style::*", and you can find further documentation for each style both below, and in those packages. Custom styles can be provided by giving a full package name containing at least one '::'. This package should then have subs defined for each handler it wishes to have installed. See "STYLES" below for a discussion of each built in style. * Handlers When provided, this option should be an anonymous hash containing as keys the type of handler and as values a sub reference to handle that type of event. All the handlers get passed as their 1st parameter the instance of expat that is parsing the document. Further details on handlers can be found in "HANDLERS". Any handler set here overrides the corresponding handler set with the Style option. * Pkg Some styles will refer to subs defined in this package. If not provided, it defaults to the package which called the constructor. * ErrorContext This is an Expat option. When this option is defined, errors are reported in context. The value should be the number of lines to show on either side of the line in which the error occurred. * ProtocolEncoding This is an Expat option. This sets the protocol encoding name. It defaults to none. The built-in encodings are: "UTF-8", "ISO-8859-1", "UTF-16", and "US-ASCII". Other encodings may be used if they have encoding maps in one of the directories in the @Encoding_Path list. Check "ENCODINGS" for more information on encoding maps. Setting the protocol encoding overrides any encoding in the XML declaration. * Namespaces This is an Expat option. If this is set to a true value, then namespace processing is done during the parse. See "Namespaces" in XML::Parser::Expat for further discussion of namespace processing. * NoExpand This is an Expat option. Normally, the parser will try to expand references to entities defined in the internal subset. If this option is set to a true value, and a default handler is also set, then the default handler will be called when an entity reference is seen in text. This has no effect if a default handler has not been registered, and it has no effect on the expansion of entity references inside attribute values. * Stream_Delimiter This is an Expat option. It takes a string value. When this string is found alone on a line while parsing from a stream, then the parse is ended as if it saw an end of file. The intended use is with a stream of xml documents in a MIME multipart format. The string should not contain a trailing newline. * ParseParamEnt This is an Expat option. Unless standalone is set to "yes" in the XML declaration, setting this to a true value allows the external DTD to be read, and parameter entities to be parsed and expanded. * NoLWP This option has no effect if the ExternEnt or ExternEntFin handlers are directly set. Otherwise, if true, it forces the use of a file based external entity handler. * Non_Expat_Options If provided, this should be an anonymous hash whose keys are options that shouldn't be passed to Expat. This should only be of concern to those subclassing XML::Parser. setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]]) This method registers handlers for various parser events. It overrides any previous handlers registered through the Style or Handler options or through earlier calls to setHandlers. By providing a false or undefined value as the handler, the existing handler can be unset. This method returns a list of type, handler pairs corresponding to the input. The handlers returned are the ones that were in effect prior to the call. See a description of the handler types in "HANDLERS". parse(SOURCE [, OPT => OPT_VALUE [...]]) The SOURCE parameter should either be a string containing the whole XML document, or it should be an open IO::Handle. Constructor options to XML::Parser::Expat given as keyword-value pairs may follow the SOURCE parameter. These override, for this call, any options or attributes passed through from the XML::Parser instance. A die call is thrown if a parse error occurs. Otherwise it will return 1 or whatever is returned from the Final handler, if one is installed. In other words, what parse may return depends on the style. parsestring This is just an alias for parse for backwards compatibility. parsefile(FILE [, OPT => OPT_VALUE [...]]) Open FILE for reading, then call parse with the open handle. The file is closed no matter how parse returns. Returns what parse returns. parse_start([ OPT => OPT_VALUE [...]]) Create and return a new instance of XML::Parser::ExpatNB. Constructor options may be provided. If an init handler has been provided, it is called before returning the ExpatNB object. Documents are parsed by making incremental calls to the parse_more method of this object, which takes a string. A single call to the parse_done method of this object, which takes no arguments, indicates that the document is finished. If there is a final handler installed, it is executed by the parse_done method before returning and the parse_done method returns whatever is returned by the final handler. HANDLERS Expat is an event based parser. As the parser recognizes parts of the document (say the start or end tag for an XML element), then any handlers registered for that type of an event are called with suitable parameters. All handlers receive an instance of XML::Parser::Expat as their first argument. See "METHODS" in XML::Parser::Expat for a discussion of the methods that can be called on this object. Init (Expat) This is called just before the parsing of the document starts. Final (Expat) This is called just after parsing has finished, but only if no errors occurred during the parse. Parse returns what this returns. Start (Expat, Element [, Attr, Val [,...]]) This event is generated when an XML start tag is recognized. Element is the name of the XML element type that is opened with the start tag. The Attr & Val pairs are generated for each attribute in the start tag. End (Expat, Element) This event is generated when an XML end tag is recognized. Note that an XML empty tag () generates both a start and an end event. Char (Expat, String) This event is generated when non-markup is recognized. The non-markup sequence of characters is in String. A single non-markup sequence of characters may generate multiple calls to this handler. Whatever the encoding of the string in the original document, this is given to the handler in UTF-8. Proc (Expat, Target, Data) This event is generated when a processing instruction is recognized. Comment (Expat, Data) This event is generated when a comment is recognized. CdataStart (Expat) This is called at the start of a CDATA section. CdataEnd (Expat) This is called at the end of a CDATA section. Default (Expat, String) This is called for any characters that don't have a registered handler. This includes both characters that are part of markup for which no events are generated (markup declarations) and characters that could generate events, but for which no handler has been registered. Whatever the encoding in the original document, the string is returned to the handler in UTF-8. Unparsed (Expat, Entity, Base, Sysid, Pubid, Notation) This is called for a declaration of an unparsed entity. Entity is the name of the entity. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Notation is the notation name. Base and Pubid may be undefined. Notation (Expat, Notation, Base, Sysid, Pubid) This is called for a declaration of notation. Notation is the notation name. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, Sysid, and Pubid may all be undefined. ExternEnt (Expat, Base, Sysid, Pubid) This is called when an external entity is referenced. Base is the base to be used for resolving a relative URI. Sysid is the system id. Pubid is the public id. Base, and Pubid may be undefined. This handler should either return a string, which represents the contents of the external entity, or return an open filehandle that can be read to obtain the contents of the external entity, or return undef, which indicates the external entity couldn't be found and will generate a parse error. If an open filehandle is returned, it must be returned as either a glob (*FOO) or as a reference to a glob (e.g. an instance of IO::Handle). A default handler is installed for this event. The default handler is XML::Parser::lwp_ext_ent_handler unless the NoLWP option was provided with a true value, otherwise XML::Parser::file_ext_ent_handler is the default handler for external entities. Even without the NoLWP option, if the URI or LWP modules are missing, the file based handler ends up being used after giving a warning on the first external entity reference. The LWP external entity handler will use proxies defined in the environment (http_proxy, ftp_proxy, etc.). Please note that the LWP external entity handler reads the entire entity into a string and returns it, where as the file handler opens a filehandle. Also note that the file external entity handler will likely choke on absolute URIs or file names that don't fit the conventions of the local operating system. The expat base method can be used to set a basename for relative pathnames. If no basename is given, or if the basename is itself a relative name, then it is relative to the current working directory. ExternEntFin (Expat) This is called after parsing an external entity. It's not called unless an ExternEnt handler is also set. There is a default handler installed that pairs with the default ExternEnt handler. If you're going to install your own ExternEnt handler, then you should set (or unset) this handler too. Entity (Expat, Name, Val, Sysid, Pubid, Ndata, IsParam) This is called when an entity is declared. For internal entities, the Val parameter will contain the value and the remaining three parameters will be undefined. For external entities, the Val parameter will be undefined, the Sysid parameter will have the system id, the Pubid parameter will have the public id if it was provided (it will be undefined otherwise), the Ndata parameter will contain the notation for unparsed entities. If this is a parameter entity declaration, then the IsParam parameter is true. Note that this handler and the Unparsed handler above overlap. If both are set, then this handler will not be called for unparsed entities. Element (Expat, Name, Model) The element handler is called when an element declaration is found. Name is the element name, and Model is the content model as an XML::Parser::Content object. See "XML::Parser::ContentModel Methods" in XML::Parser::Expat for methods available for this class. Attlist (Expat, Elname, Attname, Type, Default, Fixed) This handler is called for each attribute in an ATTLIST declaration. So an ATTLIST declaration that has multiple attributes will generate multiple calls to this handler. The Elname parameter is the name of the element with which the attribute is being associated. The Attname parameter is the name of the attribute. Type is the attribute type, given as a string. Default is the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted string (i.e. the returned string will begin and end with a quote character). If Fixed is true, then this is a fixed attribute. Doctype (Expat, Name, Sysid, Pubid, Internal) This handler is called for DOCTYPE declarations. Name is the document type name. Sysid is the system id of the document type, if it was provided, otherwise it's undefined. Pubid is the public id of the document type, which will be undefined if no public id was given. Internal is the internal subset, given as a string. If there was no internal subset, it will be undefined. Internal will contain all whitespace, comments, processing instructions, and declarations seen in the internal subset. The declarations will be there whether or not they have been processed by another handler (except for unparsed entities processed by the Unparsed handler). However, comments and processing instructions will not appear if they've been processed by their respective handlers. * DoctypeFin (Parser) This handler is called after parsing of the DOCTYPE declaration has finished, including any internal or external DTD declarations. XMLDecl (Expat, Version, Encoding, Standalone) This handler is called for xml declarations. Version is a string containing the version. Encoding is either undefined or contains an encoding string. Standalone will be either true, false, or undefined if the standalone attribute is yes, no, or not made respectively. STYLES Debug This just prints out the document in outline form. Nothing special is returned by parse. Subs Each time an element starts, a sub by that name in the package specified by the Pkg option is called with the same parameters that the Start handler gets called with. Each time an element ends, a sub with that name appended with an underscore ("_"), is called with the same parameters that the End handler gets called with. Nothing special is returned by parse. Tree Parse will return a parse tree for the document. Each node in the tree takes the form of a tag, content pair. Text nodes are represented with a pseudo-tag of "0" and the string that is their content. For elements, the content is an array reference. The first item in the array is a (possibly empty) hash reference containing attributes. The remainder of the array is a sequence of tag-content pairs representing the content of the element. So for example the result of parsing: Hello thereHowdydo would be: Tag Content ================================================================== [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [ {}, 0, "Howdy", ref, [{}]], 0, "do" ] ] The root document "foo", has 3 children: a "head" element, a "bar" element and the text "do". After the empty attribute hash, these are represented in it's contents by 3 tag-content pairs. Objects This is similar to the Tree style, except that a hash object is created for each element. The corresponding object will be in the class whose name is created by appending "::" and the element name to the package set with the Pkg option. Non-markup text will be in the ::Characters class. The contents of the corresponding object will be in an anonymous array that is the value of the Kids property for that object. Stream This style also uses the Pkg package. If none of the subs that this style looks for is there, then the effect of parsing with this style is to print a canonical copy of the document without comments or declarations. All the subs receive as their 1st parameter the Expat instance for the document they're parsing. It looks for the following routines: * StartDocument Called at the start of the parse . * StartTag Called for every start tag with a second parameter of the element type. The $_ variable will contain a copy of the tag and the %_ variable will contain attribute values supplied for that element. * EndTag Called for every end tag with a second parameter of the element type. The $_ variable will contain a copy of the end tag. * Text Called just before start or end tags with accumulated non-markup text in the $_ variable. * PI Called for processing instructions. The $_ variable will contain a copy of the PI and the target and data are sent as 2nd and 3rd parameters respectively. * EndDocument Called at conclusion of the parse. ENCODINGS XML documents may be encoded in character sets other than Unicode as long as they may be mapped into the Unicode character set. Expat has further restrictions on encodings. Read the xmlparse.h header file in the expat distribution to see details on these restrictions. Expat has built-in encodings for: "UTF-8", "ISO-8859-1", "UTF-16", and "US-ASCII". Encodings are set either through the XML declaration encoding attribute or through the ProtocolEncoding option to XML::Parser or XML::Parser::Expat. For encodings other than the built-ins, expat calls the function load_encoding in the Expat package with the encoding name. This function looks for a file in the path list @XML::Parser::Expat::Encoding_Path, that matches the lower-cased name with a '.enc' extension. The first one it finds, it loads. If you wish to build your own encoding maps, check out the XML::Encoding module from CPAN. AUTHORS Larry Wall wrote version 1.0. Clark Cooper picked up support, changed the API for this version (2.x), provided documentation, and added some standard package features. Matt Sergeant is now maintaining XML::Parser XML-Parser-2.46/Parser.pm0000644000000000000000000006611113542323734013642 0ustar rootroot# XML::Parser # # Copyright (c) 1998-2000 Larry Wall and Clark Cooper # All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. package XML::Parser; use strict; our ( $VERSION, $LWP_load_failed ); use Carp; BEGIN { require XML::Parser::Expat; $VERSION = '2.46'; die "Parser.pm and Expat.pm versions don't match" unless $VERSION eq $XML::Parser::Expat::VERSION; } $LWP_load_failed = 0; sub new { my ( $class, %args ) = @_; my $style = $args{Style}; my $nonexopt = $args{Non_Expat_Options} ||= {}; $nonexopt->{Style} = 1; $nonexopt->{Non_Expat_Options} = 1; $nonexopt->{Handlers} = 1; $nonexopt->{_HNDL_TYPES} = 1; $nonexopt->{NoLWP} = 1; $args{_HNDL_TYPES} = {%XML::Parser::Expat::Handler_Setters}; $args{_HNDL_TYPES}->{Init} = 1; $args{_HNDL_TYPES}->{Final} = 1; $args{Handlers} ||= {}; my $handlers = $args{Handlers}; if ( defined($style) ) { my $stylepkg = $style; if ( $stylepkg !~ /::/ ) { $stylepkg = "\u$style"; eval { my $fullpkg = "XML::Parser::Style::$stylepkg"; my $stylefile = $fullpkg; $stylefile =~ s/::/\//g; require "$stylefile.pm"; $stylepkg = $fullpkg; }; if ($@) { # fallback to old behaviour $stylepkg = "XML::Parser::$stylepkg"; } } foreach my $htype ( keys %{ $args{_HNDL_TYPES} } ) { # Handlers explicitly given override # handlers from the Style package unless ( defined( $handlers->{$htype} ) ) { # A handler in the style package must either have # exactly the right case as the type name or a # completely lower case version of it. my $hname = "${stylepkg}::$htype"; if ( defined(&$hname) ) { $handlers->{$htype} = \&$hname; next; } $hname = "${stylepkg}::\L$htype"; if ( defined(&$hname) ) { $handlers->{$htype} = \&$hname; next; } } } } unless ( defined( $handlers->{ExternEnt} ) or defined( $handlers->{ExternEntFin} ) ) { if ( $args{NoLWP} or $LWP_load_failed ) { $handlers->{ExternEnt} = \&file_ext_ent_handler; $handlers->{ExternEntFin} = \&file_ext_ent_cleanup; } else { # The following just bootstraps the real LWP external entity # handler $handlers->{ExternEnt} = \&initial_ext_ent_handler; # No cleanup function available until LWPExternEnt.pl loaded } } $args{Pkg} ||= caller; bless \%args, $class; } # End of new sub setHandlers { my ( $self, @handler_pairs ) = @_; croak('Uneven number of arguments to setHandlers method') if ( int(@handler_pairs) & 1 ); my @ret; while (@handler_pairs) { my $type = shift @handler_pairs; my $handler = shift @handler_pairs; unless ( defined( $self->{_HNDL_TYPES}->{$type} ) ) { my @types = sort keys %{ $self->{_HNDL_TYPES} }; croak("Unknown Parser handler type: $type\n Valid types: @types"); } push( @ret, $type, $self->{Handlers}->{$type} ); $self->{Handlers}->{$type} = $handler; } return @ret; } sub parse_start { my $self = shift; my @expat_options = (); my ( $key, $val ); while ( ( $key, $val ) = each %{$self} ) { push( @expat_options, $key, $val ) unless exists $self->{Non_Expat_Options}->{$key}; } my %handlers = %{ $self->{Handlers} }; my $init = delete $handlers{Init}; my $final = delete $handlers{Final}; my $expatnb = XML::Parser::ExpatNB->new( @expat_options, @_ ); $expatnb->setHandlers(%handlers); &$init($expatnb) if defined($init); $expatnb->{_State_} = 1; $expatnb->{FinalHandler} = $final if defined($final); return $expatnb; } sub parse { my $self = shift; my $arg = shift; my @expat_options = (); my ( $key, $val ); while ( ( $key, $val ) = each %{$self} ) { push( @expat_options, $key, $val ) unless exists $self->{Non_Expat_Options}->{$key}; } my $expat = XML::Parser::Expat->new( @expat_options, @_ ); my %handlers = %{ $self->{Handlers} }; my $init = delete $handlers{Init}; my $final = delete $handlers{Final}; $expat->setHandlers(%handlers); if ( $self->{Base} ) { $expat->base( $self->{Base} ); } &$init($expat) if defined($init); my @result = (); my $result; eval { $result = $expat->parse($arg); }; my $err = $@; if ($err) { $expat->release; die $err; } if ( $result and defined($final) ) { if (wantarray) { @result = &$final($expat); } else { $result = &$final($expat); } } $expat->release; return unless defined wantarray; return wantarray ? @result : $result; } sub parsestring { my $self = shift; $self->parse(@_); } sub parsefile { my $self = shift; my $file = shift; open( my $fh, '<', $file ) or croak "Couldn't open $file:\n$!"; binmode($fh); my @ret; my $ret; $self->{Base} = $file; if (wantarray) { eval { @ret = $self->parse( $fh, @_ ); }; } else { eval { $ret = $self->parse( $fh, @_ ); }; } my $err = $@; close($fh); die $err if $err; return unless defined wantarray; return wantarray ? @ret : $ret; } sub initial_ext_ent_handler { # This just bootstraps in the real lwp_ext_ent_handler which # also loads the URI and LWP modules. unless ($LWP_load_failed) { local ($^W) = 0; my $stat = eval { require('XML/Parser/LWPExternEnt.pl'); }; if ($stat) { $_[0]->setHandlers( ExternEnt => \&lwp_ext_ent_handler, ExternEntFin => \&lwp_ext_ent_cleanup ); goto &lwp_ext_ent_handler; } # Failed to load lwp handler, act as if NoLWP $LWP_load_failed = 1; my $cmsg = "Couldn't load LWP based external entity handler\n" . "Switching to file-based external entity handler\n" . " (To avoid this message, use NoLWP option to XML::Parser)\n"; warn($cmsg); } $_[0]->setHandlers( ExternEnt => \&file_ext_ent_handler, ExternEntFin => \&file_ext_ent_cleanup ); goto &file_ext_ent_handler; } sub file_ext_ent_handler { my ( $xp, $base, $path ) = @_; # Prepend base only for relative paths if ( defined($base) and not( $path =~ m!^(?:[\\/]|\w+:)! ) ) { my $newpath = $base; $newpath =~ s![^\\/:]*$!$path!; $path = $newpath; } if ( $path =~ /^\s*[|>+]/ or $path =~ /\|\s*$/ ) { $xp->{ErrorMessage} .= "System ID ($path) contains Perl IO control characters"; return undef; } require IO::File; my $fh = IO::File->new($path); unless ( defined $fh ) { $xp->{ErrorMessage} .= "Failed to open $path:\n$!"; return undef; } $xp->{_BaseStack} ||= []; $xp->{_FhStack} ||= []; push( @{ $xp->{_BaseStack} }, $base ); push( @{ $xp->{_FhStack} }, $fh ); $xp->base($path); return $fh; } sub file_ext_ent_cleanup { my ($xp) = @_; my $fh = pop( @{ $xp->{_FhStack} } ); $fh->close; my $base = pop( @{ $xp->{_BaseStack} } ); $xp->base($base); } 1; __END__ =head1 NAME XML::Parser - A perl module for parsing XML documents =head1 SYNOPSIS use XML::Parser; $p1 = XML::Parser->new(Style => 'Debug'); $p1->parsefile('REC-xml-19980210.xml'); $p1->parse('Hello World'); # Alternative $p2 = XML::Parser->new(Handlers => {Start => \&handle_start, End => \&handle_end, Char => \&handle_char}); $p2->parse($socket); # Another alternative $p3 = XML::Parser->new(ErrorContext => 2); $p3->setHandlers(Char => \&text, Default => \&other); open(my $fh, 'xmlgenerator |'); $p3->parse($foo, ProtocolEncoding => 'ISO-8859-1'); close($foo); $p3->parsefile('junk.xml', ErrorContext => 3); =begin man .ds PI =end man =head1 DESCRIPTION This module provides ways to parse XML documents. It is built on top of L, which is a lower level interface to James Clark's expat library. Each call to one of the parsing methods creates a new instance of XML::Parser::Expat which is then used to parse the document. Expat options may be provided when the XML::Parser object is created. These options are then passed on to the Expat object on each parse call. They can also be given as extra arguments to the parse methods, in which case they override options given at XML::Parser creation time. The behavior of the parser is controlled either by C> and/or C> options, or by L method. These all provide mechanisms for XML::Parser to set the handlers needed by XML::Parser::Expat. If neither C