pax_global_header00006660000000000000000000000064140040700250014502gustar00rootroot0000000000000052 comment=77c0d35b348c438ce0b746d4a9316b75b7daee48 jruby-joni-2.1.41/000077500000000000000000000000001400407002500136575ustar00rootroot00000000000000jruby-joni-2.1.41/.gitignore000066400000000000000000000000071400407002500156440ustar00rootroot00000000000000target jruby-joni-2.1.41/.mvn/000077500000000000000000000000001400407002500145355ustar00rootroot00000000000000jruby-joni-2.1.41/.mvn/wrapper/000077500000000000000000000000001400407002500162155ustar00rootroot00000000000000jruby-joni-2.1.41/.mvn/wrapper/maven-wrapper.jar000077500000000000000000001363201400407002500215070ustar00rootroot00000000000000PK! META-INF/PKPK!META-INF/MANIFEST.MF 0EC~ Eκ )mLcſPW{s>3P"Wg8I15 ^I!dj51d8}#S+vI )_joJՄt;97%roiFe|/5=PK+'+PK!META-INF/maven/PKPK!META-INF/maven/io.takari/PKPK!'META-INF/maven/io.takari/maven-wrapper/PKPK!5META-INF/maven/io.takari/maven-wrapper/pom.properties+K-*ϳ533J//-L+IN,J,*LKL. &%qPKyt:9PK!org/PKPK! org/apache/PKPK!org/apache/maven/PKPK!org/apache/maven/wrapper/PKPK!3org/apache/maven/wrapper/BootstrapMainStarter.classVvG`#bup "cA -l3'y^dUa#Ϗ/jlIsNzUu߻ҟI`ðe/hzEV֗=J%n3B,KVznCabO2ǭÇ*2&sܾJ!ziV 9~Q4-J1e8lD7#l\H-]1J<% _^0,Qaطg}& *pŸ7BF}MzmC& tf;N 2 jdѫa`TA twSa" \̬8 m.nR&7oSMA2aH4H0NᴂZvhsEaؙnp[[SfTE 爬Ÿe ,ZuǷ"Ic2O.U'Y)ejr.dFID(p>TAk#|LHe]PǮNUyW#11]5QC2[R!fZ"V4Ek kcY 6rU ?'e_v-jNvz<ײҾق| yBjw&vQs*4ҨeVzLm7j};%c7C}?JvvH8g`h&`;Yp ptf 5 Qk8G^h 2>T5oG?P*fR'&,ZUV{?!^oڴ$;]";xzhgdry~B`G>a6I#kl_~n0LHRF\z1WuziK;PKB` PK!Sorg/apache/maven/wrapper/DefaultDownloader$SystemPropertiesProxyAuthenticator.class]oAKRT~]TV2UWSa2Cf"Jol?eTjIڋnsfy3sfo?n,y/ ‰sF/[ GE@A/V>>9{Ad=!JM]Ah:}~S #zMh>;FSsh`f ϴ (DcI{iH˗2)8۪= 0bDحm cMPСc'*c\ݫ>)s_ֶ|{ʱ7/, S7-1Q* 1[SFapL޼* ib}=oa`Arƃ8b(8˜#I{+IESwۃU?p7fuWy߅mv38v?Kx 2:g]U]jl1(['8JVd1 |bV,H *+g5N QA: E\:a#;h8 NJӱqs9q%bO8G(Gc9¥捉/9zUsfO8br-# S+q gpinN8"7YJ' ְW=7)<-Q\0Ҙg8zE7>,$,RO;?=rX tq9Rcy6I6#a(RXPP\=`7QD+=)qĮTu,"'8V-Q]uc !xj(T2X$8v>/89K;p38҃GFcx&,R&4~[mj-MN<#?%OEs"rD F~%_ȯKE 3jOgL0LgBP:FtFKgZB|U|Q]te9N@'ڋ<`heKU*e |zV6?`,2WJc5\ha5`ŏcJ'PK8~O PK!)org/apache/maven/wrapper/Downloader.classE@x.6ν"H$j'fU<[I&3o&yBhs2IDadS]Dː Ѝe.E"A1~B|3]/G ]NH&s,pyE#KN TQD5ԭvrl^APKqPK!(org/apache/maven/wrapper/Installer.classXi`\u47ɲ-0f˖ƖA2Ʋ%Ye1،5O3Y,@B$–vR&0# KihӅq[ڴi[Jӽ7cwkX#2P"ڎCVkԊѨkPȊp ֝[ AmWzv2gg߁UyD,j+Hb?ܨTVZ O= wVպ2DVLFC>He/H0I36TΫ\[A)LE~՞ jLxP&L Fd'cA}LhZLƳyc?auW?|Cf$8A;ұ*mݺ-]a+kq^Au޾rߓ4uXAF L4>GJP &Vxgh?ʹ !MIL4+,`k 4CbkG;zc3f`\GۂQ_YzЎhYp \uLQn4Y!~̊$ߒ 'D7 n¬ I@OQIdpTwjꘑe'4v2ꧯ-?P1C+QLMe2q֝Ӌ m^5Ϥnb~ Y ;hs̫StF +g^{tT] FFJF0l'MDq';7vۂS@@^:_Ք 1 qs̎ީ|үj%UG%nw1;G7p41t2q/>LжY]r̓x :}GQb%Atn<ĜڥWK1ڱqwi<CɄ9@!x'ʊU1;9QO2! FQƧ7a{/8S MXyv%#`7~Yp`7P`5ʺJoވۑ?#xX/V@ F^8ӢWL<1bFXOl!r^b M|gXK&}ėe>f 'ZX8kM|/xԬ7 X8᰽paǚF+Tm0b:'S*/D%4` $aֺ͝d{J yhpق lD!g$ dmē 9غf*Gze&/Sv !2xwӖI$VWʠ s)j+[lw 5~4=)l#>.Eb) N[(Br9pZ:w&9CFe(S W= a INN!MEB7lGNjuIx6foԔ;ylw1+Gܔ~,*ꩥNC X:g1M{<,c *St=r|Ȑ2+ xz/r0G 6}DI{gUHQ)wآtN&{8a+ 澇( ߞH9 g?]ҴnW)EfnTW;\y|盏4o^44 M`Ygg>[Qܺ"oz.3R1TP Kqqŷ+U`ɣThc Wc&>\ /cG-ee)\^^f\q )\ NݧMt[Q՟®LuoH)p)?!@ j$Bnr**nuO ^QcBUV}$N눮^y͵qho8BZO K렂j=ĐR>*8u- +OU}UpSNBV%\W4pOM_o(=gJT + Ըjʞ<~판4Z`?Q8489Ss6z?\i7VtXbSA FOc՟0HvH$Ҫ?bWP |UJoQ)gp$M'>% |Rv jjj)Mb{ %eC q3L߼Z#%}g(&Ⴞoëbea\YT-|ak>'s*c1vA5BZt(N:an[ $ߞ]x3}]Ȼ/>k rcjA[rMPJ!Y&WҰlԽr7VZ8VrwRN?J*J K4$kSr]oclLɦgPz)L!jsEi<ԏ>t3exzKcJ(tjb-=yͥٳrEO_|;啸x >xsO`%q┣Ժe!$`6Rd͘ΡPɨ/ke7'XԤ/x>t7aN7ԺtIM+ 9#`k8"c񡧩iX/_<>|z"̦Qr/Iu|o>l5䘌eL.*N[C؜|m^1`SJ"^J6h :${WYC^+P.p|Lv'ȩW#I*\OBPKɉ\PK!%org/apache/maven/wrapper/Logger.classSn@=n.ZZJI\$^Z!DU$ER_,+_Nŧ > 1D"l;;s̱቎B,=/EJ>tbRGz x9; zo͋&c0C16@TfM3X[w>3Q=G"PTDCf $~䭵&S&긢yƼf*Q^|xX E{e0\0aGb; BzjAK_Im=D,P b?ԏ>P+U+I_yMd@ucl T**L<5I!0q&]WVFbWdP"r t1ia̚1.}E e Awp FeS4l6vr'lOlˮg81K!PK4NamPK!/org/apache/maven/wrapper/MavenWrapperMain.classXxg~eugP&iv= nB݋|/N)Pl Դ)2{@ɒ,7%بnI4[UAHO'=b ם^"zi=ՋYNZ}B\$LI`zTz( *vc@tPM!&IsT~E1r6XK X KUF izw2~\1]Ȋj!6䳻Gɠ6agHvʤaqWMG !꓎A6*J,+yW8Fʧm]|n5b)٬dƴdNwV//mrb[brZuiC_>t1I"tcу')ص$Zn蓌)61K}@ ,]׷ǚtX8u*qIN^ydEZm+hSIe4&WrA#lL:YSfHt\E$s6:R>bV <#K?zkqzw1'D6)=t,B9e̲sBZs'F(e-Z<`R%et[iZREӒ1Ś6cvGGԔ9Ǎ9-PUa*7C.iWs& =`]?MMֺrmnK>gHˣ~Xx{ ad?뮳a+\Rp Mَfy,bM3lbdL!ܞVVV"{Bqa2eSg^H ~^]JNbn%p Js͇Y_ޜ-;?C}X⥿6 >`.-Q~mt-&b}GqW$|mrGl}2EVN)?| ++&x c=Xx̓`Dz t8_꿆o(:g%U|pg~+iGYpȾ{ a)Cԝ`_'?Qy:U?c;`_m$fKs$|mM{3 NonWykeNY_WRE1y'Su>MPfYq_0*S;z3W0]A1΢(;%`DU44GF]YoG xPpQ)~\/ JE0-Ӧ`CT,u ! Hl?>}@c}{wk-V61i#¯5B*OO$kLxï@GCvvCyzN v[A`0k)I r7ب(c儖Q:ZE<{G{Ola4^ i2ԣƘ 3x\SՍIW/*+bo9U'ј}f+I= i6O}t^\zy^?6`#W\lCp|gcRIx.lEஂjvgyw1nQ=<WZ+R܈j Y꬝ESZgѾuV{΂/XőrE,%%9Xsq8(Arg<}Ϝ3xn z@uDe {t).gWbIF!jŋb7 -ܗn\O[vĤxh\ˣ:_*$w,UV9jRx=Hm](𞳙P6Q5nU(!j堧2>.of|<3{djHwYyz1sk[PH j\D$ڸkx"B0eE\,*]n ODm%UbMMYP7Pʖz @UKnuIIס$LcNgSq+'DHPK PK!>org/apache/maven/wrapper/PathAssembler$LocalDistribution.classR]OQ=-PWX?Ait(1!I&>`r^lw[[ؘG^b̙3gɞ`«Lb1D-F#ϢYG$G*/j ʄ` NDjd x]F,~f:R}o$ԛ3W*M=`5Za֕KS},bJFt><&c.aRqLs$^;MBcg cU_w8(sE;܎`%ڝ OPuw3e2QEnj5fF˱Q7Zps$ o?/8 B£ɵO9V,}@PKoItPK!,org/apache/maven/wrapper/PathAssembler.classV}kK, `*ʒ-&/Rv}qyAUvWi$mNKzߧ&7ھYɒmD>?Hμyf6WM NXv.̂\Ԗt3y NkBy`umIK53g\m龙ѫӓCWCW'&RGƓm^;,q5ӝE=Vmq`SCSUWA gtၜvt{Zi]cX F^gi4GfB3Vv3ayݞҘ@2Z~F > :=~xOt3V(P6C&拮a nK+f䊶&;z嘗`!B$w+y*0 fnrz"E 8 vT3դG-Ձl~(H/ZK-W7{wI\E7z,B+8x`@RIţ R~!?}x)יkx\VE#mӫl#<*.[^o\bk&%))OlP!R۪t5;/VZŨt_#8Ѥ)q"_1 ¿T2?:5'9<$`!oRQ|ZgaK xA"ES_}NVH7"wKx;WKakt^.?R[?N ?b]eZt_E9dN3%,Qr<ljbbCU:jQ;[1|{O* *# $pBPK|+?PK!6org/apache/maven/wrapper/SystemPropertiesHandler.classUY[W~¸AEԢQib P &3d"Uo<sk1ΣI!0fG1{S$O\|pk/!NӘ#=ŧ|t/SJ!z(-,RuGx}j|H_"~7[6khs8z|9+&l}PK{`- PK!3org/apache/maven/wrapper/WrapperConfiguration.classSUǿB҅jĆ-BZ I*mCI0XְT%tFƎ@CҿVz]eiDx@soG?WAThk f} sRan],u;68#0hwQ17)FMs qt7bD$/s/=4ƤΈ,+Hao0>gs3&ͪ!S#KR_pߛ'AÏiΠѴzÚbYpa.es&1 DɊ58ىh<=HIOҢy2@y2I=転dF~Ft `K􍾞ͥ?@>.髞^k%= a@m-B],7F+|MMBQϩO\?FPAc";@WX {j^~VAhHA  O Ka5) @ؔ5r#3ul8o64M$+bYUꠑeTILjFrDmhvŔijeq)xOM wg,ZH&LabMkVA2V;z+=fFbakuVa >ܯbbLy3 6:TsA4!V  ZG bN|-ЌFN,Nֈ8 afb +Xr%k!rHŻgςxIjŜmL}gz^8 ]* ZnZ͘L;*v @M;XB7`+v3'rT+ScEb1ڃ8H!7؀buDj!k"VX)#8˚Z&imoixy,f2梐="*IR^ +c!R~hZyg5|VVP͍MvB<RaJnUH*TLH~ZEO>B?LFljB3鲑Y<8zb6#)/j˜)` ⷇ld2g̮ҐAzf|*,_9 W6 U˜lV$W եe$&a\Ty@*>#m$):-'T|_A(=g; -#~Iŗ抉T^OF0E@ I/hj˴e74WU&5-Q(PA|\2?T-M?/2]t=WVPŏc]3*1#e/ADU@jHgM!#WxQgSDKLEI@jƦ;A]R>qK{mdMfZ&%Y(.`_ l^ůkHr*+8yS{jD">-@[}c.u:^ewwIb%s6v"{cxs][&zFb;=n;q qv*;!ۯb vH1d[5 ǯu 8.%10> lsfȳs9359gEyv6;[%|eFųxy?wj/˹gP qt' A8Wh=>%F%Z\ j,+b3)/(qYŒ|l!: OI+gyEe_kV.B-A n`Rmq#k Q"\k K7rHP (9xLw(ݎ1g'k1ᷤDho\1&#m%.~/K_彲4 CЙQI>*ۍRbs>E!I[0I)䦤5, WA+= uOI3Y-iwxcw%0\kY"z0R!`o_ğ$ "3B0+A4M``MaէM & Ckh;QC$K718ŪuT5Rrⱊdlsa1Uqu(Vsy+l ?10MTu)^𤸡qQPKǣ9,PK!org/apache/maven/wrapper/cli/PKPK!?org/apache/maven/wrapper/cli/AbstractCommandLineConverter.classUNA=#JmWXZ*ؚPk"!BEKYLڤЇj.E-?v.|s ??~XF} k*7^Ol^ [7JY967J̭iu,lG!( G%n#a8!3,wcϘl0x( "Q'a)ûku=8E?}t]tnݞ^OaiK7б0dp #qZesi CƄxv^S'.ú蕯g0iCG.<ܜ`U+uۦ|[滽s& >]yy:}AROo/4ۤɨv; SqڰaP?z)2F=/BcfQb.)OAK6p_K.40尛!eDu缭Mj9z8Q <H(^X+k~>’=ZTmAk ٕ] Ҡ Z0Bm3bՇVcM#v4"%H^b*gIBTĚN+&R$= gE= d@,y99gׯ\Ǐ"f +MYRSEyGSlR Zjzv,Yq,MgbQ7MΩ 5Sght͙bEc9aPC!(v \~T\WGzAed E.dKIlf̫B:CW4ݖwTAe+bd{ʗЩF[@NA$đ`8OvNh6ؒLי zgQmסvd]<ےRIe-ˎy'3SidHlovc2J%Nl1^=Eu"&N> > j!Ň"NJH1<8{zL2c̊aoE_Qэ %6RxYv"2+>E!,;ROv:O$b{W%|T,tui׷UI 9 +BY7ԽMfXW"d8WwS*ofiYJ.:Fs/d2My&aψ ]{ Kr'^>ܠ4kŕWԍ bUFKIŐk1O<$= 7q5rȲQ'N=gYl5Sia47"}0?_hІd? U?}wd"xj$iՄwte, Խ\;2yѮ"." TU${vi PuQ҈#՗s+?f6Sv-:XKpFeL1}O&0] M9a_ y/<-C,p-~(}.˄ qb lILhfc`PwP@e Iߐ(,ؤԸs}sqX+cяEgVc`Xe W?PKj F PK!?org/apache/maven/wrapper/cli/CommandLineArgumentException.classAO0Et FG!1DŃe4fn'~?о_?^\D*M#N)L(bv}n bpnL$. yRۧ³S k.x%9_~":!A lQAPҝ`䀎|FPqBC*Η|21A8^;* gڍDOP .McŬX0.z_/4 E+"Ij7UUyNF=KvoʪGJYǶj (vQR */YasG't![%LG8,V׵OPKm6TgPK!7org/apache/maven/wrapper/cli/CommandLineConverter.classR]O0;eĠ&&K4^ulRGK1Hѱgsu8"Pe@YgPɒ%CN;q11N,2)J #12Ї Py?ja]ܾ<cڹ$B <,H$pjZuSE]S>FˎCSZ3礏Lq0Ybu-_/U\L<5Y Ќu*Lx"}㡢81eljYk.o/J+TPu4 [6 ;]9PK=8LPK!4org/apache/maven/wrapper/cli/CommandLineOption.classU[WW'ITXXDC"ĻV@AiLfKKڇY_|h .Z]}kgodȝjW̙s9_phhLMxts9J'MK]Ӷ4 toDְ2-r4.t%ݢkf X03)0X;@-ieƮC7L1'-wYN&F@k1ٙczDFċiwdPˆcф*npMMĪq2ݫ*iW:FV / ě4& :"h/i8D6 Q:ڱGqLÀ@di"Hŕ;7qvNjc~:jڎ@OtCX'NaT.JͮN3%R v1llJy\pA T p51h#J芎1S{Fa΋**Rs܆tK;ed Ts4w;u+*}ԆY#`FgjMWJj~mNVU ]yf!PujUৣTyGG!!g vTdLjrhqhf n锭R:`}UH~f@`YieMOjC(_ N=w/+`Ӛ эP{嵦:k0# ,ֿg@Fcӧj.SVz6k?hAPyqf>+6|yf4l좓Lu@z:21l>ޖ98kA/}Y!!F%88^BO W^m#Ɛ F\wç^\ w!Z1~L=8pU x1}:Ӌ &ڂI\)MK&nK>* Vj1*nco|j/!X?%N #PL3s8cs>F5z hi6iƒ)KaSb9?}0w)$3z\;X]~`{`x$Ѻ1}Gߢ6z`>v({}]aϳ f#yIBMXGm^{ uZfQ'ݯS,]&Aho{uZ.q咏R(< A4+M^r^]>/imbA%1sa9g|3sfϯ bFApܔ.X34m}٬FN`_ 7gt',7%7Vb!¸OCYrp'?ZY"ΪY B*T^B_g<1ipBrksbSiaZv*V~|Ic*qIAߟ- qP# ƒ mxVZXᆂ# 7qL44+*e)S!0 &}vЂ *!UJr|=ClEk 3s!*0 CuzsTDq_~Q ?rA.oAd.&_u.sFVFt6\Ü(k+!#hӶmLu+d)`A凂aoO+s"O#Z6~QS|N*|N0wSr-5݁4QU4:G#Xфf 1<ZV߱x#h|텾hEx9ʒ{{eyn}^f}{H4fA+"Lyw"CGAlu@tF=.uQ9fb^!t btSQmɯׄKQˎK+6Mc"hJ&&D犢nc*G>]lՕq`:nI)n3үbU4oRaKsB KWJ,K@_U끡’^-6}w@ڔC0 d p>Vk/Mm0&4FBkЎg~am> }K_cj w_pυF?c655sTSPFj6f@$nI'r}L #EnױV: ͍߮|0#8CWO8a;Y*F#BE֑nbg 'vOFuwM9hsga[=eq8Oޯ9Ҩ ǟMg=H8OGʪ)9]P1MH)Q N/dj&t_ݍ)ɺ]#x\>~nDpHI$(^ Qăywxw1(gi ӿ@{bdFTB}Ȗ3|4Ҭ{r\*dzǘ'(EvPќ<}O1$pPԏ6$FX0Uh--0ZM}%3xÜӻLLW`ZV68/p׊CڿPK PK!Torg/apache/maven/wrapper/cli/CommandLineParser$CaseInsensitiveStringComparator.classS[OAftlVPQz_K&&M$)P2d;no?L|0>@c999?~}`;,@=>3T(^;:J%jPtT(T(#9HK51< COs5>A ˰{'xT2j1ؕj!ND6\8ǰlLFBc_0}Sg&:!an4AlQJYٜT;9`$[kxPJ1PZ.VoOX7yyW0d@.hN)`915^(Cr3 CL@fks@3"ݓCţYGҢn< Eΰ6oMg ,Z$#H.Ծ  .}~"O>I243$',gF4D*Y ZvZK2ST=ӸRe,3l.i"bQjйcחi+Vuvڦ@v,+Ń?PKG*PK!Korg/apache/maven/wrapper/cli/CommandLineParser$KnownOptionParserState.classXwUC5c#рKҠb0Ch`d3hL ;UmUu,*nq!8c`Lqߎ?ſ@ts?z8tw彗zk2Z_htZ㉔l hf0v͍5dHe# BJ*@7چ'pM  |85GSg'@۴T#16 jTx5Rvqinagjn&157fBZ75P{7!kK="3+|' xUP1'0! JPb|7b աqL7%0SFa| 0> {a|PKF{)6NMPK!Jorg/apache/maven/wrapper/cli/CommandLineParser$MissingOptionArgState.class[OAӖ.[. ( 5E+`j۴Lʚfv~$QH|gvZ/MIӝ9s~sif`x᩺-, i)heZ\˭-WPol߷e}؞\R̀"|h ^dx^)i--`a%=PaH-{[@}&20v >̂D kmǭ WƹWN+[a*ض}N/n[1JҠ?VY*X?/r[{a^?rNrUROE0Oy(AUe";U=|Ezr6Y莻'oz;&Vm=:o +)Zv ? ioa{RZ,!0聦] -Nd$+Ak_(~"OfסAsa0A:1D@;e9:CF5yLpMNq)P Ɯt+@WJ3n/MDM\.]T ~ j0i> Ԣ1ۄf (ƨGH3t8L%ęc J1d>qlFjV3n12;R|5X͕w휚b`) tGl;o.:KJeO;˴TC.4ZDPK=PK!Eorg/apache/maven/wrapper/cli/CommandLineParser$OptionComparator.classT[OQ-r- j[.]JULjHPNʒv I l7 - Ň3gvfw9_} g.*;R=imQJ[/ =kU*|-lGUװLW-\"=Y7veN5(%4eB0$ֶ #4\#k-i[eI[EQ1a- dZqby2'saD1aj-6!\'KҭD2 sA #WچY:&aPr3g&m0F!2Gu]czT ȟɟGLd<&^; GLG{ $2\$pa*^L|cKfK2䶼{Hf[IFZb!s 57[>ϯH`<fKC=CM)U `LԸ[ ZjeF%3MigqayLLF8RW!c˻,e!Y veob=Rw@7zXVbt0WdBBL8S&g8Bs_`B Gh`%4 k^pO~Rӌ Ql| ݩoGs)WƦය+FYF:>m;M~'eAUogff0@Xq1  ưfj1av_ 0H.,ܖXvcBQ]$IUWw৻X|Wޛ 7_P' noPK ҔPK!Aorg/apache/maven/wrapper/cli/CommandLineParser$OptionString.classS]OQ=SZk .+~G1&&4|)K |DJ?dDnWޝ{f̙o{CMCP%^VKlIzv[:ւj xBL.#V҄+!'&ӍsGRo\/R ~Ca \T0h ~e1xi&b͕Jo .G0;! M2zmW[-;\*8!U8 ]7E\"a$(|D넑cS~2a6dcWՌ&BkQPz B(QS)L3%d#̢B n`(*_D/!Pa2 '/J'{+f`˧ZbOg'W 5#TO 8 j*Uo1n:V]2gv3gwN{LCLti[ Ŗ*b4)󆣕o\a1]\a0T(!㾚 nDJYUA(9{\O,U3bn0'1#gq~E}"v|tPKvfPK!Korg/apache/maven/wrapper/cli/CommandLineParser$OptionStringComparator.classTMOA~feeY("ZPAh)vҨiDoCK y/hb/TΝb #HÚ2N641a6Gs |#Ba6X yg. ma rc,4Y1Mt?LOL8R2{qR݂w\/N'yɚAcPK7yPK!@org/apache/maven/wrapper/cli/CommandLineParser$ParserState.classn@M:uSI#b% E !EmTY8ĕ=nĆ H,x q4ٔXs{?3/pd#Gx+I7WRvwQ 5J Kn"i"/ĕpiBz aW~򊐯{ <֊X*dp'a_s$҉<M>yi%#?&w_aS_rqGUMB*,y6flfEE`"aa3/8C6"5#`@I75gG$R~|j@x][Y9 nt=7:4NJID>Ͳfм9ݟʬy♛͙ˑ8}C /rpx6AEp܀tPJAe]9nT6krf%&]m ?#~OxW]#y\W=2dh|2i瘛Yl+ +j9[oPKtSPK!Morg/apache/maven/wrapper/cli/CommandLineParser$UnknownOptionParserState.classU[OQWX(H+ ] /H* Aړ6-A.^|9 T ݙs739q܉#DxESTD~Ye"mst|2rY؅9˖ ­JwvV'rޢ'<GE&BZs-b5'[$ν+, h.ze#K}bmyWfVSf&GL;!nHfҭB6u鈣Mi}:ڡ)-#[K}*JRqE)'\K͈lU tԔ1]E=)X+KV3y۱7z?[qcq$`b?!p6ktW ͔ B(f:Ts6"AByaʎչOCgLJʪ H [X@g$$Tԃ n-NS)m19D+BrIZB ?6؂^&yc`j 7 34Mn­*unt$u<53\CY7o,]:V!Ӻe=:}7=-a)(9:jӾ㫸m‰wxGhFlN ;W,_Ã1+Lb%ۙl _ fG}^E 7t<] /fnYΚaRU郾&` v V3 e6v/TYWTw uNlq(dmT(=UT</5{5|=҅=>r,ȧ*NP+>/Eu`ӡ8j%b H0xVRЇt1f[,j:^Q`HfT;yHǫ H՟֫:rQ vo G2,87^;#v]6IY5[A N0* p'Q`VA/31F-~KTaǪ Q4BMa4$KpiwL+ȟCq2ψt2՚R˺E;G4Hr,MtfI1LFpUV;2^xwlX5'؞SAjD.(hWp |'. +\]&dҭ.헥iNxKRRh22*5s"o0"elYD$h2+%̻ZJ.vѱZjeu\S$uQ˹iꐖjR#?.] )=WRvOSnZ#2Ǵƿ0Fuk!*Ib5vDR7Qn|l&owg$&:X^^f2צzqn9'J6RwZ#<>Yu:Oٔat!&_f~vJB\ϭN4N%9oe^*J~]3$1MݵLIbI`}"٪r힣%9' CwzF'&OMj ۢ(Sь"_L0*1ˍ T *Q=lRW[OX«|Nv@ )SLȠELkb-L֢3hhw2Z[px:?\N6s~"xXpJ K|#.W{>+ͱ*^ubć,ji=c5g0M4fhF6a3E|̑]S$̼[%Nø_̿n6aZƲUۼn6Bf'ZL?-O91o)jrPmf)N8m/@o1|zε| 7A1L$ב3V: $x": (e)v+ZͫyNzQ2'ӷ,-Z%v-p _=( ^RA0={YHTTzCnPd9mu-J(5a<՜ ~(KjVp^WWځjz1gRfw'I nZq8Pe_hX;Zڼ )e*t:J] Ӱc-h@+VMڱ28. n|ʼn䱃)DD葩,b| nYiǭyn^.'q|[9+COaxPC!s<". Qi-ʀOWgL .ğ&o[\%w"R{OSgͪ9~xN!#4t5碵n%>۔즏S9<XvUfU:ץ֣ǒKVnzGx"=(82;1ĊY97Acªa|_B>#b8Ucb|^?F~3;g@mnњͼբ~4x=qq{ݬu%RM!PD?&kQkJy]eG^Pz^G{ qxddtڌf:<2PǹX@gA̓|j6Q2#+" bO*/WݜVX7 %2x~2/]N uÖhōY6 .OҙRgynoVMР6YjD]~J_y];j)K@_4dp>d8~Ǯ#Q#?ѵ.|xw'7Y8,,r]decy璛-pg3L~̈n^q%˭r[#$j9?PK5(PK!4org/apache/maven/wrapper/cli/ParsedCommandLine.classVi{~ȃ1( F`-`]dyMk,t_>$S˷>Ooi{g$K8 4_4w9=G_o-;֋zfH/F!).1kiI/d'͂AH`E}YOB.=sq6YEǴ ѕY6 9L*cSzqP`ì+N6w&W]' ،pmdwTeph]srd\vl}Ε莎64K5Kj2 3,5Qc0~>o L }6 ~H_1+kDDM"LEUm HDܫ!o"EQZаv ܳj5b?k[-؋D=iERٔsDz:5|Nx-H?] igfKF<70aw#(# {PC8*bl Qʱ!u ՚+|Y30],$40%NDҴ"9+ˈ!N>fc96[ 9&"X:U|ƲЊÑ/b.&CPp=)+ 1_N}`eah#s w2eEn.-)dOZ&IƋ8'48\%b9h4|WnȿTkGf>kQdьE@*.'5<܍x1=Koߤ|Kʢ$uZvT=Os4n%Z e 3@)@+zL3fI dz^÷1cݔUŧơHa5icV>O;UKl ~ ;独S25\ WMFڈ;Q$8ٌ&bdǁ?;j2krzR/g ?b) lo'u@a_i^2{בF=U|h~Ri Z ,m)Y*:xC{ @6;g'Ze;cYw6` D|-q,f.[8:o.+m49>y$os&]G8~]9unaU軅}8B8r MW=Y'X, W;>op#2&lǽ؁=؅(vgu8Idkh ӌ(Y)::N9213< Zjr8oڋ/B0 nمvKܵO)D߸( 媼gpȒ Sa܁U䅶_A4՞s>վ>P,8,f p8@e$ab<уp,qo\ryi\R\6sF8A˼c'sL.C5\Jl{<-D2V%/ q=PDQnT~Ot}έuJ>@8t"Oת uhO긟ػ8a4'UdV!W `%@Mr5u^%T JnIďZRqJ uBʬe"=_$ 5WpE;Mf? `5i{:פM+5~y,xeZ }8[UE蒺'zӥjZ[t W|]PK!@zPK!:org/apache/maven/wrapper/cli/ParsedCommandLineOption.classS]OA=Ӗn)KUB[+EĄxIdm[>&*Ae_vv{g'+1֭TZS{mFCXZеjFVf9bauSAaxpfE%!xhY9lٺe/-K0,;;XJvJ95"Ydl" ?U#`anYP`?*d̟j{z "K(rK{/:qG(`K${G IS*oz4ܮ+ -nln7%!NInx.qnǫ0ø}:\3'zI$% "$ߺBXf:xeznfZbPF4=댬3qɐ)N%#ݣuf'xWxnUZ5aڒ]pXQ.]J%zp%CkYk؇ Be3]zf;%]:Vv>hǤ;@'hNAM sod7#h.^ˤc=O0=ܯWcnNǸo#q̕lWL-ܦ*wUFQ';A'8Cqwx^u;gKppbn# 봧!<:QW;(>-zPK=!C@PK!Horg/apache/maven/wrapper/cli/ProjectPropertiesCommandLineConverter.classMO"ADB]ua L<(fOd%xorh33=i JO&(cπư]CՕNu=8"J7`7c{-☵huǞb+FPD-."OhyohW#BDFҜr !R}.# Q %ߣZ&T2ϊy3Ce;&la\FUX|k6!A\2~ [J<}a5&giZŽ]R3MέҎɠL M4:$񝆓F\o]5~kf2"`mf`c\du G,lc!-nb`u%W638W]CwOnΣvc=@ kPKBm5PK!Gorg/apache/maven/wrapper/cli/SystemPropertiesCommandLineConverter.classN1CB-r.bEBXpReLN'f#*+$<@ B!Ջs^ S}W GqZ+[0 C 2ⶊ&Ⱥ"nAb#IsB(}B\EK+XD/O1P*O}eϊE3Sg;$rUlF7X::!VAXNO,`pc<3Na1org/apache/maven/wrapper/PathAssembler$LocalDistribution.classPK!|+?,3org/apache/maven/wrapper/PathAssembler.classPK!{`- 6!;org/apache/maven/wrapper/SystemPropertiesHandler.classPK!?a( 3m@org/apache/maven/wrapper/WrapperConfiguration.classPK!ǣ9,.fDorg/apache/maven/wrapper/WrapperExecutor.classPK!SMorg/apache/maven/wrapper/cli/PK!dQvT?Morg/apache/maven/wrapper/cli/AbstractCommandLineConverter.classPK!j F I Qorg/apache/maven/wrapper/cli/AbstractPropertiesCommandLineConverter.classPK!m6Tg?/Vorg/apache/maven/wrapper/cli/CommandLineArgumentException.classPK!=8L7Worg/apache/maven/wrapper/cli/CommandLineConverter.classPK!6WK 4Yorg/apache/maven/wrapper/cli/CommandLineOption.classPK!k-% I_org/apache/maven/wrapper/cli/CommandLineParser$AfterFirstSubCommand.classPK!['Aborg/apache/maven/wrapper/cli/CommandLineParser$AfterOptions.classPK! Jeorg/apache/maven/wrapper/cli/CommandLineParser$BeforeFirstSubCommand.classPK!G*Tjorg/apache/maven/wrapper/cli/CommandLineParser$CaseInsensitiveStringComparator.classPK!F{)6NMKlorg/apache/maven/wrapper/cli/CommandLineParser$KnownOptionParserState.classPK! IJtorg/apache/maven/wrapper/cli/CommandLineParser$MissingOptionArgState.classPK!=Kworg/apache/maven/wrapper/cli/CommandLineParser$OptionAwareParserState.classPK!KEzorg/apache/maven/wrapper/cli/CommandLineParser$OptionComparator.classPK! ҔF}org/apache/maven/wrapper/cli/CommandLineParser$OptionParserState.classPK!vfAorg/apache/maven/wrapper/cli/CommandLineParser$OptionString.classPK!7yKorg/apache/maven/wrapper/cli/CommandLineParser$OptionStringComparator.classPK!tS@܅org/apache/maven/wrapper/cli/CommandLineParser$ParserState.classPK!O<M6org/apache/maven/wrapper/cli/CommandLineParser$UnknownOptionParserState.classPK!5(4xorg/apache/maven/wrapper/cli/CommandLineParser.classPK!!@z4org/apache/maven/wrapper/cli/ParsedCommandLine.classPK!=!C@:٣org/apache/maven/wrapper/cli/ParsedCommandLineOption.classPK!Bm5Horg/apache/maven/wrapper/cli/ProjectPropertiesCommandLineConverter.classPK!v겉&Gorg/apache/maven/wrapper/cli/SystemPropertiesCommandLineConverter.classPK..jruby-joni-2.1.41/.mvn/wrapper/maven-wrapper.properties000077500000000000000000000001551400407002500231230ustar00rootroot00000000000000distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.5.2/apache-maven-3.5.2-bin.zipjruby-joni-2.1.41/.travis.yml000066400000000000000000000000761400407002500157730ustar00rootroot00000000000000language: java jdk: - openjdk9 arch: - amd64 - ppc64le jruby-joni-2.1.41/Jenkinsfile000066400000000000000000000043171400407002500160500ustar00rootroot00000000000000#!/usr/bin/env groovy pipeline { agent none options { buildDiscarder(logRotator(numToKeepStr: '10')) timeout(time: 1, unit: 'HOURS') } stages { stage('OpenJDK 8') { agent { docker 'openjdk:8-jdk' } steps { checkout scm sh './mvnw test -B' } post { always { junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true } } } stage('Alternative Platforms') { parallel { stage('OpenJDK 9') { agent { docker 'openjdk:9-jdk' } steps { checkout scm sh './mvnw test -B' } post { always { junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true } } } stage('Alpine Linux') { agent { docker 'openjdk:8-jdk-alpine' } steps { checkout scm sh './mvnw test -B' } post { always { junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true } } } stage('FreeBSD 11') { agent { label 'freebsd' } steps { checkout scm sh './mvnw test -B' } post { always { junit testResults: '**/surefire-reports/**/*.xml', allowEmptyResults: true } } } /* awaiting platform support in Code Valet */ stage('Windows 2016') { when { branch 'windows-support' } steps { echo 'Not yet available' } } } } } } jruby-joni-2.1.41/LICENSE000066400000000000000000000020531400407002500146640ustar00rootroot00000000000000MIT License Copyright (c) 2017 JRuby Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. jruby-joni-2.1.41/MANIFEST.MF000066400000000000000000000001221400407002500153040ustar00rootroot00000000000000Implementation-Title: Joni (java port of Oniguruma) Implementation-Version: 1.1.7 jruby-joni-2.1.41/README.md000066400000000000000000000037171400407002500151460ustar00rootroot00000000000000joni ==== [![Maven Central](https://img.shields.io/maven-central/v/org.jruby.joni/joni.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.jruby.joni%22) [![Build Status](https://secure.travis-ci.org/jruby/joni.png)](http://travis-ci.org/jruby/joni) Java port of Oniguruma regexp library ## Usage ### Imports ```java import org.jcodings.specific.UTF8Encoding; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; ``` ### Matching ```java byte[] pattern = "a*".getBytes(); byte[] str = "aaa".getBytes(); Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); Matcher matcher = regex.matcher(str); int result = matcher.search(0, str.length, Option.DEFAULT); ``` ### Using captures ```java byte[] pattern = "(a*)".getBytes(); byte[] str = "aaa".getBytes(); Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); Matcher matcher = regex.matcher(str); int result = matcher.search(0, str.length, Option.DEFAULT); if (result != -1) { Region region = matcher.getEagerRegion(); } ``` ### Using named captures ```java byte[] pattern = "(?a*)".getBytes(); byte[] str = "aaa".getBytes(); Regex regex = new Regex(pattern, 0, pattern.length, Option.NONE, UTF8Encoding.INSTANCE); Matcher matcher = regex.matcher(str); int result = matcher.search(0, str.length, Option.DEFAULT); if (result != -1) { Region region = matcher.getEagerRegion(); for (Iterator entry = regex.namedBackrefIterator(); entry.hasNext();) { NameEntry e = entry.next(); int number = e.getBackRefs()[0]; // can have many refs per name // int begin = region.beg[number]; // int end = region.end[number]; } } ``` ## License Joni is released under the [MIT License](http://www.opensource.org/licenses/MIT). jruby-joni-2.1.41/mvnw000077500000000000000000000145551400407002500146060ustar00rootroot00000000000000#!/bin/sh # ---------------------------------------------------------------------------- # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # ---------------------------------------------------------------------------- # ---------------------------------------------------------------------------- # Maven2 Start Up Batch script # # Required ENV vars: # ------------------ # JAVA_HOME - location of a JDK home dir # # Optional ENV vars # ----------------- # M2_HOME - location of maven2's installed home dir # MAVEN_OPTS - parameters passed to the Java VM when running Maven # e.g. to debug Maven itself, use # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 # MAVEN_SKIP_RC - flag to disable loading of mavenrc files # ---------------------------------------------------------------------------- if [ -z "$MAVEN_SKIP_RC" ] ; then if [ -f /etc/mavenrc ] ; then . /etc/mavenrc fi if [ -f "$HOME/.mavenrc" ] ; then . "$HOME/.mavenrc" fi fi # OS specific support. $var _must_ be set to either true or false. cygwin=false; darwin=false; mingw=false case "`uname`" in CYGWIN*) cygwin=true ;; MINGW*) mingw=true;; Darwin*) darwin=true # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home # See https://developer.apple.com/library/mac/qa/qa1170/_index.html if [ -z "$JAVA_HOME" ]; then if [ -x "/usr/libexec/java_home" ]; then export JAVA_HOME="`/usr/libexec/java_home`" else export JAVA_HOME="/Library/Java/Home" fi fi ;; esac if [ -z "$JAVA_HOME" ] ; then if [ -r /etc/gentoo-release ] ; then JAVA_HOME=`java-config --jre-home` fi fi if [ -z "$M2_HOME" ] ; then ## resolve links - $0 may be a link to maven's home PRG="$0" # need this for relative symlinks while [ -h "$PRG" ] ; do ls=`ls -ld "$PRG"` link=`expr "$ls" : '.*-> \(.*\)$'` if expr "$link" : '/.*' > /dev/null; then PRG="$link" else PRG="`dirname "$PRG"`/$link" fi done saveddir=`pwd` M2_HOME=`dirname "$PRG"`/.. # make it fully qualified M2_HOME=`cd "$M2_HOME" && pwd` cd "$saveddir" # echo Using m2 at $M2_HOME fi # For Cygwin, ensure paths are in UNIX format before anything is touched if $cygwin ; then [ -n "$M2_HOME" ] && M2_HOME=`cygpath --unix "$M2_HOME"` [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"` fi # For Mingw, ensure paths are in UNIX format before anything is touched if $mingw ; then [ -n "$M2_HOME" ] && M2_HOME="`(cd "$M2_HOME"; pwd)`" [ -n "$JAVA_HOME" ] && JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" # TODO classpath? fi if [ -z "$JAVA_HOME" ]; then javaExecutable="`which javac`" if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then # readlink(1) is not available as standard on Solaris 10. readLink=`which readlink` if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then if $darwin ; then javaHome="`dirname \"$javaExecutable\"`" javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" else javaExecutable="`readlink -f \"$javaExecutable\"`" fi javaHome="`dirname \"$javaExecutable\"`" javaHome=`expr "$javaHome" : '\(.*\)/bin'` JAVA_HOME="$javaHome" export JAVA_HOME fi fi fi if [ -z "$JAVACMD" ] ; then if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables JAVACMD="$JAVA_HOME/jre/sh/java" else JAVACMD="$JAVA_HOME/bin/java" fi else JAVACMD="`which java`" fi fi if [ ! -x "$JAVACMD" ] ; then echo "Error: JAVA_HOME is not defined correctly." >&2 echo " We cannot execute $JAVACMD" >&2 exit 1 fi if [ -z "$JAVA_HOME" ] ; then echo "Warning: JAVA_HOME environment variable is not set." fi CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher # traverses directory structure from process work directory to filesystem root # first directory with .mvn subdirectory is considered project base directory find_maven_basedir() { if [ -z "$1" ] then echo "Path not specified to find_maven_basedir" return 1 fi basedir="$1" wdir="$1" while [ "$wdir" != '/' ] ; do if [ -d "$wdir"/.mvn ] ; then basedir=$wdir break fi # workaround for JBEAP-8937 (on Solaris 10/Sparc) if [ -d "${wdir}" ]; then wdir=`cd "$wdir/.."; pwd` fi # end of workaround done echo "${basedir}" } # concatenates all lines of a file concat_lines() { if [ -f "$1" ]; then echo "$(tr -s '\n' ' ' < "$1")" fi } BASE_DIR=`find_maven_basedir "$(pwd)"` if [ -z "$BASE_DIR" ]; then exit 1; fi export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} if [ "$MVNW_VERBOSE" = true ]; then echo $MAVEN_PROJECTBASEDIR fi MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" # For Cygwin, switch paths to Windows format before running java if $cygwin; then [ -n "$M2_HOME" ] && M2_HOME=`cygpath --path --windows "$M2_HOME"` [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"` [ -n "$MAVEN_PROJECTBASEDIR" ] && MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` fi WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain exec "$JAVACMD" \ $MAVEN_OPTS \ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" jruby-joni-2.1.41/mvnw.cmd000077500000000000000000000120751400407002500153430ustar00rootroot00000000000000@REM ---------------------------------------------------------------------------- @REM Licensed to the Apache Software Foundation (ASF) under one @REM or more contributor license agreements. See the NOTICE file @REM distributed with this work for additional information @REM regarding copyright ownership. The ASF licenses this file @REM to you under the Apache License, Version 2.0 (the @REM "License"); you may not use this file except in compliance @REM with the License. You may obtain a copy of the License at @REM @REM http://www.apache.org/licenses/LICENSE-2.0 @REM @REM Unless required by applicable law or agreed to in writing, @REM software distributed under the License is distributed on an @REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @REM KIND, either express or implied. See the License for the @REM specific language governing permissions and limitations @REM under the License. @REM ---------------------------------------------------------------------------- @REM ---------------------------------------------------------------------------- @REM Maven2 Start Up Batch script @REM @REM Required ENV vars: @REM JAVA_HOME - location of a JDK home dir @REM @REM Optional ENV vars @REM M2_HOME - location of maven2's installed home dir @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands @REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven @REM e.g. to debug Maven itself, use @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 @REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files @REM ---------------------------------------------------------------------------- @REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on' @echo off @REM set title of command window title %0 @REM enable echoing my setting MAVEN_BATCH_ECHO to 'on' @if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO% @REM set %HOME% to equivalent of $HOME if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") @REM Execute a user defined script before this one if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre @REM check for pre script, once with legacy .bat ending and once with .cmd ending if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" :skipRcPre @setlocal set ERROR_CODE=0 @REM To isolate internal variables from possible post scripts, we use another setlocal @setlocal @REM ==== START VALIDATION ==== if not "%JAVA_HOME%" == "" goto OkJHome echo. echo Error: JAVA_HOME not found in your environment. >&2 echo Please set the JAVA_HOME variable in your environment to match the >&2 echo location of your Java installation. >&2 echo. goto error :OkJHome if exist "%JAVA_HOME%\bin\java.exe" goto init echo. echo Error: JAVA_HOME is set to an invalid directory. >&2 echo JAVA_HOME = "%JAVA_HOME%" >&2 echo Please set the JAVA_HOME variable in your environment to match the >&2 echo location of your Java installation. >&2 echo. goto error @REM ==== END VALIDATION ==== :init @REM Find the project base dir, i.e. the directory that contains the folder ".mvn". @REM Fallback to current working directory if not found. set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR% IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir set EXEC_DIR=%CD% set WDIR=%EXEC_DIR% :findBaseDir IF EXIST "%WDIR%"\.mvn goto baseDirFound cd .. IF "%WDIR%"=="%CD%" goto baseDirNotFound set WDIR=%CD% goto findBaseDir :baseDirFound set MAVEN_PROJECTBASEDIR=%WDIR% cd "%EXEC_DIR%" goto endDetectBaseDir :baseDirNotFound set MAVEN_PROJECTBASEDIR=%EXEC_DIR% cd "%EXEC_DIR%" :endDetectBaseDir IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig @setlocal EnableExtensions EnableDelayedExpansion for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a @endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS% :endReadAdditionalConfig SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain %MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* if ERRORLEVEL 1 goto error goto end :error set ERROR_CODE=1 :end @endlocal & set ERROR_CODE=%ERROR_CODE% if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost @REM check for post script, once with legacy .bat ending and once with .cmd ending if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" :skipRcPost @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' if "%MAVEN_BATCH_PAUSE%" == "on" pause if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% exit /B %ERROR_CODE% jruby-joni-2.1.41/pom.xml000066400000000000000000000151101400407002500151720ustar00rootroot00000000000000 4.0.0 org.jruby.joni joni jar 2.1.41 Joni Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma that uses byte arrays directly instead of java Strings and chars UTF-8 org.sonatype.oss oss-parent 7 JIRA http://jira.codehaus.org/browse/JRUBY scm:git:https://github.com/jruby/joni.git scm:git:git@github.com:jruby/joni.git https://github.com/jruby/joni MIT License http://www.opensource.org/licenses/mit-license.php repo lopex Marcin Mielzynski lopx@gazeta.pl org.jruby.jcodings jcodings 1.0.55 junit junit 4.13.1 test src test joni org.apache.maven.wagon wagon-webdav maven-compiler-plugin 3.8.0 1.7 1.7 default-compile module-info.java org.apache.maven.plugins maven-surefire-plugin 2.22.0 maven-jar-plugin 2.2 MANIFEST.MF jacoco org.jacoco jacoco-maven-plugin 0.8.0 default-prepare-agent prepare-agent pre-unit-test prepare-agent ${project.build.directory}/coverage-reports/jacoco.exec surefireArgLine post-unit-test test report ${project.build.directory}/coverage-reports/jacoco.exec ${project.reporting.outputDirectory}/jacoco default-report prepare-package report release-on-9 [9,) maven-compiler-plugin compile9 compile 9 module-info.java maven-source-plugin 2.2.1 attach-sources jar maven-javadoc-plugin 3.0.1 attach-javadocs jar -Xdoclint:none -html5 true jruby-joni-2.1.41/src/000077500000000000000000000000001400407002500144465ustar00rootroot00000000000000jruby-joni-2.1.41/src/module-info.java000066400000000000000000000002261400407002500175270ustar00rootroot00000000000000open module org.jruby.joni { exports org.joni; exports org.joni.constants; exports org.joni.exception; requires org.jruby.jcodings; }jruby-joni-2.1.41/src/org/000077500000000000000000000000001400407002500152355ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/000077500000000000000000000000001400407002500161745ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/Analyser.java000066400000000000000000002351201400407002500206200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAll; import static org.joni.BitStatus.bsAt; import static org.joni.BitStatus.bsClear; import static org.joni.BitStatus.bsOnAt; import static org.joni.BitStatus.bsOnAtSimple; import static org.joni.Option.isCaptureGroup; import static org.joni.Option.isFindCondition; import static org.joni.Option.isIgnoreCase; import static org.joni.Option.isMultiline; import static org.joni.ast.ListNode.newAlt; import static org.joni.ast.ListNode.newList; import static org.joni.ast.QuantifierNode.isRepeatInfinite; import java.util.IllegalFormatConversionException; import org.jcodings.CaseFoldCodeItem; import org.jcodings.ObjPtr; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.joni.ast.AnchorNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.EncloseNode; import org.joni.ast.ListNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; import org.joni.constants.internal.AnchorType; import org.joni.constants.internal.EncloseType; import org.joni.constants.internal.NodeType; import org.joni.constants.internal.StackPopLevel; import org.joni.constants.internal.TargetInfo; final class Analyser extends Parser { protected Analyser(Regex regex, Syntax syntax, byte[]bytes, int p, int end, WarnCallback warnings) { super(regex, syntax, bytes, p, end, warnings); } protected final void compile() { if (Config.DEBUG) Config.log.println(encStringToString(bytes, getBegin(), getEnd())); reset(); regex.numMem = 0; regex.numRepeat = 0; regex.numNullCheck = 0; //regex.repeatRangeAlloc = 0; regex.repeatRangeLo = null; regex.repeatRangeHi = null; regex.numCombExpCheck = 0; if (Config.USE_CEC) regex.numCombExpCheck = 0; Node root = parseRegexp(); // onig_parse_make_tree regex.numMem = env.numMem; if (Config.USE_NAMED_GROUP) { /* mixed use named group and no-named group */ if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(regex.options)) { if (env.numNamed != env.numMem) { root = disableNoNameGroupCapture(root); } else { numberedRefCheck(root); } } } // USE_NAMED_GROUP if (Config.USE_NAMED_GROUP) { if (env.numCall > 0) { env.unsetAddrList = new UnsetAddrList(env.numCall); setupSubExpCall(root); // r != 0 ??? subexpRecursiveCheckTrav(root); // r < 0 -< err, FOUND_CALLED_NODE = 1 subexpInfRecursiveCheckTrav(root); // r != 0 recursion infinite ??? regex.numCall = env.numCall; } else { regex.numCall = 0; } } // USE_NAMED_GROUP if (Config.DEBUG_PARSE_TREE && Config.DEBUG_PARSE_TREE_RAW) Config.log.println("\n" + root + "\n"); Node.TopNode top = Node.newTop(root); setupTree(root, 0); root = top.getRoot(); if (Config.DEBUG_PARSE_TREE) Config.log.println("\n" + root + "\n"); regex.captureHistory = env.captureHistory; regex.btMemStart = env.btMemStart; if (isFindCondition(regex.options)) { regex.btMemEnd = bsAll(); } else { regex.btMemEnd = env.btMemEnd; regex.btMemEnd |= regex.captureHistory; } if (Config.USE_CEC) { if (env.backrefedMem == 0 || (Config.USE_SUBEXP_CALL && env.numCall == 0)) { setupCombExpCheck(root, 0); if (Config.USE_SUBEXP_CALL && env.hasRecursion) { env.numCombExpCheck = 0; } else { // USE_SUBEXP_CALL if (env.combExpMaxRegNum > 0) { for (int i=1; i 1) { int p_ = p; while (p_ < end) { int code = enc.mbcToCode(bytes, p_, end); if (code >= 0x80) { try { sb.append(String.format(" 0x%04x ", code)); } catch (IllegalFormatConversionException ifce) { sb.append(code); } } else { sb.append((char)code); } p_ += enc.length(bytes, p_, end); } } else { while (p < end) { sb.append(new String(bytes, p, 1)); p++; } } return sb.append("/").toString(); } private void noNameDisableMapFor_listAlt(Node node, int[]map, Ptr counter) { ListNode can = (ListNode)node; do { can.setValue(noNameDisableMap(can.value, map, counter)); } while ((can = can.tail) != null); } private void noNameDisableMapFor_quantifier(Node node, int[]map, Ptr counter) { QuantifierNode qn = (QuantifierNode)node; Node target = qn.target; Node old = target; target = noNameDisableMap(target, map, counter); if (target != old) { qn.setTarget(target); if (target.getType() == NodeType.QTFR) qn.reduceNestedQuantifier((QuantifierNode)target); } } private Node noNameDisableMapFor_enclose(Node node, int[]map, Ptr counter) { EncloseNode en = (EncloseNode)node; if (en.type == EncloseType.MEMORY) { if (en.isNamedGroup()) { counter.p++; map[en.regNum] = counter.p; en.regNum = counter.p; en.setTarget(noNameDisableMap(en.target, map, counter)); } else { node = en.target; en.target = null; // remove first enclose: /(a)(?c)/ node = noNameDisableMap(node, map, counter); } } else { en.setTarget(noNameDisableMap(en.target, map, counter)); } return node; } private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) { AnchorNode an = (AnchorNode)node; if (an.target != null) an.setTarget(noNameDisableMap(an.target, map, counter)); } private Node noNameDisableMap(Node node, int[]map, Ptr counter) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: noNameDisableMapFor_listAlt(node, map, counter); break; case NodeType.QTFR: noNameDisableMapFor_quantifier(node, map, counter); break; case NodeType.ENCLOSE: node = noNameDisableMapFor_enclose(node, map, counter); break; case NodeType.ANCHOR: noNameDisableMapFor_anchor(node, map, counter); break; } // switch return node; } private void renumberByMap(Node node, int[]map) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { renumberByMap(can.value, map); } while ((can = can.tail) != null); break; case NodeType.QTFR: renumberByMap(((QuantifierNode)node).target, map); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.type == EncloseType.CONDITION) { en.regNum = map[en.regNum]; } renumberByMap(en.target, map); break; case NodeType.BREF: ((BackRefNode)node).renumber(map); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if (an.target != null) renumberByMap(an.target, map); break; } // switch } protected final void numberedRefCheck(Node node) { switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { numberedRefCheck(can.value); } while ((can = can.tail) != null); break; case NodeType.QTFR: numberedRefCheck(((QuantifierNode)node).target); break; case NodeType.ENCLOSE: numberedRefCheck(((EncloseNode)node).target); break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (!br.isNameRef()) newValueException(NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if (an.target != null) numberedRefCheck(an.target); break; } // switch } protected final Node disableNoNameGroupCapture(Node root) { int[]map = new int[env.numMem + 1]; root = noNameDisableMap(root, map, new Ptr(0)); renumberByMap(root, map); for (int i=1, pos=1; i<=env.numMem; i++) { if (map[i] > 0) { env.memNodes[pos] = env.memNodes[i]; pos++; } } int loc = env.captureHistory; env.captureHistory = bsClear(); for (int i=1; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { if (bsAt(loc, i)) { env.captureHistory = bsOnAtSimple(env.captureHistory, map[i]); } } env.numMem = env.numNamed; regex.numMem = env.numNamed; regex.renumberNameTable(map); return root; } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK private int quantifiersMemoryInfo(Node node) { int info = 0; switch(node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { int v = quantifiersMemoryInfo(can.value); if (v > info) info = v; } while ((can = can.tail) != null); break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { return TargetInfo.IS_EMPTY_REC; /* tiny version */ } else { info = quantifiersMemoryInfo(cn.target); } } // USE_SUBEXP_CALL break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.upper != 0) { info = quantifiersMemoryInfo(qn.target); } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: return TargetInfo.IS_EMPTY_MEM; case EncloseType.OPTION: case EncloseNode.STOP_BACKTRACK: case EncloseNode.CONDITION: case EncloseNode.ABSENT: info = quantifiersMemoryInfo(en.target); break; default: break; } // inner switch break; case NodeType.BREF: case NodeType.STR: case NodeType.CTYPE: case NodeType.CCLASS: case NodeType.CANY: case NodeType.ANCHOR: default: break; } // switch return info; } private int getMinMatchLength(Node node) { int min = 0; switch (node.getType()) { case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (br.isRecursion()) break; if (br.back[0] > env.numMem) { if (!syntax.op3OptionECMAScript()) newValueException(INVALID_BACKREF); } else { min = getMinMatchLength(env.memNodes[br.back[0]]); } for (int i=1; i env.numMem) { if (!syntax.op3OptionECMAScript()) newValueException(INVALID_BACKREF); } else { int tmin = getMinMatchLength(env.memNodes[br.back[i]]); if (min > tmin) min = tmin; } } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { EncloseNode en = cn.target; if (en.isMinFixed()) min = en.minLength; } else { min = getMinMatchLength(cn.target); } } // USE_SUBEXP_CALL break; case NodeType.LIST: ListNode can = (ListNode)node; do { min += getMinMatchLength(can.value); } while ((can = can.tail) != null); break; case NodeType.ALT: ListNode y = (ListNode)node; do { Node x = y.value; int tmin = getMinMatchLength(x); if (y == node) { min = tmin; } else if (min > tmin) { min = tmin; } } while ((y = y.tail) != null); break; case NodeType.STR: min = ((StringNode)node).length(); break; case NodeType.CTYPE: case NodeType.CCLASS: case NodeType.CANY: min = 1; break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.lower > 0) { min = getMinMatchLength(qn.target); min = MinMaxLen.distanceMultiply(min, qn.lower); } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isMinFixed()) { min = en.minLength; } else { if (en.isMark1()) { min = 0; /* recursive */ } else { en.setMark1(); min = getMinMatchLength(en.target); en.clearMark1(); en.minLength = min; en.setMinFixed(); } } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: case EncloseNode.CONDITION: min = getMinMatchLength(en.target); break; case EncloseType.ABSENT: break; } // inner switch break; case NodeType.ANCHOR: default: break; } // switch return min; } private int getMaxMatchLength(Node node) { int max = 0; switch (node.getType()) { case NodeType.LIST: ListNode ln = (ListNode)node; do { int tmax = getMaxMatchLength(ln.value); max = MinMaxLen.distanceAdd(max, tmax); } while ((ln = ln.tail) != null); break; case NodeType.ALT: ListNode an = (ListNode)node; do { int tmax = getMaxMatchLength(an.value); if (max < tmax) max = tmax; } while ((an = an.tail) != null); break; case NodeType.STR: max = ((StringNode)node).length(); break; case NodeType.CTYPE: case NodeType.CCLASS: case NodeType.CANY: max = enc.maxLength(); break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; if (br.isRecursion()) { max = MinMaxLen.INFINITE_DISTANCE; break; } for (int i=0; i env.numMem) { if(!syntax.op3OptionECMAScript()) newValueException(INVALID_BACKREF); } else { int tmax = getMaxMatchLength(env.memNodes[br.back[i]]); if (max < tmax) max = tmax; } } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (!cn.isRecursion()) { max = getMaxMatchLength(cn.target); } else { max = MinMaxLen.INFINITE_DISTANCE; } } // USE_SUBEXP_CALL break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.upper != 0) { max = getMaxMatchLength(qn.target); if (max != 0) { if (!isRepeatInfinite(qn.upper)) { max = MinMaxLen.distanceMultiply(max, qn.upper); } else { max = MinMaxLen.INFINITE_DISTANCE; } } } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isMaxFixed()) { max = en.maxLength; } else { if (en.isMark1()) { max = MinMaxLen.INFINITE_DISTANCE; } else { en.setMark1(); max = getMaxMatchLength(en.target); en.clearMark1(); en.maxLength = max; en.setMaxFixed(); } } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: case EncloseNode.CONDITION: max = getMaxMatchLength(en.target); break; case EncloseType.ABSENT: break; } // inner switch break; case NodeType.ANCHOR: default: break; } // switch return max; } private static final int GET_CHAR_LEN_VARLEN = -1; private static final int GET_CHAR_LEN_TOP_ALT_VARLEN = -2; protected final int getCharLengthTree(Node node) { return getCharLengthTree(node, 0); } private int getCharLengthTree(Node node, int level) { level++; int len = 0; returnCode = 0; switch(node.getType()) { case NodeType.LIST: ListNode ln = (ListNode)node; do { int tlen = getCharLengthTree(ln.value, level); if (returnCode == 0) len = MinMaxLen.distanceAdd(len, tlen); } while (returnCode == 0 && (ln = ln.tail) != null); break; case NodeType.ALT: ListNode an = (ListNode)node; boolean varLen = false; int tlen = getCharLengthTree(an.value, level); while (returnCode == 0 && (an = an.tail) != null) { int tlen2 = getCharLengthTree(an.value, level); if (returnCode == 0) { if (tlen != tlen2) varLen = true; } } if (returnCode == 0) { if (varLen) { if (level == 1) { returnCode = GET_CHAR_LEN_TOP_ALT_VARLEN; } else { returnCode = GET_CHAR_LEN_VARLEN; } } else { len = tlen; } } break; case NodeType.STR: StringNode sn = (StringNode)node; len = sn.length(enc); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; if (qn.lower == qn.upper) { tlen = getCharLengthTree(qn.target, level); if (returnCode == 0) len = MinMaxLen.distanceMultiply(tlen, qn.lower); } else { returnCode = GET_CHAR_LEN_VARLEN; } break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (!cn.isRecursion()) { len = getCharLengthTree(cn.target, level); } else { returnCode = GET_CHAR_LEN_VARLEN; } } // USE_SUBEXP_CALL break; case NodeType.CTYPE: case NodeType.CCLASS: case NodeType.CANY: len = 1; break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch(en.type) { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (en.isCLenFixed()) { len = en.charLength; } else { len = getCharLengthTree(en.target, level); if (returnCode == 0) { en.charLength = len; en.setCLenFixed(); } } } // USE_SUBEXP_CALL break; case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: case EncloseNode.CONDITION: len = getCharLengthTree(en.target, level); break; case EncloseType.ABSENT: break; } // inner switch break; case NodeType.ANCHOR: break; default: returnCode = GET_CHAR_LEN_VARLEN; } // switch return len; } /* x is not included y ==> 1 : 0 */ private boolean isNotIncluded(Node x, Node y) { Node tmp; retry: while(true) { int yType = y.getType(); switch(x.getType()) { case NodeType.CTYPE: switch(yType) { case NodeType.CTYPE: { CTypeNode cny = (CTypeNode)y; CTypeNode cnx = (CTypeNode)x; return cny.ctype == cnx.ctype && cny.not != cnx.not && cny.asciiRange == cnx.asciiRange; } case NodeType.CCLASS: // !swap:! tmp = x; x = y; y = tmp; // !goto retry;! continue retry; case NodeType.STR: // !goto swap;! tmp = x; x = y; y = tmp; continue retry; default: break; } // inner switch break; case NodeType.CCLASS: CClassNode xc = (CClassNode)x; switch(yType) { case NodeType.CTYPE: { CTypeNode yc = (CTypeNode)y; switch(yc.ctype) { case CharacterType.WORD: if (!yc.not) { if (xc.mbuf == null && !xc.isNot()) { for (int i=0; i ys.length()) len = ys.length(); if (xs.isAmbig() || ys.isAmbig()) { /* tiny version */ return false; } else { for (int i=0, p=ys.p, q=xs.p; i 0) { if (qn.headExact != null) { n = qn.headExact; } else { n = getHeadValueNode(qn.target, exact); } } break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int options = regex.options; regex.options = en.option; n = getHeadValueNode(en.target, exact); regex.options = options; break; case EncloseType.MEMORY: case EncloseType.STOP_BACKTRACK: case EncloseNode.CONDITION: n = getHeadValueNode(en.target, exact); break; case EncloseType.ABSENT: break; } // inner switch break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if (an.type == AnchorType.PREC_READ) n = getHeadValueNode(an.target, exact); break; default: break; } // switch return n; } // true: invalid private boolean checkTypeTree(Node node, int typeMask, int encloseMask, int anchorMask) { if ((node.getType2Bit() & typeMask) == 0) return true; boolean invalid = false; switch(node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { invalid = checkTypeTree(can.value, typeMask, encloseMask, anchorMask); } while (!invalid && (can = can.tail) != null); break; case NodeType.QTFR: invalid = checkTypeTree(((QuantifierNode)node).target, typeMask, encloseMask, anchorMask); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if ((en.type & encloseMask) == 0) return true; invalid = checkTypeTree(en.target, typeMask, encloseMask, anchorMask); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; if ((an.type & anchorMask) == 0) return true; if (an.target != null) invalid = checkTypeTree(an.target, typeMask, encloseMask, anchorMask); break; default: break; } // switch return invalid; } private static final int RECURSION_EXIST = 1; private static final int RECURSION_INFINITE = 2; private int subexpInfRecursiveCheck(Node node, boolean head) { int r = 0; switch (node.getType()) { case NodeType.LIST: int min; ListNode x = (ListNode)node; do { int ret = subexpInfRecursiveCheck(x.value, head); if (ret == RECURSION_INFINITE) return ret; r |= ret; if (head) { min = getMinMatchLength(x.value); if (min != 0) head = false; } } while ((x = x.tail) != null); break; case NodeType.ALT: ListNode can = (ListNode)node; r = RECURSION_EXIST; do { int ret = subexpInfRecursiveCheck(can.value, head); if (ret == RECURSION_INFINITE) return ret; r &= ret; } while ((can = can.tail) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; r = subexpInfRecursiveCheck(qn.target, head); if (r == RECURSION_EXIST) { if (qn.lower == 0) r = 0; } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpInfRecursiveCheck(an.target, head); break; } // inner switch break; case NodeType.CALL: r = subexpInfRecursiveCheck(((CallNode)node).target, head); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isMark2()) { return 0; } else if (en.isMark1()) { return !head ? RECURSION_EXIST : RECURSION_INFINITE; // throw exception here ??? } else { en.setMark2(); r = subexpInfRecursiveCheck(en.target, head); en.clearMark2(); } break; default: break; } // switch return r; } protected final int subexpInfRecursiveCheckTrav(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { r = subexpInfRecursiveCheckTrav(can.value); } while (r == 0 && (can = can.tail) != null); break; case NodeType.QTFR: r = subexpInfRecursiveCheckTrav(((QuantifierNode)node).target); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpInfRecursiveCheckTrav(an.target); break; } // inner switch break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isRecursion()) { en.setMark1(); r = subexpInfRecursiveCheck(en.target, true); if (r > 0) newValueException(NEVER_ENDING_RECURSION); en.clearMark1(); } r = subexpInfRecursiveCheckTrav(en.target); break; default: break; } // switch return r; } private int subexpRecursiveCheck(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { r |= subexpRecursiveCheck(can.value); } while ((can = can.tail) != null); break; case NodeType.QTFR: r = subexpRecursiveCheck(((QuantifierNode)node).target); break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpRecursiveCheck(an.target); break; } // inner switch break; case NodeType.CALL: CallNode cn = (CallNode)node; r = subexpRecursiveCheck(cn.target); if (r != 0) cn.setRecursion(); break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (en.isMark2()) { return 0; } else if (en.isMark1()) { return 1; /* recursion */ } else { en.setMark2(); r = subexpRecursiveCheck(en.target); en.clearMark2(); } break; default: break; } // switch return r; } private static final int FOUND_CALLED_NODE = 1; protected final int subexpRecursiveCheckTrav(Node node) { int r = 0; switch (node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { int ret = subexpRecursiveCheckTrav(can.value); if (ret == FOUND_CALLED_NODE) { r = FOUND_CALLED_NODE; } // else if (ret < 0) return ret; ??? } while ((can = can.tail) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; r = subexpRecursiveCheckTrav(qn.target); if (qn.upper == 0) { if (r == FOUND_CALLED_NODE) qn.isRefered = true; } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: r = subexpRecursiveCheckTrav(an.target); break; } // inner switch break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; if (!en.isRecursion()) { if (en.isCalled()) { en.setMark1(); r = subexpRecursiveCheck(en.target); if (r != 0) en.setRecursion(); en.clearMark1(); } } r = subexpRecursiveCheckTrav(en.target); if (en.isCalled()) r |= FOUND_CALLED_NODE; break; default: break; } // switch return r; } private void setCallAttr(CallNode cn) { EncloseNode en = env.memNodes[cn.groupNum]; if (en == null) newValueException(UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); en.setCalled(); cn.setTarget(en); env.btMemStart = BitStatus.bsOnAt(env.btMemStart, cn.groupNum); cn.unsetAddrList = env.unsetAddrList; } protected final void setupSubExpCall(Node node) { switch(node.getType()) { case NodeType.LIST: case NodeType.ALT: ListNode can = (ListNode)node; do { setupSubExpCall(can.value); } while ((can = can.tail) != null); break; case NodeType.QTFR: setupSubExpCall(((QuantifierNode)node).target); break; case NodeType.ENCLOSE: setupSubExpCall(((EncloseNode)node).target); break; case NodeType.CALL: CallNode cn = (CallNode)node; if (cn.groupNum != 0) { int gNum = cn.groupNum; if (Config.USE_NAMED_GROUP) { if (env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { newValueException(NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); } } // USE_NAMED_GROUP if (gNum > env.numMem) newValueException(UNDEFINED_GROUP_REFERENCE, cn.nameP, cn.nameEnd); setCallAttr(cn); } else { if (Config.USE_NAMED_GROUP) { if (Config.USE_PERL_SUBEXP_CALL && cn.nameP == cn.nameEnd) { setCallAttr(cn); } else { NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd); if (ne == null) { newValueException(UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd); } else if (ne.backNum > 1) { newValueException(MULTIPLEX_DEFINITION_NAME_CALL, cn.nameP, cn.nameEnd); } else { cn.groupNum = ne.backRef1; // ne.backNum == 1 ? ne.backRef1 : ne.backRefs[0]; // ??? need to check ? setCallAttr(cn); } } } } break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: case AnchorType.PREC_READ_NOT: case AnchorType.LOOK_BEHIND: case AnchorType.LOOK_BEHIND_NOT: setupSubExpCall(an.target); break; } break; } // switch } /* divide different length alternatives in look-behind. (?<=A|B) ==> (?<=A)|(?<=B) (? (? list */ } while ((np = ((ListNode)np).tail) != null); } return node; } private Node setupLookBehind(AnchorNode node) { int len = getCharLengthTree(node.target); switch(returnCode) { case 0: node.charLength = len; break; case GET_CHAR_LEN_VARLEN: newSyntaxException(INVALID_LOOK_BEHIND_PATTERN); break; case GET_CHAR_LEN_TOP_ALT_VARLEN: if (syntax.differentLengthAltLookBehind()) { return divideLookBehindAlternatives(node); } else { newSyntaxException(INVALID_LOOK_BEHIND_PATTERN); } } return node; } private void nextSetup(Node node, Node nextNode) { retry: while(true) { int type = node.getType(); if (type == NodeType.QTFR) { QuantifierNode qn = (QuantifierNode)node; if (qn.greedy && isRepeatInfinite(qn.upper)) { if (Config.USE_QTFR_PEEK_NEXT) { StringNode n = (StringNode)getHeadValueNode(nextNode, true); /* '\0': for UTF-16BE etc... */ if (n != null && n.bytes[n.p] != 0) { qn.nextHeadExact = n; } } // USE_QTFR_PEEK_NEXT /* automatic possessification a*b ==> (?>a*)b */ if (qn.lower <= 1) { if (qn.target.isSimple()) { Node x = getHeadValueNode(qn.target, false); if (x != null) { Node y = getHeadValueNode(nextNode, false); if (y != null && isNotIncluded(x, y)) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); en.setStopBtSimpleRepeat(); node.replaceWith(en); en.setTarget(node); } } } } } } else if (type == NodeType.ENCLOSE) { EncloseNode en = (EncloseNode)node; if (en.isMemory()) { node = en.target; continue retry; } } break; } // while } private void updateStringNodeCaseFoldSingleByte(StringNode sn, byte[]toLower) { int end = sn.end; byte[]bytes = sn.bytes; int sp = 0; int p = sn.p; while (p < end) { byte lower = toLower[bytes[p] & 0xff]; if (lower != bytes[p]) { byte[]sbuf = new byte[end - sn.p]; System.arraycopy(bytes, sn.p, sbuf, 0, sp); while (p < end) sbuf[sp++] = toLower[bytes[p++] & 0xff]; sn.set(sbuf, 0, sp); break; } else { sp++; p++; } } } private void updateStringNodeCaseFoldMultiByte(StringNode sn) { byte[]bytes = sn.bytes; int end = sn.end; value = sn.p; int sp = 0; byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN]; while (value < end) { int ovalue = value; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, buf); for (int i = 0; i < len; i++) { if (bytes[ovalue + i] != buf[i]) { byte[]sbuf = new byte[sn.length() << 1]; System.arraycopy(bytes, sn.p, sbuf, 0, ovalue - sn.p); value = ovalue; while (value < end) { len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, buf); for (i = 0; i < len; i++) { if (sp >= sbuf.length) { byte[]tmp = new byte[sbuf.length << 1]; System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); sbuf = tmp; } sbuf[sp++] = buf[i]; } } sn.set(sbuf, 0, sp); return; } } sp += len; } } private void updateStringNodeCaseFold(Node node) { StringNode sn = (StringNode)node; byte[] toLower = enc.toLowerCaseTable(); if (toLower != null) { updateStringNodeCaseFoldSingleByte(sn, toLower); } else { updateStringNodeCaseFoldMultiByte(sn); } } private Node expandCaseFoldMakeRemString(byte[]bytes, int p, int end) { StringNode node = new StringNode(bytes, p, end); updateStringNodeCaseFold(node); node.setAmbig(); node.setDontGetOptInfo(); return node; } private boolean isCaseFoldVariableLength(int itemNum, CaseFoldCodeItem[] items, int slen) { for(int i = 0; i < itemNum; i++) { if (items[i].byteLen != slen || items[i].code.length != 1) return true; } return false; } private boolean expandCaseFoldStringAlt(int itemNum, CaseFoldCodeItem[]items, byte[]bytes, int p, int slen, int end, ObjPtr node) { boolean varlen = false; for (int i=0; i prevNode = new ObjPtr(); StringNode stringNode = null; while (p < end) { CaseFoldCodeItem[]items = enc.caseFoldCodesByString(regex.caseFoldFlag, bytes, p, end); int len = enc.length(bytes, p, end); if (items.length == 0 || !isCaseFoldVariableLength(items.length, items, len)) { if (stringNode == null) { if (root == null && prevNode.p != null) { topRoot = root = ListNode.listAdd(null, prevNode.p); } prevNode.p = stringNode = new StringNode(); // onig_node_new_str(NULL, NULL); if (root != null) ListNode.listAdd(root, stringNode); } stringNode.catBytes(bytes, p, p + len); } else { altNum *= (items.length + 1); if (altNum > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; if (stringNode != null) { updateStringNodeCaseFold(stringNode); stringNode.setAmbig(); } if (root == null && prevNode.p != null) { topRoot = root = ListNode.listAdd(null, prevNode.p); } if (expandCaseFoldStringAlt(items.length, items, bytes, p, len, end, prevNode)) { // if (r == 1) if (root == null) { topRoot = (ListNode)prevNode.p; } else { ListNode.listAdd(root, prevNode.p); } root = (ListNode)((ListNode)prevNode.p).value; } else { /* r == 0 */ if (root != null) ListNode.listAdd(root, prevNode.p); } stringNode = null; } p += len; } if (stringNode != null) { updateStringNodeCaseFold(stringNode); stringNode.setAmbig(); } if (p < end) { Node srem = expandCaseFoldMakeRemString(bytes, p, end); if (prevNode.p != null && root == null) { topRoot = root = ListNode.listAdd(null, prevNode.p); } if (root == null) { prevNode.p = srem; } else { ListNode.listAdd(root, srem); } } /* ending */ Node xnode = topRoot != null ? topRoot : prevNode.p; node.replaceWith(xnode); return xnode; } private static final int CEC_THRES_NUM_BIG_REPEAT = 512; private static final int CEC_INFINITE_NUM = 0x7fffffff; private static final int CEC_IN_INFINITE_REPEAT = (1<<0); private static final int CEC_IN_FINITE_REPEAT = (1<<1); private static final int CEC_CONT_BIG_REPEAT = (1<<2); protected final int setupCombExpCheck(Node node, int state) { int r = state; int ret; switch (node.getType()) { case NodeType.LIST: ListNode ln = (ListNode)node; do { r = setupCombExpCheck(ln.value, r); //prev = ((ConsAltNode)node).value; } while (r >= 0 && (ln = ln.tail) != null); break; case NodeType.ALT: ListNode an = (ListNode)node; do { ret = setupCombExpCheck(an.value, state); r |= ret; } while (ret >= 0 && (an = an.tail) != null); break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; int childState = state; int addState = 0; int varNum; if (!isRepeatInfinite(qn.upper)) { if (qn.upper > 1) { /* {0,1}, {1,1} are allowed */ childState |= CEC_IN_FINITE_REPEAT; /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env.backrefedMem == 0) { if (qn.target.getType() == NodeType.ENCLOSE) { EncloseNode en = (EncloseNode)qn.target; if (en.type == EncloseType.MEMORY) { if (en.target.getType() == NodeType.QTFR) { QuantifierNode q = (QuantifierNode)en.target; if (isRepeatInfinite(q.upper) && q.greedy == qn.greedy) { qn.upper = qn.lower == 0 ? 1 : qn.lower; if (qn.upper == 1) childState = state; } } } } } } } if ((state & CEC_IN_FINITE_REPEAT) != 0) { qn.combExpCheckNum = -1; } else { if (isRepeatInfinite(qn.upper)) { varNum = CEC_INFINITE_NUM; childState |= CEC_IN_INFINITE_REPEAT; } else { varNum = qn.upper - qn.lower; } if (varNum >= CEC_THRES_NUM_BIG_REPEAT) addState |= CEC_CONT_BIG_REPEAT; if (((state & CEC_IN_INFINITE_REPEAT) != 0 && varNum != 0) || ((state & CEC_CONT_BIG_REPEAT) != 0 && varNum >= CEC_THRES_NUM_BIG_REPEAT)) { if (qn.combExpCheckNum == 0) { env.numCombExpCheck++; qn.combExpCheckNum = env.numCombExpCheck; if (env.currMaxRegNum > env.combExpMaxRegNum) { env.combExpMaxRegNum = env.currMaxRegNum; } } } } r = setupCombExpCheck(qn.target, childState); r |= addState; break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch( en.type) { case EncloseNode.MEMORY: if (env.currMaxRegNum < en.regNum) { env.currMaxRegNum = en.regNum; } r = setupCombExpCheck(en.target, state); break; default: r = setupCombExpCheck(en.target, state); } // inner switch break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { env.hasRecursion = true; } else { r = setupCombExpCheck(cn.target, state); } } // USE_SUBEXP_CALL break; default: break; } // switch return r; } private static final int IN_ALT = (1<<0); private static final int IN_NOT = (1<<1); private static final int IN_REPEAT = (1<<2); private static final int IN_VAR_REPEAT = (1<<3); private static final int IN_CALL = (1<<4); private static final int IN_RECCALL = (1<<5); private static final int EXPAND_STRING_MAX_LENGTH = 100; /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) 2. expand ignore-case in char class. 3. set memory status bit flags. (reg->mem_stats) 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. 5. find invalid patterns in look-behind. 6. expand repeated string. */ protected final Node setupTree(Node node, int state) { restart: while (true) { switch (node.getType()) { case NodeType.LIST: ListNode lin = (ListNode)node; Node prev = null; do { setupTree(lin.value, state); if (prev != null) { nextSetup(prev, lin.value); } prev = lin.value; } while ((lin = lin.tail) != null); break; case NodeType.ALT: ListNode aln = (ListNode)node; do { setupTree(aln.value, (state | IN_ALT)); } while ((aln = aln.tail) != null); break; case NodeType.CCLASS: break; case NodeType.STR: if (isIgnoreCase(regex.options) && !((StringNode)node).isRaw()) { node = expandCaseFoldString(node); } break; case NodeType.CTYPE: case NodeType.CANY: break; case NodeType.CALL: // if (Config.USE_SUBEXP_CALL) ? break; case NodeType.BREF: BackRefNode br = (BackRefNode)node; for (int i=0; i env.numMem) { if (!syntax.op3OptionECMAScript()) newValueException(INVALID_BACKREF); } else { env.backrefedMem = bsOnAt(env.backrefedMem, br.back[i]); env.btMemStart = bsOnAt(env.btMemStart, br.back[i]); if (Config.USE_BACKREF_WITH_LEVEL) { if (br.isNestLevel()) { env.btMemEnd = bsOnAt(env.btMemEnd, br.back[i]); } } // USE_BACKREF_AT_LEVEL env.memNodes[br.back[i]].setMemBackrefed(); } } break; case NodeType.QTFR: QuantifierNode qn = (QuantifierNode)node; Node target = qn.target; if ((state & IN_REPEAT) != 0) qn.setInRepeat(); if (isRepeatInfinite(qn.upper) || qn.lower >= 1) { int d = getMinMatchLength(target); if (d == 0) { qn.targetEmptyInfo = TargetInfo.IS_EMPTY; if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { int info = quantifiersMemoryInfo(target); if (info > 0) qn.targetEmptyInfo = info; } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK // strange stuff here (turned off) } } state |= IN_REPEAT; if (qn.lower != qn.upper) state |= IN_VAR_REPEAT; target = setupTree(target, state); /* expand string */ if (target.getType() == NodeType.STR) { StringNode sn = (StringNode)target; if (qn.lower > 1) { StringNode str = new StringNode(sn.bytes, sn.p, sn.end); str.flag = sn.flag; int i; int n = qn.lower; int len = sn.length(); for (i = 1; i < n && (i + 1) * len <= EXPAND_STRING_MAX_LENGTH; i++) { str.catBytes(sn.bytes, sn.p, sn.end); } if (i < qn.upper || isRepeatInfinite(qn.upper)) { qn.lower -= i; if (!isRepeatInfinite(qn.upper)) qn.upper -= i; ListNode list = ListNode.newList(str, null); qn.replaceWith(list); ListNode.listAdd(list, qn); } else { qn.replaceWith(str); } break; } } if (Config.USE_OP_PUSH_OR_JUMP_EXACT) { if (qn.greedy && qn.targetEmptyInfo != 0) { if (target.getType() == NodeType.QTFR) { QuantifierNode tqn = (QuantifierNode)target; if (tqn.headExact != null) { qn.headExact = tqn.headExact; tqn.headExact = null; } } else { qn.headExact = getHeadValueNode(qn.target, true); } } } // USE_OP_PUSH_OR_JUMP_EXACT break; case NodeType.ENCLOSE: EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int options = regex.options; regex.options = en.option; setupTree(en.target, state); regex.options = options; break; case EncloseType.MEMORY: if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { env.btMemStart = bsOnAt(env.btMemStart, en.regNum); /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } if (en.isCalled()) state |= IN_CALL; if (en.isRecursion()) { state |= IN_RECCALL; } else if ((state & IN_RECCALL) != 0){ en.setRecursion(); } setupTree(en.target, state); break; case EncloseType.STOP_BACKTRACK: setupTree(en.target, state); if (en.target.getType() == NodeType.QTFR) { QuantifierNode tqn = (QuantifierNode)en.target; if (isRepeatInfinite(tqn.upper) && tqn.lower <= 1 && tqn.greedy) { /* (?>a*), a*+ etc... */ if (tqn.target.isSimple()) en.setStopBtSimpleRepeat(); } } break; case EncloseNode.CONDITION: if (Config.USE_NAMED_GROUP) { if (!en.isNameRef() && env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { newValueException(NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); } } if (en.regNum > env.numMem) newValueException(INVALID_BACKREF); setupTree(en.target, state); break; case EncloseType.ABSENT: setupTree(en.target, state); break; } // inner switch break; case NodeType.ANCHOR: AnchorNode an = (AnchorNode)node; switch (an.type) { case AnchorType.PREC_READ: setupTree(an.target, state); break; case AnchorType.PREC_READ_NOT: setupTree(an.target, (state | IN_NOT)); break; case AnchorType.LOOK_BEHIND: if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB, AnchorType.ALLOWED_IN_LB)) newSyntaxException(INVALID_LOOK_BEHIND_PATTERN); node = setupLookBehind(an); if (node.getType() != NodeType.ANCHOR) continue restart; setupTree(((AnchorNode)node).target, state); node = setupLookBehind(an); break; case AnchorType.LOOK_BEHIND_NOT: if (checkTypeTree(an.target, NodeType.ALLOWED_IN_LB, EncloseType.ALLOWED_IN_LB_NOT, AnchorType.ALLOWED_IN_LB_NOT)) newSyntaxException(INVALID_LOOK_BEHIND_PATTERN); node = setupLookBehind(an); if (node.getType() != NodeType.ANCHOR) continue restart; setupTree(((AnchorNode)node).target, (state | IN_NOT)); node = setupLookBehind(an); break; } // inner switch break; } // switch return node; } // restart: while } private static final int MAX_NODE_OPT_INFO_REF_COUNT = 5; private void optimizeNodeLeft(Node node, NodeOptInfo opt, OptEnvironment oenv) { // oenv remove, pass mmd opt.clear(); opt.setBoundNode(oenv.mmd); switch (node.getType()) { case NodeType.LIST: { OptEnvironment nenv = new OptEnvironment(); NodeOptInfo nopt = new NodeOptInfo(); nenv.copy(oenv); ListNode lin = (ListNode)node; do { optimizeNodeLeft(lin.value, nopt, nenv); nenv.mmd.add(nopt.length); opt.concatLeftNode(nopt, enc); } while ((lin = lin.tail) != null); break; } case NodeType.ALT: { NodeOptInfo nopt = new NodeOptInfo(); ListNode aln = (ListNode)node; do { optimizeNodeLeft(aln.value, nopt, oenv); if (aln == node) { opt.copy(nopt); } else { opt.altMerge(nopt, oenv); } } while ((aln = aln.tail) != null); break; } case NodeType.STR: { StringNode sn = (StringNode)node; int slen = sn.length(); if (!sn.isAmbig()) { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); opt.exb.ignoreCase = 0; if (slen > 0) { opt.map.addChar(sn.bytes[sn.p], enc); } opt.length.set(slen, slen); } else { int max; if (sn.isDontGetOptInfo()) { int n = sn.length(enc); max = enc.maxLength() * n; } else { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); opt.exb.ignoreCase = 1; if (slen > 0) { opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag); } max = slen; } opt.length.set(slen, max); } if (opt.exb.length == slen) { opt.exb.reachEnd = true; } break; } case NodeType.CCLASS: { CClassNode cc = (CClassNode)node; /* no need to check ignore case. (setted in setup_tree()) */ if (cc.mbuf != null || cc.isNot()) { int min = enc.minLength(); int max = enc.maxLength(); opt.length.set(min, max); } else { for (int i=0; i= maxCode) { opt.map.addChar((byte)i, enc); } } } else { for (int i=0; i 0) { opt.expr.copy(nopt.exb); } else if (nopt.exm.length > 0) { opt.expr.copy(nopt.exm); } opt.expr.reachEnd = false; if (nopt.map.value > 0) opt.map.copy(nopt.map); break; case AnchorType.LOOK_BEHIND_NOT: break; } // inner switch break; } case NodeType.BREF: { BackRefNode br = (BackRefNode)node; if (br.isRecursion()) { opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); break; } Node[]nodes = oenv.scanEnv.memNodes; int min = 0; int max = 0; if (nodes != null && nodes[br.back[0]] != null) { min = getMinMatchLength(nodes[br.back[0]]); max = getMaxMatchLength(nodes[br.back[0]]); } for (int i=1; i tmin) min = tmin; if (max < tmax) max = tmax; } } opt.length.set(min, max); break; } case NodeType.CALL: { if (Config.USE_SUBEXP_CALL) { CallNode cn = (CallNode)node; if (cn.isRecursion()) { opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); } else { int safe = oenv.options; oenv.options = cn.target.option; optimizeNodeLeft(cn.target, opt, oenv); oenv.options = safe; } } // USE_SUBEXP_CALL break; } case NodeType.QTFR: { NodeOptInfo nopt = new NodeOptInfo(); QuantifierNode qn = (QuantifierNode)node; optimizeNodeLeft(qn.target, nopt, oenv); if (/*qn.lower == 0 &&*/ isRepeatInfinite(qn.upper)) { if (oenv.mmd.max == 0 && qn.target.getType() == NodeType.CANY && qn.greedy) { if (isMultiline(oenv.options)) { opt.anchor.add(AnchorType.ANYCHAR_STAR_ML); } else { opt.anchor.add(AnchorType.ANYCHAR_STAR); } } } /*else*/ { if (qn.lower > 0) { opt.copy(nopt); if (nopt.exb.length > 0) { if (nopt.exb.reachEnd) { int i; for (i = 2; i <= qn.lower && !opt.exb.isFull(); i++) { opt.exb.concat(nopt.exb, enc); } if (i < qn.lower) { opt.exb.reachEnd = false; } } } if (qn.lower != qn.upper) { opt.exb.reachEnd = false; opt.exm.reachEnd = false; } if (qn.lower > 1) { opt.exm.reachEnd = false; } } } int min = MinMaxLen.distanceMultiply(nopt.length.min, qn.lower); int max; if (isRepeatInfinite(qn.upper)) { max = nopt.length.max > 0 ? MinMaxLen.INFINITE_DISTANCE : 0; } else { max = MinMaxLen.distanceMultiply(nopt.length.max, qn.upper); } opt.length.set(min, max); break; } case NodeType.ENCLOSE: { EncloseNode en = (EncloseNode)node; switch (en.type) { case EncloseType.OPTION: int save = oenv.options; oenv.options = en.option; optimizeNodeLeft(en.target, opt, oenv); oenv.options = save; break; case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL && ++en.optCount > MAX_NODE_OPT_INFO_REF_COUNT) { int min = 0; int max = MinMaxLen.INFINITE_DISTANCE; if (en.isMinFixed()) min = en.minLength; if (en.isMaxFixed()) max = en.maxLength; opt.length.set(min, max); } else { // USE_SUBEXP_CALL optimizeNodeLeft(en.target, opt, oenv); if (opt.anchor.isSet(AnchorType.ANYCHAR_STAR_MASK)) { if (bsAt(oenv.scanEnv.backrefedMem, en.regNum)) { opt.anchor.remove(AnchorType.ANYCHAR_STAR_MASK); } } } break; case EncloseType.STOP_BACKTRACK: case EncloseType.CONDITION: optimizeNodeLeft(en.target, opt, oenv); break; case EncloseType.ABSENT: opt.length.set(0, MinMaxLen.INFINITE_DISTANCE); break; } // inner switch break; } default: newInternalException(PARSER_BUG); } // switch } protected final void setOptimizedInfoFromTree(Node node) { NodeOptInfo opt = new NodeOptInfo(); OptEnvironment oenv = new OptEnvironment(); oenv.enc = regex.enc; oenv.options = regex.options; oenv.caseFoldFlag = regex.caseFoldFlag; oenv.scanEnv = env; oenv.mmd.clear(); // ?? optimizeNodeLeft(node, opt, oenv); regex.anchor = opt.anchor.leftAnchor & (AnchorType.BEGIN_BUF | AnchorType.BEGIN_POSITION | AnchorType.ANYCHAR_STAR | AnchorType.ANYCHAR_STAR_ML | AnchorType.LOOK_BEHIND); if ((opt.anchor.leftAnchor & (AnchorType.LOOK_BEHIND | AnchorType.PREC_READ_NOT)) != 0) regex.anchor &= ~AnchorType.ANYCHAR_STAR_ML; regex.anchor |= opt.anchor.rightAnchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF | AnchorType.PREC_READ_NOT); if ((regex.anchor & (AnchorType.END_BUF | AnchorType.SEMI_END_BUF)) != 0) { regex.anchorDmin = opt.length.min; regex.anchorDmax = opt.length.max; } if (opt.exb.length > 0 || opt.exm.length > 0) { opt.exb.select(opt.exm, enc); if (opt.map.value > 0 && opt.exb.compare(opt.map) > 0) { // !goto set_map;! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.setOptimizeExactInfo(opt.exb); regex.setSubAnchor(opt.exb.anchor); } } else if (opt.map.value > 0) { // !set_map:! regex.setOptimizeMapInfo(opt.map); regex.setSubAnchor(opt.map.anchor); } else { regex.subAnchor |= opt.anchor.leftAnchor & AnchorType.BEGIN_LINE; if (opt.length.max == 0) regex.subAnchor |= opt.anchor.rightAnchor & AnchorType.END_LINE; } if (Config.DEBUG_COMPILE || Config.DEBUG_MATCH) { Config.log.println(regex.optimizeInfoToString()); } } } jruby-joni-2.1.41/src/org/joni/ApplyCaseFold.java000066400000000000000000000101241400407002500215230ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.ApplyAllCaseFoldFunction; import org.jcodings.Encoding; import org.joni.ast.CClassNode; import org.joni.ast.ListNode; import org.joni.ast.StringNode; final class ApplyCaseFold implements ApplyAllCaseFoldFunction { // i_apply_case_fold public void apply(int from, int[]to, int length, Object o) { ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o; ScanEnvironment env = arg.env; Encoding enc = env.enc; CClassNode cc = arg.cc; CClassNode ascCc = arg.ascCc; BitSet bs = cc.bs; boolean addFlag; if (ascCc == null) { addFlag = false; } else if (Encoding.isAscii(from) == Encoding.isAscii(to[0])) { addFlag = true; } else { addFlag = ascCc.isCodeInCC(enc, from); if (ascCc.isNot()) addFlag = !addFlag; } if (length == 1) { boolean inCC = cc.isCodeInCC(enc, from); if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) { if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) { if (addFlag) { if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) { cc.addCodeRange(env, to[0], to[0], false); } else { /* /(?i:[^A-C])/.match("a") ==> fail. */ bs.set(to[0]); } } } } else { if (inCC) { if (addFlag) { if (enc.minLength() > 1 || to[0] >= BitSet.SINGLE_BYTE_SIZE) { if (cc.isNot()) cc.clearNotFlag(env); cc.addCodeRange(env, to[0], to[0], false); } else { if (cc.isNot()) { bs.clear(to[0]); } else { bs.set(to[0]); } } } } } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS } else { if (cc.isCodeInCC(enc, from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) { StringNode node = null; for (int i=0; i 1 || cc.bs.isEmpty()) { len = OPSize.OPCODE; } else { len = OPSize.OPCODE + BitSet.BITSET_SIZE; } len += OPSize.LENGTH + cc.mbuf.getUsed(); } return len; } @Override protected void compileCClassNode(CClassNode cc) { if (cc.mbuf == null) { if (cc.isNot()) { addOpcode(OPCode.CCLASS_NOT); } else { addOpcode(OPCode.CCLASS); } addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset } else { if (enc.minLength() > 1 || cc.bs.isEmpty()) { if (cc.isNot()) { addOpcode(OPCode.CCLASS_MB_NOT); } else { addOpcode(OPCode.CCLASS_MB); } addMultiByteCClass(cc.mbuf); } else { if (cc.isNot()) { addOpcode(OPCode.CCLASS_MIX_NOT); } else { addOpcode(OPCode.CCLASS_MIX); } // store the bit set and mbuf themself! addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset addMultiByteCClass(cc.mbuf); } } } @Override protected void compileCTypeNode(CTypeNode node) { CTypeNode cn = node; int op; switch (cn.ctype) { case CharacterType.WORD: if (cn.not) { if (cn.asciiRange) { op = OPCode.ASCII_NOT_WORD; } else { op = OPCode.NOT_WORD; } } else { if (cn.asciiRange) { op = OPCode.ASCII_WORD; } else { op = OPCode.WORD; } } break; default: newInternalException(PARSER_BUG); return; // not reached } // inner switch addOpcode(op); } @Override protected void compileAnyCharNode() { if (isMultiline(regex.options)) { addOpcode(OPCode.ANYCHAR_ML); } else { addOpcode(OPCode.ANYCHAR); } } @Override protected void compileCallNode(CallNode node) { addOpcode(OPCode.CALL); node.unsetAddrList.add(codeLength, node.target); addAbsAddr(0); /*dummy addr.*/ } @Override protected void compileBackrefNode(BackRefNode node) { BackRefNode br = node; if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) { addOpcode(OPCode.BACKREF_WITH_LEVEL); addOption(regex.options & Option.IGNORECASE); addLength(br.nestLevel); // !goto add_bacref_mems;! addLength(br.backNum); for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); return; } else { // USE_BACKREF_AT_LEVEL if (br.backNum == 1) { if (isIgnoreCase(regex.options)) { addOpcode(OPCode.BACKREFN_IC); addMemNum(br.back[0]); } else { switch (br.back[0]) { case 1: addOpcode(OPCode.BACKREF1); break; case 2: addOpcode(OPCode.BACKREF2); break; default: addOpcode(OPCode.BACKREFN); addOpcode(br.back[0]); break; } // switch } } else { if (isIgnoreCase(regex.options)) { addOpcode(OPCode.BACKREF_MULTI_IC); } else { addOpcode(OPCode.BACKREF_MULTI); } // !add_bacref_mems:! addLength(br.backNum); for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]); } } } private static final int REPEAT_RANGE_ALLOC = 8; private void entryRepeatRange(int id, int lower, int upper) { if (regex.repeatRangeLo == null) { regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC]; regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC]; } else if (id >= regex.repeatRangeLo.length){ int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC]; System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length); regex.repeatRangeLo = tmp; tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC]; System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length); regex.repeatRangeHi = tmp; } regex.repeatRangeLo[id] = lower; regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper; } private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) { regex.requireStack = true; int numRepeat = regex.numRepeat; addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG); addMemNum(numRepeat); /* OP_REPEAT ID */ regex.numRepeat++; addRelAddr(targetLen + OPSize.REPEAT_INC); entryRepeatRange(numRepeat, qn.lower, qn.upper); compileTreeEmptyCheck(qn.target, emptyInfo); if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) { addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG); } else { addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG); } addMemNum(numRepeat); /* OP_REPEAT ID */ } private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50 private static boolean cknOn(int ckn) { return ckn > 0; } private int compileCECLengthQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0; /* anychar repeat */ if (qn.target.getType() == NodeType.CANY) { if (qn.greedy && infinite) { if (qn.nextHeadExact != null && !cknOn(ckn)) { return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen; } else { return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen; } } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } int len; if (infinite && qn.lower <= 1) { if (qn.greedy) { if (qn.lower == 1) { len = OPSize.JUMP; } else { len = 0; } len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP; } else { if (qn.lower == 0) { len = OPSize.JUMP; } else { len = 0; } len += modTLen + OPSize.PUSH + cklen; } } else if (qn.upper == 0) { if (qn.isRefered) { /* /(?..){0}/ */ len = OPSize.JUMP + tlen; } else { len = 0; } } else if (qn.upper == 1 && qn.greedy) { if (qn.lower == 0) { if (cknOn(ckn)) { len = OPSize.STATE_CHECK_PUSH + tlen; } else { len = OPSize.PUSH + tlen; } } else { len = tlen; } } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ len = OPSize.PUSH + cklen + OPSize.JUMP + tlen; } else { len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; if (cknOn(ckn)) { len += OPSize.STATE_CHECK; } } return len; } @Override protected void compileCECQuantifierNode(QuantifierNode qn) { regex.requireStack = true; boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0; if (qn.isAnyCharStar()) { compileTreeNTimes(qn.target, qn.lower); if (qn.nextHeadExact != null && !cknOn(ckn)) { if (isMultiline(regex.options)) { addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); } else { addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT); } if (cknOn(ckn)) { addStateCheckNum(ckn); } StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); return; } else { if (isMultiline(regex.options)) { if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_ANYCHAR_ML_STAR); } else { addOpcode(OPCode.ANYCHAR_ML_STAR); } } else { if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_ANYCHAR_STAR); } else { addOpcode(OPCode.ANYCHAR_STAR); } } if (cknOn(ckn)) { addStateCheckNum(ckn); } return; } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } if (infinite && qn.lower <= 1) { if (qn.greedy) { if (qn.lower == 1) { addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH : OPSize.PUSH); } if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(modTLen + OPSize.JUMP); } else { addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); } compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ? OPSize.STATE_CHECK_PUSH : OPSize.PUSH))); } else { if (qn.lower == 0) { addOpcodeRelAddr(OPCode.JUMP, modTLen); } compileTreeEmptyCheck(qn.target, emptyInfo); if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP); addStateCheckNum(ckn); addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP)); } else { addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); } } } else if (qn.upper == 0) { if (qn.isRefered) { /* /(?..){0}/ */ addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } // else r=0 ??? } else if (qn.upper == 1 && qn.greedy) { if (qn.lower == 0) { if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(tlen); } else { addOpcodeRelAddr(OPCode.PUSH, tlen); } } compileTree(qn.target); } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */ if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK_PUSH); addStateCheckNum(ckn); addRelAddr(OPSize.JUMP); } else { addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP); } addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } else { compileRangeRepeatNode(qn, modTLen, emptyInfo); if (cknOn(ckn)) { addOpcode(OPCode.STATE_CHECK); addStateCheckNum(ckn); } } } private int compileNonCECLengthQuantifierNode(QuantifierNode qn) { boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); /* anychar repeat */ if (qn.target.getType() == NodeType.CANY) { if (qn.greedy && infinite) { if (qn.nextHeadExact != null) { return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower; } else { return OPSize.ANYCHAR_STAR + tlen * qn.lower; } } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } int len; if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = OPSize.JUMP; } else { len = tlen * qn.lower; } if (qn.greedy) { if (qn.headExact != null) { len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP; } else if (qn.nextHeadExact != null) { len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP; } else { len += OPSize.PUSH + modTLen + OPSize.JUMP; } } else { len += OPSize.JUMP + modTLen + OPSize.PUSH; } } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */ len = OPSize.JUMP + tlen; } else if (!infinite && qn.greedy && (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) { len = tlen * qn.lower; len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower); } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */ len = OPSize.PUSH + OPSize.JUMP + tlen; } else { len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM; } return len; } @Override protected void compileNonCECQuantifierNode(QuantifierNode qn) { regex.requireStack = true; boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; int tlen = compileLengthTree(qn.target); if (qn.isAnyCharStar()) { compileTreeNTimes(qn.target, qn.lower); if (qn.nextHeadExact != null) { if (isMultiline(regex.options)) { addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT); } else { addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT); } StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); return; } else { if (isMultiline(regex.options)) { addOpcode(OPCode.ANYCHAR_ML_STAR); } else { addOpcode(OPCode.ANYCHAR_STAR); } return; } } int modTLen; if (emptyInfo != 0) { modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END); } else { modTLen = tlen; } if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn.greedy) { if (qn.headExact != null) { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1); } else if (qn.nextHeadExact != null) { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT); } else { addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH); } } else { addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP); } } else { compileTreeNTimes(qn.target, qn.lower); } if (qn.greedy) { if (qn.headExact != null) { addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP); StringNode sn = (StringNode)qn.headExact; addBytes(sn.bytes, sn.p, 1); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1)); } else if (qn.nextHeadExact != null) { addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP); StringNode sn = (StringNode)qn.nextHeadExact; addBytes(sn.bytes, sn.p, 1); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT)); } else { addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH)); } } else { addOpcodeRelAddr(OPCode.JUMP, modTLen); compileTreeEmptyCheck(qn.target, emptyInfo); addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH)); } } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */ addOpcodeRelAddr(OPCode.JUMP, tlen); compileTree(qn.target); } else if (!infinite && qn.greedy && (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn.upper - qn.lower; compileTreeNTimes(qn.target, qn.lower); for (int i=0; i= code.length) { int length = code.length << 1; while (length <= size) length <<= 1; int[]tmp = new int[length]; System.arraycopy(code, 0, tmp, 0, code.length); code = tmp; } } private void addInt(int i) { if (codeLength >= code.length) { int[]tmp = new int[code.length << 1]; System.arraycopy(code, 0, tmp, 0, code.length); code = tmp; } code[codeLength++] = i; } void setInt(int i, int offset) { ensure(offset); regex.code[offset] = i; } private void addBytes(byte[]bytes, int p ,int length) { ensure(codeLength + length); int end = p + length; while (p < end) code[codeLength++] = bytes[p++]; } private void addInts(int[]ints, int length) { ensure(codeLength + length); System.arraycopy(ints, 0, code, codeLength, length); codeLength += length; } private void addOpcode(int opcode) { addInt(opcode); } private void addStateCheckNum(int num) { addInt(num); } private void addRelAddr(int addr) { addInt(addr); } private void addAbsAddr(int addr) { addInt(addr); } private void addLength(int length) { addInt(length); } private void addMemNum(int num) { addInt(num); } private void addOption(int option) { addInt(option); } private void addOpcodeRelAddr(int opcode, int addr) { addOpcode(opcode); addRelAddr(addr); } private void addOpcodeOption(int opcode, int option) { addOpcode(opcode); addOption(option); } private void addTemplate(byte[]bytes) { if (templateNum == 0) { templates = new byte[2][]; } else if (templateNum == templates.length) { byte[][]tmp = new byte[templateNum * 2][]; System.arraycopy(templates, 0, tmp, 0, templateNum); templates = tmp; } templates[templateNum++] = bytes; } } jruby-joni-2.1.41/src/org/joni/BitSet.java000066400000000000000000000073731400407002500202430ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class BitSet { static final int BITS_PER_BYTE = 8; public static final int SINGLE_BYTE_SIZE = (1 << BITS_PER_BYTE); public static final int BITS_IN_ROOM = 4 * BITS_PER_BYTE; public static final int BITSET_SIZE = (SINGLE_BYTE_SIZE / BITS_IN_ROOM); static final int ROOM_SHIFT = log2(BITS_IN_ROOM); public final int[] bits = new int[BITSET_SIZE]; public boolean at(int pos) { return (bits[pos >>> ROOM_SHIFT] & bit(pos)) != 0; } public void set(ScanEnvironment env, int pos) { if (at(pos)) env.ccDuplicateWarn(); set(pos); } public void set(int pos) { bits[pos >>> ROOM_SHIFT] |= bit(pos); } public void clear(int pos) { bits[pos >>> ROOM_SHIFT] &= ~bit(pos); } public void invert(int pos) { bits[pos >>> ROOM_SHIFT] ^= bit(pos); } public void clear() { for (int i=0; i>>= 1) != 0) log++; return log; } private static final int BITS_TO_STRING_WRAP = 4; public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("BitSet"); for (int i=0; i s) ? s : pkeep) - str; node.end = s - str; stkp = 0; makeCaptureHistoryTree(region.historyRoot); } private byte[]cfbuf; private byte[]cfbuf2; protected final byte[]cfbuf() { return cfbuf == null ? cfbuf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf; } protected final byte[]cfbuf2() { return cfbuf2 == null ? cfbuf2 = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : cfbuf2; } private boolean stringCmpIC(int caseFlodFlag, int s1, IntHolder ps2, int mbLen, int textEnd) { byte[]buf1 = cfbuf(); byte[]buf2 = cfbuf2(); int s2 = ps2.value; int end1 = s1 + mbLen; while (s1 < end1) { value = s1; int len1 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf1); s1 = value; value = s2; int len2 = enc.mbcCaseFold(caseFlodFlag, bytes, this, textEnd, buf2); s2 = value; if (len1 != len2) return false; int p1 = 0; int p2 = 0; while (len1-- > 0) { if (buf1[p1] != buf2[p2]) return false; p1++; p2++; } } ps2.value = s2; return true; } protected final int matchAt(int _range, int _sstart, int _sprev, boolean interrupt) throws InterruptedException { range = _range; sstart = _sstart; sprev = _sprev; stk = 0; ip = 0; if (Config.DEBUG_MATCH) debugMatchBegin(); stackInit(); bestLen = -1; s = _sstart; pkeep = _sstart; return enc.isSingleByte() || (msaOptions & Option.CR_7_BIT) != 0 ? executeSb(interrupt) : execute(interrupt); } private final int execute(final boolean checkThreadInterrupt) throws InterruptedException { final int[] code = this.code; int interruptCheckCounter = 0; while (true) { if (interruptCheckCounter++ >= interruptCheckEvery) { handleInterrupted(checkThreadInterrupt); interruptCheckCounter = 0; } if (Config.DEBUG_MATCH) debugMatchLoop(); sbegin = s; switch (code[ip++]) { case OPCode.END: if (opEnd()) return finish(); break; case OPCode.EXACT1: opExact1(); break; case OPCode.EXACT2: opExact2(); continue; case OPCode.EXACT3: opExact3(); continue; case OPCode.EXACT4: opExact4(); continue; case OPCode.EXACT5: opExact5(); continue; case OPCode.EXACTN: opExactN(); continue; case OPCode.EXACTMB2N1: opExactMB2N1(); break; case OPCode.EXACTMB2N2: opExactMB2N2(); continue; case OPCode.EXACTMB2N3: opExactMB2N3(); continue; case OPCode.EXACTMB2N: opExactMB2N(); continue; case OPCode.EXACTMB3N: opExactMB3N(); continue; case OPCode.EXACTMBN: opExactMBN(); continue; case OPCode.EXACT1_IC: opExact1IC(); break; case OPCode.EXACTN_IC: opExactNIC(); continue; case OPCode.CCLASS: opCClass(); break; case OPCode.CCLASS_MB: opCClassMB(); break; case OPCode.CCLASS_MIX: opCClassMIX(); break; case OPCode.CCLASS_NOT: opCClassNot(); break; case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break; case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break; case OPCode.ANYCHAR: opAnyChar(); break; case OPCode.ANYCHAR_ML: opAnyCharML(); break; case OPCode.ANYCHAR_STAR: opAnyCharStar(); break; case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break; case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break; case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break; case OPCode.WORD: opWord(); break; case OPCode.NOT_WORD: opNotWord(); break; case OPCode.WORD_BOUND: opWordBound(); continue; case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue; case OPCode.WORD_BEGIN: opWordBegin(); continue; case OPCode.WORD_END: opWordEnd(); continue; case OPCode.ASCII_WORD: opAsciiWord(); break; case OPCode.ASCII_NOT_WORD: opNotAsciiWord(); break; case OPCode.ASCII_WORD_BOUND: opAsciiWordBound(); break; case OPCode.ASCII_NOT_WORD_BOUND: opNotAsciiWordBound(); continue; case OPCode.ASCII_WORD_BEGIN: opAsciiWordBegin(); continue; case OPCode.ASCII_WORD_END: opAsciiWordEnd(); continue; case OPCode.BEGIN_BUF: opBeginBuf(); continue; case OPCode.END_BUF: opEndBuf(); continue; case OPCode.BEGIN_LINE: opBeginLine(); continue; case OPCode.END_LINE: opEndLine(); continue; case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; case OPCode.BEGIN_POSITION: opBeginPosition(); continue; case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; case OPCode.MEMORY_START: opMemoryStart(); continue; case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; case OPCode.MEMORY_END: opMemoryEnd(); continue; // case OPCode.KEEP: opKeep(); continue; case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; case OPCode.BACKREF1: opBackRef1(); continue; case OPCode.BACKREF2: opBackRef2(); continue; case OPCode.BACKREFN: opBackRefN(); continue; case OPCode.BACKREFN_IC: opBackRefNIC(); continue; case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; case OPCode.JUMP: opJump(); continue; case OPCode.PUSH: opPush(); continue; case OPCode.POP: opPop(); continue; case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; case OPCode.REPEAT: opRepeat(); continue; case OPCode.REPEAT_NG: opRepeatNG(); continue; case OPCode.REPEAT_INC: opRepeatInc(); continue; case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; case OPCode.PUSH_POS: opPushPos(); continue; case OPCode.POP_POS: opPopPos(); continue; case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; case OPCode.FAIL_POS: opFailPos(); continue; case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; case OPCode.POP_STOP_BT: opPopStopBT(); continue; case OPCode.LOOK_BEHIND: opLookBehind(); continue; case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; case OPCode.PUSH_ABSENT_POS: opPushAbsentPos(); continue; case OPCode.ABSENT: opAbsent(); continue; case OPCode.ABSENT_END: opAbsentEnd(); continue; case OPCode.CALL: opCall(); continue; case OPCode.RETURN: opReturn(); continue; case OPCode.CONDITION: opCondition(); continue; case OPCode.FINISH: return finish(); case OPCode.FAIL: opFail(); continue; case OPCode.STATE_CHECK_ANYCHAR_STAR: if (USE_CEC) {opStateCheckAnyCharStar(); break;} case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:if (USE_CEC) {opStateCheckAnyCharMLStar();break;} case OPCode.STATE_CHECK_PUSH: if (USE_CEC) {opStateCheckPush(); continue;} case OPCode.STATE_CHECK_PUSH_OR_JUMP: if (USE_CEC) {opStateCheckPushOrJump(); continue;} case OPCode.STATE_CHECK: if (USE_CEC) {opStateCheck(); continue;} default: throw new InternalException(ErrorMessages.UNDEFINED_BYTECODE); } // main switch } // main while } private final int executeSb(final boolean checkThreadInterrupt) throws InterruptedException { final int[] code = this.code; int interruptCheckCounter = 0; while (true) { if (interruptCheckCounter++ >= interruptCheckEvery) { handleInterrupted(checkThreadInterrupt); interruptCheckCounter = 0; } if (Config.DEBUG_MATCH) debugMatchLoop(); sbegin = s; switch (code[ip++]) { case OPCode.END: if (opEnd()) return finish(); break; case OPCode.EXACT1: opExact1(); break; case OPCode.EXACT2: opExact2(); continue; case OPCode.EXACT3: opExact3(); continue; case OPCode.EXACT4: opExact4(); continue; case OPCode.EXACT5: opExact5(); continue; case OPCode.EXACTN: opExactN(); continue; case OPCode.EXACTMB2N1: opExactMB2N1(); break; case OPCode.EXACTMB2N2: opExactMB2N2(); continue; case OPCode.EXACTMB2N3: opExactMB2N3(); continue; case OPCode.EXACTMB2N: opExactMB2N(); continue; case OPCode.EXACTMB3N: opExactMB3N(); continue; case OPCode.EXACTMBN: opExactMBN(); continue; case OPCode.EXACT1_IC: opExact1IC(); break; case OPCode.EXACTN_IC: opExactNIC(); continue; case OPCode.CCLASS: opCClassSb(); break; case OPCode.CCLASS_MB: opCClassMBSb(); break; case OPCode.CCLASS_MIX: opCClassMIXSb(); break; case OPCode.CCLASS_NOT: opCClassNotSb(); break; case OPCode.CCLASS_MB_NOT: opCClassMBNotSb(); break; case OPCode.CCLASS_MIX_NOT: opCClassMIXNotSb(); break; case OPCode.ANYCHAR: opAnyCharSb(); break; case OPCode.ANYCHAR_ML: opAnyCharMLSb(); break; case OPCode.ANYCHAR_STAR: opAnyCharStarSb(); break; case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStarSb(); break; case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNextSb(); break; case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNextSb(); break; case OPCode.WORD: opWordSb(); break; case OPCode.NOT_WORD: opNotWordSb(); break; case OPCode.WORD_BOUND: opWordBoundSb(); continue; case OPCode.NOT_WORD_BOUND: opNotWordBoundSb(); continue; case OPCode.WORD_BEGIN: opWordBeginSb(); continue; case OPCode.WORD_END: opWordEndSb(); continue; case OPCode.ASCII_WORD: opAsciiWord(); break; case OPCode.ASCII_NOT_WORD: opNotAsciiWord(); break; case OPCode.ASCII_WORD_BOUND: opAsciiWordBound(); break; case OPCode.ASCII_NOT_WORD_BOUND: opNotAsciiWordBound(); continue; case OPCode.ASCII_WORD_BEGIN: opAsciiWordBegin(); continue; case OPCode.ASCII_WORD_END: opAsciiWordEnd(); continue; case OPCode.BEGIN_BUF: opBeginBuf(); continue; case OPCode.END_BUF: opEndBuf(); continue; case OPCode.BEGIN_LINE: opBeginLineSb(); continue; case OPCode.END_LINE: opEndLineSb(); continue; case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue; case OPCode.BEGIN_POSITION: opBeginPosition(); continue; case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue; case OPCode.MEMORY_START: opMemoryStart(); continue; case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue; case OPCode.MEMORY_END: opMemoryEnd(); continue; case OPCode.KEEP: opKeep(); continue; case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue; case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue; case OPCode.BACKREF1: opBackRef1(); continue; case OPCode.BACKREF2: opBackRef2(); continue; case OPCode.BACKREFN: opBackRefN(); continue; case OPCode.BACKREFN_IC: opBackRefNIC(); continue; case OPCode.BACKREF_MULTI: opBackRefMulti(); continue; case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue; case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue; case OPCode.NULL_CHECK_START: opNullCheckStart(); continue; case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue; case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue; case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue; case OPCode.JUMP: opJump(); continue; case OPCode.PUSH: opPush(); continue; case OPCode.POP: opPop(); continue; case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue; case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue; case OPCode.REPEAT: opRepeat(); continue; case OPCode.REPEAT_NG: opRepeatNG(); continue; case OPCode.REPEAT_INC: opRepeatInc(); continue; case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue; case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue; case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue; case OPCode.PUSH_POS: opPushPos(); continue; case OPCode.POP_POS: opPopPos(); continue; case OPCode.PUSH_POS_NOT: opPushPosNot(); continue; case OPCode.FAIL_POS: opFailPos(); continue; case OPCode.PUSH_STOP_BT: opPushStopBT(); continue; case OPCode.POP_STOP_BT: opPopStopBT(); continue; case OPCode.LOOK_BEHIND: opLookBehindSb(); continue; case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue; case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue; case OPCode.PUSH_ABSENT_POS: opPushAbsentPos(); continue; case OPCode.ABSENT: opAbsent(); continue; case OPCode.ABSENT_END: opAbsentEnd(); continue; case OPCode.CALL: opCall(); continue; case OPCode.RETURN: opReturn(); continue; case OPCode.CONDITION: opCondition(); continue; case OPCode.FINISH: return finish(); case OPCode.FAIL: opFail(); continue; case OPCode.EXACT1_IC_SB: opExact1ICSb(); break; case OPCode.EXACTN_IC_SB: opExactNICSb(); continue; case OPCode.STATE_CHECK_ANYCHAR_STAR: if (USE_CEC) {opStateCheckAnyCharStarSb(); break;} case OPCode.STATE_CHECK_ANYCHAR_ML_STAR:if (USE_CEC) {opStateCheckAnyCharMLStarSb();break;} case OPCode.STATE_CHECK_PUSH: if (USE_CEC) {opStateCheckPush(); continue;} case OPCode.STATE_CHECK_PUSH_OR_JUMP: if (USE_CEC) {opStateCheckPushOrJump(); continue;} case OPCode.STATE_CHECK: if (USE_CEC) {opStateCheck(); continue;} default: throw new InternalException(ErrorMessages.UNDEFINED_BYTECODE); } // main switch } // main while } private void handleInterrupted(final boolean checkThreadInterrupt) throws InterruptedException { if (interrupted || (checkThreadInterrupt && Thread.currentThread().isInterrupted())) { Thread.currentThread().interrupted(); throw new InterruptedException(); } interruptCheckEvery = Math.min(interruptCheckEvery << 1, MAX_INTERRUPT_CHECK_EVERY); } private boolean opEnd() { int n = s - sstart; if (n > bestLen) { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (isFindLongest(regex.options)) { if (n > msaBestLen) { msaBestLen = n; msaBestS = sstart; } else { // goto end_best_len; return endBestLength(); } } } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE bestLen = n; final Region region = msaRegion; if (region != null) { // USE_POSIX_REGION_OPTION ... else ... region.beg[0] = msaBegin = ((pkeep > s) ? s : pkeep) - str; region.end[0] = msaEnd = s - str; for (int i = 1; i <= regex.numMem; i++) { int me = repeatStk[memEndStk + i]; if (me != INVALID_INDEX) { int ms = repeatStk[memStartStk + i]; region.beg[i] = (bsAt(regex.btMemStart, i) ? stack[ms].getMemPStr() : ms) - str; region.end[i] = (bsAt(regex.btMemEnd, i) ? stack[me].getMemPStr() : me) - str; } else { region.beg[i] = region.end[i] = Region.REGION_NOTPOS; } } if (Config.USE_CAPTURE_HISTORY && regex.captureHistory != 0) checkCaptureHistory(region); } else { msaBegin = ((pkeep > s) ? s : pkeep) - str; msaEnd = s - str; } } else { Region region = msaRegion; if (region != null) { region.clear(); } else { msaBegin = msaEnd = 0; } } // end_best_len: /* default behavior: return first-matching result. */ return endBestLength(); } private boolean endBestLength() { if (isFindCondition(regex.options)) { if (isFindNotEmpty(regex.options) && s == sstart) { bestLen = -1; {opFail(); return false;} /* for retry */ } if (isFindLongest(regex.options) && s < range) { {opFail(); return false;} /* for retry */ } } // goto finish; return true; } private void opExact1() { if (s >= range || code[ip] != bytes[s]) { opFail(); } else { ip++; s++; sprev = sbegin; // break; } } private void opExact2() { if (s + 2 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s] ) { opFail(); } else { sprev = s; ip++; s++; } } private void opExact3() { if (s + 3 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s]) { opFail(); } else { sprev = s; ip++; s++; } } private void opExact4() { if (s + 4 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s]) { opFail(); } else { sprev = s; ip++; s++; } } private void opExact5() { if (s + 5 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s]) { opFail(); } else { sprev = s; ip++; s++; } } private void opExactN() { int tlen = code[ip++]; if (s + tlen > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen-- > 0) if (bs[ps++] != bytes[s++]) {opFail(); return;} } else { while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;} } sprev = s - 1; } private void opExactMB2N1() { if (s + 2 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s]) { opFail(); } else { ip++; s++; sprev = sbegin; // break; } } private void opExactMB2N2() { if (s + 4 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; sprev = s; if (code[ip] != bytes[s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; } private void opExactMB2N3() { if (s + 6 > range || code[ip] != bytes[s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; sprev = s; if (code[ip] != bytes[s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; } private void opExactMB2N() { int tlen = code[ip++]; if (s + tlen * 2 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while(tlen-- > 0) { if (bs[ps] != bytes[s] || bs[++ps] != bytes[++s]) {opFail(); return;} ps++; s++; } } else { while(tlen-- > 0) { if (code[ip] != bytes[s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; } } sprev = s - 2; } private void opExactMB3N() { int tlen = code[ip++]; if (s + tlen * 3 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen-- > 0) { if (bs[ps] != bytes[s] || bs[++ps] != bytes[++s] || bs[++ps] != bytes[++s]) {opFail(); return;} ps++; s++; } } else { while (tlen-- > 0) { if (code[ip] != bytes[s] || code[++ip] != bytes[++s] || code[++ip] != bytes[++s]) {opFail(); return;} ip++; s++; } } sprev = s - 3; } private void opExactMBN() { int tlen = code[ip++]; /* mb-len */ int tlen2= code[ip++]; /* string len */ tlen2 *= tlen; if (s + tlen2 > range) {opFail(); return;} if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen2-- > 0) { if (bs[ps] != bytes[s]) {opFail(); return;} ps++; s++; } } else { while (tlen2-- > 0) { if (code[ip] != bytes[s]) {opFail(); return;} ip++; s++; } } sprev = s - tlen; } private void opExact1IC() { if (s >= range) {opFail(); return;} byte[]lowbuf = cfbuf(); value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (code[ip] != lowbuf[q]) {opFail(); return;} ip++; q++; } sprev = sbegin; // break; } private void opExact1ICSb() { if (s >= range || code[ip] != enc.toLowerCaseTable()[bytes[s++] & 0xff]) {opFail(); return;} ip++; sprev = sbegin; // break; } private void opExactNIC() { int tlen = code[ip++]; byte[]lowbuf = cfbuf(); if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; int endp = ps + tlen; while (ps < endp) { sprev = s; if (s >= range) {opFail(); return;} value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (bs[ps] != lowbuf[q]) {opFail(); return;} ps++; q++; } } } else { int endp = ip + tlen; while (ip < endp) { sprev = s; if (s >= range) {opFail(); return;} value = s; int len = enc.mbcCaseFold(regex.caseFoldFlag, bytes, this, end, lowbuf); s = value; if (s > range) {opFail(); return;} int q = 0; while (len-- > 0) { if (code[ip] != lowbuf[q]) {opFail(); return;} ip++; q++; } } } } private void opExactNICSb() { int tlen = code[ip++]; if (s + tlen > range) {opFail(); return;} byte[]toLowerTable = enc.toLowerCaseTable(); if (Config.USE_STRING_TEMPLATES) { byte[]bs = regex.templates[code[ip++]]; int ps = code[ip++]; while (tlen-- > 0) if (bs[ps++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;} } else { while (tlen-- > 0) if (code[ip++] != toLowerTable[bytes[s++] & 0xff]) {opFail(); return;} } sprev = s - 1; } private void opCondition() { int mem = code[ip++]; int addr = code[ip++]; if (mem > regex.numMem || repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX) { ip += addr; } } private boolean isInBitSet() { int c = bytes[s] & 0xff; return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); } private void opCClass() { if (s >= range || !isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s += enc.length(bytes, s, end); /* OP_CCLASS can match mb-code. \D, \S */ if (s > end) s = end; sprev = sbegin; // break; } private void opCClassSb() { if (s >= range || !isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s++; sprev = sbegin; // break; } private boolean isInClassMB() { int tlen = code[ip++]; if (s >= range) return false; int mbLen = enc.length(bytes, s, end); if (s + mbLen > range) return false; int ss = s; s += mbLen; int c = enc.mbcToCode(bytes, ss, s); if (!CodeRange.isInCodeRange(code, ip, c)) return false; ip += tlen; return true; } private void opCClassMB() { // beyond string check if (s >= range || !enc.isMbcHead(bytes, s, end)) {opFail(); return;} if (!isInClassMB()) {opFail(); return;} // not!!! sprev = sbegin; // break; } private void opCClassMBSb() { opFail(); } private void opCClassMIX() { if (s >= range) {opFail(); return;} if (enc.isMbcHead(bytes, s, end)) { ip += BitSet.BITSET_SIZE; if (!isInClassMB()) {opFail(); return;} } else { if (!isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; int tlen = code[ip++]; // by code range length ip += tlen; s++; } sprev = sbegin; // break; } private void opCClassMIXSb() { if (s >= range || !isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; int tlen = code[ip++]; ip += tlen; s++; sprev = sbegin; // break; } private void opCClassNot() { if (s >= range || isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s += enc.length(bytes, s, end); if (s > end) s = end; sprev = sbegin; // break; } private void opCClassNotSb() { if (s >= range || isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s++; sprev = sbegin; // break; } private boolean isNotInClassMB() { int tlen = code[ip++]; int mbLen = enc.length(bytes, s, end); if (!(s + mbLen <= range)) { if (s >= range) return false; s = end; ip += tlen; return true; } int ss = s; s += mbLen; int c = enc.mbcToCode(bytes, ss, s); if (CodeRange.isInCodeRange(code, ip, c)) return false; ip += tlen; return true; } private void opCClassMBNot() { if (s >= range) {opFail(); return;} if (!enc.isMbcHead(bytes, s, end)) { s++; int tlen = code[ip++]; ip += tlen; sprev = sbegin; // break; return; } if (!isNotInClassMB()) {opFail(); return;} sprev = sbegin; // break; } private void opCClassMBNotSb() { if (s >= range) {opFail(); return;} s++; int tlen = code[ip++]; ip += tlen; sprev = sbegin; // break; } private void opCClassMIXNot() { if (s >= range) {opFail(); return;} if (enc.isMbcHead(bytes, s, end)) { ip += BitSet.BITSET_SIZE; if (!isNotInClassMB()) {opFail(); return;} } else { if (isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; int tlen = code[ip++]; ip += tlen; s++; } sprev = sbegin; // break; } private void opCClassMIXNotSb() { if (s >= range || isInBitSet()) {opFail(); return;} ip += BitSet.BITSET_SIZE; s++; int tlen = code[ip++]; ip += tlen; sprev = sbegin; // break; } private void opAnyChar() { final int n; if (s >= range || s + (n = enc.length(bytes, s, end)) > range || enc.isNewLine(bytes, s, end)) {opFail(); return;} s += n; sprev = sbegin; // break; } private void opAnyCharSb() { if (s >= range || bytes[s] == Encoding.NEW_LINE) {opFail(); return;} s++; sprev = sbegin; // break; } private void opAnyCharML() { if (s >= range) {opFail(); return;} int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} s += n; sprev = sbegin; // break; } private void opAnyCharMLSb() { if (s >= range) {opFail(); return;} s++; sprev = sbegin; // break; } private void opAnyCharStar() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev, pkeep); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} if (enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } } private void opAnyCharStarSb() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev, pkeep); if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } } private void opAnyCharMLStar() { final byte[]bytes = this.bytes; while (s < range) { pushAlt(ip, s, sprev, pkeep); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } } private void opAnyCharMLStarSb() { while (s < range) { pushAlt(ip, s, sprev, pkeep); sprev = s; s++; } } private void opAnyCharStarPeekNext() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep); int n = enc.length(bytes, s, end); if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } ip++; sprev = sbegin; // break; } private void opAnyCharStarPeekNextSb() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { byte b = bytes[s]; if (c == b) pushAlt(ip + 1, s, sprev, pkeep); if (b == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } ip++; sprev = sbegin; // break; } private void opAnyCharMLStarPeekNext() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } ip++; sprev = sbegin; // break; } private void opAnyCharMLStarPeekNextSb() { final byte c = (byte)code[ip]; final byte[]bytes = this.bytes; while (s < range) { if (c == bytes[s]) pushAlt(ip + 1, s, sprev, pkeep); sprev = s; s++; } ip++; sprev = sbegin; // break; } // CEC private void opStateCheckAnyCharStar() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem, pkeep); int n = enc.length(bytes, s, end); if (s + n > range || enc.isNewLine(bytes, s, end)) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opStateCheckAnyCharStarSb() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem, pkeep); if (bytes[s] == Encoding.NEW_LINE) {opFail(); return;} sprev = s; s++; } sprev = sbegin; // break; } // CEC private void opStateCheckAnyCharMLStar() { int mem = code[ip++]; final byte[]bytes = this.bytes; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem, pkeep); int n = enc.length(bytes, s, end); if (s + n > range) {opFail(); return;} sprev = s; s += n; } sprev = sbegin; // break; } private void opStateCheckAnyCharMLStarSb() { int mem = code[ip++]; while (s < range) { if (stateCheckVal(s, mem)) {opFail(); return;} pushAltWithStateCheck(ip, s, sprev, mem, pkeep); sprev = s; s++; } sprev = sbegin; // break; } private void opWord() { if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opWordSb() { if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;} s++; sprev = sbegin; // break; } private void opAsciiWord() { if (s >= range || !isMbcAsciiWord(enc, bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opNotWord() { if (s >= range || enc.isMbcWord(bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opNotWordSb() { if (s >= range || enc.isWord(bytes[s] & 0xff)) {opFail(); return;} s++; sprev = sbegin; // break; } private void opNotAsciiWord() { if (s >= range || isMbcAsciiWord(enc, bytes, s, end)) {opFail(); return;} s += enc.length(bytes, s, end); sprev = sbegin; // break; } private void opWordBound() { if (s == str) { if (s >= range || !enc.isMbcWord(bytes, s, end)) {opFail(); return;} } else if (s == end) { if (sprev >= end || !enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } else { if (enc.isMbcWord(bytes, s, end) == enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } } private void opWordBoundSb() { if (s == str) { if (s >= range || !enc.isWord(bytes[s] & 0xff)) {opFail(); return;} } else if (s == end) { if (sprev >= end || !enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } else { if (enc.isWord(bytes[s] & 0xff) == enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } } private void opAsciiWordBound() { if (s == str) { if (s >= range || !isMbcAsciiWord(enc, bytes, s, end)) {opFail(); return;} } else if (s == end) { if (sprev >= end || !isMbcAsciiWord(enc, bytes, sprev, end)) {opFail(); return;} } else { if (isMbcAsciiWord(enc, bytes, s, end) == isMbcAsciiWord(enc, bytes, sprev, end)) {opFail(); return;} } } private void opNotWordBound() { if (s == str) { if (s < range && enc.isMbcWord(bytes, s, end)) {opFail(); return;} } else if (s == end) { if (sprev < end && enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } else { if (enc.isMbcWord(bytes, s, end) != enc.isMbcWord(bytes, sprev, end)) {opFail(); return;} } } private void opNotWordBoundSb() { if (s == str) { if (s < range && enc.isWord(bytes[s] & 0xff)) {opFail(); return;} } else if (s == end) { if (sprev < end && enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } else { if (enc.isWord(bytes[s] & 0xff) != enc.isWord(bytes[sprev] & 0xff)) {opFail(); return;} } } private void opNotAsciiWordBound() { if (s == str) { if (s < range && isMbcAsciiWord(enc, bytes, s, end)) {opFail(); return;} } else if (s == end) { if (sprev < end && isMbcAsciiWord(enc, bytes, sprev, end)) {opFail(); return;} } else { if (isMbcAsciiWord(enc, bytes, s, end) != isMbcAsciiWord(enc, bytes, sprev, end)) {opFail(); return;} } } private void opWordBegin() { if (s < range && enc.isMbcWord(bytes, s, end)) { if (s == str || !enc.isMbcWord(bytes, sprev, end)) return; } opFail(); } private void opWordBeginSb() { if (s < range && enc.isWord(bytes[s] & 0xff)) { if (s == str || !enc.isWord(bytes[sprev] & 0xff)) return; } opFail(); } private void opAsciiWordBegin() { if (s < range && isMbcAsciiWord(enc, bytes, s, end)) { if (s == str || !isMbcAsciiWord(enc, bytes, sprev, end)) return; } opFail(); } private void opWordEnd() { if (s != str && enc.isMbcWord(bytes, sprev, end)) { if (s == end || !enc.isMbcWord(bytes, s, end)) return; } opFail(); } private void opWordEndSb() { if (s != str && enc.isWord(bytes[sprev] & 0xff)) { if (s == end || !enc.isWord(bytes[s] & 0xff)) return; } opFail(); } private void opAsciiWordEnd() { if (s != str && isMbcAsciiWord(enc, bytes, sprev, end)) { if (s == end || !isMbcAsciiWord(enc, bytes, s, end)) return; } opFail(); } private void opBeginBuf() { if (s != str) opFail(); } private void opEndBuf() { if (s != end) opFail(); } private void opBeginLine() { if (s == str) { if (isNotBol(msaOptions)) opFail(); return; } else if (enc.isNewLine(bytes, sprev, end) && s != end) { return; } opFail(); } private void opBeginLineSb() { if (s == str) { if (isNotBol(msaOptions)) opFail(); return; } else if (bytes[sprev] == Encoding.NEW_LINE && s != end) { return; } opFail(); } private void opEndLine() { if (s == end) { if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { if (str == end || !enc.isNewLine(bytes, sprev, end)) { if (isNotEol(msaOptions)) opFail(); } return; } else { if (isNotEol(msaOptions)) opFail(); return; } } else if (enc.isNewLine(bytes, s, end) || (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end))) { return; } opFail(); } private void opEndLineSb() { if (s == end) { if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { if (str == end || !(sprev < end && bytes[sprev] == Encoding.NEW_LINE)) { if (isNotEol(msaOptions)) opFail(); } return; } else { if (isNotEol(msaOptions)) opFail(); return; } } else if (bytes[s] == Encoding.NEW_LINE || (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end))) { return; } opFail(); } private void opSemiEndBuf() { if (s == end) { if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { if (str == end || !enc.isNewLine(bytes, sprev, end)) { if (isNotEol(msaOptions)) opFail(); } return; } else { if (isNotEol(msaOptions)) opFail(); return; } } else if (enc.isNewLine(bytes, s, end) && (s + enc.length(bytes, s, end)) == end) { return; } else if (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end)) { int ss = s + enc.length(bytes, s, end); ss += enc.length(bytes, ss, end); if (ss == end) return; } opFail(); } private void opBeginPosition() { if (s != msaGpos) opFail(); } private void opMemoryStartPush() { int mem = code[ip++]; pushMemStart(mem, s); } private void opMemoryStart() { int mem = code[ip++]; repeatStk[memStartStk + mem] = s; repeatStk[memEndStk + mem] = -1; } private void opMemoryEndPush() { int mem = code[ip++]; pushMemEnd(mem, s); } private void opMemoryEnd() { int mem = code[ip++]; repeatStk[memEndStk + mem] = s; } private void opKeep() { pkeep = s; } private void opMemoryEndPushRec() { int mem = code[ip++]; int stkp = getMemStart(mem); /* should be before push mem-end. */ pushMemEnd(mem, s); repeatStk[memStartStk + mem] = stkp; } private void opMemoryEndRec() { int mem = code[ip++]; repeatStk[memEndStk + mem] = s; int stkp = getMemStart(mem); repeatStk[memStartStk + mem] = bsAt(regex.btMemStart, mem) ? stkp : stack[stkp].getMemPStr(); pushMemEndMark(mem); } private boolean backrefInvalid(int mem) { return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX; } private int backrefStart(int mem) { int ms = repeatStk[memStartStk + mem]; return bsAt(regex.btMemStart, mem) ? stack[ms].getMemPStr() : ms; } private int backrefEnd(int mem) { int me = repeatStk[memEndStk + mem]; return bsAt(regex.btMemEnd, mem) ? stack[me].getMemPStr() : me; } private void backref(int mem) { if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} int pstart = backrefStart(mem); int pend = backrefEnd(mem); int n = pend - pstart; if (s + n > range) {opFail(); return;} sprev = s; while (n-- > 0) if (bytes[pstart++] != bytes[s++]) {opFail(); return;} if (sprev < range) { // beyond string check int len; while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } } private void opBackRef1() { backref(1); } private void opBackRef2() { backref(2); } private void opBackRefN() { backref(code[ip++]); } private void opBackRefNIC() { int mem = code[ip++]; if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;} int pstart = backrefStart(mem); int pend = backrefEnd(mem); int n = pend - pstart; if (s + n > range) {opFail(); return;} sprev = s; value = s; if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;} s = value; if (sprev < range) { int len; while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } } private void opBackRefMulti() { int tlen = code[ip++]; int i; loop:for (i=0; i range) {opFail(); return;} sprev = s; int swork = s; while (n-- > 0) { if (bytes[pstart++] != bytes[swork++]) continue loop; } s = swork; int len; // beyond string check if (sprev < range) { while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } ip += tlen - i - 1; // * SIZE_MEMNUM (1) break; /* success */ } if (i == tlen) {opFail(); return;} } private void opBackRefMultiIC() { int tlen = code[ip++]; int i; loop:for (i=0; i range) {opFail(); return;} sprev = s; value = s; if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) continue loop; // STRING_CMP_VALUE_IC s = value; int len; if (sprev < range) { while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } ip += tlen - i - 1; // * SIZE_MEMNUM (1) break; /* success */ } if (i == tlen) {opFail(); return;} } private boolean memIsInMemp(int mem, int num, int memp) { for (int i=0; i= 0) { StackEntry e = stack[k]; if (e.type == CALL_FRAME) { level--; } else if (e.type == RETURN) { level++; } else if (level == nest) { if (e.type == MEM_START) { if (memIsInMemp(e.getMemNum(), memNum, memp)) { int pstart = e.getMemPStr(); if (pend != -1) { if (pend - pstart > end - s) return false; /* or goto next_mem; */ int p = pstart; value = s; if (ignoreCase) { if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) { return false; /* or goto next_mem; */ } } else { while (p < pend) { if (bytes[p++] != bytes[value++]) return false; /* or goto next_mem; */ } } s = value; return true; } } } else if (e.type == MEM_END) { if (memIsInMemp(e.getMemNum(), memNum, memp)) { pend = e.getMemPStr(); } } } k--; } return false; } private void opBackRefAtLevel() { int ic = code[ip++]; int level = code[ip++]; int tlen = code[ip++]; sprev = s; if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit int len; if (sprev < range) { while (sprev + (len = enc.length(bytes, sprev, end)) < s) sprev += len; } ip += tlen; // * SIZE_MEMNUM } else { {opFail(); return;} } } /* no need: IS_DYNAMIC_OPTION() == 0 */ @SuppressWarnings("unused") private void opSetOptionPush() { // option = code[ip++]; // final for now pushAlt(ip, s, sprev, pkeep); ip += OPSize.SET_OPTION + OPSize.FAIL; } @SuppressWarnings("unused") private void opSetOption() { // option = code[ip++]; // final for now } private void opNullCheckStart() { int mem = code[ip++]; pushNullCheckStart(mem, s); } private void nullCheckFound() { // null_check_found: /* empty loop founded, skip next instruction */ switch(code[ip++]) { case OPCode.JUMP: case OPCode.PUSH: ip++; // p += SIZE_RELADDR; break; case OPCode.REPEAT_INC: case OPCode.REPEAT_INC_NG: case OPCode.REPEAT_INC_SG: case OPCode.REPEAT_INC_NG_SG: ip++; // p += SIZE_MEMNUM; break; default: throw new InternalException(ErrorMessages.UNEXPECTED_BYTECODE); } // switch } private void opNullCheckEnd() { int mem = code[ip++]; int isNull = nullCheck(mem, s); /* mem: null check id */ if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s); } nullCheckFound(); } } // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK private void opNullCheckEndMemST() { int mem = code[ip++]; /* mem: null check id */ int isNull = nullCheckMemSt(mem, s); if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s); } if (isNull == -1) {opFail(); return;} nullCheckFound(); } } // USE_SUBEXP_CALL private void opNullCheckEndMemSTPush() { int mem = code[ip++]; /* mem: null check id */ int isNull; if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) { isNull = nullCheckMemStRec(mem, s); } else { isNull = nullCheckRec(mem, s); } if (isNull != 0) { if (Config.DEBUG_MATCH) { Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s); } if (isNull == -1) {opFail(); return;} nullCheckFound(); } else { pushNullCheckEnd(mem); } } private void opJump() { ip += code[ip] + 1; } private void opPush() { int addr = code[ip++]; pushAlt(ip + addr, s, sprev, pkeep); } // CEC private void opStateCheckPush() { int mem = code[ip++]; if (stateCheckVal(s, mem)) {opFail(); return;} int addr = code[ip++]; pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep); } // CEC private void opStateCheckPushOrJump() { int mem = code[ip++]; int addr= code[ip++]; if (stateCheckVal(s, mem)) { ip += addr; } else { pushAltWithStateCheck(ip + addr, s, sprev, mem, pkeep); } } // CEC private void opStateCheck() { int mem = code[ip++]; if (stateCheckVal(s, mem)) {opFail(); return;} pushStateCheck(s, mem); } private void opPop() { popOne(); } private void opPushOrJumpExact1() { int addr = code[ip++]; // beyond string check if (s < range && code[ip] == bytes[s]) { ip++; pushAlt(ip + addr, s, sprev, pkeep); return; } ip += addr + 1; } private void opPushIfPeekNext() { int addr = code[ip++]; // beyond string check if (s < range && code[ip] == bytes[s]) { ip++; pushAlt(ip + addr, s, sprev, pkeep); return; } ip++; } private void opRepeat() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int addr= code[ip++]; // ensure1(); repeatStk[mem] = stk; pushRepeat(mem, ip); if (regex.repeatRangeLo[mem] == 0) { // lower pushAlt(ip + addr, s, sprev, pkeep); } } private void opRepeatNG() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int addr= code[ip++]; // ensure1(); repeatStk[mem] = stk; pushRepeat(mem, ip); if (regex.repeatRangeLo[mem] == 0) { pushAlt(ip, s, sprev, pkeep); ip += addr; } } private void repeatInc(int mem, int si) { StackEntry e = stack[si]; e.increaseRepeatCount(); if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) { /* end of repeat. Nothing to do. */ } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { pushAlt(ip, s, sprev, pkeep); ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */ } else { ip = e.getRepeatPCode(); } pushRepeatInc(si); } private void opRepeatInc() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int si = repeatStk[mem]; repeatInc(mem, si); } private void opRepeatIncSG() { int mem = code[ip++]; /* mem: OP_REPEAT ID */ int si = getRepeat(mem); repeatInc(mem, si); } private void repeatIncNG(int mem, int si) { StackEntry e = stack[si]; e.increaseRepeatCount(); if (e.getRepeatCount() < regex.repeatRangeHi[mem]) { if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) { int pcode = e.getRepeatPCode(); pushRepeatInc(si); pushAlt(pcode, s, sprev, pkeep); } else { ip = e.getRepeatPCode(); pushRepeatInc(si); } } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) { pushRepeatInc(si); } } private void opRepeatIncNG() { int mem = code[ip++]; int si = repeatStk[mem]; repeatIncNG(mem, si); } private void opRepeatIncNGSG() { int mem = code[ip++]; int si = getRepeat(mem); repeatIncNG(mem, si); } private void opPushPos() { pushPos(s, sprev, pkeep); } private void opPopPos() { StackEntry e = stack[posEnd()]; s = e.getStatePStr(); sprev= e.getStatePStrPrev(); } private void opPushPosNot() { int addr = code[ip++]; pushPosNot(ip + addr, s, sprev, pkeep); } private void opFailPos() { popTilPosNot(); opFail(); } private void opPushStopBT() { pushStopBT(); } private void opPopStopBT() { stopBtEnd(); } private void opLookBehind() { int tlen = code[ip++]; s = enc.stepBack(bytes, str, s, end, tlen); if (s == -1) {opFail(); return;} sprev = enc.prevCharHead(bytes, str, s, end); } private void opLookBehindSb() { int tlen = code[ip++]; s -= tlen; if (s < str) {opFail(); return;} sprev = s == str ? -1 : s - 1; } private void opPushLookBehindNot() { int addr = code[ip++]; int tlen = code[ip++]; int q = enc.stepBack(bytes, str, s, end, tlen); if (q == -1) { /* too short case -> success. ex. /(? aend && s > absent) { pop(); opFail(); return; } else if (s >= aend && s > absent) { if (s > aend || s > end) { opFail(); return; } ip += addr; } else { pushAlt(ip + addr, s, sprev, pkeep); int n = (s >= end) ? 1 : enc.length(bytes, s, end); pushAbsentPos(absent, range); pushAlt(selfip, s + n, s, pkeep); pushAbsent(); range = aend; } } private void opAbsentEnd() { if (sprev < range) range = sprev; if (Config.DEBUG_MATCH) System.out.println("ABSENT_END: end:" + range); popTilAbsent(); opFail(); return; // sprev = sbegin; // break; } private void opCall() { int addr = code[ip++]; pushCallFrame(ip); ip = addr; // absolute address } private void opReturn() { ip = sreturn(); pushReturn(); } private void opFail() { if (stack == null) { ip = regex.codeLength - 1; return; } StackEntry e = pop(); ip = e.getStatePCode(); s = e.getStatePStr(); sprev = e.getStatePStrPrev(); pkeep = e.getPKeep(); if (USE_CEC) { if (((SCStackEntry)e).getStateCheck() != 0) { e.type = STATE_CHECK_MARK; stk++; } } } private int finish() { return bestLen; } private void debugMatchBegin() { Config.log.println("match_at: " + "str: " + str + ", end: " + end + ", start: " + sstart + ", sprev: " + sprev); Config.log.println("size: " + (end - str) + ", start offset: " + (sstart - str)); } private void debugMatchLoop() { Config.log.printf("%4d", (s - str)).print("> \""); int q, i; for (i = 0, q = s; i < 7 && q < end && s >= 0; i++) { int len = enc.length(bytes, q, end); while (len-- > 0) { if (q < end) { Config.log.print(new String(bytes, q++, 1)); } } } String str = q < end ? "...\"" : "\""; q += str.length(); Config.log.print(str); for (i = 0; i < 20 - (q - s); i++) Config.log.print(" "); StringBuilder sb = new StringBuilder(); new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip); Config.log.println(sb.toString()); } } jruby-joni-2.1.41/src/org/joni/ByteCodePrinter.java000066400000000000000000000320311400407002500221000ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.joni.constants.internal.Arguments; import org.joni.constants.internal.OPCode; import org.joni.constants.internal.OPSize; import org.joni.exception.InternalException; class ByteCodePrinter { final int[] code; final int codeLength; final byte[][] templates; final Encoding enc; public ByteCodePrinter(Regex regex) { code = regex.code; codeLength = regex.codeLength; templates = regex.templates; enc = regex.enc; } public String byteCodeListToString() { return compiledByteCodeListToString(); } private void pString(StringBuilder sb, int len, int s) { sb.append(':'); while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); } private void pLenString(StringBuilder sb, int len, int mbLen, int s) { int x = len * mbLen; sb.append(':').append(len).append(':'); while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); } private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, byte[]tm, int idx) { int x = len * mbLen; sb.append(":T:").append(len).append(':'); while (x-- > 0) sb.append(new String(tm, idx++, 1)); } public int compiledByteCodeToString(StringBuilder sb, int bp) { int len, n, mem, addr, scn, cod; BitSet bs; int tm, idx; sb.append('[').append(OPCode.OpCodeNames[code[bp]]); int argType = OPCode.OpCodeArgTypes[code[bp]]; int ip = bp; if (argType != Arguments.SPECIAL) { bp++; switch (argType) { case Arguments.NON: break; case Arguments.RELADDR: sb.append(":(").append(code[bp]).append(')'); bp += OPSize.RELADDR; break; case Arguments.ABSADDR: sb.append(":(").append(code[bp]).append(')'); bp += OPSize.ABSADDR; break; case Arguments.LENGTH: sb.append(':').append(code[bp]); bp += OPSize.LENGTH; break; case Arguments.MEMNUM: sb.append(':').append(code[bp]); bp += OPSize.MEMNUM; break; case Arguments.OPTION: sb.append(':').append(code[bp]); bp += OPSize.OPTION; break; case Arguments.STATE_CHECK: sb.append(':').append(code[bp]); bp += OPSize.STATE_CHECK; break; } } else { switch (code[bp++]) { case OPCode.EXACT1: case OPCode.ANYCHAR_STAR_PEEK_NEXT: case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: pString(sb, 1, bp++); break; case OPCode.EXACT2: pString(sb, 2, bp); bp += 2; break; case OPCode.EXACT3: pString(sb, 3, bp); bp += 3; break; case OPCode.EXACT4: pString(sb, 4, bp); bp += 4; break; case OPCode.EXACT5: pString(sb, 5, bp); bp += 5; break; case OPCode.EXACTN: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 1, templates[tm], idx); } else { pLenString(sb, len, 1, bp); bp += len; } break; case OPCode.EXACTMB2N1: pString(sb, 2, bp); bp += 2; break; case OPCode.EXACTMB2N2: pString(sb, 4, bp); bp += 4; break; case OPCode.EXACTMB2N3: pString(sb, 6, bp); bp += 6; break; case OPCode.EXACTMB2N: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 2, templates[tm], idx); } else { pLenString(sb, len, 2, bp); bp += len * 2; } break; case OPCode.EXACTMB3N: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 3, templates[tm], idx); } else { pLenString(sb, len, 3, bp); bp += len * 3; } break; case OPCode.EXACTMBN: int mbLen = code[bp]; bp += OPSize.LENGTH; len = code[bp]; bp += OPSize.LENGTH; n = len * mbLen; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; sb.append(":T:").append(mbLen).append(":").append(len).append(":"); while (n-- > 0) sb.append(new String(templates[tm], idx++, 1)); } else { sb.append(":").append(mbLen).append(":").append(len).append(":"); while (n-- > 0) sb.append(new String(new byte[]{(byte)code[bp++]})); } break; case OPCode.EXACT1_IC: case OPCode.EXACT1_IC_SB: final int MAX_CHAR_LENGTH = 6; byte[]bytes = new byte[MAX_CHAR_LENGTH]; for (int i = 0; bp + i < code.length && i < MAX_CHAR_LENGTH; i++) bytes[i] = (byte)code[bp + i]; len = enc.length(bytes, 0, MAX_CHAR_LENGTH); pString(sb, len, bp); bp += len; break; case OPCode.EXACTN_IC: case OPCode.EXACTN_IC_SB: len = code[bp]; bp += OPSize.LENGTH; if (Config.USE_STRING_TEMPLATES) { tm = code[bp]; bp += OPSize.INDEX; idx = code[bp]; bp += OPSize.INDEX; pLenStringFromTemplate(sb, len, 1, templates[tm], idx); } else { pLenString(sb, len, 1, bp); bp += len; } break; case OPCode.CCLASS: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; sb.append(':').append(n); break; case OPCode.CCLASS_NOT: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; sb.append(':').append(n); break; case OPCode.CCLASS_MB: case OPCode.CCLASS_MB_NOT: len = code[bp]; bp += OPSize.LENGTH; cod = code[bp]; //bp += OPSize.CODE_POINT; bp += len; sb.append(':').append(cod).append(':').append(len); break; case OPCode.CCLASS_MIX: case OPCode.CCLASS_MIX_NOT: bs = new BitSet(); System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); n = bs.numOn(); bp += BitSet.BITSET_SIZE; len = code[bp]; bp += OPSize.LENGTH; cod = code[bp]; //bp += OPSize.CODE_POINT; bp += len; sb.append(':').append(n).append(':').append(cod).append(':').append(len); break; case OPCode.BACKREFN_IC: mem = code[bp]; bp += OPSize.MEMNUM; sb.append(':').append(mem); break; case OPCode.BACKREF_MULTI_IC: case OPCode.BACKREF_MULTI: sb.append(' '); len = code[bp]; bp += OPSize.LENGTH; for (int i=0; i 0) sb.append(", "); sb.append(mem); } break; case OPCode.BACKREF_WITH_LEVEL: { int option = code[bp]; bp += OPSize.OPTION; sb.append(':').append(option); int level = code[bp]; bp += OPSize.LENGTH; sb.append(':').append(level); sb.append(' '); len = code[bp]; bp += OPSize.LENGTH; for (int i=0; i 0) sb.append(", "); sb.append(mem); } break; } case OPCode.REPEAT: case OPCode.REPEAT_NG: mem = code[bp]; bp += OPSize.MEMNUM; addr = code[bp]; bp += OPSize.RELADDR; sb.append(':').append(mem).append(':').append(addr); break; case OPCode.PUSH_OR_JUMP_EXACT1: case OPCode.PUSH_IF_PEEK_NEXT: addr = code[bp]; bp += OPSize.RELADDR; sb.append(":(").append(addr).append(')'); pString(sb, 1, bp); bp++; break; case OPCode.LOOK_BEHIND: len = code[bp]; bp += OPSize.LENGTH; sb.append(':').append(len); break; case OPCode.PUSH_LOOK_BEHIND_NOT: addr = code[bp]; bp += OPSize.RELADDR; len = code[bp]; bp += OPSize.LENGTH; sb.append(':').append(len).append(":(").append(addr).append(')'); break; case OPCode.STATE_CHECK_PUSH: case OPCode.STATE_CHECK_PUSH_OR_JUMP: scn = code[bp]; bp += OPSize.STATE_CHECK_NUM; addr = code[bp]; bp += OPSize.RELADDR; sb.append(':').append(scn).append(":(").append(addr).append(')'); break; case OPCode.CONDITION: mem = code[bp]; bp += OPSize.MEMNUM; addr = code[bp]; bp += OPSize.RELADDR; sb.append(':').append(mem).append(":").append(addr); break; default: throw new InternalException("undefined code: " + code[--bp]); } } sb.append(']'); if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) sb.append('@').append(ip).append('(').append(bp - ip).append(')'); return bp; } private String compiledByteCodeListToString() { StringBuilder sb = new StringBuilder("code length: ").append(codeLength).append('\n'); int ncode = -1; int bp = 0; int end = codeLength; while (bp < end) { ncode++; sb.append(ncode % 5 == 0 ? '\n' : ' '); bp = compiledByteCodeToString(sb, bp); } sb.append("\n"); return sb.toString(); } } jruby-joni-2.1.41/src/org/joni/CaptureTreeNode.java000066400000000000000000000045571400407002500221030ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; final class CaptureTreeNode { int group; int beg; int end; // int allocated; int numChildren; CaptureTreeNode[] children; CaptureTreeNode() { beg = Region.REGION_NOTPOS; end = Region.REGION_NOTPOS; group = -1; } static final int HISTORY_TREE_INIT_ALLOC_SIZE = 8; void addChild(CaptureTreeNode child) { if (children == null) { children = new CaptureTreeNode[HISTORY_TREE_INIT_ALLOC_SIZE]; } else if (numChildren >= children.length) { CaptureTreeNode[] tmp = new CaptureTreeNode[children.length << 1]; System.arraycopy(children, 0, tmp, 0, children.length); children = tmp; } children[numChildren] = child; numChildren++; } void clear() { for (int i = 0; i < numChildren; i++) { children[i] = null; // ??? } numChildren = 0; beg = end = Region.REGION_NOTPOS; group = -1; } CaptureTreeNode cloneTree() { CaptureTreeNode clone = new CaptureTreeNode(); clone.beg = beg; clone.end = end; for (int i = 0; i < numChildren; i++) { CaptureTreeNode child = children[i].cloneTree(); clone.addChild(child); } return clone; } } jruby-joni-2.1.41/src/org/joni/CodeRangeBuffer.java000066400000000000000000000302051400407002500220200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.joni.exception.ErrorMessages; import org.joni.exception.ValueException; public final class CodeRangeBuffer { private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5; public static final int LAST_CODE_POINT = 0x7fffffff; private int[]p; private int used; public CodeRangeBuffer() { p = new int[INIT_MULTI_BYTE_RANGE_SIZE]; writeCodePoint(0, 0); } public int[]getCodeRange() { return p; } public int getUsed() { return used; } private CodeRangeBuffer(CodeRangeBuffer orig) { p = new int[orig.p.length]; System.arraycopy(orig.p, 0, p, 0, p.length); used = orig.used; } public void expand(int low) { int length = p.length; do { length <<= 1; } while (length < low); int[]tmp = new int[length]; System.arraycopy(p, 0, tmp, 0, used); p = tmp; } public void ensureSize(int size) { int length = p.length; while (length < size ) { length <<= 1; } if (p.length != length) { int[]tmp = new int[length]; System.arraycopy(p, 0, tmp, 0, used); p = tmp; } } private void moveRight(int from, int to, int n) { if (to + n > p.length) expand(to + n); System.arraycopy(p, from, p, to, n); if (to + n > used) used = to + n; } protected void moveLeft(int from, int to, int n) { System.arraycopy(p, from, p, to, n); } private void moveLeftAndReduce(int from, int to) { System.arraycopy(p, from, p, to, used - from); used -= from - to; } public void writeCodePoint(int pos, int b) { int u = pos + 1; if (p.length < u) expand(u); p[pos] = b; if (used < u) used = u; } public CodeRangeBuffer clone() { return new CodeRangeBuffer(this); } // add_code_range_to_buf public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) { return addCodeRangeToBuff(pbuf, env, from, to, true); } public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to, boolean checkDup) { if (from > to) { int n = from; from = to; to = n; } if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode int[]p = pbuf.p; int n = p[0]; int bound = from == 0 ? 0 : n; int low = 0; while (low < bound) { int x = (low + bound) >>> 1; if (from - 1 > p[x * 2 + 2]) { low = x + 1; } else { bound = x; } } int high = to == LAST_CODE_POINT ? n : low; bound = n; while (high < bound) { int x = (high + bound) >>> 1; if (to + 1 >= p[x * 2 + 1]) { high = x + 1; } else { bound = x; } } int incN = low + 1 - high; if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.TOO_MANY_MULTI_BYTE_RANGES); if (incN != 1) { if (checkDup) { if (from <= p[low * 2 + 2] && (p[low * 2 + 1] <= from || p[low * 2 + 2] <= to)) env.ccDuplicateWarn(); } if (from > p[low * 2 + 1]) from = p[low * 2 + 1]; if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2]; } if (incN != 0) { int fromPos = 1 + high * 2; int toPos = 1 + (low + 1) * 2; if (incN > 0) { if (high < n) { int size = (n - high) * 2; pbuf.moveRight(fromPos, toPos, size); } } else { pbuf.moveLeftAndReduce(fromPos, toPos); } } int pos = 1 + low * 2; // pbuf.ensureSize(pos + 2); pbuf.writeCodePoint(pos, from); pbuf.writeCodePoint(pos + 1, to); n += incN; pbuf.writeCodePoint(0, n); return pbuf; } // add_code_range, be aware of it returning null! public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to) { return addCodeRange(pbuf, env, from, to, true); } public static CodeRangeBuffer addCodeRange(CodeRangeBuffer pbuf, ScanEnvironment env, int from, int to, boolean checkDup) { if (from >to) { if (env.syntax.allowEmptyRangeInCC()) { return pbuf; } else { throw new ValueException(ErrorMessages.EMPTY_RANGE_IN_CHAR_CLASS); } } return addCodeRangeToBuff(pbuf, env, from, to, checkDup); } private static int mbcodeStartPosition(Encoding enc) { return enc.minLength() > 1 ? 0 : 0x80; } // SET_ALL_MULTI_BYTE_RANGE protected static CodeRangeBuffer setAllMultiByteRange(ScanEnvironment env, CodeRangeBuffer pbuf) { return addCodeRangeToBuff(pbuf, env, mbcodeStartPosition(env.enc), LAST_CODE_POINT); } // ADD_ALL_MULTI_BYTE_RANGE public static CodeRangeBuffer addAllMultiByteRange(ScanEnvironment env, CodeRangeBuffer pbuf) { if (!env.enc.isSingleByte()) return setAllMultiByteRange(env, pbuf); return pbuf; } // not_code_range_buf public static CodeRangeBuffer notCodeRangeBuff(ScanEnvironment env, CodeRangeBuffer bbuf) { CodeRangeBuffer pbuf = null; if (bbuf == null) return setAllMultiByteRange(env, pbuf); int[]p = bbuf.p; int n = p[0]; if (n <= 0) return setAllMultiByteRange(env, pbuf); int pre = mbcodeStartPosition(env.enc); int from; int to = 0; for (int i=0; i to1) break; } if (from1 <= to1) { pbuf = addCodeRangeToBuff(pbuf, env, from1, to1); } return pbuf; } // and_code_range_buf public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1, CodeRangeBuffer bbuf2, boolean not2, ScanEnvironment env) { CodeRangeBuffer pbuf = null; if (bbuf1 == null) { if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */ return null; } else if (bbuf2 == null) { if (not2) return bbuf1.clone(); return null; } if (not1) { CodeRangeBuffer tbuf; boolean tnot; // swap tnot = not1; not1 = not2; not2 = tnot; tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; } int[]p1 = bbuf1.p; int n1 = p1[0]; int[]p2 = bbuf2.p; int n2 = p2[0]; if (!not2 && !not1) { /* 1 AND 2 */ for (int i=0; i to1) break; if (to2 < from1) continue; int from = from1 > from2 ? from1 : from2; int to = to1 < to2 ? to1 : to2; pbuf = addCodeRangeToBuff(pbuf, env, from, to); } } } else if (!not1) { /* 1 AND (not 2) */ for (int i=0; i 0 && i % 6 == 0) buf.append("\n "); } return buf.toString(); } private static String rangeNumToString(int num){ return "0x" + Integer.toString(num, 16); } } jruby-joni-2.1.41/src/org/joni/Compiler.java000066400000000000000000000140251400407002500206130ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.joni.ast.AnchorNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.ListNode; import org.joni.ast.EncloseNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; import org.joni.constants.internal.NodeType; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.SyntaxException; abstract class Compiler implements ErrorMessages { protected final Analyser analyser; protected final Encoding enc; protected final Regex regex; protected Compiler(Analyser analyser) { this.analyser = analyser; this.regex = analyser.regex; this.enc = regex.enc; } final void compile(Node root) { prepare(); compileTree(root); finish(); } protected abstract void prepare(); protected abstract void finish(); protected abstract void compileAltNode(ListNode node); private void compileStringRawNode(StringNode sn) { if (sn.length() <= 0) return; addCompileString(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false); } private void compileStringNode(StringNode node) { StringNode sn = node; if (sn.length() <= 0) return; boolean ambig = sn.isAmbig(); int p, prev; p = prev = sn.p; int end = sn.end; byte[]bytes = sn.bytes; int prevLen = enc.length(bytes, p, end); p += prevLen; int blen = prevLen; while (p < end) { int len = enc.length(bytes, p, end); if (len == prevLen || ambig) { blen += len; } else { addCompileString(bytes, prev, prevLen, blen, ambig); prev = p; blen = len; prevLen = len; } p += len; } addCompileString(bytes, prev, prevLen, blen, ambig); } protected abstract void addCompileString(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase); protected abstract void compileCClassNode(CClassNode node); protected abstract void compileCTypeNode(CTypeNode node); protected abstract void compileAnyCharNode(); protected abstract void compileCallNode(CallNode node); protected abstract void compileBackrefNode(BackRefNode node); protected abstract void compileCECQuantifierNode(QuantifierNode node); protected abstract void compileNonCECQuantifierNode(QuantifierNode node); protected abstract void compileOptionNode(EncloseNode node); protected abstract void compileEncloseNode(EncloseNode node); protected abstract void compileAnchorNode(AnchorNode node); protected final void compileTree(Node node) { switch (node.getType()) { case NodeType.LIST: ListNode lin = (ListNode)node; do { compileTree(lin.value); } while ((lin = lin.tail) != null); break; case NodeType.ALT: compileAltNode((ListNode)node); break; case NodeType.STR: StringNode sn = (StringNode)node; if (sn.isRaw()) { compileStringRawNode(sn); } else { compileStringNode(sn); } break; case NodeType.CCLASS: compileCClassNode((CClassNode)node); break; case NodeType.CTYPE: compileCTypeNode((CTypeNode)node); break; case NodeType.CANY: compileAnyCharNode(); break; case NodeType.BREF: compileBackrefNode((BackRefNode)node); break; case NodeType.CALL: if (Config.USE_SUBEXP_CALL) { compileCallNode((CallNode)node); break; } // USE_SUBEXP_CALL break; case NodeType.QTFR: if (Config.USE_CEC) { compileCECQuantifierNode((QuantifierNode)node); } else { compileNonCECQuantifierNode((QuantifierNode)node); } break; case NodeType.ENCLOSE: EncloseNode enode = (EncloseNode)node; if (enode.isOption()) { compileOptionNode(enode); } else { compileEncloseNode(enode); } break; case NodeType.ANCHOR: compileAnchorNode((AnchorNode)node); break; default: // undefined node type newInternalException(PARSER_BUG); } // switch } protected final void compileTreeNTimes(Node node, int n) { for (int i=0; i, \k */ final boolean USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT = true; /* /(?:()|())*\2/ */ final boolean USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE = true; /* /\n$/ =~ "\n" */ final boolean USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR = true; final boolean CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS = true; final boolean USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE = false; final boolean USE_CAPTURE_HISTORY = false; final boolean USE_VARIABLE_META_CHARS = true; final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */ final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true; final boolean USE_SUNDAY_QUICK_SEARCH = true; final boolean USE_CEC = false; final boolean USE_DYNAMIC_OPTION = false; final boolean USE_BYTE_MAP = OptExactInfo.OPT_EXACT_MAXLEN <= CHAR_TABLE_SIZE; final boolean USE_INT_MAP_BACKWARD = false; final int NREGION = 10; final int MAX_BACKREF_NUM = 1000; final int MAX_CAPTURE_GROUP_NUM = 32767; final int MAX_REPEAT_NUM = 100000; final int MAX_MULTI_BYTE_RANGES_NUM = 10000; // internal config final boolean USE_OP_PUSH_OR_JUMP_EXACT = true; final boolean USE_QTFR_PEEK_NEXT = true; final int INIT_MATCH_STACK_SIZE = 64; final boolean DONT_OPTIMIZE = false; final boolean USE_STRING_TEMPLATES = true; // use embedded string templates in Regex object as byte arrays instead of compiling them into int bytecode array final int MAX_CAPTURE_HISTORY_GROUP = 31; final int CHECK_STRING_THRESHOLD_LEN = 7; final int CHECK_BUFF_MAX_SIZE = 0x4000; final PrintStream log = System.out; final PrintStream err = System.err; final boolean DEBUG_ALL = false; final boolean DEBUG = DEBUG_ALL; final boolean DEBUG_PARSE_TREE = DEBUG_ALL; final boolean DEBUG_PARSE_TREE_RAW = true; final boolean DEBUG_COMPILE = DEBUG_ALL; final boolean DEBUG_COMPILE_BYTE_CODE_INFO = DEBUG_ALL; final boolean DEBUG_SEARCH = DEBUG_ALL; final boolean DEBUG_MATCH = DEBUG_ALL; } jruby-joni-2.1.41/src/org/joni/Lexer.java000066400000000000000000001310411400407002500201160ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.Option.isAsciiRange; import static org.joni.Option.isSingleline; import static org.joni.Option.isWordBoundAllRange; import static org.joni.ast.QuantifierNode.isRepeatInfinite; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.jcodings.exception.CharacterPropertyException; import org.jcodings.exception.EncodingError; import org.joni.ast.QuantifierNode; import org.joni.constants.MetaChar; import org.joni.constants.internal.AnchorType; import org.joni.constants.internal.TokenType; import org.joni.exception.ErrorMessages; class Lexer extends ScannerSupport { protected final Regex regex; protected final ScanEnvironment env; protected final Syntax syntax; // fast access to syntax protected final Token token = new Token(); // current token protected Lexer(Regex regex, Syntax syntax, byte[]bytes, int p, int end, WarnCallback warnings) { super(regex.enc, bytes, p, end); this.regex = regex; this.env = new ScanEnvironment(regex, syntax, warnings); this.syntax = env.syntax; } /** * @return 0: normal {n,m}, 2: fixed {n} * !introduce returnCode here */ private int fetchRangeQuantifier() { mark(); boolean synAllow = syntax.allowInvalidInterval(); if (!left()) { if (synAllow) { return 1; /* "....{" : OK! */ } else { newSyntaxException(END_PATTERN_AT_LEFT_BRACE); } } if (!synAllow) { c = peek(); if (c == ')' || c == '(' || c == '|') { newSyntaxException(END_PATTERN_AT_LEFT_BRACE); } } int low = scanUnsignedNumber(); if (low < 0) newSyntaxException(ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (low > Config.MAX_REPEAT_NUM) newSyntaxException(ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); boolean nonLow = false; if (p == _p) { /* can't read low */ if (syntax.allowIntervalLowAbbrev()) { low = 0; nonLow = true; } else { return invalidRangeQuantifier(synAllow); } } if (!left()) return invalidRangeQuantifier(synAllow); fetch(); int up; int ret = 0; if (c == ',') { int prev = p; // ??? last up = scanUnsignedNumber(); if (up < 0) newValueException(TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (up > Config.MAX_REPEAT_NUM) newValueException(TOO_BIG_NUMBER_FOR_REPEAT_RANGE); if (p == prev) { if (nonLow) return invalidRangeQuantifier(synAllow); up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */ } } else { if (nonLow) return invalidRangeQuantifier(synAllow); unfetch(); up = low; /* {n} : exact n times */ ret = 2; /* fixed */ } if (!left()) return invalidRangeQuantifier(synAllow); fetch(); if (syntax.opEscBraceInterval()) { if (c != syntax.metaCharTable.esc) return invalidRangeQuantifier(synAllow); if (!left()) return invalidRangeQuantifier(synAllow); fetch(); } if (c != '}') return invalidRangeQuantifier(synAllow); if (!isRepeatInfinite(up) && low > up) { newValueException(UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); } token.type = TokenType.INTERVAL; token.setRepeatLower(low); token.setRepeatUpper(up); return ret; /* 0: normal {n,m}, 2: fixed {n} */ } private int invalidRangeQuantifier(boolean synAllow) { if (synAllow) { restore(); return 1; } else { newSyntaxException(INVALID_REPEAT_RANGE_PATTERN); return 0; // not reached } } /* \M-, \C-, \c, or \... */ private void fetchEscapedValue() { if (!left()) newSyntaxException(END_PATTERN_AT_ESCAPE); fetch(); switch(c) { case 'M': if (syntax.op2EscCapitalMBarMeta()) { if (!left()) newSyntaxException(END_PATTERN_AT_META); fetch(); if (c != '-') newSyntaxException(META_CODE_SYNTAX); if (!left()) newSyntaxException(END_PATTERN_AT_META); fetch(); if (c == syntax.metaCharTable.esc) fetchEscapedValue(); c = ((c & 0xff) | 0x80); } else { fetchEscapedValueBackSlash(); } break; case 'C': if (syntax.op2EscCapitalCBarControl()) { if (!left()) newSyntaxException(END_PATTERN_AT_CONTROL); fetch(); if (c != '-') newSyntaxException(CONTROL_CODE_SYNTAX); fetchEscapedValueControl(); } else { fetchEscapedValueBackSlash(); } break; case 'c': if (syntax.opEscCControl()) { fetchEscapedValueControl(); } /* fall through */ default: fetchEscapedValueBackSlash(); } // switch } private void fetchEscapedValueBackSlash() { c = env.convertBackslashValue(c); } private void fetchEscapedValueControl() { if (!left()) { if (syntax.op3OptionECMAScript()) { return; } else { newSyntaxException(END_PATTERN_AT_CONTROL); } } fetch(); if (c == '?') { c = 0177; } else { if (c == syntax.metaCharTable.esc) fetchEscapedValue(); c &= 0x9f; } } private int nameEndCodePoint(int start) { switch(start) { case '<': return '>'; case '\'': return '\''; case '(': return ')'; case '{': return '}'; default: return 0; } } // USE_NAMED_GROUP && USE_BACKREF_AT_LEVEL /* \k, \k \k, \k \k<-num+n>, \k<-num-n> */ // value implicit (rnameEnd) private boolean fetchNameWithLevel(int startCode, Ptr rbackNum, Ptr rlevel) { int src = p; boolean existLevel = false; int isNum = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(EMPTY_GROUP_NAME); if (enc.isDigit(c)) { isNum = 1; } else if (c == '-') { isNum = 2; sign = -1; pnumHead = p; } } while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')' || c == '+' || c == '-') { if (isNum == 2) err = INVALID_GROUP_NAME; break; } if (isNum != 0) { if (enc.isDigit(c)) { isNum = 1; } else { err = INVALID_GROUP_NAME; // isNum = 0; } } } boolean isEndCode = false; if (err == null && c != endCode) { if (c == '+' || c == '-') { int flag = c == '-' ? -1 : 1; if (!left()) newValueException(INVALID_CHAR_IN_GROUP_NAME); fetch(); if (!enc.isDigit(c)) newValueException(INVALID_GROUP_NAME, src, stop); unfetch(); int level = scanUnsignedNumber(); if (level < 0) newValueException(TOO_BIG_NUMBER); rlevel.p = level * flag; existLevel = true; if (left()) { fetch(); isEndCode = c == endCode; } } if (!isEndCode) { err = INVALID_GROUP_NAME; nameEnd = stop; } } if (err == null) { if (isNum != 0) { mark(); p = pnumHead; int backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(TOO_BIG_NUMBER); } else if (backNum == 0) { newValueException(INVALID_GROUP_NAME, src, stop); } rbackNum.p = backNum * sign; } value = nameEnd; return existLevel; } else { newValueException(INVALID_GROUP_NAME, src, nameEnd); return false; // not reached } } // USE_NAMED_GROUP // ref: 0 -> define name (don't allow number name) // 1 -> reference name (allow number name) private int fetchNameForNamedGroup(int startCode, boolean ref) { int src = p; value = 0; int isNum = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(EMPTY_GROUP_NAME); if (enc.isDigit(c)) { if (ref) { isNum = 1; } else { err = INVALID_GROUP_NAME; // isNum = 0; } } else if (c == '-') { if (ref) { isNum = 2; sign = -1; pnumHead = p; } else { err = INVALID_GROUP_NAME; // isNum = 0; } } } if (err == null) { while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') { if (isNum == 2) { err = INVALID_GROUP_NAME; return fetchNameTeardown(src, endCode, nameEnd, err); } break; } if (isNum != 0) { if (enc.isDigit(c)) { isNum = 1; } else { if (!enc.isWord(c)) { err = INVALID_CHAR_IN_GROUP_NAME; } else { err = INVALID_GROUP_NAME; } return fetchNameTeardown(src, endCode, nameEnd, err); } } } if (c != endCode) { err = INVALID_GROUP_NAME; nameEnd = stop; return fetchNameErr(src, nameEnd, err); } int backNum = 0; if (isNum != 0) { mark(); p = pnumHead; backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(TOO_BIG_NUMBER); } else if (backNum == 0) { newValueException(INVALID_GROUP_NAME, src, nameEnd); } backNum *= sign; } value = nameEnd; return backNum; } else { return fetchNameTeardown(src, endCode, nameEnd, err); } } private int fetchNameErr(int src, int nameEnd, String err) { newValueException(err, src, nameEnd); return 0; // not reached } private int fetchNameTeardown(int src, int endCode, int nameEnd, String err) { while (left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') break; } if (!left()) nameEnd = stop; return fetchNameErr(src, nameEnd, err); } // #else USE_NAMED_GROUP // make it return nameEnd! private final int fetchNameForNoNamedGroup(int startCode, boolean ref) { int src = p; value = 0; int sign = 1; int endCode = nameEndCodePoint(startCode); int pnumHead = p; int nameEnd = stop; String err = null; if (!left()) { newValueException(EMPTY_GROUP_NAME); } else { fetch(); if (c == endCode) newValueException(EMPTY_GROUP_NAME); if (enc.isDigit(c)) { } else if (c == '-') { sign = -1; pnumHead = p; } else { err = INVALID_CHAR_IN_GROUP_NAME; } } while(left()) { nameEnd = p; fetch(); if (c == endCode || c == ')') break; if (!enc.isDigit(c)) err = INVALID_CHAR_IN_GROUP_NAME; } if (err == null && c != endCode) { err = INVALID_GROUP_NAME; nameEnd = stop; } if (err == null) { mark(); p = pnumHead; int backNum = scanUnsignedNumber(); restore(); if (backNum < 0) { newValueException(TOO_BIG_NUMBER); } else if (backNum == 0){ newValueException(INVALID_GROUP_NAME, src, nameEnd); } backNum *= sign; value = nameEnd; return backNum; } else { newValueException(err, src, nameEnd); return 0; // not reached } } protected final int fetchName(int startCode, boolean ref) { if (Config.USE_NAMED_GROUP) { return fetchNameForNamedGroup(startCode, ref); } else { return fetchNameForNoNamedGroup(startCode, ref); } } private boolean strExistCheckWithEsc(int[]s, int n, int bad) { int p = this.p; int to = this.stop; boolean inEsc = false; int i; while(p < to) { if (inEsc) { inEsc = false; p += enc.length(bytes, p, to); } else { int x = enc.mbcToCode(bytes, p, to); int q = p + enc.length(bytes, p, to); if (x == s[0]) { for (i=1; i= n) return true; p += enc.length(bytes, p, to); } else { x = enc.mbcToCode(bytes, p, to); if (x == bad) return false; else if (x == syntax.metaCharTable.esc) inEsc = true; p = q; } } } return false; } private static final int send[] = new int[]{':', ']'}; private void fetchTokenInCCFor_charType(boolean flag, int type) { token.type = TokenType.CHAR_TYPE; token.setPropCType(type); token.setPropNot(flag); } private void fetchTokenInCCFor_p() { int c2 = peek(); // !!! migrate to peekIs if (c2 == '{' && syntax.op2EscPBraceCharProperty()) { inc(); token.type = TokenType.CHAR_PROPERTY; token.setPropNot(c == 'P'); if (left() && syntax.op2EscPBraceCircumflexNot()) { c2 = fetchTo(); if (c2 == '^') { token.setPropNot(!token.getPropNot()); } else { unfetch(); } } } else { syntaxWarn("invalid Unicode Property \\<%n>", (char)c); } } private void fetchTokenInCCFor_x() { if (!left()) return; int last = p; if (peekIs('{') && syntax.opEscXBraceHex8()) { inc(); int num = scanUnsignedHexadecimalNumber(0, 8); if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (left()) { int c2 = peek(); if (enc.isXDigit(c2)) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); } if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) { inc(); token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } else { /* can't read nothing or invalid format */ p = last; } } else if (syntax.opEscXHex2()) { int num = scanUnsignedHexadecimalNumber(0, 2); if (num < 0) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 16; token.setC(num); } } private void fetchTokenInCCFor_u() { if (!left()) return; int last = p; if (syntax.op2EscUHex4()) { int num = scanUnsignedHexadecimalNumber(4, 4); if (num < -1) newValueException(TOO_SHORT_DIGITS); if (num < 0) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } } private void fetchTokenInCCFor_digit() { if (syntax.opEscOctal3()) { unfetch(); int last = p; int num = scanUnsignedOctalNumber(3); if (num < 0 || num > 0xff) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 8; token.setC(num); } } private void fetchTokenInCCFor_posixBracket() { if (syntax.opPosixBracket() && peekIs(':')) { token.backP = p; /* point at '[' is readed */ inc(); if (strExistCheckWithEsc(send, send.length, ']')) { token.type = TokenType.POSIX_BRACKET_OPEN; } else { unfetch(); // remove duplication, goto cc_in_cc; if (syntax.op2CClassSetOp()) { token.type = TokenType.CC_CC_OPEN; } else { env.ccEscWarn("["); } } } else { // cc_in_cc: if (syntax.op2CClassSetOp()) { token.type = TokenType.CC_CC_OPEN; } else { env.ccEscWarn("["); } } } private void fetchTokenInCCFor_and() { if (syntax.op2CClassSetOp() && left() && peekIs('&')) { inc(); token.type = TokenType.CC_AND; } } protected final TokenType fetchTokenInCC() { if (!left()) { token.type = TokenType.EOT; return token.type; } fetch(); token.type = TokenType.CHAR; token.base = 0; token.setC(c); token.escaped = false; if (c == ']') { token.type = TokenType.CC_CLOSE; } else if (c == '-') { token.type = TokenType.CC_RANGE; } else if (c == syntax.metaCharTable.esc) { if (!syntax.backSlashEscapeInCC()) return token.type; if (!left()) newSyntaxException(END_PATTERN_AT_ESCAPE); fetch(); token.escaped = true; token.setC(c); switch (c) { case 'w': fetchTokenInCCFor_charType(false, CharacterType.WORD); break; case 'W': fetchTokenInCCFor_charType(true, CharacterType.WORD); break; case 'd': fetchTokenInCCFor_charType(false, CharacterType.DIGIT); break; case 'D': fetchTokenInCCFor_charType(true, CharacterType.DIGIT); break; case 's': fetchTokenInCCFor_charType(false, CharacterType.SPACE); break; case 'S': fetchTokenInCCFor_charType(true, CharacterType.SPACE); break; case 'h': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); break; case 'H': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); break; case 'p': case 'P': if (!left()) break; fetchTokenInCCFor_p(); break; case 'x': fetchTokenInCCFor_x(); break; case 'u': fetchTokenInCCFor_u(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': fetchTokenInCCFor_digit(); break; default: unfetch(); fetchEscapedValue(); if (token.getC() != c) { token.setCode(c); token.type = TokenType.CODE_POINT; } break; } // switch } else if (c == '[') { fetchTokenInCCFor_posixBracket(); } else if (c == '&') { fetchTokenInCCFor_and(); } return token.type; } protected final int backrefRelToAbs(int relNo) { return env.numMem + 1 + relNo; } private void fetchTokenFor_repeat(int lower, int upper) { token.type = TokenType.OP_REPEAT; token.setRepeatLower(lower); token.setRepeatUpper(upper); greedyCheck(); } private void fetchTokenFor_openBrace() { switch (fetchRangeQuantifier()) { case 0: greedyCheck(); break; case 2: if (syntax.fixedIntervalIsGreedyOnly()) { possessiveCheck(); } else { greedyCheck(); } break; default: /* 1 : normal char */ } // inner switch } private void fetchTokenFor_anchor(int subType) { token.type = TokenType.ANCHOR; token.setAnchorSubtype(subType); } private void fetchTokenFor_xBrace() { if (!left()) return; int last = p; if (peekIs('{') && syntax.opEscXBraceHex8()) { inc(); int num = scanUnsignedHexadecimalNumber(0, 8); if (num < 0) newValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE); if (left()) { if (enc.isXDigit(peek())) newValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE); } if (p > last + enc.length(bytes, last, stop) && left() && peekIs('}')) { inc(); token.type = TokenType.CODE_POINT; token.setCode(num); } else { /* can't read nothing or invalid format */ p = last; } } else if (syntax.opEscXHex2()) { int num = scanUnsignedHexadecimalNumber(0, 2); if (num < 0) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 16; token.setC(num); } } private void fetchTokenFor_uHex() { if (!left()) return; int last = p; if (syntax.op2EscUHex4()) { int num = scanUnsignedHexadecimalNumber(4, 4); if (num < -1) newValueException(TOO_SHORT_DIGITS); if (num < 0) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.CODE_POINT; token.base = 16; token.setCode(num); } } private void fetchTokenFor_digit() { unfetch(); int last = p; int num = scanUnsignedNumber(); if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */ if (syntax.strictCheckBackref()) { if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(INVALID_BACKREF); } token.type = TokenType.BACKREF; token.setBackrefNum(1); token.setBackrefRef1(num); token.setBackrefByName(false); if (Config.USE_BACKREF_WITH_LEVEL) token.setBackrefExistLevel(false); return; } if (c == '8' || c == '9') { /* normal char */ // skip_backref: p = last; inc(); return; } p = last; fetchTokenFor_zero(); /* fall through */ } private void fetchTokenFor_zero() { if (syntax.opEscOctal3()) { int last = p; int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3); if (num < 0 || num > 0xff) newValueException(TOO_BIG_NUMBER); if (p == last) { /* can't read nothing. */ num = 0; /* but, it's not error */ } token.type = TokenType.RAW_BYTE; token.base = 8; token.setC(num); } else if (c != '0') { inc(); } } private void fetchTokenFor_NamedBackref() { if (Config.USE_NAMED_GROUP) { if (syntax.op2EscKNamedBackref() && left()) { fetch(); if (c =='<' || c == '\'') { fetchNamedBackrefToken(); } else { unfetch(); syntaxWarn("invalid back reference"); } } } } private void fetchTokenFor_subexpCall() { if (Config.USE_NAMED_GROUP) { if (syntax.op2EscGBraceBackref() && left()) { fetch(); if (c == '{') { fetchNamedBackrefToken(); } else { unfetch(); } } } if (Config.USE_SUBEXP_CALL) { if (syntax.op2EscGSubexpCall() && left()) { fetch(); if (c == '<' || c == '\'') { int gNum = -1; boolean rel = false; int cnext = peek(); int nameEnd = 0; if (cnext == '0') { inc(); if (peekIs(nameEndCodePoint(c))) { /* \g<0>, \g'0' */ inc(); nameEnd = p; gNum = 0; } } else if (cnext == '+') { inc(); rel = true; } int prev = p; if (gNum < 0) { gNum = fetchName(c, true); nameEnd = value; } token.type = TokenType.CALL; token.setCallNameP(prev); token.setCallNameEnd(nameEnd); token.setCallGNum(gNum); token.setCallRel(rel); } else { syntaxWarn("invalid subexp call"); unfetch(); } } } } protected void fetchNamedBackrefToken() { int last = p; int backNum; if (Config.USE_BACKREF_WITH_LEVEL) { Ptr rbackNum = new Ptr(); Ptr rlevel = new Ptr(); token.setBackrefExistLevel(fetchNameWithLevel(c, rbackNum, rlevel)); token.setBackrefLevel(rlevel.p); backNum = rbackNum.p; } else { backNum = fetchName(c, true); } // USE_BACKREF_AT_LEVEL int nameEnd = value; // set by fetchNameWithLevel/fetchName if (backNum != 0) { if (backNum < 0) { backNum = backrefRelToAbs(backNum); if (backNum <= 0) newValueException(INVALID_BACKREF); } if (syntax.strictCheckBackref() && (backNum > env.numMem || env.memNodes == null)) { newValueException(INVALID_BACKREF); } token.type = TokenType.BACKREF; token.setBackrefByName(false); token.setBackrefNum(1); token.setBackrefRef1(backNum); } else { NameEntry e = regex.nameToGroupNumbers(bytes, last, nameEnd); if (e == null) newValueException(UNDEFINED_NAME_REFERENCE, last, nameEnd); if (syntax.strictCheckBackref()) { if (e.backNum == 1) { if (e.backRef1 > env.numMem || env.memNodes == null || env.memNodes[e.backRef1] == null) newValueException(INVALID_BACKREF); } else { for (int i=0; i env.numMem || env.memNodes == null || env.memNodes[e.backRefs[i]] == null) newValueException(INVALID_BACKREF); } } } token.type = TokenType.BACKREF; token.setBackrefByName(true); if (e.backNum == 1) { token.setBackrefNum(1); token.setBackrefRef1(e.backRef1); } else { token.setBackrefNum(e.backNum); token.setBackrefRefs(e.backRefs); } } } private void fetchTokenFor_charProperty() { if (peekIs('{') && syntax.op2EscPBraceCharProperty()) { inc(); token.type = TokenType.CHAR_PROPERTY; token.setPropNot(c == 'P'); if (left() && syntax.op2EscPBraceCircumflexNot()) { fetch(); if (c == '^') { token.setPropNot(!token.getPropNot()); } else { unfetch(); } } } else { syntaxWarn("invalid Unicode Property \\<%n>", (char)c); } } private void fetchTokenFor_metaChars() { if (c == syntax.metaCharTable.anyChar) { token.type = TokenType.ANYCHAR; } else if (c == syntax.metaCharTable.anyTime) { fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); } else if (c == syntax.metaCharTable.zeroOrOneTime) { fetchTokenFor_repeat(0, 1); } else if (c == syntax.metaCharTable.oneOrMoreTime) { fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); } else if (c == syntax.metaCharTable.anyCharAnyTime) { token.type = TokenType.ANYCHAR_ANYTIME; // goto out } } protected final void fetchToken() { int src = p; // mark(); // out start: while(true) { if (!left()) { token.type = TokenType.EOT; return; } token.type = TokenType.STRING; token.base = 0; token.backP = p; fetch(); if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn) if (!left()) newSyntaxException(END_PATTERN_AT_ESCAPE); token.backP = p; fetch(); token.setC(c); token.escaped = true; switch(c) { case '*': if (syntax.opEscAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); break; case '+': if (syntax.opEscPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); break; case '?': if (syntax.opEscQMarkZeroOne()) fetchTokenFor_repeat(0, 1); break; case '{': if (syntax.opEscBraceInterval()) fetchTokenFor_openBrace(); break; case '|': if (syntax.opEscVBarAlt()) token.type = TokenType.ALT; break; case '(': if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; break; case ')': if (syntax.opEscLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; break; case 'w': if (syntax.opEscWWord()) fetchTokenInCCFor_charType(false, CharacterType.WORD); break; case 'W': if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, CharacterType.WORD); break; case 'b': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.WORD_BOUND); token.setAnchorASCIIRange(isAsciiRange(env.option) && !isWordBoundAllRange(env.option)); } break; case 'B': if (syntax.opEscBWordBound()) { fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); token.setAnchorASCIIRange(isAsciiRange(env.option) && !isWordBoundAllRange(env.option)); } break; case '<': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_BEGIN); token.setAnchorASCIIRange(isAsciiRange(env.option)); } break; case '>': if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { fetchTokenFor_anchor(AnchorType.WORD_END); token.setAnchorASCIIRange(isAsciiRange(env.option)); } break; case 's': if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, CharacterType.SPACE); break; case 'S': if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(true, CharacterType.SPACE); break; case 'd': if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(false, CharacterType.DIGIT); break; case 'D': if (syntax.opEscDDigit()) fetchTokenInCCFor_charType(true, CharacterType.DIGIT); break; case 'h': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(false, CharacterType.XDIGIT); break; case 'H': if (syntax.op2EscHXDigit()) fetchTokenInCCFor_charType(true, CharacterType.XDIGIT); break; case 'A': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); break; case 'Z': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.SEMI_END_BUF); break; case 'z': if (syntax.opEscAZBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); break; case 'G': if (syntax.opEscCapitalGBeginAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_POSITION); break; case '`': if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.BEGIN_BUF); break; case '\'': if (syntax.op2EscGnuBufAnchor()) fetchTokenFor_anchor(AnchorType.END_BUF); break; case 'x': fetchTokenFor_xBrace(); break; case 'u': fetchTokenFor_uHex(); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': fetchTokenFor_digit(); break; case '0': fetchTokenFor_zero(); break; case 'k': fetchTokenFor_NamedBackref(); break; case 'g': fetchTokenFor_subexpCall(); break; case 'Q': if (syntax.op2EscCapitalQQuote()) token.type = TokenType.QUOTE_OPEN; break; case 'p': case 'P': fetchTokenFor_charProperty(); break; case 'R': if (syntax.op2EscCapitalRLinebreak()) token.type = TokenType.LINEBREAK; break; case 'X': if (syntax.op2EscCapitalXExtendedGraphemeCluster()) token.type = TokenType.EXTENDED_GRAPHEME_CLUSTER; break; case 'K': if (syntax.op2EscCapitalKKeep()) token.type = TokenType.KEEP; break; default: unfetch(); fetchEscapedValue(); if (token.getC() != c) { /* set_raw: */ token.type = TokenType.CODE_POINT; token.setCode(c); } else { /* string */ p = token.backP + enc.length(bytes, token.backP, stop); } break; } // switch (c) } else { token.setC(c); token.escaped = false; if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) { fetchTokenFor_metaChars(); break; } { switch(c) { case '.': if (syntax.opDotAnyChar()) token.type = TokenType.ANYCHAR; break; case '*': if (syntax.opAsteriskZeroInf()) fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE); break; case '+': if (syntax.opPlusOneInf()) fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE); break; case '?': if (syntax.opQMarkZeroOne()) fetchTokenFor_repeat(0, 1); break; case '{': if (syntax.opBraceInterval()) fetchTokenFor_openBrace(); break; case '|': if (syntax.opVBarAlt()) token.type = TokenType.ALT; break; case '(': if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (peekIs('#')) { fetch(); while (true) { if (!left()) newSyntaxException(END_PATTERN_IN_GROUP); fetch(); if (c == syntax.metaCharTable.esc) { if (left()) fetch(); } else { if (c == ')') break; } } continue start; // goto start } unfetch(); } if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_OPEN; break; case ')': if (syntax.opLParenSubexp()) token.type = TokenType.SUBEXP_CLOSE; break; case '^': if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE); break; case '$': if (syntax.opLineAnchor()) fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.SEMI_END_BUF : AnchorType.END_LINE); break; case '[': if (syntax.opBracketCC()) token.type = TokenType.CC_OPEN; break; case ']': if (src > getBegin()) { /* /].../ is allowed. */ env.closeBracketWithoutEscapeWarn("]"); } break; case '#': if (Option.isExtend(env.option)) { while (left()) { fetch(); if (enc.isNewLine(c)) break; } continue start; // goto start } break; case ' ': case '\t': case '\n': case '\r': case '\f': if (Option.isExtend(env.option)) continue start; // goto start break; default: // string break; } // switch } } break; } // while } private void greedyCheck() { if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) { fetch(); token.setRepeatGreedy(false); token.setRepeatPossessive(false); } else { possessiveCheck(); } } private void possessiveCheck() { if (left() && peekIs('+') && (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL || syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) { fetch(); token.setRepeatGreedy(true); token.setRepeatPossessive(true); } else { token.setRepeatGreedy(true); token.setRepeatPossessive(false); } } protected final int fetchCharPropertyToCType() { mark(); while (left()) { int last = p; fetch(); if (c == '}') { return enc.propertyNameToCType(bytes, _p, last); } else if (c == '(' || c == ')' || c == '{' || c == '|') { throw new CharacterPropertyException(EncodingError.ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); } } newValueException(PROPERTY_NAME_NEVER_TERMINATED, _p, stop); return 0; // not reached } protected final void syntaxWarn(String message, char c) { syntaxWarn(message.replace("<%n>", Character.toString(c))); } protected final void syntaxWarn(String message) { if (env.warnings != WarnCallback.NONE) { env.warnings.warn(message + ": /" + new String(bytes, getBegin(), getEnd()) + "/"); } } } jruby-joni-2.1.41/src/org/joni/Matcher.java000066400000000000000000000617641400407002500204400ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.Option.isFindLongest; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.jcodings.constants.CharacterType; import org.jcodings.specific.ASCIIEncoding; import org.joni.constants.internal.AnchorType; public abstract class Matcher extends IntHolder { public static final int FAILED = -1; public static final int INTERRUPTED = -2; protected final Regex regex; protected final Encoding enc; protected final byte[]bytes; protected final int str; protected final int end; protected int msaStart; protected int msaOptions; protected final Region msaRegion; protected int msaBestLen; protected int msaBestS; protected int msaGpos; protected int msaBegin; protected int msaEnd; Matcher(Regex regex, Region region, byte[]bytes, int p, int end) { this.regex = regex; this.enc = regex.enc; this.bytes = bytes; this.str = p; this.end = end; this.msaRegion = region; } // main matching method protected abstract int matchAt(int range, int sstart, int sprev, boolean interrupt) throws InterruptedException; protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum); protected abstract void stateCheckBuffClear(); public abstract void interrupt(); public final Region getRegion() { return msaRegion; } public final Region getEagerRegion() { return msaRegion != null ? msaRegion : new Region(msaBegin, msaEnd); } public final int getBegin() { return msaBegin; } public final int getEnd() { return msaEnd; } protected final void msaInit(int option, int start, int gpos) { msaOptions = option; msaStart = start; msaGpos = gpos; if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1; } public final int match(int at, int range, int option) { try { return matchCommon(at, range, option, false); } catch (InterruptedException ex) { return INTERRUPTED; } } public final int matchInterruptible(int at, int range, int option) throws InterruptedException { return matchCommon(at, range, option, true); } private final int matchCommon(int at, int range, int option, boolean interrupt) throws InterruptedException { msaInit(option, at, at); if (Config.USE_CEC) { int offset = at = str; stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); // move it to construction? } // USE_COMBINATION_EXPLOSION_CHECK int prev = enc.prevCharHead(bytes, str, at, end); if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { return matchAt(end /*range*/, at, prev, interrupt); } else { return matchAt(range /*range*/, at, prev, interrupt); } } int low, high; // these are the return values private final boolean forwardSearchRange(byte[]bytes, int str, int end, int s, int range, IntHolder lowPrev) { int pprev = -1; int p = s; if (Config.DEBUG_SEARCH) debugForwardSearchRange(str, end, s, range); if (regex.dMin > 0) { if (enc.isSingleByte()) { p += regex.dMin; } else { int q = p + regex.dMin; while (p < q && p < end) p += enc.length(bytes, p, end); } } retry:while (true) { if (Config.DEBUG_SEARCH) debugSearch(regex.forward.getName(), p, end, range); p = regex.forward.search(this, bytes, p, end, range); if (p != -1 && p < range) { if (p - regex.dMin < s) { // retry_gate: pprev = p; p += enc.length(bytes, p, end); continue retry; } if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != str) { int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); if (!enc.isNewLine(bytes, prev, end)) { // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } } break; case AnchorType.END_LINE: if (p == end) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { int prev = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); if (prev != -1 && enc.isNewLine(bytes, prev, end)) { // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } } } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) { //if () break; // goto retry_gate; pprev = p; p += enc.length(bytes, p, end); continue retry; } break; } // switch } if (regex.dMax == 0) { low = p; if (lowPrev != null) { // ??? // remove null checks if (low > s) { lowPrev.value = enc.prevCharHead(bytes, s, p, end); } else { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, p, end); } } } else { if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; if (low > s) { low = enc.rightAdjustCharHeadWithPrev(bytes, s, low, end, lowPrev); if (lowPrev != null && lowPrev.value == -1) { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : s, low, end); } } else { if (lowPrev != null) { lowPrev.value = enc.prevCharHead(bytes, (pprev != -1) ? pprev : str, low, end); } } } } /* no needs to adjust *high, *high is used as range check only */ high = p - regex.dMin; if (Config.DEBUG_SEARCH) debugForwardSearchRangeSuccess(str, low, high); return true; /* success */ } return false; /* fail */ } //while } // low, high private final boolean backwardSearchRange(byte[]bytes, int str, int end, int s, int range, int adjrange) { range += regex.dMin; int p = s; retry:while (true) { p = regex.backward.search(this, bytes, range, adjrange, end, p, s, range); if (p != -1) { if (regex.subAnchor != 0) { switch (regex.subAnchor) { case AnchorType.BEGIN_LINE: if (p != str) { int prev = enc.prevCharHead(bytes, str, p, end); if (!enc.isNewLine(bytes, prev, end)) { p = prev; continue retry; } } break; case AnchorType.END_LINE: if (p == end) { if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { int prev = enc.prevCharHead(bytes, adjrange, p, end); if (prev == -1) return false; if (enc.isNewLine(bytes, prev, end)) { p = prev; continue retry; } } } else if (!enc.isNewLine(bytes, p, end) && (!Config.USE_CRNL_AS_LINE_TERMINATOR || !enc.isMbcCrnl(bytes, p, end))) { p = enc.prevCharHead(bytes, adjrange, p, end); if (p == -1) return false; continue retry; } break; } // switch } /* no needs to adjust *high, *high is used as range check only */ if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { low = p - regex.dMax; high = p - regex.dMin; high = enc.rightAdjustCharHead(bytes, adjrange, high, end); } if (Config.DEBUG_SEARCH) debugBackwardSearchRange(str, low, high); return true; } if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail."); return false; } // while } // MATCH_AND_RETURN_CHECK private boolean matchCheck(int upperRange, int s, int prev, boolean interrupt) throws InterruptedException { if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { //range = upperRange; if (matchAt(upperRange, s, prev, interrupt) != -1) { if (!isFindLongest(regex.options)) return true; } } else { //range = upperRange; if (matchAt(upperRange, s, prev, interrupt) != -1) return true; } } else { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (matchAt(end, s, prev, interrupt) != -1) { //range = upperRange; if (!isFindLongest(regex.options)) return true; } } else { //range = upperRange; if (matchAt(end, s, prev, interrupt) != -1) return true; } } return false; } public final int search(int start, int range, int option) { try { return searchCommon(start, start, range, option, false); } catch (InterruptedException ex) { return INTERRUPTED; } } public final int search(int gpos, int start, int range, int option) { try { return searchCommon(gpos, start, range, option, false); } catch (InterruptedException ex) { return INTERRUPTED; } } public final int searchInterruptible(int start, int range, int option) throws InterruptedException { return searchCommon(start, start, range, option, true); } public final int searchInterruptible(int gpos, int start, int range, int option) throws InterruptedException { return searchCommon(gpos, start, range, option, true); } private final int searchCommon(int gpos, int start, int range, int option, boolean interrupt) throws InterruptedException { int s, prev; int origStart = start; int origRange = range; if (Config.DEBUG_SEARCH) debugSearch(str, end, start, range); if (start > end || start < str) return FAILED; /* anchor optimize: resume search range */ if (regex.anchor != 0 && str < end) { int minSemiEnd, maxSemiEnd; if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { /* search start-position only */ // !begin_position:! if (range > start) { if (gpos > start) { if (gpos < range) { range = gpos + 1; } else { range = start + 1; } } } else { range = start; } } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { /* search str-position only */ if (range > start) { if (start != str) return FAILED; // mismatch_no_msa; range = str + 1; } else { if (range <= str) { start = str; range = str; } else { return FAILED; // mismatch_no_msa; } } } else if ((regex.anchor & AnchorType.END_BUF) != 0) { minSemiEnd = maxSemiEnd = end; // !end_buf:! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { int preEnd = enc.stepBack(bytes, str, end, end, 1); maxSemiEnd = end; if (enc.isNewLine(bytes, preEnd, end)) { minSemiEnd = preEnd; if (Config.USE_CRNL_AS_LINE_TERMINATOR) { preEnd = enc.stepBack(bytes, str, preEnd, end, 1); if (preEnd != -1 && enc.isMbcCrnl(bytes, preEnd, end)) { minSemiEnd = preEnd; } } if (minSemiEnd > str && start <= minSemiEnd) { // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } } else { minSemiEnd = end; // !goto end_buf;! if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return FAILED; // mismatch_no_msa; } } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { // goto !begin_position;! if (range > start) { if (gpos > start) { if (gpos < range) { range = gpos + 1; } else { range = start + 1; } } } else { range = start; } } } else if (str == end) { /* empty string */ // empty address ? if (Config.DEBUG_SEARCH) Config.log.println("onig_search: empty string."); if (regex.thresholdLength == 0) { s = start = str; prev = -1; msaInit(option, start, start); if (Config.USE_CEC) stateCheckBuffClear(); if (matchCheck(end, s, prev, interrupt)) return match(s); return mismatch(); } return FAILED; // goto mismatch_no_msa; } if (Config.DEBUG_SEARCH) debugSearch(str, end, start, range); msaInit(option, origStart, gpos); if (Config.USE_CEC) { int offset = Math.min(start, range) - str; stateCheckBuffInit(end - str, offset, regex.numCombExpCheck); } s = start; if (range > start) { /* forward search */ if (s > str) { prev = enc.prevCharHead(bytes, str, s, end); } else { prev = 0; // -1 } if (regex.forward != null) { int schRange = range; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schRange = end; } else { schRange += regex.dMax; if (schRange > end) schRange = end; } } if ((end - start) < regex.thresholdLength) return mismatch(); if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { do { if (!forwardSearchRange(bytes, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev if (s < low) { s = low; prev = value; } while (s <= high) { if (matchCheck(origRange, s, prev, interrupt)) return match(s); // ??? prev = s; s += enc.length(bytes, s, end); } } while (s < range); return mismatch(); } else { /* check only. */ if (!forwardSearchRange(bytes, str, end, s, schRange, null)) return mismatch(); if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { do { if (matchCheck(origRange, s, prev, interrupt)) return match(s); prev = s; s += enc.length(bytes, s, end); if ((regex.anchor & (AnchorType.LOOK_BEHIND | AnchorType.PREC_READ_NOT)) == 0) { while (!enc.isNewLine(bytes, prev, end) && s < range) { prev = s; s += enc.length(bytes, s, end); } } } while (s < range); return mismatch(); } } } do { if (matchCheck(origRange, s, prev, interrupt)) return match(s); prev = s; s += enc.length(bytes, s, end); } while (s < range); if (s == range) { /* because empty match with /$/. */ if (matchCheck(origRange, s, prev, interrupt)) return match(s); } } else { /* backward search */ if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { if (origStart < end) { origStart += enc.length(bytes, origStart, end); // /* is upper range */ } } if (regex.backward != null) { int adjrange; if (range < end) { adjrange = enc.leftAdjustCharHead(bytes, str, range, end); } else { adjrange = end; } if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { do { int schStart = s + regex.dMax; if (schStart > end) schStart = end; if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); // low, high if (s > high) s = high; while (s != -1 && s >= low) { prev = enc.prevCharHead(bytes, str, s, end); if (matchCheck(origStart, s, prev, interrupt)) return match(s); s = prev; } } while (s >= range); return mismatch(); } else { /* check only. */ if ((end - range) < regex.thresholdLength) return mismatch(); int schStart = s; if (regex.dMax != 0) { if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { schStart = end; } else { schStart += regex.dMax; if (schStart > end) { schStart = end; } else { schStart = enc.leftAdjustCharHead(bytes, start, schStart, end); } } } if (!backwardSearchRange(bytes, str, end, schStart, range, adjrange)) return mismatch(); } } do { prev = enc.prevCharHead(bytes, str, s, end); if (matchCheck(origStart, s, prev, interrupt)) return match(s); s = prev; } while (s >= range); } return mismatch(); } private final boolean endBuf(int start, int range, int minSemiEnd, int maxSemiEnd) { if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa; if (range > start) { if ((minSemiEnd - start) > regex.anchorDmax) { start = minSemiEnd - regex.anchorDmax; if (start < end) { start = enc.rightAdjustCharHead(bytes, str, start, end); } else { /* match with empty at end */ start = enc.prevCharHead(bytes, str, end, end); } } if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { range = maxSemiEnd - regex.anchorDmin + 1; } if (start >= range) return true; // mismatch_no_msa; } else { if ((minSemiEnd - range) > regex.anchorDmax) { range = minSemiEnd - regex.anchorDmax; } if ((maxSemiEnd - start) < regex.anchorDmin) { start = maxSemiEnd - regex.anchorDmin; start = enc.leftAdjustCharHead(bytes, str, start, end); } if (range > start) return true; // mismatch_no_msa; } return false; } private final int match(int s) { return s - str; // sstart ??? } private final int mismatch() { if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { if (msaBestLen >= 0) { int s = msaBestS; return match(s); } } // falls through finish: return FAILED; } private byte[]icbuf; protected final byte[]icbuf() { return icbuf == null ? icbuf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN] : icbuf; } static boolean isMbcAsciiWord(Encoding enc, byte[]bytes, int p, int end) { // ONIGENC_IS_MBC_ASCII_WORD return ASCIIEncoding.INSTANCE.isCodeCType(enc.mbcToCode(bytes, p, end), CharacterType.WORD); } private final void debugForwardSearchRange(int str, int end, int s, int range) { if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range: " + "str: " + str + ", end: " + end + ", s: " + s + ", range: " + range); } } private final void debugForwardSearchRangeSuccess(int str, int low, int high) { if (Config.DEBUG_SEARCH) { Config.log.println("forward_search_range success: " + "low: " + (low - str) + ", high: " + (high - str) + ", dmin: " + regex.dMin + ", dmax: " + regex.dMax); } } private final void debugSearch(int str, int end, int start, int range) { if (Config.DEBUG_SEARCH) { Config.log.println("onig_search (entry point): " + "str: " + str + ", end: " + (end - str) + ", start: " + (start - str) + ", range " + (range - str)); } } private final void debugBackwardSearchRange(int str, int low, int high) { if (Config.DEBUG_SEARCH) { Config.log.println("backward_search_range: "+ "low: " + (low - str) + ", high: " + (high - str)); } } static void debugSearch(String name, int textP, int textEnd, int textRange) { Config.log.println(name + ": text: " + textP + ", text_end: " + textEnd + ", text_range: " + textRange); } } jruby-joni-2.1.41/src/org/joni/MatcherFactory.java000066400000000000000000000026771400407002500217660ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; abstract class MatcherFactory { abstract Matcher create(Regex regex, Region region, byte[]bytes, int p, int end); static final MatcherFactory DEFAULT = new MatcherFactory() { @Override Matcher create(Regex regex, Region region, byte[]bytes, int p, int end) { return new ByteCodeMachine(regex, region, bytes, p, end); } }; } jruby-joni-2.1.41/src/org/joni/MinMaxLen.java000066400000000000000000000100341400407002500206650ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; final class MinMaxLen { int min; /* min byte length */ int max; /* max byte length */ /* 1000 / (min-max-dist + 1) */ private static final short distValues[] = { 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 }; int distanceValue() { if (max == INFINITE_DISTANCE) return 0; int d = max - min; /* return dist_vals[d] * 16 / (mm->min + 12); */ return d < distValues.length ? distValues[d] : 1; } int compareDistanceValue(MinMaxLen other, int v1, int v2) { if (v2 <= 0) return -1; if (v1 <= 0) return 1; v1 *= distanceValue(); v2 *= other.distanceValue(); if (v2 > v1) return 1; if (v2 < v1) return -1; return Integer.compare(min, other.min); } boolean equal(MinMaxLen other) { return min == other.min && max == other.max; } void set(int min, int max) { this.min = min; this.max = max; } void clear() { min = max = 0; } void copy(MinMaxLen other) { min = other.min; max = other.max; } void add(MinMaxLen other) { min = distanceAdd(min, other.min); max = distanceAdd(max, other.max); } void addLength(int len) { min = distanceAdd(min, len); max = distanceAdd(max, len); } void altMerge(MinMaxLen other) { if (min > other.min) min = other.min; if (max < other.max) max = other.max; } static final int INFINITE_DISTANCE = 0x7FFFFFFF; static int distanceAdd(int d1, int d2) { if (d1 == INFINITE_DISTANCE || d2 == INFINITE_DISTANCE) { return INFINITE_DISTANCE; } else { if (d1 <= INFINITE_DISTANCE - d2) return d1 + d2; else return INFINITE_DISTANCE; } } static int distanceMultiply(int d, int m) { if (m == 0) return 0; if (d < INFINITE_DISTANCE / m) { return d * m; } else { return INFINITE_DISTANCE; } } static String distanceRangeToString(int a, int b) { String s = ""; if (a == INFINITE_DISTANCE) { s += "inf"; } else { s += "(" + a + ")"; } s += "-"; if (b == INFINITE_DISTANCE) { s += "inf"; } else { s += "(" + b + ")"; } return s; } } jruby-joni-2.1.41/src/org/joni/NameEntry.java000066400000000000000000000057071400407002500207520ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class NameEntry { static final int INIT_NAME_BACKREFS_ALLOC_NUM = 8; public final byte[]name; public final int nameP; public final int nameEnd; int backNum; int backRef1; int backRefs[]; public NameEntry(byte[]bytes, int p, int end) { name = bytes; nameP = p; nameEnd = end; } public int[] getBackRefs() { switch (backNum) { case 0: return new int[]{}; case 1: return new int[]{backRef1}; default: int[]result = new int[backNum]; System.arraycopy(backRefs, 0, result, 0, backNum); return result; } } private void alloc() { backRefs = new int[INIT_NAME_BACKREFS_ALLOC_NUM]; } private void ensureSize() { if (backNum > backRefs.length) { int[]tmp = new int[backRefs.length << 1]; System.arraycopy(backRefs, 0, tmp, 0, backRefs.length); backRefs = tmp; } } public void addBackref(int backRef) { backNum++; switch (backNum) { case 1: backRef1 = backRef; break; case 2: alloc(); backRefs[0] = backRef1; backRefs[1] = backRef; break; default: ensureSize(); backRefs[backNum - 1] = backRef; } } public String toString() { StringBuilder buff = new StringBuilder(new String(name, nameP, nameEnd - nameP) + " "); if (backNum == 0) { buff.append("-"); } else if (backNum == 1){ buff.append(backRef1); } else { for (int i=0; i 0) buff.append(", "); buff.append(backRefs[i]); } } return buff.toString(); } } jruby-joni-2.1.41/src/org/joni/NativeMachine.java000066400000000000000000000024061400407002500215540ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; abstract class NativeMachine extends Matcher { protected NativeMachine(Regex regex, Region region, byte[]bytes, int p, int end) { super(regex, region, bytes, p, end); } } jruby-joni-2.1.41/src/org/joni/NodeOptInfo.java000066400000000000000000000105551400407002500212310ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; final class NodeOptInfo { final MinMaxLen length = new MinMaxLen(); final OptAnchorInfo anchor = new OptAnchorInfo(); final OptExactInfo exb = new OptExactInfo(); /* boundary */ final OptExactInfo exm = new OptExactInfo(); /* middle */ final OptExactInfo expr = new OptExactInfo(); /* prec read (?=...) */ final OptMapInfo map = new OptMapInfo(); /* boundary */ public void setBoundNode(MinMaxLen mmd) { exb.mmd.copy(mmd); expr.mmd.copy(mmd); map.mmd.copy(mmd); } public void clear() { length.clear(); anchor.clear(); exb.clear(); exm.clear(); expr.clear(); map.clear(); } public void copy(NodeOptInfo other) { length.copy(other.length); anchor.copy(other.anchor); exb.copy(other.exb); exm.copy(other.exm); expr.copy(other.expr); map.copy(other.map); } public void concatLeftNode(NodeOptInfo other, Encoding enc) { OptAnchorInfo tanchor = new OptAnchorInfo(); // remove it somehow ? tanchor.concat(anchor, other.anchor, length.max, other.length.max); anchor.copy(tanchor); if (other.exb.length > 0 && length.max == 0) { tanchor.concat(anchor, other.exb.anchor, length.max, other.length.max); other.exb.anchor.copy(tanchor); } if (other.map.value > 0 && length.max == 0) { if (other.map.mmd.max == 0) { other.map.anchor.leftAnchor |= anchor.leftAnchor; } } boolean exbReach = exb.reachEnd; boolean exmReach = exm.reachEnd; if (other.length.max != 0) { exb.reachEnd = exm.reachEnd = false; } if (other.exb.length > 0) { if (exbReach) { exb.concat(other.exb, enc); other.exb.clear(); } else if (exmReach) { exm.concat(other.exb, enc); other.exb.clear(); } } exm.select(other.exb, enc); exm.select(other.exm, enc); if (expr.length > 0) { if (other.length.max > 0) { // TODO: make sure it is not an Oniguruma bug (casting unsigned int to int for arithmetic comparison) int otherLengthMax = other.length.max; if (otherLengthMax == MinMaxLen.INFINITE_DISTANCE) otherLengthMax = -1; if (expr.length > otherLengthMax) expr.length = otherLengthMax; if (expr.mmd.max == 0) { exb.select(expr, enc); } else { exm.select(expr, enc); } } } else if (other.expr.length > 0) { expr.copy(other.expr); } map.select(other.map); length.add(other.length); } public void altMerge(NodeOptInfo other, OptEnvironment env) { anchor.altMerge(other.anchor); exb.altMerge(other.exb, env); exm.altMerge(other.exm, env); expr.altMerge(other.expr, env); map.altMerge(other.map, env.enc); length.altMerge(other.length); } public void setBound(MinMaxLen mmd) { exb.mmd.copy(mmd); expr.mmd.copy(mmd); map.mmd.copy(mmd); } } jruby-joni-2.1.41/src/org/joni/OptAnchorInfo.java000066400000000000000000000066131400407002500215560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.constants.internal.AnchorType; final class OptAnchorInfo implements AnchorType { int leftAnchor; int rightAnchor; void clear() { leftAnchor = rightAnchor = 0; } void copy(OptAnchorInfo other) { leftAnchor = other.leftAnchor; rightAnchor = other.rightAnchor; } void concat(OptAnchorInfo left, OptAnchorInfo right, int leftLength, int rightLength) { leftAnchor = left.leftAnchor; if (leftLength == 0) leftAnchor |= right.leftAnchor; rightAnchor = right.rightAnchor; if (rightLength == 0) { rightAnchor |= left.rightAnchor; } else { rightAnchor |= left.rightAnchor & AnchorType.PREC_READ_NOT; } } boolean isSet(int anchor) { if ((leftAnchor & anchor) != 0) return true; return (rightAnchor & anchor) != 0; } void add(int anchor) { if (isLeftAnchor(anchor)) { leftAnchor |= anchor; } else { rightAnchor |= anchor; } } void remove(int anchor) { if (isLeftAnchor(anchor)) { leftAnchor &= ~anchor; } else { rightAnchor &= ~anchor; } } void altMerge(OptAnchorInfo other) { leftAnchor &= other.leftAnchor; rightAnchor &= other.rightAnchor; } static boolean isLeftAnchor(int anchor) { // make a mask for it ? return !(anchor == END_BUF || anchor == SEMI_END_BUF || anchor == END_LINE || anchor == PREC_READ || anchor == PREC_READ_NOT); } static String anchorToString(int anchor) { StringBuilder s = new StringBuilder("["); if ((anchor & AnchorType.BEGIN_BUF) !=0 ) s.append("begin-buf "); if ((anchor & AnchorType.BEGIN_LINE) !=0 ) s.append("begin-line "); if ((anchor & AnchorType.BEGIN_POSITION) !=0 ) s.append("begin-pos "); if ((anchor & AnchorType.END_BUF) !=0 ) s.append("end-buf "); if ((anchor & AnchorType.SEMI_END_BUF) !=0 ) s.append("semi-end-buf "); if ((anchor & AnchorType.END_LINE) !=0 ) s.append("end-line "); if ((anchor & AnchorType.ANYCHAR_STAR) !=0 ) s.append("anychar-star "); if ((anchor & AnchorType.ANYCHAR_STAR_ML) !=0 ) s.append("anychar-star-pl "); s.append("]"); return s.toString(); } } jruby-joni-2.1.41/src/org/joni/OptEnvironment.java000066400000000000000000000030141400407002500220240ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; // remove this one in future and pass mmd directly final class OptEnvironment { final MinMaxLen mmd = new MinMaxLen(); Encoding enc; int options; int caseFoldFlag; ScanEnvironment scanEnv; void copy(OptEnvironment other) { mmd.copy(other.mmd); enc = other.enc; options = other.options; caseFoldFlag = other.caseFoldFlag; scanEnv = other.scanEnv; } } jruby-joni-2.1.41/src/org/joni/OptExactInfo.java000066400000000000000000000121571400407002500214100ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; final class OptExactInfo { static final int OPT_EXACT_MAXLEN = 24; final MinMaxLen mmd = new MinMaxLen(); final OptAnchorInfo anchor = new OptAnchorInfo(); boolean reachEnd; int ignoreCase; /* -1: unset, 0: case sensitive, 1: ignore case */ final byte bytes[] = new byte[OPT_EXACT_MAXLEN]; int length; boolean isFull() { return length >= OPT_EXACT_MAXLEN; } void clear() { mmd.clear(); anchor.clear(); reachEnd = false; ignoreCase = -1; length = 0; } void copy(OptExactInfo other) { mmd.copy(other.mmd); anchor.copy(other.anchor); reachEnd = other.reachEnd; ignoreCase = other.ignoreCase; length = other.length; System.arraycopy(other.bytes, 0, bytes, 0, OPT_EXACT_MAXLEN); } void concat(OptExactInfo other, Encoding enc) { if (ignoreCase < 0) { ignoreCase = other.ignoreCase; } else if (ignoreCase != other.ignoreCase) { return; } int p = 0; // add->s; int end = p + other.length; int i; for (i = length; p < end;) { int len = enc.length(other.bytes, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (int j = 0; j < len && p < end; j++) { bytes[i++] = other.bytes[p++]; // arraycopy or even don't copy anything ?? } } length = i; reachEnd = (p == end && other.reachEnd); OptAnchorInfo tmp = new OptAnchorInfo(); tmp.concat(anchor, other.anchor, 1, 1); if (!reachEnd) tmp.rightAnchor = 0; anchor.copy(tmp); } void concatStr(byte[]lbytes, int p, int end, boolean raw, Encoding enc) { int i; for (i = length; p < end && i < OPT_EXACT_MAXLEN;) { int len = enc.length(lbytes, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (int j = 0; j < len && p < end; j++) { bytes[i++] = lbytes[p++]; } } length = i; } void altMerge(OptExactInfo other, OptEnvironment env) { if (other.length == 0 || length == 0) { clear(); return; } if (!mmd.equal(other.mmd)) { clear(); return; } int i; for (i=0; i= 0) { ignoreCase |= other.ignoreCase; } anchor.altMerge(other.anchor); if (!reachEnd) anchor.rightAnchor = 0; } void select(OptExactInfo alt, Encoding enc) { int v1 = length; int v2 = alt.length; if (v2 == 0) { return; } else if (v1 == 0) { copy(alt); return; } else if (v1 <= 2 && v2 <= 2) { /* ByteValTable[x] is big value --> low price */ v2 = OptMapInfo.positionValue(enc, bytes[0] & 0xff); v1 = OptMapInfo.positionValue(enc, alt.bytes[0] & 0xff); if (length > 1) v1 += 5; if (alt.length > 1) v2 += 5; } if (ignoreCase <= 0) v1 *= 2; if (alt.ignoreCase <= 0) v2 *= 2; if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); } // comp_opt_exact_or_map_info private static final int COMP_EM_BASE = 20; int compare(OptMapInfo m) { if (m.value <= 0) return -1; int ve = COMP_EM_BASE * length * (ignoreCase > 0 ? 1 : 2); int vm = COMP_EM_BASE * 5 * 2 / m.value; return mmd.compareDistanceValue(m.mmd, ve, vm); } } jruby-joni-2.1.41/src/org/joni/OptMapInfo.java000066400000000000000000000104521400407002500210550ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.CaseFoldCodeItem; import org.jcodings.Encoding; final class OptMapInfo { final MinMaxLen mmd = new MinMaxLen(); /* info position */ final OptAnchorInfo anchor = new OptAnchorInfo(); int value; /* weighted value */ final byte map[] = new byte[Config.CHAR_TABLE_SIZE]; void clear() { mmd.clear(); anchor.clear(); value = 0; for (int i=0; i 0) copy(alt); } // alt_merge_opt_map_info void altMerge(OptMapInfo other, Encoding enc) { /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ if (value == 0) return; if (other.value == 0 || mmd.max < other.mmd.max) { clear(); return; } mmd.altMerge(other.mmd); int val = 0; for (int i=0; i 1) { return 20; } else { return ByteValTable[i]; } } else { return 4; /* Take it easy. */ } } } jruby-joni-2.1.41/src/org/joni/Option.java000066400000000000000000000130001400407002500203010ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class Option { /* options */ public static final int NONE = 0; public static final int IGNORECASE = (1 << 0); public static final int EXTEND = (1 << 1); public static final int MULTILINE = (1 << 2); public static final int SINGLELINE = (1 << 3); public static final int FIND_LONGEST = (1 << 4); public static final int FIND_NOT_EMPTY = (1 << 5); public static final int NEGATE_SINGLELINE = (1 << 6); public static final int DONT_CAPTURE_GROUP = (1 << 7); public static final int CAPTURE_GROUP = (1 << 8); /* options (search time) */ public static final int NOTBOL = (1 << 9); public static final int NOTEOL = (1 << 10); public static final int POSIX_REGION = (1 << 11); /* options (ctype range) */ public static final int ASCII_RANGE = (1 << 12); public static final int POSIX_BRACKET_ALL_RANGE = (1 << 13); public static final int WORD_BOUND_ALL_RANGE = (1 << 14); /* options (newline) */ public static final int NEWLINE_CRLF = (1 << 15); public static final int NOTBOS = (1 << 16); public static final int NOTEOS = (1 << 17); public static final int CR_7_BIT = (1 << 18); public static final int MAXBIT = (1 << 19); /* limit */ public static final int DEFAULT = NONE; public static String toString(int option) { String options = ""; if (isIgnoreCase(option)) options += "IGNORECASE"; if (isExtend(option)) options += "EXTEND"; if (isMultiline(option)) options += "MULTILINE"; if (isSingleline(option)) options += "SINGLELINE"; if (isFindLongest(option)) options += "FIND_LONGEST"; if (isFindNotEmpty(option)) options += "FIND_NOT_EMPTY"; if (isNegateSingleline(option)) options += "NEGATE_SINGLELINE"; if (isDontCaptureGroup(option)) options += "DONT_CAPTURE_GROUP"; if (isCaptureGroup(option)) options += "CAPTURE_GROUP"; if (isNotBol(option)) options += "NOTBOL"; if (isNotEol(option)) options += "NOTEOL"; if (isPosixRegion(option)) options += "POSIX_REGION"; if (isCR7Bit(option)) options += "CR_7_BIT"; return options; } public static boolean isIgnoreCase(int option) { return (option & IGNORECASE) != 0; } public static boolean isExtend(int option) { return (option & EXTEND) != 0; } public static boolean isSingleline(int option) { return (option & SINGLELINE) != 0; } public static boolean isMultiline(int option) { return (option & MULTILINE) != 0; } public static boolean isFindLongest(int option) { return (option & FIND_LONGEST) != 0; } public static boolean isFindNotEmpty(int option) { return (option & FIND_NOT_EMPTY) != 0; } public static boolean isFindCondition(int option) { return (option & (FIND_LONGEST | FIND_NOT_EMPTY)) != 0; } public static boolean isNegateSingleline(int option) { return (option & NEGATE_SINGLELINE) != 0; } public static boolean isDontCaptureGroup(int option) { return (option & DONT_CAPTURE_GROUP) != 0; } public static boolean isCaptureGroup(int option) { return (option & CAPTURE_GROUP) != 0; } public static boolean isNotBol(int option) { return (option & NOTBOL) != 0; } public static boolean isNotEol(int option) { return (option & NOTEOL) != 0; } public static boolean isPosixRegion(int option) { return (option & POSIX_REGION) != 0; } public static boolean isAsciiRange(int option) { return (option & ASCII_RANGE) != 0; } public static boolean isPosixBracketAllRange(int option) { return (option & POSIX_BRACKET_ALL_RANGE) != 0; } public static boolean isWordBoundAllRange(int option) { return (option & WORD_BOUND_ALL_RANGE) != 0; } public static boolean isNewlineCRLF(int option) { return (option & NEWLINE_CRLF) != 0; } public static boolean isCR7Bit(int option) { return (option & CR_7_BIT) != 0; } public static boolean isDynamic(int option) { // ignore-case and multibyte status are included in compiled code // return (option & (MULTILINE | IGNORECASE)) != 0; return false; } } jruby-joni-2.1.41/src/org/joni/Parser.java000066400000000000000000001530731400407002500203040ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsOnAtSimple; import static org.joni.BitStatus.bsOnOff; import static org.joni.Option.isAsciiRange; import static org.joni.Option.isDontCaptureGroup; import static org.joni.Option.isIgnoreCase; import static org.joni.Option.isPosixBracketAllRange; import org.jcodings.ObjPtr; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.jcodings.constants.PosixBracket; import org.jcodings.exception.InternalException; import org.jcodings.unicode.UnicodeCodeRange; import org.joni.ast.AnchorNode; import org.joni.ast.AnyCharNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; import org.joni.ast.CClassNode.CCSTATE; import org.joni.ast.CClassNode.CCStateArg; import org.joni.ast.CClassNode.CCVALTYPE; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.EncloseNode; import org.joni.ast.ListNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; import org.joni.constants.internal.AnchorType; import org.joni.constants.internal.EncloseType; import org.joni.constants.internal.NodeType; import org.joni.constants.internal.TokenType; import org.joni.exception.ErrorMessages; class Parser extends Lexer { protected int returnCode; // return code used by parser methods (they itself return parsed nodes) // this approach will not affect recursive calls protected Parser(Regex regex, Syntax syntax, byte[]bytes, int p, int end, WarnCallback warnings) { super(regex, syntax, bytes, p, end, warnings); } private static final int POSIX_BRACKET_NAME_MIN_LEN = 4; private static final int POSIX_BRACKET_CHECK_LIMIT_LENGTH = 20; private static final byte BRACKET_END[] = ":]".getBytes(); private boolean parsePosixBracket(CClassNode cc, CClassNode ascCc) { mark(); boolean not; if (peekIs('^')) { inc(); not = true; } else { not = false; } if (enc.strLength(bytes, p, stop) >= POSIX_BRACKET_NAME_MIN_LEN + 3) { // else goto not_posix_bracket boolean asciiRange = isAsciiRange(env.option) && !isPosixBracketAllRange(env.option); for (int i=0; i POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; } if (c == ':' && left()) { inc(); if (left()) { fetch(); if (c == ']') newSyntaxException(INVALID_POSIX_BRACKET_TYPE); } } restore(); return true; /* 1: is not POSIX bracket, but no error. */ } private boolean codeExistCheck(int code, boolean ignoreEscaped) { mark(); boolean inEsc = false; while (left()) { if (ignoreEscaped && inEsc) { inEsc = false; } else { fetch(); if (c == code) { restore(); return true; } if (c == syntax.metaCharTable.esc) inEsc = true; } } restore(); return false; } private CClassNode parseCharClass(ObjPtr ascNode) { final boolean neg; CClassNode cc, prevCc = null, ascCc = null, ascPrevCc = null, workCc = null, ascWorkCc = null; CCStateArg arg = new CCStateArg(); fetchTokenInCC(); if (token.type == TokenType.CHAR && token.getC() == '^' && !token.escaped) { neg = true; fetchTokenInCC(); } else { neg = false; } if (token.type == TokenType.CC_CLOSE && !syntax.op3OptionECMAScript()) { if (!codeExistCheck(']', true)) newSyntaxException(EMPTY_CHAR_CLASS); env.ccEscWarn("]"); token.type = TokenType.CHAR; /* allow []...] */ } cc = new CClassNode(); if (isIgnoreCase(env.option)) { ascCc = ascNode.p = new CClassNode(); } boolean andStart = false; arg.state = CCSTATE.START; while (token.type != TokenType.CC_CLOSE) { boolean fetched = false; switch (token.type) { case CHAR: final int len; if (token.getCode() >= BitSet.SINGLE_BYTE_SIZE || (len = enc.codeToMbcLength(token.getC())) > 1) { arg.inType = CCVALTYPE.CODE_POINT; } else { arg.inType = CCVALTYPE.SB; // sb_char: } arg.to = token.getC(); arg.toIsRaw = false; parseCharClassValEntry2(cc, ascCc, arg); // goto val_entry2 break; case RAW_BYTE: if (!enc.isSingleByte() && token.base != 0) { /* tok->base != 0 : octal or hexadec. */ byte[]buf = new byte[Config.ENC_MBC_CASE_FOLD_MAXLEN]; int psave = p; int base = token.base; buf[0] = (byte)token.getC(); int i; for (i=1; i len) { /* fetch back */ p = psave; for (i=1; i ascPtr = new ObjPtr(); CClassNode acc = parseCharClass(ascPtr); cc.or(acc, env); if (ascPtr.p != null) { ascCc.or(ascPtr.p, env); } break; case CC_AND: /* && */ if (arg.state == CCSTATE.VALUE) { arg.to = 0; arg.toIsRaw = false; cc.nextStateValue(arg, ascCc, env); } /* initialize local variables */ andStart = true; arg.state = CCSTATE.START; if (prevCc != null) { prevCc.and(cc, env); if (ascCc != null) { ascPrevCc.and(ascCc, env); } } else { prevCc = cc; if (workCc == null) workCc = new CClassNode(); cc = workCc; if (ascCc != null) { ascPrevCc = ascCc; if (ascWorkCc == null) ascWorkCc = new CClassNode(); ascCc = ascWorkCc; } } cc.clear(); if (ascCc != null) ascCc.clear(); break; case EOT: newSyntaxException(PREMATURE_END_OF_CHAR_CLASS); default: newInternalException(PARSER_BUG); } // switch if (!fetched) fetchTokenInCC(); } // while if (arg.state == CCSTATE.VALUE) { arg.to = 0; arg.toIsRaw = false; cc.nextStateValue(arg, ascCc, env); } if (prevCc != null) { prevCc.and(cc, env); cc = prevCc; if (ascCc != null) { ascPrevCc.and(ascCc, env); ascCc = ascPrevCc; } } if (neg) { cc.setNot(); if (ascCc != null) ascCc.setNot(); } else { cc.clearNot(); if (ascCc != null) ascCc.clearNot(); } if (cc.isNot() && syntax.notNewlineInNegativeCC()) { if (!cc.isEmpty()) { // ??? final int NEW_LINE = 0x0a; if (enc.isNewLine(NEW_LINE)) { if (enc.codeToMbcLength(NEW_LINE) == 1) { cc.bs.set(env, NEW_LINE); } else { cc.addCodeRange(env, NEW_LINE, NEW_LINE); } } } } return cc; } private void parseCharClassSbChar(CClassNode cc, CClassNode ascCc, CCStateArg arg) { arg.inType = CCVALTYPE.SB; arg.to = token.getC(); arg.toIsRaw = false; parseCharClassValEntry2(cc, ascCc, arg); // goto val_entry2 } private void parseCharClassRangeEndVal(CClassNode cc, CClassNode ascCc, CCStateArg arg) { arg.to = '-'; arg.toIsRaw = false; parseCharClassValEntry(cc, ascCc, arg); // goto val_entry } private void parseCharClassValEntry(CClassNode cc, CClassNode ascCc, CCStateArg arg) { int len = enc.codeToMbcLength(arg.to); arg.inType = len == 1 ? CCVALTYPE.SB : CCVALTYPE.CODE_POINT; parseCharClassValEntry2(cc, ascCc, arg); // val_entry2: } private void parseCharClassValEntry2(CClassNode cc, CClassNode ascCc, CCStateArg arg) { cc.nextStateValue(arg, ascCc, env); } private Node parseEnclose(TokenType term) { Node node = null; if (!left()) newSyntaxException(END_PATTERN_WITH_UNMATCHED_PARENTHESIS); int option = env.option; if (peekIs('?') && syntax.op2QMarkGroupEffect()) { inc(); if (!left()) newSyntaxException(END_PATTERN_IN_GROUP); boolean listCapture = false; fetch(); switch(c) { case ':': /* (?:...) grouping only */ fetchToken(); // group: node = parseSubExp(term); returnCode = 1; /* group */ return node; case '=': node = new AnchorNode(AnchorType.PREC_READ); break; case '!': /* preceding read */ node = new AnchorNode(AnchorType.PREC_READ_NOT); if (syntax.op3OptionECMAScript()) { env.pushPrecReadNotNode(node); } break; case '>': /* (?>...) stop backtrack */ node = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose break; case '~': /* (?~...) absent operator */ if (syntax.op2QMarkTildeAbsent()) { node = new EncloseNode(EncloseType.ABSENT); break; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } case '\'': if (Config.USE_NAMED_GROUP) { if (syntax.op2QMarkLtNamedGroup()) { listCapture = false; // goto named_group1 node = parseEncloseNamedGroup2(listCapture); break; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } } // USE_NAMED_GROUP break; case '<': /* look behind (?<=...), (?...) */ } unfetch(); } } // USE_NAMED_GROUP EncloseNode en = EncloseNode.newMemory(env.option, false); int num = env.addMemEntry(); if (num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); en.regNum = num; node = en; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */ if (left() && syntax.op2QMarkLParenCondition()) { int num = -1; int name = -1; fetch(); if (enc.isDigit(c)) { /* (n) */ unfetch(); num = fetchName('(', true); if (syntax.strictCheckBackref()) { if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(INVALID_BACKREF); } } else { if (Config.USE_NAMED_GROUP) { if (c == '<' || c == '\'') { /* (), ('name') */ name = p; fetchNamedBackrefToken(); inc(); num = token.getBackrefNum() > 1 ? token.getBackrefRefs()[0] : token.getBackrefRef1(); } } else { // USE_NAMED_GROUP newSyntaxException(INVALID_CONDITION_PATTERN); } } EncloseNode en = new EncloseNode(EncloseType.CONDITION); en.regNum = num; if (name != -1) en.setNameRef(); node = en; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; case '^': /* loads default options */ if (left() && syntax.op2OptionPerl()) { /* d-imsx */ option = bsOnOff(option, Option.ASCII_RANGE, true); option = bsOnOff(option, Option.IGNORECASE, true); option = bsOnOff(option, Option.SINGLELINE, false); option = bsOnOff(option, Option.MULTILINE, true); option = bsOnOff(option, Option.EXTEND, true); fetch(); } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } // case 'p': #ifdef USE_POSIXLINE_OPTION case '-': case 'i': case 'm': case 's': case 'x': case 'a': case 'd': case 'l': case 'u': boolean neg = false; while (true) { switch(c) { case ':': case ')': break; case '-': neg = true; break; case 'x': option = bsOnOff(option, Option.EXTEND, neg); break; case 'i': option = bsOnOff(option, Option.IGNORECASE, neg); break; case 's': if (syntax.op2OptionPerl()) { option = bsOnOff(option, Option.MULTILINE, neg); } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; case 'm': if (syntax.op2OptionPerl()) { option = bsOnOff(option, Option.SINGLELINE, !neg); } else if (syntax.op2OptionRuby()) { option = bsOnOff(option, Option.MULTILINE, neg); } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; // case 'p': #ifdef USE_POSIXLINE_OPTION // not defined // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); // break; case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */ if ((syntax.op2OptionPerl() || syntax.op2OptionRuby()) && !neg) { option = bsOnOff(option, Option.ASCII_RANGE, false); option = bsOnOff(option, Option.POSIX_BRACKET_ALL_RANGE, true); option = bsOnOff(option, Option.WORD_BOUND_ALL_RANGE, true); break; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } case 'u': if ((syntax.op2OptionPerl() || syntax.op2OptionRuby()) && !neg) { option = bsOnOff(option, Option.ASCII_RANGE, true); option = bsOnOff(option, Option.POSIX_BRACKET_ALL_RANGE, true); option = bsOnOff(option, Option.WORD_BOUND_ALL_RANGE, true); break; } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } case 'd': if (syntax.op2OptionPerl() && !neg) { option = bsOnOff(option, Option.ASCII_RANGE, true); } else if (syntax.op2OptionRuby() && !neg) { option = bsOnOff(option, Option.ASCII_RANGE, false); option = bsOnOff(option, Option.POSIX_BRACKET_ALL_RANGE, false); option = bsOnOff(option, Option.WORD_BOUND_ALL_RANGE, false); } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; case 'l': if (syntax.op2OptionPerl() && !neg) { option = bsOnOff(option, Option.ASCII_RANGE, true); } else { newSyntaxException(UNDEFINED_GROUP_OPTION); } break; default: newSyntaxException(UNDEFINED_GROUP_OPTION); } // switch if (c == ')') { node = EncloseNode.newOption(option); returnCode = 2; /* option only */ return node; } else if (c == ':') { int prev = env.option; env.option = option; fetchToken(); Node target = parseSubExp(term); env.option = prev; EncloseNode en = EncloseNode.newOption(option); en.setTarget(target); node = en; returnCode = 0; return node; } if (!left()) newSyntaxException(END_PATTERN_IN_GROUP); fetch(); } // while default: newSyntaxException(UNDEFINED_GROUP_OPTION); } // switch } else { if (isDontCaptureGroup(env.option)) { fetchToken(); // goto group node = parseSubExp(term); returnCode = 1; /* group */ return node; } EncloseNode en = EncloseNode.newMemory(env.option, false); en.regNum = env.addMemEntry(); node = en; } fetchToken(); Node target = parseSubExp(term); if (node.getType() == NodeType.ANCHOR) { AnchorNode an = (AnchorNode)node; an.setTarget(target); if (syntax.op3OptionECMAScript() && an.type == AnchorType.PREC_READ_NOT) { env.popPrecReadNotNode(an); } } else { EncloseNode en = (EncloseNode)node; en.setTarget(target); if (en.type == EncloseType.MEMORY) { if (syntax.op3OptionECMAScript()) { en.containingAnchor = env.currentPrecReadNotNode(); } /* Don't move this to previous of parse_subexp() */ env.setMemNode(en.regNum, en); } else if (en.type == EncloseType.CONDITION) { if (target.getType() != NodeType.ALT) { /* convert (?(cond)yes) to (?(cond)yes|empty) */ en.setTarget(ListNode.newAlt(target, ListNode.newAlt(StringNode.EMPTY, null))); } } } returnCode = 0; return node; // ?? } private Node parseEncloseNamedGroup2(boolean listCapture) { int nm = p; fetchName(c, false); int nameEnd = value; int num = env.addMemEntry(); if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY); regex.nameAdd(bytes, nm, nameEnd, num, syntax); EncloseNode en = EncloseNode.newMemory(env.option, true); en.regNum = num; if (listCapture) env.captureHistory = bsOnAtSimple(env.captureHistory, num); env.numNamed++; return en; } private int findStrPosition(int[]s, int n, int from, int to, Ptr nextChar) { int x; int q; int p = from; int i; while (p < to) { x = enc.mbcToCode(bytes, p, to); q = p + enc.length(bytes, p, to); if (x == s[0]) { for (i=1; i= n) { if (bytes[nextChar.p] != 0) nextChar.p = q; // we may need zero term semantics... return p; } } p = q; } return -1; } private Node parseExp(TokenType term) { if (token.type == term) return StringNode.EMPTY; // goto end_of_token Node node = null; boolean group = false; switch(token.type) { case ALT: case EOT: return StringNode.EMPTY; // end_of_token:, node_new_empty case SUBEXP_OPEN: node = parseEnclose(TokenType.SUBEXP_CLOSE); if (returnCode == 1) { group = true; } else if (returnCode == 2) { /* option only */ int prev = env.option; EncloseNode en = (EncloseNode)node; env.option = en.option; fetchToken(); Node target = parseSubExp(term); env.option = prev; en.setTarget(target); return node; } break; case SUBEXP_CLOSE: if (!syntax.allowUnmatchedCloseSubexp()) newSyntaxException(UNMATCHED_CLOSE_PARENTHESIS); if (token.escaped) { return parseExpTkRawByte(group); // goto tk_raw_byte } else { return parseExpTkByte(group); // goto tk_byte } case LINEBREAK: node = parseLineBreak(); break; case EXTENDED_GRAPHEME_CLUSTER: node = parseExtendedGraphemeCluster(); break; case KEEP: node = new AnchorNode(AnchorType.KEEP); break; case STRING: return parseExpTkByte(group); // tk_byte: case RAW_BYTE: return parseExpTkRawByte(group); // tk_raw_byte: case CODE_POINT: return parseStringLoop(StringNode.fromCodePoint(token.getCode(), enc), group); case QUOTE_OPEN: node = parseQuoteOpen(); break; case CHAR_TYPE: node = parseCharType(node); break; case CHAR_PROPERTY: node = parseCharProperty(); break; case CC_OPEN: { ObjPtr ascPtr = new ObjPtr(); CClassNode cc = parseCharClass(ascPtr); int code = cc.isOneChar(); if (code != -1) return parseStringLoop(StringNode.fromCodePoint(code, enc), group); node = cc; if (isIgnoreCase(env.option)) node = cClassCaseFold(node, cc, ascPtr.p); break; } case ANYCHAR: node = new AnyCharNode(); break; case ANYCHAR_ANYTIME: node = parseAnycharAnytime(); break; case BACKREF: node = parseBackref(); break; case CALL: if (Config.USE_SUBEXP_CALL) node = parseCall(); break; case ANCHOR: node = new AnchorNode(token.getAnchorSubtype(), token.getAnchorASCIIRange()); break; case OP_REPEAT: case INTERVAL: if (syntax.contextIndepRepeatOps()) { if (syntax.contextInvalidRepeatOps()) { newSyntaxException(TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); } else { node = StringNode.EMPTY; // node_new_empty } } else { return parseExpTkByte(group); // goto tk_byte } break; default: newInternalException(PARSER_BUG); } //switch //targetp = node; fetchToken(); // re_entry: return parseExpRepeat(node, group); // repeat: } private Node parseLineBreak() { byte[]buflb = new byte[Config.ENC_CODE_TO_MBC_MAXLEN * 2]; int len1 = enc.codeToMbc(0x0D, buflb, 0); int len2 = enc.codeToMbc(0x0A, buflb, len1); StringNode left = new StringNode(buflb, 0, len1 + len2); left.setRaw(); /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */ CClassNode right = new CClassNode(); if (enc.minLength() > 1) { right.addCodeRange(env, 0x0A, 0x0D); } else { right.bs.setRange(env, 0x0A, 0x0D); } if (enc.isUnicode()) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ right.addCodeRange(env, 0x85, 0x85); right.addCodeRange(env, 0x2028, 0x2029); } /* (?>...) */ EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); en.setTarget(ListNode.newAlt(left, ListNode.newAlt(right, null))); return en; } private void addPropertyToCC(CClassNode cc, UnicodeCodeRange range, boolean not) { cc.addCType(range.getCType(), not, false, env, this); } private void createPropertyNode(Node[]nodes, int np, UnicodeCodeRange range) { CClassNode cc = new CClassNode(); addPropertyToCC(cc, range, false); nodes[np] = cc; } private void quantifierNode(Node[]nodes, int np, int lower, int upper) { QuantifierNode qnf = new QuantifierNode(lower, upper, false); qnf.setTarget(nodes[np]); nodes[np] = qnf; } private void quantifierPropertyNode(Node[]nodes, int np, UnicodeCodeRange range, char repetitions) { int lower = 0; int upper = QuantifierNode.REPEAT_INFINITE; createPropertyNode(nodes, np, range); switch (repetitions) { case '?': upper = 1; break; case '+': lower = 1; break; case '*': break; case '2': lower = upper = 2; break; default : throw new InternalException(ErrorMessages.PARSER_BUG); } quantifierNode(nodes, np, lower, upper); } private void createNodeFromArray(boolean list, Node[] nodes, int np, int nodeArray) { int i = 0; ListNode tmp = null; while (nodes[nodeArray + i] != null) i++; while (--i >= 0) { nodes[np] = list ? ListNode.newList(nodes[nodeArray + i], tmp) : ListNode.newAlt(nodes[nodeArray + i], tmp); nodes[nodeArray + i] = null; tmp = (ListNode)nodes[np]; } } private ListNode createNodeFromArray(Node[]nodes, int nodeArray) { int i = 0; ListNode np = null, tmp = null; while (nodes[nodeArray + i] != null) i++; while (--i >= 0) { np = ListNode.newAlt(nodes[nodeArray + i], tmp); nodes[nodeArray + i] = null; tmp = np; } return np; } private static final int NODE_COMMON_SIZE = 16; private Node parseExtendedGraphemeCluster() { final Node[] nodes = new Node[NODE_COMMON_SIZE]; final int anyTargetPosition; int alts = 0; StringNode strNode = new StringNode(Config.ENC_CODE_TO_MBC_MAXLEN * 2); strNode.setRaw(); strNode.catCode(0x0D, enc); strNode.catCode(0x0A, enc); nodes[alts] = strNode; if (Config.USE_UNICODE_PROPERTIES && enc.isUnicode()) { CClassNode cc; cc = new CClassNode(); nodes[alts + 1] = cc; addPropertyToCC(cc, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_CONTROL, false); if (enc.minLength() > 1) { cc.addCodeRange(env, 0x000A, 0x000A); cc.addCodeRange(env, 0x000D, 0x000D); } else { cc.bs.set(0x0A); cc.bs.set(0x0D); } { int list = alts + 3; quantifierPropertyNode(nodes, list + 0, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_PREPEND, '*'); { int coreAlts = list + 2; { int HList = coreAlts + 1; quantifierPropertyNode(nodes, HList + 0, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_L, '*'); { int HAlt2 = HList + 2; quantifierPropertyNode(nodes, HAlt2 + 0, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_V, '+'); { int HList2 = HAlt2 + 2; createPropertyNode(nodes, HList2 + 0, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_LV); quantifierPropertyNode(nodes, HList2 + 1, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_V, '*'); createNodeFromArray(true, nodes, HAlt2 + 1, HList2); } createPropertyNode(nodes, HAlt2 + 2, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_LVT); createNodeFromArray(false, nodes, HList + 1, HAlt2); } quantifierPropertyNode(nodes, HList + 2, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_T, '*'); createNodeFromArray(true, nodes, coreAlts + 0, HList); } quantifierPropertyNode(nodes, coreAlts + 1, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_L, '+'); quantifierPropertyNode(nodes, coreAlts + 2, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_T, '+'); quantifierPropertyNode(nodes, coreAlts + 3, UnicodeCodeRange.REGIONALINDICATOR, '2'); { int XPList = coreAlts + 5; createPropertyNode(nodes, XPList + 0, UnicodeCodeRange.EXTENDEDPICTOGRAPHIC); { int ExList = XPList + 2; quantifierPropertyNode(nodes, ExList + 0, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_EXTEND, '*'); strNode = new StringNode(Config.ENC_CODE_TO_MBC_MAXLEN); strNode.setRaw(); strNode.catCode(0x200D, enc); nodes[ExList + 1] = strNode; createPropertyNode(nodes, ExList + 2, UnicodeCodeRange.EXTENDEDPICTOGRAPHIC); createNodeFromArray(true, nodes, XPList + 1, ExList); } quantifierNode(nodes, XPList + 1, 0, QuantifierNode.REPEAT_INFINITE); createNodeFromArray(true, nodes, coreAlts + 4, XPList); } cc = new CClassNode(); nodes[coreAlts + 5] = cc; if (enc.minLength() > 1) { addPropertyToCC(cc, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_CONTROL, false); cc.addCodeRange(env, 0x000A, 0x000A); cc.addCodeRange(env, 0x000D, 0x000D); cc.mbuf = CodeRangeBuffer.notCodeRangeBuff(env, cc.mbuf); } else { addPropertyToCC(cc, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_CONTROL, true); cc.bs.clear(0x0A); cc.bs.clear(0x0D); } createNodeFromArray(false, nodes, list + 1, coreAlts); } createPropertyNode(nodes, list + 2, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_EXTEND); cc = (CClassNode)nodes[list + 2]; addPropertyToCC(cc, UnicodeCodeRange.GRAPHEMECLUSTERBREAK_SPACINGMARK, false); cc.addCodeRange(env, 0x200D, 0x200D); quantifierNode(nodes, list + 2, 0, QuantifierNode.REPEAT_INFINITE); createNodeFromArray(true, nodes, alts + 2, list); } anyTargetPosition = 3; } else { // enc.isUnicode() anyTargetPosition = 1; } Node any = new AnyCharNode(); EncloseNode option = EncloseNode.newOption(bsOnOff(env.option, Option.MULTILINE, false)); option.setTarget(any); nodes[anyTargetPosition] = option; Node topAlt = createNodeFromArray(nodes, alts); EncloseNode enclose = new EncloseNode(EncloseType.STOP_BACKTRACK); enclose.setTarget(topAlt); if (Config.USE_UNICODE_PROPERTIES && enc.isUnicode()) { option = EncloseNode.newOption(bsOnOff(env.option, Option.IGNORECASE, true)); option.setTarget(enclose); return option; } else { return enclose; } } private Node parseExpTkByte(boolean group) { StringNode node = new StringNode(bytes, token.backP, p); // tk_byte: return parseStringLoop(node, group); } private Node parseStringLoop(StringNode node, boolean group) { while (true) { fetchToken(); if (token.type == TokenType.STRING) { if (token.backP == node.end) { node.end = p; // non escaped character, remain shared, just increase shared range } else { node.catBytes(bytes, token.backP, p); // non continuous string stream, need to COW } } else if (token.type == TokenType.CODE_POINT) { node.catCode(token.getCode(), enc); } else { break; } } // targetp = node; return parseExpRepeat(node, group); // string_end:, goto repeat } private Node parseExpTkRawByte(boolean group) { // tk_raw_byte: StringNode node = new StringNode(); node.setRaw(); node.catByte((byte)token.getC()); int len = 1; while (true) { if (len >= enc.minLength()) { if (len == enc.length(node.bytes, node.p, node.end)) { fetchToken(); node.clearRaw(); // !goto string_end;! return parseExpRepeat(node, group); } } fetchToken(); if (token.type != TokenType.RAW_BYTE) { /* Don't use this, it is wrong for little endian encodings. */ // USE_PAD_TO_SHORT_BYTE_CHAR ... newValueException(TOO_SHORT_MULTI_BYTE_STRING); } node.catByte((byte)token.getC()); len++; } // while } private Node parseExpRepeat(Node target, boolean group) { while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: if (isInvalidQuantifier(target)) newSyntaxException(TARGET_OF_REPEAT_OPERATOR_INVALID); if (!group && syntax.op3OptionECMAScript() && target.getType() == NodeType.QTFR) { newSyntaxException(NESTED_REPEAT_NOT_ALLOWED); } QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), token.getRepeatUpper(), token.type == TokenType.INTERVAL); qtfr.greedy = token.getRepeatGreedy(); int ret = qtfr.setQuantifier(target, group, env, bytes, getBegin(), getEnd()); Node qn = qtfr; if (token.getRepeatPossessive()) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose en.setTarget(qn); qn = en; } if (ret == 0 || (syntax.op3OptionECMAScript() && ret == 1)) { target = qn; } else if (ret == 2) { /* split case: /abc+/ */ target = ListNode.newList(target, null); ListNode tmp = ListNode.newList(qn, null); ((ListNode)target).setTail(tmp); fetchToken(); return parseExpRepeatForCar(target, tmp, group); } fetchToken(); // goto re_entry } return target; } private Node parseExpRepeatForCar(Node top, ListNode target, boolean group) { while (token.type == TokenType.OP_REPEAT || token.type == TokenType.INTERVAL) { // repeat: if (isInvalidQuantifier(target.value)) newSyntaxException(TARGET_OF_REPEAT_OPERATOR_INVALID); QuantifierNode qtfr = new QuantifierNode(token.getRepeatLower(), token.getRepeatUpper(), token.type == TokenType.INTERVAL); qtfr.greedy = token.getRepeatGreedy(); int ret = qtfr.setQuantifier(target.value, group, env, bytes, getBegin(), getEnd()); Node qn = qtfr; if (token.getRepeatPossessive()) { EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); // node_new_enclose en.setTarget(qn); qn = en; } if (ret == 0) { target.setValue(qn); } else if (ret == 2) { /* split case: /abc+/ */ assert false; } fetchToken(); // goto re_entry } return top; } private boolean isInvalidQuantifier(Node node) { if (Config.USE_NO_INVALID_QUANTIFIER) return false; ListNode consAlt; switch(node.getType()) { case NodeType.ANCHOR: return true; case NodeType.ENCLOSE: /* allow enclosed elements */ /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ break; case NodeType.LIST: consAlt = (ListNode)node; do { if (!isInvalidQuantifier(consAlt.value)) return false; } while ((consAlt = consAlt.tail) != null); return false; case NodeType.ALT: consAlt = (ListNode)node; do { if (isInvalidQuantifier(consAlt.value)) return true; } while ((consAlt = consAlt.tail) != null); break; default: break; } return false; } private Node parseQuoteOpen() { int[]endOp = new int[]{syntax.metaCharTable.esc, 'E'}; int qstart = p; Ptr nextChar = new Ptr(); int qend = findStrPosition(endOp, endOp.length, qstart, stop, nextChar); if (qend == -1) nextChar.p = qend = stop; Node node = new StringNode(bytes, qstart, qend); p = nextChar.p; return node; } private Node parseCharType(Node node) { switch(token.getPropCType()) { case CharacterType.WORD: node = new CTypeNode(token.getPropCType(), token.getPropNot(), isAsciiRange(env.option)); break; case CharacterType.SPACE: case CharacterType.DIGIT: case CharacterType.XDIGIT: CClassNode ccn = new CClassNode(); ccn.addCType(token.getPropCType(), false, isAsciiRange(env.option), env, this); if (token.getPropNot()) ccn.setNot(); node = ccn; break; default: newInternalException(PARSER_BUG); } // inner switch return node; } private Node cClassCaseFold(Node node, CClassNode cc, CClassNode ascCc) { ApplyCaseFoldArg arg = new ApplyCaseFoldArg(env, cc, ascCc); enc.applyAllCaseFold(env.caseFoldFlag, ApplyCaseFold.INSTANCE, arg); if (arg.altRoot != null) { node = ListNode.newAlt(node, arg.altRoot); } return node; } private Node parseCharProperty() { int ctype = fetchCharPropertyToCType(); CClassNode cc = new CClassNode(); Node node = cc; cc.addCType(ctype, false, false, env, this); if (token.getPropNot()) cc.setNot(); if (isIgnoreCase(env.option)) { if (ctype != CharacterType.ASCII) { node = cClassCaseFold(node, cc, cc); } } return node; } private Node parseAnycharAnytime() { Node node = new AnyCharNode(); QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); qn.setTarget(node); return qn; } private Node parseBackref() { final Node node; if (syntax.op3OptionECMAScript() && token.getBackrefNum() == 1 && env.memNodes != null) { EncloseNode encloseNode = env.memNodes[token.getBackrefRef1()]; boolean shouldIgnore = false; if (encloseNode != null && encloseNode.containingAnchor != null) { shouldIgnore = true; for (Node anchorNode : env.precReadNotNodes) { if (anchorNode == encloseNode.containingAnchor) { shouldIgnore = false; break; } } } if (shouldIgnore) { node = StringNode.EMPTY; } else { node = newBackRef(new int[]{token.getBackrefRef1()}); } } else { node = newBackRef(token.getBackrefNum() > 1 ? token.getBackrefRefs() : new int[]{token.getBackrefRef1()}); } return node; } private BackRefNode newBackRef(int[]backRefs) { return new BackRefNode(token.getBackrefNum(), backRefs, token.getBackrefByName(), token.getBackrefExistLevel(), token.getBackrefLevel(), env); } private Node parseCall() { int gNum = token.getCallGNum(); if (gNum < 0 || token.getCallRel()) { if (gNum > 0) gNum--; gNum = backrefRelToAbs(gNum); if (gNum <= 0) newValueException(INVALID_BACKREF); } Node node = new CallNode(bytes, token.getCallNameP(), token.getCallNameEnd(), gNum); env.numCall++; return node; } private Node parseBranch(TokenType term) { Node node = parseExp(term); if (token.type == TokenType.EOT || token.type == term || token.type == TokenType.ALT) { return node; } else { ListNode top = ListNode.newList(node, null); ListNode t = top; while (token.type != TokenType.EOT && token.type != term && token.type != TokenType.ALT) { node = parseExp(term); if (node.getType() == NodeType.LIST) { t.setTail((ListNode)node); while (((ListNode)node).tail != null ) node = ((ListNode)node).tail; t = ((ListNode)node); } else { t.setTail(ListNode.newList(node, null)); t = t.tail; } } return top; } } /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ private Node parseSubExp(TokenType term) { Node node = parseBranch(term); if (token.type == term) { return node; } else if (token.type == TokenType.ALT) { ListNode top = ListNode.newAlt(node, null); ListNode t = top; while (token.type == TokenType.ALT) { fetchToken(); node = parseBranch(term); t.setTail(ListNode.newAlt(node, null)); t = t.tail; } if (token.type != term) parseSubExpError(term); return top; } else { parseSubExpError(term); return null; //not reached } } private void parseSubExpError(TokenType term) { if (term == TokenType.SUBEXP_CLOSE) { newSyntaxException(END_PATTERN_WITH_UNMATCHED_PARENTHESIS); } else { newInternalException(PARSER_BUG); } } protected final Node parseRegexp() { fetchToken(); Node top = parseSubExp(TokenType.EOT); if (Config.USE_SUBEXP_CALL) { if (env.numCall > 0) { /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */ EncloseNode np = EncloseNode.newMemory(env.option, false); np.regNum = 0; np.setTarget(top); if (env.memNodes == null) env.memNodes = new EncloseNode[Config.SCANENV_MEMNODES_SIZE]; env.memNodes[0] = np; top = np; } } return top; } } jruby-joni-2.1.41/src/org/joni/Regex.java000066400000000000000000000421721400407002500201170ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAt; import static org.joni.Config.USE_SUNDAY_QUICK_SEARCH; import static org.joni.Option.isCaptureGroup; import static org.joni.Option.isDontCaptureGroup; import java.util.Collections; import java.util.Iterator; import org.jcodings.CaseFoldCodeItem; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.jcodings.util.BytesHash; import org.joni.constants.internal.AnchorType; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.ValueException; public final class Regex { int[] code; /* compiled pattern */ int codeLength; boolean requireStack; int numMem; /* used memory(...) num counted from 1 */ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ int numCombExpCheck; /* combination explosion check */ int numCall; /* number of subexp call */ int captureHistory; /* (?@...) flag (1-31) */ int btMemStart; /* need backtrack flag */ int btMemEnd; /* need backtrack flag */ int stackPopLevel; int[]repeatRangeLo; int[]repeatRangeHi; MatcherFactory factory; final Encoding enc; int options; int userOptions; Object userObject; final int caseFoldFlag; private BytesHash nameTable; // named entries /* optimization info (string search, char-map and anchors) */ Search.Forward forward; /* optimize flag */ Search.Backward backward; int thresholdLength; /* search str-length for apply optimize */ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ int anchorDmin; /* (SEMI_)END_BUF anchor distance */ int anchorDmax; /* (SEMI_)END_BUF anchor distance */ int subAnchor; /* start-anchor for exact or map */ byte[]exact; int exactP; int exactEnd; byte[]map; /* used as BM skip or char-map */ int[]intMap; /* BM skip for exact_len > 255 */ int[]intMapBackward; /* BM skip for backward search */ int dMin; /* min-distance of exact or map */ int dMax; /* max-distance of exact or map */ byte[][]templates; /* fixed pattern strings not embedded in bytecode */ int templateNum; public Regex(CharSequence cs) { this(cs.toString()); } public Regex(CharSequence cs, Encoding enc) { this(cs.toString(), enc); } public Regex(String str) { this(str.getBytes(), 0, str.length(), 0, UTF8Encoding.INSTANCE); } public Regex(String str, Encoding enc) { this(str.getBytes(), 0, str.length(), 0, enc); } public Regex(byte[] bytes) { this(bytes, 0, bytes.length, 0, ASCIIEncoding.INSTANCE); } public Regex(byte[] bytes, int p, int end) { this(bytes, p, end, 0, ASCIIEncoding.INSTANCE); } public Regex(byte[] bytes, int p, int end, int option) { this(bytes, p, end, option, ASCIIEncoding.INSTANCE); } public Regex(byte[]bytes, int p, int end, int option, Encoding enc) { this(bytes, p, end, option, enc, Syntax.RUBY, WarnCallback.DEFAULT); } // onig_new public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax) { this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, WarnCallback.DEFAULT); } public Regex(byte[]bytes, int p, int end, int option, Encoding enc, WarnCallback warnings) { this(bytes, p, end, option, enc, Syntax.RUBY, warnings); } // onig_new public Regex(byte[]bytes, int p, int end, int option, Encoding enc, Syntax syntax, WarnCallback warnings) { this(bytes, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, enc, syntax, warnings); } // onig_alloc_init public Regex(byte[]bytes, int p, int end, int option, int caseFoldFlag, Encoding enc, Syntax syntax, WarnCallback warnings) { if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { throw new ValueException(ErrorMessages.INVALID_COMBINATION_OF_OPTIONS); } if ((option & Option.NEGATE_SINGLELINE) != 0) { option |= syntax.options; option &= ~Option.SINGLELINE; } else { option |= syntax.options; } this.enc = enc; this.options = option; this.caseFoldFlag = caseFoldFlag; new Analyser(this, syntax, bytes, p, end, warnings).compile(); } public Matcher matcher(byte[]bytes) { return matcher(bytes, 0, bytes.length); } public Matcher matcherNoRegion(byte[]bytes) { return matcherNoRegion(bytes, 0, bytes.length); } public Matcher matcher(byte[]bytes, int p, int end) { return factory.create(this, numMem == 0 ? null : new Region(numMem + 1), bytes, p, end); } public Matcher matcherNoRegion(byte[]bytes, int p, int end) { return factory.create(this, null, bytes, p, end); } public int numberOfCaptures() { return numMem; } public int numberOfCaptureHistories() { if (Config.USE_CAPTURE_HISTORY) { int n = 0; for (int i=0; i<=Config.MAX_CAPTURE_HISTORY_GROUP; i++) { if (bsAt(captureHistory, i)) n++; } return n; } else { return 0; } } private NameEntry nameFind(byte[]name, int nameP, int nameEnd) { if (nameTable != null) return nameTable.get(name, nameP, nameEnd); return null; } void renumberNameTable(int[]map) { if (nameTable != null) { for (NameEntry e : nameTable) { if (e.backNum > 1) { for (int i=0; i(); // 13, oni defaults to 5 } else { e = nameFind(name, nameP, nameEnd); } if (e == null) { // dup the name here as oni does ?, what for ? (it has to manage it, we don't) e = new NameEntry(name, nameP, nameEnd); nameTable.putDirect(name, nameP, nameEnd, e); } else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) { throw new ValueException(ErrorMessages.MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP)); } e.addBackref(backRef); } NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) { return nameFind(name, nameP, nameEnd); } public int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) { NameEntry e = nameToGroupNumbers(name, nameP, nameEnd); if (e == null) throw new ValueException(ErrorMessages.UNDEFINED_NAME_REFERENCE, new String(name, nameP, nameEnd - nameP)); switch(e.backNum) { case 0: throw new InternalException(ErrorMessages.PARSER_BUG); case 1: return e.backRef1; default: if (region != null) { for (int i = e.backNum - 1; i >= 0; i--) { if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i]; } } return e.backRefs[e.backNum - 1]; } } String nameTableToString() { StringBuilder sb = new StringBuilder(); if (nameTable != null) { sb.append("name table\n"); for (NameEntry ne : nameTable) { sb.append(" ").append(ne).append("\n"); } sb.append("\n"); } return sb.toString(); } public Iterator namedBackrefIterator() { return nameTable == null ? Collections.emptyIterator() : nameTable.iterator(); } public int numberOfNames() { return nameTable == null ? 0 : nameTable.size(); } public boolean noNameGroupIsActive(Syntax syntax) { if (isDontCaptureGroup(options)) return false; if (Config.USE_NAMED_GROUP) { if (numberOfNames() > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(options)) return false; } return true; } /* set skip map for Boyer-Moor search */ boolean setupBMSkipMap(boolean ignoreCase) { byte[]bytes = exact; int s = exactP; int end = exactEnd; int len = end - s; int clen; CaseFoldCodeItem[]items = CaseFoldCodeItem.EMPTY_FOLD_CODES; byte[]buf = new byte[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM * Config.ENC_MBC_CASE_FOLD_MAXLEN]; final int ilen = USE_SUNDAY_QUICK_SEARCH ? len : len - 1; if (Config.USE_BYTE_MAP || len < Config.CHAR_TABLE_SIZE) { if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; // map/skip for (int i = 0; i < Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)(USE_SUNDAY_QUICK_SEARCH ? len + 1 : len); for (int i = 0; i < ilen; i += clen) { if (ignoreCase) items = enc.caseFoldCodesByString(caseFoldFlag, bytes, s + i, end); clen = setupBMSkipMapCheck(bytes, s + i, end, items, buf); if (clen == 0) return true; for (int j = 0; j < clen; j++) { map[bytes[s + i + j] & 0xff] = (byte)(ilen - i - j); for (int k = 0; k < items.length; k++) { map[buf[k * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM + j] & 0xff] = (byte)(ilen - i - j); } } } } else { if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE]; for (int i = 0; i < Config.CHAR_TABLE_SIZE; i++) intMap[i] = (USE_SUNDAY_QUICK_SEARCH ? len + 1 : len); for (int i = 0; i < ilen; i += clen) { if (ignoreCase) items = enc.caseFoldCodesByString(caseFoldFlag, bytes, s + i, end); clen = setupBMSkipMapCheck(bytes, s + i, end, items, buf); if (clen == 0) return true; for (int j = 0; j < clen; j++) { intMap[bytes[s + i + j] & 0xff] = ilen - i - j; for (int k = 0; k < items.length; k++) { intMap[buf[k * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM + j] & 0xff] = ilen - i - j; } } } } return false; } private int setupBMSkipMapCheck(byte[]bytes, int p, int end, CaseFoldCodeItem[]items, byte[]buf) { int clen = enc.length(bytes, p, end); if (p + clen > end) clen = end - p; for (int j = 0; j < items.length; j++) { if (items[j].code.length != 1 || items[j].byteLen != clen) return 0; int flen = enc.codeToMbc(items[j].code[0], buf, j * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM); if (flen != clen) return 0; } return clen; } void setOptimizeExactInfo(OptExactInfo e) { if (e.length == 0) return; // shall we copy that ? exact = e.bytes; exactP = 0; exactEnd = e.length; boolean allowReverse = enc.isReverseMatchAllowed(exact, exactP, exactEnd); if (e.ignoreCase > 0) { if (e.length >= 3 || (e.length >= 2 && allowReverse)) { forward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_FORWARD : Search.SLOW_IC_FORWARD; // if (!setupBMSkipMap(true)) { // forward = allowReverse ? Search.BM_IC_FORWARD : Search.BM_NOT_REV_IC_FORWARD; // } else { // forward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_FORWARD : Search.SLOW_IC_FORWARD; // } } else { forward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_FORWARD : Search.SLOW_IC_FORWARD; } backward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_BACKWARD : Search.SLOW_IC_BACKWARD; } else { if (e.length >= 3 || (e.length >= 2 && allowReverse)) { if (!setupBMSkipMap(false)) { forward = allowReverse ? Search.BM_FORWARD : Search.BM_NOT_REV_FORWARD; } else { forward = enc.isSingleByte() ? Search.SLOW_SB_FORWARD : Search.SLOW_FORWARD; } } else { forward = enc.isSingleByte() ? Search.SLOW_SB_FORWARD : Search.SLOW_FORWARD; } backward = enc.isSingleByte() ? Search.SLOW_SB_BACKWARD : Search.SLOW_BACKWARD; } dMin = e.mmd.min; dMax = e.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + (exactEnd - exactP); } } void setOptimizeMapInfo(OptMapInfo m) { map = m.map; if (enc.isSingleByte()) { forward = Search.MAP_SB_FORWARD; backward = Search.MAP_SB_BACKWARD; } else { forward = Search.MAP_FORWARD; backward = Search.MAP_BACKWARD; } dMin = m.mmd.min; dMax = m.mmd.max; if (dMin != MinMaxLen.INFINITE_DISTANCE) { thresholdLength = dMin + 1; } } void setSubAnchor(OptAnchorInfo anc) { subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; subAnchor |= anc.rightAnchor & AnchorType.END_LINE; } void clearOptimizeInfo() { forward = null; backward = null; anchor = 0; anchorDmax = 0; anchorDmin = 0; subAnchor = 0; exact = null; exactP = exactEnd = 0; } public String optimizeInfoToString() { String s = ""; s += "optimize: " + (forward != null ? forward.getName() : "NONE") + "\n"; s += " anchor: " + OptAnchorInfo.anchorToString(anchor); if ((anchor & AnchorType.END_BUF_MASK) != 0) { s += MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax); } s += "\n"; if (forward != null) { s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n"; } s += "dmin: " + dMin + " dmax: " + dMax + "\n"; s += "threshold length: " + thresholdLength + "\n"; if (exact != null) { s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n"; } else if (forward == Search.MAP_FORWARD || forward == Search.MAP_SB_FORWARD) { int n=0; for (int i=0; i 0) { int c=0; s += "["; for (int i=0; i 0) s += ", "; c++; if (enc.maxLength() == 1 && enc.isPrint(i)) s += ((char)i); else s += i; } } s += "]\n"; } } return s; } public Encoding getEncoding() { return enc; } public int getOptions() { return options; } public void setUserOptions(int options) { this.userOptions = options; } public int getUserOptions() { return userOptions; } public void setUserObject(Object object) { this.userObject = object; } public Object getUserObject() { return userObject; } } jruby-joni-2.1.41/src/org/joni/Region.java000066400000000000000000000044171400407002500202700ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; public final class Region { static final int REGION_NOTPOS = -1; public final int numRegs; public final int[]beg; public final int[]end; public CaptureTreeNode historyRoot; public Region(int num) { this.numRegs = num; this.beg = new int[num]; this.end = new int[num]; } public Region(int begin, int end) { this.numRegs = 1; this.beg = new int[]{begin}; this.end = new int[]{end}; } public Region clone() { Region region = new Region(numRegs); System.arraycopy(beg, 0, region.beg, 0, beg.length); System.arraycopy(end, 0, region.end, 0, end.length); if (historyRoot != null) region.historyRoot = historyRoot.cloneTree(); return region; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Region: \n"); for (int i=0; i= Config.MAX_CAPTURE_GROUP_NUM) throw new InternalException(ErrorMessages.TOO_MANY_CAPTURE_GROUPS); if (numMem++ == 0) { memNodes = new EncloseNode[Config.SCANENV_MEMNODES_SIZE]; } else if (numMem >= memNodes.length) { EncloseNode[]tmp = new EncloseNode[memNodes.length << 1]; System.arraycopy(memNodes, 0, tmp, 0, memNodes.length); memNodes = tmp; } return numMem; } void setMemNode(int num, EncloseNode node) { if (numMem >= num) { memNodes[num] = node; } else { throw new InternalException(ErrorMessages.PARSER_BUG); } } void pushPrecReadNotNode(Node node) { numPrecReadNotNodes++; if (precReadNotNodes == null) { precReadNotNodes = new Node[Config.SCANENV_MEMNODES_SIZE]; } else if (numPrecReadNotNodes >= precReadNotNodes.length) { Node[]tmp = new Node[precReadNotNodes.length << 1]; System.arraycopy(precReadNotNodes, 0, tmp, 0, precReadNotNodes.length); precReadNotNodes = tmp; } precReadNotNodes[numPrecReadNotNodes - 1] = node; } void popPrecReadNotNode(Node node) { if (precReadNotNodes != null && precReadNotNodes[numPrecReadNotNodes - 1] == node) { precReadNotNodes[numPrecReadNotNodes - 1] = null; numPrecReadNotNodes--; } } Node currentPrecReadNotNode() { if (numPrecReadNotNodes > 0) { return precReadNotNodes[numPrecReadNotNodes - 1]; } return null; } int convertBackslashValue(int c) { if (syntax.opEscControlChars()) { switch (c) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'f': return '\f'; case 'a': return '\007'; case 'b': return '\010'; case 'e': return '\033'; case 'v': if (syntax.op2EscVVtab()) return 11; // '\v' break; default: if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) unknownEscWarn(String.valueOf((char)c)); } } return c; } void ccEscWarn(String s) { if (warnings != WarnCallback.NONE) { if (syntax.warnCCOpNotEscaped() && syntax.backSlashEscapeInCC()) { warnings.warn("character class has '" + s + "' without escape"); } } } void unknownEscWarn(String s) { if (warnings != WarnCallback.NONE) { warnings.warn("Unknown escape \\" + s + " is ignored"); } } void closeBracketWithoutEscapeWarn(String s) { if (warnings != WarnCallback.NONE) { if (syntax.warnCCOpNotEscaped()) { warnings.warn("regular expression has '" + s + "' without escape"); } } } void ccDuplicateWarn() { if (syntax.warnCCDup() && (warningsFlag & SyntaxProperties.WARN_CC_DUP) == 0) { warningsFlag |= SyntaxProperties.WARN_CC_DUP; // FIXME: #34 points out problem and what it will take to uncomment this (we were getting erroneous versions of this) // warnings.warn("character class has duplicated range"); } } } jruby-joni-2.1.41/src/org/joni/ScannerSupport.java000066400000000000000000000127701400407002500220340ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.jcodings.Encoding; import org.jcodings.IntHolder; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.SyntaxException; import org.joni.exception.ValueException; abstract class ScannerSupport extends IntHolder implements ErrorMessages { protected final Encoding enc; // fast access to encoding protected final byte[]bytes; // pattern protected int p; // current scanner position protected int stop; // pattern end (mutable) private int lastFetched; // last fetched value for unfetch support protected int c; // current code point private final int begin; // pattern begin position for reset() support private final int end; // pattern end position for reset() support protected int _p; // used by mark()/restore() to mark positions protected ScannerSupport(Encoding enc, byte[]bytes, int p, int end) { this.enc = enc; this.bytes = bytes; this.begin = p; this.end = end; } protected final int getBegin() { return begin; } protected final int getEnd() { return end; } private static final int INT_SIGN_BIT = 1 << 31; protected final int scanUnsignedNumber() { int last = c; int num = 0; // long ??? while(left()) { fetch(); if (enc.isDigit(c)) { int onum = num; num = num * 10 + Encoding.digitVal(c); if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; } else { unfetch(); break; } } c = last; return num; } protected final int scanUnsignedHexadecimalNumber(int minLength, int maxLength) { int last = c; int num = 0; int restLen = maxLength - minLength; while(left() && maxLength-- != 0) { fetch(); if (enc.isXDigit(c)) { int val = enc.xdigitVal(c); if ((Integer.MAX_VALUE - val) / 16 < num) return -1; num = (num << 4) + val; } else { unfetch(); maxLength++; break; } } if (maxLength > restLen) return -2; c = last; return num; } protected final int scanUnsignedOctalNumber(int maxLength) { int last = c; int num = 0; while(left() && maxLength-- != 0) { fetch(); if (enc.isDigit(c) && c < '8') { int onum = num; int val = Encoding.odigitVal(c); num = (num << 3) + val; if (((onum ^ num) & INT_SIGN_BIT) != 0) return -1; } else { unfetch(); break; } } c = last; return num; } protected final void reset() { p = begin; stop = end; } protected final void mark() { _p = p; } protected final void restore() { p = _p; } protected final void inc() { lastFetched = p; p += enc.length(bytes, p, stop); } protected final void fetch() { c = enc.mbcToCode(bytes, p, stop); lastFetched = p; p += enc.length(bytes, p, stop); } protected int fetchTo() { int to = enc.mbcToCode(bytes, p, stop); lastFetched = p; p += enc.length(bytes, p, stop); return to; } protected final void unfetch() { p = lastFetched; } protected final int peek() { return p < stop ? enc.mbcToCode(bytes, p, stop) : 0; } protected final boolean peekIs(int c) { return peek() == c; } protected final boolean left() { return p < stop; } protected void newSyntaxException(String message) { throw new SyntaxException(message); } protected void newValueException(String message) { throw new ValueException(message); } protected void newValueException(String message, String str) { throw new ValueException(message, str); } protected void newValueException(String message, int p, int end) { throw new ValueException(message, new String(bytes, p, end - p)); } protected void newInternalException(String message) { throw new InternalException(message); } } jruby-joni-2.1.41/src/org/joni/Search.java000066400000000000000000000551641400407002500202570ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.Config.USE_SUNDAY_QUICK_SEARCH; import org.jcodings.Encoding; import org.jcodings.IntHolder; final class Search { static abstract class Forward { abstract String getName(); abstract int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange); } static abstract class Backward { abstract int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_); } private static boolean lowerCaseMatch(byte[] t, int tP, int tEnd, byte[] bytes, int p, int end, Encoding enc, byte[] buf, int caseFoldFlag) { final IntHolder holder = new IntHolder(); holder.value = p; while (tP < tEnd) { int lowlen = enc.mbcCaseFold(caseFoldFlag, bytes, holder, end, buf); if (lowlen == 1) { if (t[tP++] != buf[0]) return false; } else { int q = 0; while (lowlen > 0) { if (t[tP++] != buf[q++]) return false; lowlen--; } } } return true; } static final Forward SLOW_FORWARD = new Forward() { @Override final String getName() { return "EXACT_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s += enc.length(text, s, textEnd); } return -1; } }; static final Backward SLOW_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; s = (s > textStart) ? textStart : enc.leftAdjustCharHead(text, adjustText, s, textEnd); while (s >= textP) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } }; static final Forward SLOW_SB_FORWARD = new Forward() { @Override final String getName() { return "EXACT_SB_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } s++; } return -1; } }; static final Backward SLOW_SB_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) s = textStart; while (s >= textP) { if (text[s] == target[targetP]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != text[p++]) break; t++; } if (t == targetEnd) return s; } //s = s <= adjustText ? -1 : s - 1; s--; } return -1; } }; static final Forward SLOW_IC_FORWARD = new Forward() { @Override final String getName() { return "EXACT_IC_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; byte[]buf = matcher.icbuf(); while (s < end) { if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd, enc, buf, regex.caseFoldFlag)) return s; s += enc.length(text, s, textEnd); } return -1; } }; static final Backward SLOW_IC_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; s = (s > textStart) ? textStart : enc.leftAdjustCharHead(text, adjustText, s, textEnd); byte[]buf = matcher.icbuf(); while (s >= textP) { if (lowerCaseMatch(target, targetP, targetEnd, text, s, textEnd, enc, buf, regex.caseFoldFlag)) return s; s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } }; static final Forward SLOW_IC_SB_FORWARD = new Forward() { @Override final String getName() { return "EXACT_IC_SB_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; final byte[]toLowerTable = regex.enc.toLowerCaseTable(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end = textEnd; end -= targetEnd - targetP - 1; if (end > textRange) end = textRange; int s = textP; while (s < end) { if (target[targetP] == toLowerTable[text[s] & 0xff]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != toLowerTable[text[p++] & 0xff]) break; t++; } if (t == targetEnd) return s; } s++; } return -1; } }; static final Backward SLOW_IC_SB_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; final byte[]toLowerTable = regex.enc.toLowerCaseTable(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int s = textEnd; s -= targetEnd - targetP; if (s > textStart) s = textStart; while (s >= textP) { if (target[targetP] == toLowerTable[text[s] & 0xff]) { int p = s + 1; int t = targetP + 1; while (t < targetEnd) { if (target[t] != toLowerTable[text[p++] & 0xff]) break; t++; } if (t == targetEnd) return s; } //s = s <= adjustText ? -1 : s - 1; s--; } return -1; }; }; static final Forward BM_FORWARD = new Forward() { @Override final String getName() { return "EXACT_BM_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end, s; int tail = targetEnd - 1; if (USE_SUNDAY_QUICK_SEARCH) { int tlen1 = tail - targetP; end = textRange + tlen1; s = textP + tlen1; } else { end = textRange + (targetEnd - targetP) - 1; s = textP + (targetEnd - targetP) - 1; } if (end > textEnd) end = textEnd; if (Config.USE_BYTE_MAP || regex.intMap == null) { while (s < end) { int p = s; int t = tail; while (text[p] == target[t]) { if (t == targetP) return p; p--; t--; } if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; s += regex.map[text[USE_SUNDAY_QUICK_SEARCH ? s + 1 : s] & 0xff]; } } else { /* see int_map[] */ while (s < end) { int p = s; int t = tail; while (text[p] == target[t]) { if (t == targetP) return p; p--; t--; } if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; s += regex.intMap[text[USE_SUNDAY_QUICK_SEARCH ? s + 1 : s] & 0xff]; } } return -1; } }; static final Backward BM_BACKWARD = new Backward() { private static final int BM_BACKWARD_SEARCH_LENGTH_THRESHOLD = 100; @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { if (Config.USE_INT_MAP_BACKWARD) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; if (regex.intMapBackward == null) { if (s_ - range_ < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) { return SLOW_BACKWARD.search(matcher, text, textP, adjustText, textEnd, textStart, s_, range_); // goto exact_method; } setBmBackwardSkip(regex, target, targetP, targetEnd); } int s = textEnd - (targetEnd - targetP); s = (textStart < s) ? textStart : enc.leftAdjustCharHead(text, adjustText, s, textEnd); while (s >= textP) { int p = s; int t = targetP; while (t < targetEnd && text[p] == target[t]) { p++; t++; } if (t == targetEnd) return s; s -= regex.intMapBackward[text[s] & 0xff]; s = enc.leftAdjustCharHead(text, adjustText, s, textEnd); } return -1; } else { return SLOW_BACKWARD.search(matcher, text, textP, adjustText, textEnd, textStart, s_, range_); // goto exact_method; } } private void setBmBackwardSkip(Regex regex, byte[]bytes, int p, int end) { final int[] skip; if (regex.intMapBackward == null) { regex.intMapBackward = skip = new int[Config.CHAR_TABLE_SIZE]; } else { skip = regex.intMapBackward; } int len = end - p; for (int i = 0; i < Config.CHAR_TABLE_SIZE; i++) skip[i] = len; for (int i = len - 1; i > 0; i--) skip[bytes[i] & 0xff] = i; } }; static final Forward BM_IC_FORWARD = new Forward() { @Override final String getName() { return "EXACT_BM_IC_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]buf = matcher.icbuf(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int end, s, tlen1; int tail = targetEnd - 1; if (USE_SUNDAY_QUICK_SEARCH) { tlen1 = tail - targetP; end = textRange + tlen1; s = textP + tlen1; } else { end = textRange + (targetEnd - targetP) - 1; s = textP + (targetEnd - targetP) - 1; } if (end > textEnd) end = textEnd; if (Config.USE_BYTE_MAP || regex.intMap == null) { while (s < end) { int p = USE_SUNDAY_QUICK_SEARCH ? s - tlen1 : s - (targetEnd - targetP) + 1; if (lowerCaseMatch(target, targetP, targetEnd, text, p, s + 1, enc, buf, regex.caseFoldFlag)) return p; if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; s += regex.map[text[USE_SUNDAY_QUICK_SEARCH ? s + 1 : s] & 0xff]; } } else { /* see int_map[] */ while (s < end) { int p = USE_SUNDAY_QUICK_SEARCH ? s - tlen1 : s - (targetEnd - targetP) + 1; if (lowerCaseMatch(target, targetP, targetEnd, text, p, s + 1, enc, buf, regex.caseFoldFlag)) return p; if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; s += regex.intMap[text[USE_SUNDAY_QUICK_SEARCH ? s + 1 : s] & 0xff]; } } return -1; } }; static final Forward BM_NOT_REV_FORWARD = new Forward() { @Override final String getName() { return "EXACT_BM_NOT_REV_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int tail = targetEnd - 1; int tlen1 = tail - targetP; int end = textRange; if (end + tlen1 > textEnd) end = textEnd - tlen1; int s = textP, p, se; if (Config.USE_BYTE_MAP || regex.intMap == null) { while (s < end) { p = se = s + tlen1; int t = tail; while (text[p] == target[t]) { if (t == targetP) return s; p--; t--; } if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; int skip = regex.map[text[USE_SUNDAY_QUICK_SEARCH ? se + 1 : se] & 0xff]; t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } else { while (s < end) { p = se = s + tlen1; int t = tail; while (text[p] == target[t]) { if (t == targetP) return s; p--; t--; } if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; int skip = regex.intMap[text[USE_SUNDAY_QUICK_SEARCH ? se + 1 : se] & 0xff]; t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } return -1; } }; static final Forward BM_NOT_REV_IC_FORWARD = new Forward() { @Override final String getName() { return "EXACT_BM_NOT_REV_IC_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]buf = matcher.icbuf(); byte[]target = regex.exact; int targetP = regex.exactP; int targetEnd = regex.exactEnd; int tail = targetEnd - 1; int tlen1 = tail - targetP; int end = textRange; if (end + tlen1 > textEnd) end = textEnd - tlen1; int s = textP; if (Config.USE_BYTE_MAP || regex.intMap == null) { while (s < end) { int se = s + tlen1; if (lowerCaseMatch(target, targetP, targetEnd, text, s, se + 1, enc, buf, regex.caseFoldFlag)) return s; if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; int skip = regex.map[text[USE_SUNDAY_QUICK_SEARCH ? se + 1 : se] & 0xff]; int t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } else { while (s < end) { int se = s + tlen1; if (lowerCaseMatch(target, targetP, targetEnd, text, s, se + 1, enc, buf, regex.caseFoldFlag)) return s; if (USE_SUNDAY_QUICK_SEARCH && (s + 1 >= end)) break; int skip = regex.intMap[text[USE_SUNDAY_QUICK_SEARCH ? se + 1 : se] & 0xff]; int t = s; do { s += enc.length(text, s, textEnd); } while ((s - t) < skip && s < end); } } return -1; } }; static final Forward MAP_FORWARD = new Forward() { @Override final String getName() { return "MAP_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]map = regex.map; int s = textP; while (s < textRange) { if (map[text[s] & 0xff] != 0) return s; s += enc.length(text, s, textEnd); } return -1; } }; static final Backward MAP_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; Encoding enc = regex.enc; byte[]map = regex.map; int s = textStart; if (s >= textEnd) s = textEnd - 1; // multibyte safe ? while (s >= textP) { if (map[text[s] & 0xff] != 0) return s; s = enc.prevCharHead(text, adjustText, s, textEnd); } return -1; } }; static final Forward MAP_SB_FORWARD = new Forward() { @Override final String getName() { return "MAP_SB_FORWARD"; } @Override final int search(Matcher matcher, byte[]text, int textP, int textEnd, int textRange) { Regex regex = matcher.regex; byte[]map = regex.map; int s = textP; while (s < textRange) { if (map[text[s] & 0xff] != 0) return s; s++; } return -1; } }; static final Backward MAP_SB_BACKWARD = new Backward() { @Override final int search(Matcher matcher, byte[]text, int textP, int adjustText, int textEnd, int textStart, int s_, int range_) { Regex regex = matcher.regex; byte[]map = regex.map; int s = textStart; if (s >= textEnd) s = textEnd - 1; while (s >= textP) { if (map[text[s] & 0xff] != 0) return s; s--; } return -1; } }; } jruby-joni-2.1.41/src/org/joni/StackEntry.java000066400000000000000000000104071400407002500211300ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; class StackEntry { int type; private int E1, E2, E3, E4; // first union member /* byte code position */ void setStatePCode(int pcode) { E1 = pcode; } int getStatePCode() { return E1; } /* string position */ void setStatePStr(int pstr) { E2 = pstr; } int getStatePStr() { return E2; } /* previous char position of pstr */ void setStatePStrPrev(int pstrPrev) { E3 = pstrPrev; } int getStatePStrPrev() { return E3; } void setPKeep(int pkeep) { E4 = pkeep; } int getPKeep() { return E4; } // second union member /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ void setRepeatCount(int count) { E1 = count; } int getRepeatCount() { return E1; } void decreaseRepeatCount() { E1--; } void increaseRepeatCount() { E1++; } /* byte code position (head of repeated target) */ void setRepeatPCode(int pcode) { E2 = pcode; } int getRepeatPCode() { return E2; } /* repeat id */ void setRepeatNum(int num) { E3 = num; } int getRepeatNum() { return E3; } // third union member /* index of stack */ /*int repeat_inc struct*/ void setSi(int si) { E1 = si; } int getSi() { return E1; } // fourth union member /* memory num */ void setMemNum(int num) { E1 = num; } int getMemNum() { return E1; } /* start/end position */ void setMemPstr(int pstr) { E2 = pstr; } int getMemPStr() { return E2; } /* Following information is set, if this stack type is MEM-START */ /* prev. info (for backtrack "(...)*" ) */ void setMemStart(int start) { E3 = start; } int getMemStart() { return E3; } /* prev. info (for backtrack "(...)*" ) */ void setMemEnd(int end) { E4 = end; } int getMemEnd() { return E4; } // fifth union member /* null check id */ void setNullCheckNum(int num) { E1 = num; } int getNullCheckNum() { return E1; } /* start position */ void setNullCheckPStr(int pstr) { E2 = pstr; } int getNullCheckPStr() { return E2; } // sixth union member /* byte code position */ void setCallFrameRetAddr(int addr) { E1 = addr; } int getCallFrameRetAddr() { return E1; } /* null check id */ void setCallFrameNum(int num) { E2 = num; } int getCallFrameNum() { return E2; } /* string position */ void setCallFramePStr(int pstr) { E3 = pstr; } int getCallFramePStr() { return E3; } /* absent position */ void setAbsentStr(int pos) { E1 = pos; } int getAbsentStr() { return E1; } void setAbsentEndStr(int pos) { E2 = pos; } int getAbsentEndStr() { return E2; } } final class SCStackEntry extends StackEntry { private int E5; void setStateCheck(int check) { E5 = check; } int getStateCheck() { return E5; } }jruby-joni-2.1.41/src/org/joni/StackMachine.java000066400000000000000000000461031400407002500213750ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.BitStatus.bsAt; import static org.joni.Config.USE_CEC; import java.lang.ref.WeakReference; import java.util.Arrays; import org.joni.constants.internal.StackPopLevel; import org.joni.constants.internal.StackType; abstract class StackMachine extends Matcher implements StackType { protected static final int INVALID_INDEX = -1; protected StackEntry[]stack; protected int stk; // stkEnd protected final int[]repeatStk; protected final int memStartStk, memEndStk; protected byte[] stateCheckBuff; // CEC, move to int[] ? protected int stateCheckBuffSize; protected StackMachine(Regex regex, Region region, byte[]bytes, int p , int end) { super(regex, region, bytes, p, end); stack = regex.requireStack ? fetchStack() : null; final int n; if (Config.USE_SUBEXP_CALL) { n = regex.numRepeat + ((regex.numMem + 1) << 1); memStartStk = regex.numRepeat; memEndStk = memStartStk + regex.numMem + 1; } else { n = regex.numRepeat + (regex.numMem << 1); memStartStk = regex.numRepeat - 1; memEndStk = memStartStk + regex.numMem; /* for index start from 1, mem_start_stk[1]..mem_start_stk[num_mem] */ /* for index start from 1, mem_end_stk[1]..mem_end_stk[num_mem] */ } repeatStk = n > 0 ? new int[n] : null; } protected final void stackInit() { if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */ if (repeatStk != null) { for (int i = (Config.USE_SUBEXP_CALL ? 0 : 1); i <= regex.numMem; i++) { repeatStk[i + memStartStk] = repeatStk[i + memEndStk] = INVALID_INDEX; } } } private static StackEntry[] allocateStack() { StackEntry[]stack = new StackEntry[Config.INIT_MATCH_STACK_SIZE]; stack[0] = USE_CEC ? new SCStackEntry() : new StackEntry(); return stack; } private void doubleStack() { StackEntry[] newStack = new StackEntry[stack.length << 1]; System.arraycopy(stack, 0, newStack, 0, stack.length); stack = newStack; } static final ThreadLocal> stacks = new ThreadLocal>(); private static StackEntry[] fetchStack() { WeakReference ref = stacks.get(); StackEntry[] stack; if (ref == null) { stacks.set( new WeakReference(stack = allocateStack()) ); } else { stack = ref.get(); if (stack == null) { stacks.set( new WeakReference(stack = allocateStack()) ); } } return stack; } private final StackEntry ensure1() { if (stk >= stack.length) doubleStack(); StackEntry e = stack[stk]; if (e == null) stack[stk] = e = USE_CEC ? new SCStackEntry() : new StackEntry(); return e; } private final void pushType(int type) { ensure1().type = type; stk++; } // CEC // STATE_CHECK_POS private int stateCheckPos(int s, int snum) { return (s - str) * regex.numCombExpCheck + (snum - 1); } // STATE_CHECK_VAL protected final boolean stateCheckVal(int s, int snum) { if (stateCheckBuff != null) { int x = stateCheckPos(s, snum); return (stateCheckBuff[x / 8] & (1 << (x % 8))) != 0; } return false; } // ELSE_IF_STATE_CHECK_MARK private void stateCheckMark() { StackEntry e = stack[stk]; int x = stateCheckPos(e.getStatePStr(), ((SCStackEntry)e).getStateCheck()); stateCheckBuff[x / 8] |= (1 << (x % 8)); } // STATE_CHECK_BUFF_INIT private static final int STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE = 16; protected final void stateCheckBuffInit(int strLength, int offset, int stateNum) { if (stateNum > 0 && strLength >= Config.CHECK_STRING_THRESHOLD_LEN) { int size = ((strLength + 1) * stateNum + 7) >>> 3; offset = (offset * stateNum) >>> 3; if (size > 0 && offset < size && size < Config.CHECK_BUFF_MAX_SIZE) { if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { stateCheckBuff = new byte[size]; } else { // same impl, reduce... stateCheckBuff = new byte[size]; } Arrays.fill(stateCheckBuff, offset, (size - offset), (byte)0); stateCheckBuffSize = size; } else { stateCheckBuff = null; // reduce stateCheckBuffSize = 0; } } else { stateCheckBuff = null; // reduce stateCheckBuffSize = 0; } } protected final void stateCheckBuffClear() { stateCheckBuff = null; stateCheckBuffSize = 0; } private void push(int type, int pat, int s, int prev, int pkeep) { StackEntry e = ensure1(); e.type = type; e.setStatePCode(pat); e.setStatePStr(s); e.setStatePStrPrev(prev); if (USE_CEC) ((SCStackEntry)e).setStateCheck(0); e.setPKeep(pkeep); stk++; } private final void pushEnsured(int type, int pat) { StackEntry e = stack[stk]; e.type = type; e.setStatePCode(pat); if (USE_CEC) ((SCStackEntry)e).setStateCheck(0); stk++; } protected final void pushAltWithStateCheck(int pat, int s, int sprev, int snum, int pkeep) { StackEntry e = ensure1(); e.type = ALT; e.setStatePCode(pat); e.setStatePStr(s); e.setStatePStrPrev(sprev); if (USE_CEC) ((SCStackEntry)e).setStateCheck(stateCheckBuff != null ? snum : 0); e.setPKeep(pkeep); stk++; } protected final void pushStateCheck(int s, int snum) { if (stateCheckBuff != null) { StackEntry e = ensure1(); e.type = STATE_CHECK_MARK; e.setStatePStr(s); ((SCStackEntry)e).setStateCheck(snum); stk++; } } protected final void pushAlt(int pat, int s, int prev, int pkeep) { push(ALT, pat, s, prev, pkeep); } protected final void pushPos(int s, int prev, int pkeep) { push(POS, -1 /*NULL_UCHARP*/, s, prev, pkeep); } protected final void pushPosNot(int pat, int s, int prev, int pkeep) { push(POS_NOT, pat, s, prev, pkeep); } protected final void pushStopBT() { pushType(STOP_BT); } protected final void pushLookBehindNot(int pat, int s, int sprev, int pkeep) { push(LOOK_BEHIND_NOT, pat, s, sprev, pkeep); } protected final void pushRepeat(int id, int pat) { StackEntry e = ensure1(); e.type = REPEAT; e.setRepeatNum(id); e.setRepeatPCode(pat); e.setRepeatCount(0); stk++; } protected final void pushRepeatInc(int sindex) { StackEntry e = ensure1(); e.type = REPEAT_INC; e.setSi(sindex); stk++; } protected final void pushMemStart(int mnum, int s) { StackEntry e = ensure1(); e.type = MEM_START; e.setMemNum(mnum); e.setMemPstr(s); e.setMemStart(repeatStk[memStartStk + mnum]); e.setMemEnd(repeatStk[memEndStk + mnum]); repeatStk[memStartStk + mnum] = stk; repeatStk[memEndStk + mnum] = INVALID_INDEX; stk++; } protected final void pushMemEnd(int mnum, int s) { StackEntry e = ensure1(); e.type = MEM_END; e.setMemNum(mnum); e.setMemPstr(s); e.setMemStart(repeatStk[memStartStk + mnum]); e.setMemEnd(repeatStk[memEndStk + mnum]); repeatStk[memEndStk + mnum] = stk; stk++; } protected final void pushMemEndMark(int mnum) { StackEntry e = ensure1(); e.type = MEM_END_MARK; e.setMemNum(mnum); stk++; } protected final int getMemStart(int mnum) { int level = 0; int stkp = stk; while (stkp > 0) { stkp--; StackEntry e = stack[stkp]; if ((e.type & MASK_MEM_END_OR_MARK) != 0 && e.getMemNum() == mnum) { level++; } else if (e.type == MEM_START && e.getMemNum() == mnum) { if (level == 0) break; level--; } } return stkp; } protected final void pushNullCheckStart(int cnum, int s) { StackEntry e = ensure1(); e.type = NULL_CHECK_START; e.setNullCheckNum(cnum); e.setNullCheckPStr(s); stk++; } protected final void pushNullCheckEnd(int cnum) { StackEntry e = ensure1(); e.type = NULL_CHECK_END; e.setNullCheckNum(cnum); stk++; } protected final void pushCallFrame(int pat) { StackEntry e = ensure1(); e.type = CALL_FRAME; e.setCallFrameRetAddr(pat); stk++; } protected final void pushReturn() { StackEntry e = ensure1(); e.type = RETURN; stk++; } protected final void pushAbsent() { StackEntry e = ensure1(); e.type = ABSENT; stk++; } protected final void pushAbsentPos(int start, int end) { StackEntry e = ensure1(); e.type = ABSENT_POS; e.setAbsentStr(start); e.setAbsentEndStr(end); stk++; } protected final void popOne() { stk--; } protected final StackEntry pop() { switch (regex.stackPopLevel) { case StackPopLevel.FREE: return popFree(); case StackPopLevel.MEM_START: return popMemStart(); default: return popDefault(); } } private StackEntry popFree() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) { return e; } else if (USE_CEC) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } private StackEntry popMemStart() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) { return e; } else if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (USE_CEC) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } } private void popRewrite(StackEntry e) { if (e.type == MEM_START) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (e.type == REPEAT_INC) { stack[e.getSi()].decreaseRepeatCount(); } else if (e.type == MEM_END) { repeatStk[memStartStk + e.getMemNum()] = e.getMemStart(); repeatStk[memEndStk + e.getMemNum()] = e.getMemEnd(); } else if (USE_CEC) { if (e.type == STATE_CHECK_MARK) stateCheckMark(); } } private StackEntry popDefault() { while (true) { StackEntry e = stack[--stk]; if ((e.type & MASK_POP_USED) != 0) return e; else popRewrite(e); } } protected final void popTilPosNot() { while (true) { StackEntry e = stack[--stk]; if (e.type == POS_NOT) break; else popRewrite(e); } } protected final void popTilLookBehindNot() { while (true) { StackEntry e = stack[--stk]; if (e.type == LOOK_BEHIND_NOT) break; else popRewrite(e); } } protected final void popTilAbsent() { while (true) { StackEntry e = stack[--stk]; if (e.type == ABSENT) break; else popRewrite(e); } } protected final int posEnd() { int k = stk; while (true) { k--; StackEntry e = stack[k]; if ((e.type & MASK_TO_VOID_TARGET) != 0) { e.type = VOID; } else if (e.type == POS) { e.type = VOID; break; } } return k; } protected final void stopBtEnd() { int k = stk; while (true) { k--; StackEntry e = stack[k]; if ((e.type & MASK_TO_VOID_TARGET) != 0) { e.type = VOID; } else if (e.type == STOP_BT) { e.type = VOID; break; } } } // int for consistency with other null check routines protected final int nullCheck(int id, int s) { int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { return e.getNullCheckPStr() == s ? 1 : 0; } } } } protected final int nullCheckRec(int id, int s) { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (level == 0) { return e.getNullCheckPStr() == s ? 1 : 0; } else { level--; } } } else if (e.type == NULL_CHECK_END) { level++; } } } protected final int nullCheckMemSt(int id, int s) { int k = stk; int isNull; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (e.getNullCheckPStr() != s) { isNull = 0; break; } else { int endp; isNull = 1; while (k < stk) { e = stack[k++]; if (e.type == MEM_START) { if (e.getMemEnd() == INVALID_INDEX) { isNull = 0; break; } if (bsAt(regex.btMemEnd, e.getMemNum())) { endp = stack[e.getMemEnd()].getMemPStr(); } else { endp = e.getMemEnd(); } if (stack[e.getMemStart()].getMemPStr() != endp) { isNull = 0; break; } else if (endp != s) { isNull = -1; /* empty, but position changed */ } } } break; } } } } return isNull; } protected final int nullCheckMemStRec(int id, int s) { int level = 0; int k = stk; int isNull; while (true) { k--; StackEntry e = stack[k]; if (e.type == NULL_CHECK_START) { if (e.getNullCheckNum() == id) { if (level == 0) { if (e.getNullCheckPStr() != s) { isNull = 0; break; } else { int endp; isNull = 1; while (k < stk) { if (e.type == MEM_START) { if (e.getMemEnd() == INVALID_INDEX) { isNull = 0; break; } if (bsAt(regex.btMemEnd, e.getMemNum())) { endp = stack[e.getMemEnd()].getMemPStr(); } else { endp = e.getMemEnd(); } if (stack[e.getMemStart()].getMemPStr() != endp) { isNull = 0; break; } else if (endp != s) { isNull = -1;; /* empty, but position changed */ } } k++; e = stack[k]; } break; } } else { level--; } } } else if (e.type == NULL_CHECK_END) { if (e.getNullCheckNum() == id) level++; } } return isNull; } protected final int getRepeat(int id) { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == REPEAT) { if (level == 0) { if (e.getRepeatNum() == id) return k; } } else if (e.type == CALL_FRAME) { level--; } else if (e.type == RETURN) { level++; } } } protected final int sreturn() { int level = 0; int k = stk; while (true) { k--; StackEntry e = stack[k]; if (e.type == CALL_FRAME) { if (level == 0) { return e.getCallFrameRetAddr(); } else { level--; } } else if (e.type == RETURN) { level++; } } } } jruby-joni-2.1.41/src/org/joni/Syntax.java000066400000000000000000000524611400407002500203350ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import static org.joni.constants.MetaChar.INEFFECTIVE_META_CHAR; import org.joni.constants.SyntaxProperties; public final class Syntax implements SyntaxProperties { public final String name; public final int op; public final int op2; public final int op3; public final int behavior; public final int options; public final MetaCharTable metaCharTable; public Syntax(String name, int op, int op2, int op3, int behavior, int options, MetaCharTable metaCharTable) { this.name = name; this.op = op; this.op2 = op2; this.op3 = op3; this.behavior = behavior; this.options = options; this.metaCharTable = metaCharTable; } public static class MetaCharTable { public final int esc; public final int anyChar; public final int anyTime; public final int zeroOrOneTime; public final int oneOrMoreTime; public final int anyCharAnyTime; public MetaCharTable(int esc, int anyChar, int anyTime, int zeroOrOneTime, int oneOrMoreTime, int anyCharAnyTime) { this.esc = esc; this.anyChar = anyChar; this.anyTime = anyTime; this.zeroOrOneTime = zeroOrOneTime; this.oneOrMoreTime = oneOrMoreTime; this.anyCharAnyTime = anyCharAnyTime; } } /** * OP * */ protected boolean isOp(int opm) { return (op & opm) != 0; } public boolean opVariableMetaCharacters() { return isOp(OP_VARIABLE_META_CHARACTERS); } public boolean opDotAnyChar() { return isOp(OP_DOT_ANYCHAR); } public boolean opAsteriskZeroInf() { return isOp(OP_ASTERISK_ZERO_INF); } public boolean opEscAsteriskZeroInf() { return isOp(OP_ESC_ASTERISK_ZERO_INF); } public boolean opPlusOneInf() { return isOp(OP_PLUS_ONE_INF); } public boolean opEscPlusOneInf() { return isOp(OP_ESC_PLUS_ONE_INF); } public boolean opQMarkZeroOne() { return isOp(OP_QMARK_ZERO_ONE); } public boolean opEscQMarkZeroOne() { return isOp(OP_ESC_QMARK_ZERO_ONE); } public boolean opBraceInterval() { return isOp(OP_BRACE_INTERVAL); } public boolean opEscBraceInterval() { return isOp(OP_ESC_BRACE_INTERVAL); } public boolean opVBarAlt() { return isOp(OP_VBAR_ALT); } public boolean opEscVBarAlt() { return isOp(OP_ESC_VBAR_ALT); } public boolean opLParenSubexp() { return isOp(OP_LPAREN_SUBEXP); } public boolean opEscLParenSubexp() { return isOp(OP_ESC_LPAREN_SUBEXP); } public boolean opEscAZBufAnchor() { return isOp(OP_ESC_AZ_BUF_ANCHOR); } public boolean opEscCapitalGBeginAnchor() { return isOp(OP_ESC_CAPITAL_G_BEGIN_ANCHOR); } public boolean opDecimalBackref() { return isOp(OP_DECIMAL_BACKREF); } public boolean opBracketCC() { return isOp(OP_BRACKET_CC); } public boolean opEscWWord() { return isOp(OP_ESC_W_WORD); } public boolean opEscLtGtWordBeginEnd() { return isOp(OP_ESC_LTGT_WORD_BEGIN_END); } public boolean opEscBWordBound() { return isOp(OP_ESC_B_WORD_BOUND); } public boolean opEscSWhiteSpace() { return isOp(OP_ESC_S_WHITE_SPACE); } public boolean opEscDDigit() { return isOp(OP_ESC_D_DIGIT); } public boolean opLineAnchor() { return isOp(OP_LINE_ANCHOR); } public boolean opPosixBracket() { return isOp(OP_POSIX_BRACKET); } public boolean opQMarkNonGreedy() { return isOp(OP_QMARK_NON_GREEDY); } public boolean opEscControlChars() { return isOp(OP_ESC_CONTROL_CHARS); } public boolean opEscCControl() { return isOp(OP_ESC_C_CONTROL); } public boolean opEscOctal3() { return isOp(OP_ESC_OCTAL3); } public boolean opEscXHex2() { return isOp(OP_ESC_X_HEX2); } public boolean opEscXBraceHex8() { return isOp(OP_ESC_X_BRACE_HEX8); } public boolean opEscOBraceOctal() { return isOp(OP_ESC_O_BRACE_OCTAL); } /** * OP * */ protected boolean isOp2(int opm) { return (op2 & opm) != 0; } public boolean op2EscCapitalQQuote() { return isOp2(OP2_ESC_CAPITAL_Q_QUOTE); } public boolean op2QMarkGroupEffect() { return isOp2(OP2_QMARK_GROUP_EFFECT); } public boolean op2OptionPerl() { return isOp2(OP2_OPTION_PERL); } public boolean op2OptionRuby() { return isOp2(OP2_OPTION_RUBY); } public boolean op2PlusPossessiveRepeat() { return isOp2(OP2_PLUS_POSSESSIVE_REPEAT); } public boolean op2PlusPossessiveInterval() { return isOp2(OP2_PLUS_POSSESSIVE_INTERVAL); } public boolean op2CClassSetOp() { return isOp2(OP2_CCLASS_SET_OP); } public boolean op2QMarkLtNamedGroup() { return isOp2(OP2_QMARK_LT_NAMED_GROUP); } public boolean op2EscKNamedBackref() { return isOp2(OP2_ESC_K_NAMED_BACKREF); } public boolean op2EscGSubexpCall() { return isOp2(OP2_ESC_G_SUBEXP_CALL); } public boolean op2AtMarkCaptureHistory() { return isOp2(OP2_ATMARK_CAPTURE_HISTORY); } public boolean op2EscCapitalCBarControl() { return isOp2(OP2_ESC_CAPITAL_C_BAR_CONTROL); } public boolean op2EscCapitalMBarMeta() { return isOp2(OP2_ESC_CAPITAL_M_BAR_META); } public boolean op2EscVVtab() { return isOp2(OP2_ESC_V_VTAB); } public boolean op2EscUHex4() { return isOp2(OP2_ESC_U_HEX4); } public boolean op2EscGnuBufAnchor() { return isOp2(OP2_ESC_GNU_BUF_ANCHOR); } public boolean op2EscPBraceCharProperty() { return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY); } public boolean op2EscPBraceCircumflexNot() { return isOp2(OP2_ESC_P_BRACE_CIRCUMFLEX_NOT); } public boolean op2EscHXDigit() { return isOp2(OP2_ESC_H_XDIGIT); } public boolean op2IneffectiveEscape() { return isOp2(OP2_INEFFECTIVE_ESCAPE); } public boolean op2EscCapitalRLinebreak() { return isOp2(OP2_ESC_CAPITAL_R_LINEBREAK); } public boolean op2EscCapitalXExtendedGraphemeCluster() { return isOp2(OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER); } public boolean op2EscVVerticalWhiteSpace() { return isOp2(OP2_ESC_V_VERTICAL_WHITESPACE); } public boolean op2EscHHorizontalWhiteSpace() { return isOp2(OP2_ESC_H_HORIZONTAL_WHITESPACE); } public boolean op2EscCapitalKKeep() { return isOp2(OP2_ESC_CAPITAL_K_KEEP); } public boolean op2QMarkTildeAbsent() { return isOp2(OP2_QMARK_TILDE_ABSENT); } public boolean op2EscGBraceBackref() { return isOp2(OP2_ESC_G_BRACE_BACKREF); } public boolean op2QMarkSubexpCall() { return isOp2(OP2_QMARK_SUBEXP_CALL); } public boolean op2QMarkBarBranchReset() { return isOp2(OP2_QMARK_BAR_BRANCH_RESET); } public boolean op2QMarkLParenCondition() { return isOp2(OP2_QMARK_LPAREN_CONDITION); } public boolean op2QMarkCapitalPNamedGroup() { return isOp2(OP2_QMARK_CAPITAL_P_NAMED_GROUP); } protected boolean isOp3(int opm) { return (op3 & opm) != 0; } public boolean op3OptionJava() { return isOp3(OP3_OPTION_JAVA); } public boolean op3OptionECMAScript() { return isOp3(OP3_OPTION_ECMASCRIPT); } /** * BEHAVIOR * */ protected boolean isBehavior(int bvm) { return (behavior & bvm) != 0; } public boolean contextIndepRepeatOps() { return isBehavior(CONTEXT_INDEP_REPEAT_OPS); } public boolean contextInvalidRepeatOps() { return isBehavior(CONTEXT_INVALID_REPEAT_OPS); } public boolean allowUnmatchedCloseSubexp() { return isBehavior(ALLOW_UNMATCHED_CLOSE_SUBEXP); } public boolean allowInvalidInterval() { return isBehavior(ALLOW_INVALID_INTERVAL); } public boolean allowIntervalLowAbbrev() { return isBehavior(ALLOW_INTERVAL_LOW_ABBREV); } public boolean strictCheckBackref() { return isBehavior(STRICT_CHECK_BACKREF); } public boolean differentLengthAltLookBehind() { return isBehavior(DIFFERENT_LEN_ALT_LOOK_BEHIND); } public boolean captureOnlyNamedGroup() { return isBehavior(CAPTURE_ONLY_NAMED_GROUP); } public boolean allowMultiplexDefinitionName() { return isBehavior(ALLOW_MULTIPLEX_DEFINITION_NAME); } public boolean fixedIntervalIsGreedyOnly() { return isBehavior(FIXED_INTERVAL_IS_GREEDY_ONLY); } public boolean notNewlineInNegativeCC() { return isBehavior(NOT_NEWLINE_IN_NEGATIVE_CC); } public boolean backSlashEscapeInCC() { return isBehavior(BACKSLASH_ESCAPE_IN_CC); } public boolean allowEmptyRangeInCC() { return isBehavior(ALLOW_EMPTY_RANGE_IN_CC); } public boolean allowDoubleRangeOpInCC() { return isBehavior(ALLOW_DOUBLE_RANGE_OP_IN_CC); } public boolean warnCCOpNotEscaped() { return isBehavior(WARN_CC_OP_NOT_ESCAPED); } public boolean warnCCDup() { return isBehavior(WARN_CC_DUP); } public boolean warnReduntantNestedRepeat() { return isBehavior(WARN_REDUNDANT_NESTED_REPEAT); } public static final Syntax RUBY = new Syntax( "RUBY", (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_QMARK_GROUP_EFFECT | OP2_OPTION_RUBY | OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | OP2_ESC_G_SUBEXP_CALL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | OP2_PLUS_POSSESSIVE_REPEAT | OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL | OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB | OP2_ESC_H_XDIGIT | OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER | OP2_QMARK_LPAREN_CONDITION | OP2_ESC_CAPITAL_R_LINEBREAK | OP2_ESC_CAPITAL_K_KEEP | OP2_QMARK_TILDE_ABSENT ), 0, ( GNU_REGEX_BV | ALLOW_INTERVAL_LOW_ABBREV | DIFFERENT_LEN_ALT_LOOK_BEHIND | CAPTURE_ONLY_NAMED_GROUP | ALLOW_MULTIPLEX_DEFINITION_NAME | FIXED_INTERVAL_IS_GREEDY_ONLY | WARN_CC_OP_NOT_ESCAPED | WARN_CC_DUP | WARN_REDUNDANT_NESTED_REPEAT ), (Option.ASCII_RANGE | Option.POSIX_BRACKET_ALL_RANGE | Option.WORD_BOUND_ALL_RANGE), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax DEFAULT = RUBY; public static final Syntax TEST = new Syntax("TEST", RUBY.op, RUBY.op2 | OP2_ESC_U_HEX4, RUBY.op3, RUBY.behavior, RUBY.options & ~ Option.ASCII_RANGE, RUBY.metaCharTable); public static final Syntax ASIS = new Syntax( "ASIS", 0, OP2_INEFFECTIVE_ESCAPE, 0, 0, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PosixBasic = new Syntax( "PosixBasic", (POSIX_COMMON_OP | OP_ESC_LPAREN_SUBEXP | OP_ESC_BRACE_INTERVAL ), 0, 0, 0, ( Option.SINGLELINE | Option.MULTILINE ), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PosixExtended = new Syntax( "PosixExtended", ( POSIX_COMMON_OP | OP_LPAREN_SUBEXP | OP_BRACE_INTERVAL | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE |OP_VBAR_ALT ), 0, 0, ( CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | ALLOW_UNMATCHED_CLOSE_SUBEXP | ALLOW_DOUBLE_RANGE_OP_IN_CC ), ( Option.SINGLELINE | Option.MULTILINE ), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Emacs = new Syntax( "Emacs", ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE | OP_DECIMAL_BACKREF | OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS ), OP2_ESC_GNU_BUF_ANCHOR, 0, ALLOW_EMPTY_RANGE_IN_CC, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Grep = new Syntax( "Grep", ( OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET | OP_ESC_BRACE_INTERVAL | OP_ESC_LPAREN_SUBEXP | OP_ESC_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_ESC_PLUS_ONE_INF | OP_ESC_QMARK_ZERO_ONE | OP_LINE_ANCHOR | OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END | OP_DECIMAL_BACKREF ), 0, 0, ( ALLOW_EMPTY_RANGE_IN_CC | NOT_NEWLINE_IN_NEGATIVE_CC ), Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax GnuRegex = new Syntax( "GnuRegex", GNU_REGEX_OP, 0, 0, GNU_REGEX_BV, Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Java = new Syntax( "Java", (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT | OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP | OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 | OP2_ESC_P_BRACE_CHAR_PROPERTY ), 0, ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), (Option.SINGLELINE | Option.WORD_BOUND_ALL_RANGE | Option.WORD_BOUND_ALL_RANGE), new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax Perl = new Syntax( "Perl", (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ), 0, GNU_REGEX_BV, Option.SINGLELINE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax PerlNG = new Syntax( "PerlNG", (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_X_BRACE_HEX8 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | OP2_QMARK_LT_NAMED_GROUP | OP2_ESC_K_NAMED_BACKREF | OP2_ESC_G_SUBEXP_CALL ), 0, ( GNU_REGEX_BV | CAPTURE_ONLY_NAMED_GROUP | ALLOW_MULTIPLEX_DEFINITION_NAME ), Option.SINGLELINE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); public static final Syntax ECMAScript = new Syntax( "ECMAScript", (( GNU_REGEX_OP | OP_QMARK_NON_GREEDY | OP_ESC_OCTAL3 | OP_ESC_X_HEX2 | OP_ESC_CONTROL_CHARS | OP_ESC_C_CONTROL | OP_DECIMAL_BACKREF | OP_ESC_D_DIGIT | OP_ESC_S_WHITE_SPACE | OP_ESC_W_WORD ) & ~OP_ESC_LTGT_WORD_BEGIN_END ), ( OP2_ESC_CAPITAL_Q_QUOTE | OP2_QMARK_GROUP_EFFECT | OP2_OPTION_PERL | OP2_ESC_P_BRACE_CHAR_PROPERTY | OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | OP2_ESC_U_HEX4 | OP2_ESC_V_VTAB), OP3_OPTION_ECMASCRIPT, ( CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL | BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC | DIFFERENT_LEN_ALT_LOOK_BEHIND ), Option.NONE, new MetaCharTable( '\\', /* esc */ INEFFECTIVE_META_CHAR, /* anychar '.' */ INEFFECTIVE_META_CHAR, /* anytime '*' */ INEFFECTIVE_META_CHAR, /* zero or one time '?' */ INEFFECTIVE_META_CHAR, /* one or more time '+' */ INEFFECTIVE_META_CHAR /* anychar anytime */ ) ); } jruby-joni-2.1.41/src/org/joni/Token.java000066400000000000000000000077471400407002500201360ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.constants.internal.TokenType; final class Token { TokenType type; boolean escaped; int base; /* is number: 8, 16 (used in [....]) */ int backP; // union fields private int INT1, INT2, INT3, INT4, INT5; private int []INTA1; // union accessors int getC() { return INT1; } void setC(int c) { INT1 = c; } int getCode() { return INT1; } void setCode(int code) { INT1 = code; } int getAnchorSubtype() { return INT1; } void setAnchorSubtype(int anchor) { INT1 = anchor; } boolean getAnchorASCIIRange() { return INT2 == 1; } void setAnchorASCIIRange(boolean ascii) { INT2 = ascii ? 1 : 0; } // repeat union member int getRepeatLower() { return INT1; } void setRepeatLower(int lower) { INT1 = lower; } int getRepeatUpper() { return INT2; } void setRepeatUpper(int upper) { INT2 = upper; } boolean getRepeatGreedy() { return INT3 != 0; } void setRepeatGreedy(boolean greedy) { INT3 = greedy ? 1 : 0; } boolean getRepeatPossessive() { return INT4 != 0; } void setRepeatPossessive(boolean possessive) { INT4 = possessive ? 1 : 0; } // backref union member int getBackrefNum() { return INT1; } void setBackrefNum(int num) { INT1 = num; } int getBackrefRef1() { return INT2; } void setBackrefRef1(int ref1) { INT2 = ref1; } int[]getBackrefRefs() { return INTA1; } void setBackrefRefs(int[]refs) { INTA1 = refs; } boolean getBackrefByName() { return INT3 != 0; } void setBackrefByName(boolean byName) { INT3 = byName ? 1 : 0; } // USE_BACKREF_AT_LEVEL boolean getBackrefExistLevel() { return INT4 != 0; } void setBackrefExistLevel(boolean existLevel) { INT4 = existLevel ? 1 : 0; } int getBackrefLevel() { return INT5; } void setBackrefLevel(int level) { INT5 = level; } // call union member int getCallNameP() { return INT1; } void setCallNameP(int nameP) { INT1 = nameP; } int getCallNameEnd() { return INT2; } void setCallNameEnd(int nameEnd) { INT2 = nameEnd; } int getCallGNum() { return INT3; } void setCallGNum(int gnum) { INT3 = gnum; } boolean getCallRel() { return INT4 != 0; } void setCallRel(boolean rel) { INT4 = rel ? 1 : 0; } // prop union member int getPropCType() { return INT1; } void setPropCType(int ctype) { INT1 = ctype; } boolean getPropNot() { return INT2 != 0; } void setPropNot(boolean not) { INT2 = not ? 1 : 0; } } jruby-joni-2.1.41/src/org/joni/UnsetAddrList.java000066400000000000000000000047321400407002500215720ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; import org.joni.ast.EncloseNode; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; public final class UnsetAddrList { EncloseNode[]targets; int[]offsets; int num; public UnsetAddrList(int size) { targets = new EncloseNode[size]; offsets = new int[size]; } public void add(int offset, EncloseNode node) { if (num >= offsets.length) { EncloseNode []ttmp = new EncloseNode[targets.length << 1]; System.arraycopy(targets, 0, ttmp, 0, num); targets = ttmp; int[]otmp = new int[offsets.length << 1]; System.arraycopy(offsets, 0, otmp, 0, num); offsets = otmp; } targets[num] = node; offsets[num] = offset; num++; } public void fix(Regex regex) { for (int i=0; i 0) { for (int i = 0; i < num; i++) { value.append("offset + ").append(offsets[i]).append(" target: ").append(targets[i].getAddressName()); } } return value.toString(); } } jruby-joni-2.1.41/src/org/joni/WarnCallback.java000066400000000000000000000027171400407002500213720ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni; /** * @author Ola Bini */ public interface WarnCallback { WarnCallback DEFAULT = new WarnCallback() { public void warn(String message) { System.err.println(message); } }; WarnCallback NONE = new WarnCallback() { public void warn(String message) { } }; void warn(String message); } jruby-joni-2.1.41/src/org/joni/ast/000077500000000000000000000000001400407002500167635ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/ast/AnchorNode.java000066400000000000000000000067701400407002500216600ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.constants.internal.AnchorType; public final class AnchorNode extends Node { public final int type; public final boolean asciiRange; public Node target; public int charLength; public AnchorNode(int type, boolean asciiRange) { super(ANCHOR); this.type = type; charLength = -1; this.asciiRange = asciiRange; } public AnchorNode(int type) { this(type, false); } @Override protected void setChild(Node child) { target = child; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Anchor"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n type: " + typeToString()); value.append(", ascii: " + asciiRange); value.append("\n target: " + pad(target, level + 1)); return value.toString(); } public String typeToString() { StringBuilder type = new StringBuilder(); if (isType(AnchorType.BEGIN_BUF)) type.append("BEGIN_BUF "); if (isType(AnchorType.BEGIN_LINE)) type.append("BEGIN_LINE "); if (isType(AnchorType.BEGIN_POSITION)) type.append("BEGIN_POSITION "); if (isType(AnchorType.END_BUF)) type.append("END_BUF "); if (isType(AnchorType.SEMI_END_BUF)) type.append("SEMI_END_BUF "); if (isType(AnchorType.END_LINE)) type.append("END_LINE "); if (isType(AnchorType.WORD_BOUND)) type.append("WORD_BOUND "); if (isType(AnchorType.NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); if (isType(AnchorType.WORD_BEGIN)) type.append("WORD_BEGIN "); if (isType(AnchorType.WORD_END)) type.append("WORD_END "); if (isType(AnchorType.PREC_READ)) type.append("PREC_READ "); if (isType(AnchorType.PREC_READ_NOT)) type.append("PREC_READ_NOT "); if (isType(AnchorType.LOOK_BEHIND)) type.append("LOOK_BEHIND "); if (isType(AnchorType.LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); if (isType(AnchorType.ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); if (isType(AnchorType.ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); return type.toString(); } private boolean isType(int type) { return (this.type & type) != 0; } } jruby-joni-2.1.41/src/org/joni/ast/AnyCharNode.java000066400000000000000000000025571400407002500217720ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; public final class AnyCharNode extends Node { public AnyCharNode(){ super(CANY); } @Override public String getName() { return "Any Char"; } @Override public String toString(int level) { String value = ""; return value; } } jruby-joni-2.1.41/src/org/joni/ast/BackRefNode.java000066400000000000000000000056571400407002500217460ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.Config; import org.joni.ScanEnvironment; import org.joni.exception.ErrorMessages; import org.joni.exception.ValueException; public final class BackRefNode extends StateNode { public final int back[]; public int backNum; public int nestLevel; private BackRefNode(int backNum, int[]backRefs, boolean byName, ScanEnvironment env) { super(BREF); this.backNum = backNum; if (byName) setNameRef(); for (int i=0; i 0) { back[pos] = n; pos++; } } backNum = pos; } @Override public String getName() { return "Back Ref"; } @Override public String toString(int level) { StringBuilder sb = new StringBuilder(super.toString(level)); sb.append("\n backNum: " + backNum); String backs = ""; for (int i=0; i= sbOut) { if (j > CR_FROM(mbr, i)) { addCodeRangeToBuf(env, j, CR_TO(mbr, i)); i++; } // !goto sb_end!, remove duplication! for (; i= sbOut) { // !goto sb_end2!, remove duplication prev = sbOut; for (i=0; i 1) { ccAscii.addCodeRangeToBuf(env, 0x00, 0x7F); } else { ccAscii.bs.setRange(env, 0x00, 0x7F); } ccWork.and(ccAscii, env); } or(ccWork, env); } else { addCTypeByRange(ctype, not, env, sbOut.value, ranges); } return; } int maxCode = asciiRange ? 0x80 : BitSet.SINGLE_BYTE_SIZE; switch(ctype) { case CharacterType.ALPHA: case CharacterType.BLANK: case CharacterType.CNTRL: case CharacterType.DIGIT: case CharacterType.LOWER: case CharacterType.PUNCT: case CharacterType.SPACE: case CharacterType.UPPER: case CharacterType.XDIGIT: case CharacterType.ASCII: case CharacterType.ALNUM: if (not) { for (int c=0; c= maxCode) bs.set(env, c); } if (asciiRange) addAllMultiByteRange(env); } else { for (int c=0; c 0 && /* check invalid code point */ !(enc.isWord(c) || c >= maxCode)) bs.set(env, c); } if (asciiRange) addAllMultiByteRange(env); } break; default: throw new InternalException(ErrorMessages.PARSER_BUG); } // switch } public static enum CCVALTYPE { SB, CODE_POINT, CLASS } public static enum CCSTATE { VALUE, RANGE, COMPLETE, START } public static final class CCStateArg { public int from; public int to; public boolean fromIsRaw; public boolean toIsRaw; public CCVALTYPE inType; public CCVALTYPE type; public CCSTATE state; } public void nextStateClass(CCStateArg arg, CClassNode ascCC, ScanEnvironment env) { if (arg.state == CCSTATE.RANGE) throw new SyntaxException(ErrorMessages.CHAR_CLASS_VALUE_AT_END_OF_RANGE); if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { if (arg.type == CCVALTYPE.SB) { bs.set(env, arg.from); if (ascCC != null) ascCC.bs.set(arg.from); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.from, arg.from); if (ascCC != null) ascCC.addCodeRange(env, arg.from, arg.from, false); } } arg.state = CCSTATE.VALUE; arg.type = CCVALTYPE.CLASS; } public void nextStateValue(CCStateArg arg, CClassNode ascCc, ScanEnvironment env) { switch(arg.state) { case VALUE: if (arg.type == CCVALTYPE.SB) { bs.set(env, arg.from); if (ascCc != null) ascCc.bs.set(arg.from); } else if (arg.type == CCVALTYPE.CODE_POINT) { addCodeRange(env, arg.from, arg.from); if (ascCc != null) ascCc.addCodeRange(env, arg.from, arg.from, false); } break; case RANGE: if (arg.inType == arg.type) { if (arg.inType == CCVALTYPE.SB) { if (arg.from > 0xff || arg.to > 0xff) throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); if (arg.from > arg.to) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } else { throw new ValueException(ErrorMessages.EMPTY_RANGE_IN_CHAR_CLASS); } } bs.setRange(env, arg.from, arg.to); if (ascCc != null) ascCc.bs.setRange(null, arg.from, arg.to); } else { addCodeRange(env, arg.from, arg.to); if (ascCc != null) ascCc.addCodeRange(env, arg.from, arg.to, false); } } else { if (arg.from > arg.to) { if (env.syntax.allowEmptyRangeInCC()) { // goto ccs_range_end arg.state = CCSTATE.COMPLETE; break; } else { throw new ValueException(ErrorMessages.EMPTY_RANGE_IN_CHAR_CLASS); } } bs.setRange(env, arg.from, arg.to < 0xff ? arg.to : 0xff); addCodeRange(env, arg.from, arg.to); if (ascCc != null) { ascCc.bs.setRange(null, arg.from, arg.to < 0xff ? arg.to : 0xff); ascCc.addCodeRange(env, arg.from, arg.to, false); } } // ccs_range_end: arg.state = CCSTATE.COMPLETE; break; case COMPLETE: case START: arg.state = CCSTATE.VALUE; break; default: break; } // switch arg.fromIsRaw = arg.toIsRaw; arg.from = arg.to; arg.type = arg.inType; } // onig_is_code_in_cc_len boolean isCodeInCCLength(int encLength, int code) { boolean found; if (encLength > 1 || code >= BitSet.SINGLE_BYTE_SIZE) { if (mbuf == null) { found = false; } else { found = CodeRange.isInCodeRange(mbuf.getCodeRange(), code); } } else { found = bs.at(code); } if (isNot()) { return !found; } else { return found; } } // onig_is_code_in_cc public boolean isCodeInCC(Encoding enc, int code) { int len; if (enc.minLength() > 1) { len = 2; } else { len = enc.codeToMbcLength(code); } return isCodeInCCLength(len, code); } public void setNot() { flags |= FLAG_NCCLASS_NOT; } public void clearNot() { flags &= ~FLAG_NCCLASS_NOT; } public boolean isNot() { return (flags & FLAG_NCCLASS_NOT) != 0; } } jruby-joni-2.1.41/src/org/joni/ast/CTypeNode.java000066400000000000000000000033741400407002500214670ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; public final class CTypeNode extends Node { public final int ctype; public final boolean not; public final boolean asciiRange; public CTypeNode(int type, boolean not, boolean asciiRange) { super(CTYPE); this.ctype= type; this.not = not; this.asciiRange = asciiRange; } @Override public String getName() { return "Character Type"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n ctype: " + ctype); value.append(", not: " + not); value.append(", ascii: " + asciiRange); return value.toString(); } } jruby-joni-2.1.41/src/org/joni/ast/CallNode.java000066400000000000000000000045671400407002500213230ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.UnsetAddrList; public final class CallNode extends StateNode { public final byte[]name; public final int nameP; public final int nameEnd; public int groupNum; public EncloseNode target; public UnsetAddrList unsetAddrList; public CallNode(byte[]name, int nameP, int nameEnd, int gnum) { super(CALL); this.name = name; this.nameP = nameP; this.nameEnd = nameEnd; this.groupNum = gnum; /* call by number if gnum != 0 */ } @Override protected void setChild(Node newChild) { target = (EncloseNode)newChild; } @Override protected Node getChild() { return target; } public void setTarget(EncloseNode tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Call"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(super.toString(level)); value.append("\n name: " + new String(name, nameP, nameEnd - nameP)); value.append(", groupNum: " + groupNum); value.append("\n unsetAddrList: " + pad(unsetAddrList, level + 1)); value.append("\n target: " + pad(target.getAddressName(), level + 1)); return value.toString(); } } jruby-joni-2.1.41/src/org/joni/ast/EncloseNode.java000066400000000000000000000102121400407002500220200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.Config; import org.joni.Option; import org.joni.constants.internal.EncloseType; public final class EncloseNode extends StateNode implements EncloseType { public final int type; // enclose type public int regNum; public int option; public Node target; /* EncloseNode : ENCLOSE_MEMORY */ public int callAddr; // AbsAddrType public int minLength; // OnigDistance public int maxLength; // OnigDistance public int charLength; public int optCount; // referenced count in optimize_node_left() public Node containingAnchor; // // node_new_enclose / onig_node_new_enclose public EncloseNode(int type) { super(ENCLOSE); this.type = type; callAddr = -1; } public static EncloseNode newMemory(int option, boolean isNamed) { EncloseNode en = new EncloseNode(MEMORY); if (Config.USE_SUBEXP_CALL) en.option = option; if (isNamed) en.setNamedGroup(); return en; } public static EncloseNode newOption(int option) { EncloseNode en = new EncloseNode(OPTION); en.option = option; return en; } @Override protected void setChild(Node child) { target = child; } @Override protected Node getChild() { return target; } public void setTarget(Node tgt) { target = tgt; tgt.parent = this; } @Override public String getName() { return "Enclose"; } @Override public String toString(int level) { StringBuilder value = new StringBuilder(super.toString(level)); value.append("\n type: " + typeToString()); value.append("\n regNum: " + regNum); value.append(", option: " + Option.toString(option)); value.append(", callAddr: " + callAddr); value.append(", minLength: " + minLength); value.append(", maxLength: " + maxLength); value.append(", charLength: " + charLength); value.append(", optCount: " + optCount); value.append("\n target: " + pad(target, level + 1)); return value.toString(); } public String typeToString() { StringBuilder types = new StringBuilder(); if (isStopBacktrack()) types.append("STOP_BACKTRACK "); if (isMemory()) types.append("MEMORY "); if (isOption()) types.append("OPTION "); if (isCondition()) types.append("CONDITION "); if (isAbsent()) types.append("ABSENT "); return types.toString(); } public void setEncloseStatus(int flag) { state |= flag; } public void clearEncloseStatus(int flag) { state &= ~flag; } public boolean isMemory() { return (type & MEMORY) != 0; } public boolean isOption() { return (type & OPTION) != 0; } public boolean isCondition() { return (type & CONDITION) != 0; } public boolean isStopBacktrack() { return (type & STOP_BACKTRACK) != 0; } public boolean isAbsent() { return (type & ABSENT) != 0; } } jruby-joni-2.1.41/src/org/joni/ast/ListNode.java000066400000000000000000000055741400407002500213620ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; public final class ListNode extends Node { public Node value; public ListNode tail; private ListNode(Node value, ListNode tail, int type) { super(type); this.value = value; if (value != null) value.parent = this; this.tail = tail; if (tail != null) tail.parent = this; } public static ListNode newAlt(Node value, ListNode tail) { return new ListNode(value, tail, ALT); } public static ListNode newList(Node value, ListNode tail) { return new ListNode(value, tail, LIST); } public static ListNode listAdd(ListNode list, Node value) { ListNode n = newList(value, null); if (list != null) { while (list.tail != null) list = list.tail; list.setTail(n); } return n; } public void toListNode() { type = LIST; } @Override protected void setChild(Node child) { value = child; } @Override protected Node getChild() { return value; } public void setValue(Node value) { this.value = value; value.parent = this; } public void setTail(ListNode tail) { this.tail = tail; } @Override public String getName() { switch (type) { case ALT: return "Alt"; case LIST: return "List"; default: throw new InternalException(ErrorMessages.PARSER_BUG); } } @Override public String toString(int level) { StringBuilder sb = new StringBuilder(); sb.append("\n value: " + pad(value, level + 1)); sb.append("\n tail: " + (tail == null ? "NULL" : tail.toString())); return sb.toString(); } } jruby-joni-2.1.41/src/org/joni/ast/Node.java000066400000000000000000000064241400407002500205210ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.constants.internal.NodeType; public abstract class Node implements NodeType { public Node parent; protected int type; Node(int type) { this.type = type; } public final int getType() { return type; } public final int getType2Bit() { return 1 << getType(); } protected void setChild(Node tgt){ // default definition } protected Node getChild(){ // default definition return null; }; public void replaceWith(Node with) { with.parent = parent; parent.setChild(with); parent = null; } public abstract String getName(); protected abstract String toString(int level); public String getAddressName() { return getName() + ":0x" + Integer.toHexString(System.identityHashCode(this)); } public final String toString() { StringBuilder s = new StringBuilder(); s.append("<" + getAddressName() + " (" + (parent == null ? "NULL" : parent.getAddressName()) + ")>"); return s + toString(0); } protected static String pad(Object value, int level) { if (value == null) return "NULL"; StringBuilder pad = new StringBuilder(" "); for (int i=0; i= 0 && targetQNum >= 0 && env.syntax.warnReduntantNestedRepeat()) { switch(REDUCE_TABLE[targetQNum][nestQNum]) { case ASIS: break; case DEL: env.warnings.warn("regular expression has redundant nested repeat operator " + PopularQStr[targetQNum] + " /" + new String(bytes, p, end) + "/"); break; default: env.warnings.warn("nested repeat operator '" + PopularQStr[targetQNum] + "' and '" + PopularQStr[nestQNum] + "' was replaced with '" + ReduceQStr[REDUCE_TABLE[targetQNum][nestQNum].ordinal()] + "' in regular expression " + "/" + new String(bytes, p, end) + "/"); } } } // USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR if (targetQNum >= 0) { if (nestQNum >= 0) { reduceNestedQuantifier(qnt); return 0; } else if (targetQNum == 1 || targetQNum == 2) { /* * or + */ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ if (!isRepeatInfinite(upper) && upper > 1 && greedy) { upper = lower == 0 ? 1 : lower; } } } default: break; } setTarget(tgt); return 0; } public static boolean isRepeatInfinite(int n) { return n == REPEAT_INFINITE; } } jruby-joni-2.1.41/src/org/joni/ast/StateNode.java000066400000000000000000000133251400407002500215200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.joni.constants.internal.NodeStatus; abstract class StateNode extends Node implements NodeStatus { protected int state; StateNode(int type) { super(type); } public boolean isMinFixed() { return (state & NST_MIN_FIXED) != 0; } public void setMinFixed() { state |= NST_MIN_FIXED; } public void clearMinFixed() { state &= ~NST_MIN_FIXED; } public boolean isMaxFixed() { return (state & NST_MAX_FIXED) != 0; } public void setMaxFixed() { state |= NST_MAX_FIXED; } public void clearMaxFixed() { state &= ~NST_MAX_FIXED; } public boolean isCLenFixed() { return (state & NST_CLEN_FIXED) != 0; } public void setCLenFixed() { state |= NST_CLEN_FIXED; } public void clearCLenFixed() { state &= ~NST_CLEN_FIXED; } public boolean isMark1() { return (state & NST_MARK1) != 0; } public void setMark1() { state |= NST_MARK1; } public void clearMark1() { state &= ~NST_MARK1; } public boolean isMark2() { return (state & NST_MARK2) != 0; } public void setMark2() { state |= NST_MARK2; } public void clearMark2() { state &= ~NST_MARK2; } public boolean isMemBackrefed() { return (state & NST_MEM_BACKREFED) != 0; } public void setMemBackrefed() { state |= NST_MEM_BACKREFED; } public void clearMemBackrefed() { state &= ~NST_MEM_BACKREFED; } public boolean isStopBtSimpleRepeat() { return (state & NST_STOP_BT_SIMPLE_REPEAT) != 0; } public void setStopBtSimpleRepeat() { state |= NST_STOP_BT_SIMPLE_REPEAT; } public void clearStopBtSimpleRepeat() { state &= ~NST_STOP_BT_SIMPLE_REPEAT; } public boolean isRecursion() { return (state & NST_RECURSION) != 0; } public void setRecursion() { state |= NST_RECURSION; } public void clearRecursion() { state &= ~NST_RECURSION; } public boolean isCalled() { return (state & NST_CALLED) != 0; } public void setCalled() { state |= NST_CALLED; } public void clearCAlled() { state &= ~NST_CALLED; } public boolean isAddrFixed() { return (state & NST_ADDR_FIXED) != 0; } public void setAddrFixed() { state |= NST_ADDR_FIXED; } public void clearAddrFixed() { state &= ~NST_ADDR_FIXED; } public boolean isNamedGroup() { return (state & NST_NAMED_GROUP) != 0; } public void setNamedGroup() { state |= NST_NAMED_GROUP; } public void clearNamedGroup() { state &= ~NST_NAMED_GROUP; } public boolean isNameRef() { return (state & NST_NAME_REF) != 0; } public void setNameRef() { state |= NST_NAME_REF; } public void clearNameRef() { state &= ~NST_NAME_REF; } public boolean isInRepeat() { return (state & NST_IN_REPEAT) != 0; } public void setInRepeat() { state |= NST_IN_REPEAT; } public void clearInRepeat() { state &= ~NST_IN_REPEAT; } public boolean isNestLevel() { return (state & NST_NEST_LEVEL) != 0; } public void setNestLevel() { state |= NST_NEST_LEVEL; } public void clearNestLevel() { state &= ~NST_NEST_LEVEL; } public boolean isByNumber() { return (state & NST_BY_NUMBER) != 0; } public void setByNumber() { state |= NST_BY_NUMBER; } public void clearByNumber() { state &= ~NST_BY_NUMBER; } @Override public String toString(int level) { return "\n state: " + stateToString(); } public String stateToString() { StringBuilder states = new StringBuilder(); if (isMinFixed()) states.append("MIN_FIXED "); if (isMaxFixed()) states.append("MAX_FIXED "); if (isMark1()) states.append("MARK1 "); if (isMark2()) states.append("MARK2 "); if (isMemBackrefed()) states.append("MEM_BACKREFED "); if (isStopBtSimpleRepeat()) states.append("STOP_BT_SIMPLE_REPEAT "); if (isRecursion()) states.append("RECURSION "); if (isCalled()) states.append("CALLED "); if (isAddrFixed()) states.append("ADDR_FIXED "); if (isNamedGroup()) states.append("NAMED_GROUP "); if (isNameRef()) states.append("NAME_REF "); if (isInRepeat()) states.append("IN_REPEAT "); if (isNestLevel()) states.append("NEST_LEVEL "); if (isByNumber()) states.append("BY_NUMBER "); return states.toString(); } } jruby-joni-2.1.41/src/org/joni/ast/StringNode.java000066400000000000000000000137251400407002500217120ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.ast; import org.jcodings.Encoding; import org.joni.Config; import org.joni.constants.internal.StringType; public final class StringNode extends Node implements StringType { private static final int NODE_STR_MARGIN = 16; private static final int NODE_STR_BUF_SIZE = 24; public static final StringNode EMPTY = new StringNode(null, Integer.MAX_VALUE, Integer.MAX_VALUE); public byte[]bytes; public int p; public int end; public int flag; public StringNode(int size) { super(STR); this.bytes = new byte[size]; } public StringNode() { this(NODE_STR_BUF_SIZE); } public static StringNode fromCodePoint(int code, Encoding enc) { StringNode str = new StringNode(Config.ENC_CODE_TO_MBC_MAXLEN); str.end = enc.codeToMbc(code, str.bytes, str.p); return str; } public StringNode(byte[]bytes, int p, int end) { super(STR); this.bytes = bytes; this.p = p; this.end = end; setShared(); } /* Ensure there is ahead bytes available in node's buffer * (assumes that the node is not shared) */ private void ensure(int ahead) { int len = (end - p) + ahead; if (len >= bytes.length) { byte[]tmp = new byte[len + NODE_STR_MARGIN]; System.arraycopy(bytes, p, tmp, 0, end - p); bytes = tmp; } } /* COW and/or ensure there is ahead bytes available in node's buffer */ private void modifyEnsure(int ahead) { if (isShared()) { int len = (end - p) + ahead; byte[]tmp = new byte[len + NODE_STR_MARGIN]; System.arraycopy(bytes, p, tmp, 0, end - p); bytes = tmp; end = end - p; p = 0; clearShared(); } else { ensure(ahead); } } @Override public String getName() { return "String"; } public int length() { return end - p; } public int length(Encoding enc) { return enc.strLength(bytes, p, end); } public StringNode splitLastChar(Encoding enc) { StringNode n = null; if (end > p) { int prev = enc.prevCharHead(bytes, p, end, end); if (prev != -1 && prev > p) { /* can be split */ n = new StringNode(bytes, prev, end); if (isRaw()) n.setRaw(); end = prev; } } return n; } public boolean canBeSplit(Encoding enc) { if (end > p) { return enc.length(bytes, p, end) < (end - p); } return false; } public void set(byte[]bytes, int p, int end) { this.bytes = bytes; this.p = p; this.end = end; setShared(); } public void catBytes(byte[]cat, int catP, int catEnd) { int len = catEnd - catP; modifyEnsure(len); System.arraycopy(cat, catP, bytes, end, len); end += len; } public void catByte(byte c) { modifyEnsure(1); bytes[end++] = c; } public void catCode(int code, Encoding enc) { modifyEnsure(Config.ENC_CODE_TO_MBC_MAXLEN); end += enc.codeToMbc(code, bytes, end); } public void setRaw() { flag |= NSTR_RAW; } public void clearRaw() { flag &= ~NSTR_RAW; } public boolean isRaw() { return (flag & NSTR_RAW) != 0; } public void setAmbig() { flag |= NSTR_AMBIG; } public void clearAmbig() { flag &= ~NSTR_AMBIG; } public boolean isAmbig() { return (flag & NSTR_AMBIG) != 0; } public void setDontGetOptInfo() { flag |= NSTR_DONT_GET_OPT_INFO; } public void clearDontGetOptInfo() { flag &= ~NSTR_DONT_GET_OPT_INFO; } public boolean isDontGetOptInfo() { return (flag & NSTR_DONT_GET_OPT_INFO) != 0; } public void setShared() { flag |= NSTR_SHARED; } public void clearShared() { flag &= ~NSTR_SHARED; } public boolean isShared() { return (flag & NSTR_SHARED) != 0; } public String flagsToString() { StringBuilder flags = new StringBuilder(); if (isRaw()) flags.append("RAW "); if (isAmbig()) flags.append("AMBIG "); if (isDontGetOptInfo()) flags.append("DONT_GET_OPT_INFO "); if (isShared()) flags.append("SHARED "); return flags.toString(); } @Override public String toString(int level) { StringBuilder sb = new StringBuilder(); sb.append("\n flags: " + flagsToString()); sb.append("\n bytes: '"); for (int i=p; i= 0x20 && (bytes[i] & 0xff) < 0x7f) { sb.append((char)bytes[i]); } else { sb.append(String.format("[0x%02x]", bytes[i])); } } sb.append("'"); return sb.toString(); } } jruby-joni-2.1.41/src/org/joni/bench/000077500000000000000000000000001400407002500172535ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/bench/AbstractBench.java000066400000000000000000000034231400407002500226230ustar00rootroot00000000000000package org.joni.bench; import org.jcodings.specific.ASCIIEncoding; import org.joni.Option; import org.joni.Regex; import org.joni.Syntax; public abstract class AbstractBench { protected void bench(String _reg, String _str, int warmup, int times) throws Exception { byte[] reg = _reg.getBytes(); byte[] str = _str.getBytes(); Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); System.err.println("::: /" + _reg + "/ =~ \"" + _str + "\", " + warmup + " * " + times + " times"); for(int j=0;j */ final int OP_ESC_B_WORD_BOUND = (1<<20); /* \b, \B */ final int OP_ESC_S_WHITE_SPACE = (1<<21); /* \s, \S */ final int OP_ESC_D_DIGIT = (1<<22); /* \d, \D */ final int OP_LINE_ANCHOR = (1<<23); /* ^, $ */ final int OP_POSIX_BRACKET = (1<<24); /* [:xxxx:] */ final int OP_QMARK_NON_GREEDY = (1<<25); /* ??,*?,+?,{n,m}? */ final int OP_ESC_CONTROL_CHARS = (1<<26); /* \n,\r,\t,\a ... */ final int OP_ESC_C_CONTROL = (1<<27); /* \cx */ final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */ final int OP_ESC_O_BRACE_OCTAL = (1<<31); /* \o{OOO} */ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */ final int OP2_OPTION_PERL = (1<<2); /* (?imsxadlu), (?-imsx), (?^imsxalu) */ final int OP2_OPTION_RUBY = (1<<3); /* (?imxadu);, (?-imx); */ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */ final int OP2_CCLASS_SET_OP = (1<<6); /* [...&&..[..]..] */ final int OP2_QMARK_LT_NAMED_GROUP = (1<<7); /* (?...); */ final int OP2_ESC_K_NAMED_BACKREF = (1<<8); /* \k */ final int OP2_ESC_G_SUBEXP_CALL = (1<<9); /* \g, \g */ final int OP2_ATMARK_CAPTURE_HISTORY = (1<<10); /* (?@..);,(?@..); */ final int OP2_ESC_CAPITAL_C_BAR_CONTROL = (1<<11); /* \C-x */ final int OP2_ESC_CAPITAL_M_BAR_META = (1<<12); /* \M-x */ final int OP2_ESC_V_VTAB = (1<<13); /* \v as VTAB */ final int OP2_ESC_U_HEX4 = (1<<14); /* \\uHHHH */ final int OP2_ESC_GNU_BUF_ANCHOR = (1<<15); /* \`, \' */ final int OP2_ESC_P_BRACE_CHAR_PROPERTY = (1<<16); /* \p{...}, \P{...} */ final int OP2_ESC_P_BRACE_CIRCUMFLEX_NOT = (1<<17); /* \p{^..}, \P{^..} */ /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ final int OP2_ESC_CAPITAL_R_LINEBREAK = (1<<21); /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */ final int OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER = (1<<22); /* \X as (?:\P{M}\p{M}*) */ final int OP2_ESC_V_VERTICAL_WHITESPACE = (1<<23); /* \v, \V -- Perl */ final int OP2_ESC_H_HORIZONTAL_WHITESPACE= (1<<24); /* \h, \H -- Perl */ final int OP2_ESC_CAPITAL_K_KEEP = (1<<25); /* \K */ final int OP2_ESC_G_BRACE_BACKREF = (1<<26); /* \g{name}, \g{n} */ final int OP2_QMARK_SUBEXP_CALL = (1<<27); /* (?&name), (?n), (?R), (?0) */ final int OP2_QMARK_BAR_BRANCH_RESET = (1<<28); /* (?|...) */ final int OP2_QMARK_LPAREN_CONDITION = (1<<29); /* (?(cond)yes...|no...) */ final int OP2_QMARK_CAPITAL_P_NAMED_GROUP= (1<<30); /* (?P...), (?P=name), (?P>name) -- Python/PCRE */ final int OP2_QMARK_TILDE_ABSENT = (1<<31); /* (?~...) */ final int OP3_OPTION_JAVA = (1<<0); /* (?idmsux), (?-idmsux) */ final int OP3_OPTION_ECMASCRIPT = (1<<1); /* EcmaScript quirks */ /* syntax (behavior); */ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */ final int CONTEXT_INDEP_REPEAT_OPS = (1<<0); /* ?, *, +, {n,m} */ final int CONTEXT_INVALID_REPEAT_OPS = (1<<1); /* error or ignore */ final int ALLOW_UNMATCHED_CLOSE_SUBEXP = (1<<2); /* ...);... */ final int ALLOW_INVALID_INTERVAL = (1<<3); /* {??? */ final int ALLOW_INTERVAL_LOW_ABBREV = (1<<4); /* {,n} => {0,n} */ final int STRICT_CHECK_BACKREF = (1<<5); /* /(\1);/,/\1();/ ..*/ final int DIFFERENT_LEN_ALT_LOOK_BEHIND = (1<<6); /* (?<=a|bc); */ final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?);(?); */ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */ final int ALLOW_MULTIPLEX_DEFINITION_NAME_CALL = (1<<10); /* (?)(?)(?&x) */ /* syntax (behavior); in char class [...] */ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */ final int BACKSLASH_ESCAPE_IN_CC = (1<<21); /* [..\w..] etc.. */ final int ALLOW_EMPTY_RANGE_IN_CC = (1<<22); final int ALLOW_DOUBLE_RANGE_OP_IN_CC = (1<<23); /* [0-9-a]=[0-9\-a] */ /* syntax (behavior); warning */ final int WARN_CC_OP_NOT_ESCAPED = (1<<24); /* [,-,] */ final int WARN_REDUNDANT_NESTED_REPEAT = (1<<25); /* (?:a*);+ */ final int WARN_CC_DUP = (1<<26); /* [aa] */ final int POSIX_COMMON_OP = OP_DOT_ANYCHAR | OP_POSIX_BRACKET | OP_DECIMAL_BACKREF | OP_BRACKET_CC | OP_ASTERISK_ZERO_INF | OP_LINE_ANCHOR | OP_ESC_CONTROL_CHARS; final int GNU_REGEX_OP = OP_DOT_ANYCHAR | OP_BRACKET_CC | OP_POSIX_BRACKET | OP_DECIMAL_BACKREF | OP_BRACE_INTERVAL | OP_LPAREN_SUBEXP | OP_VBAR_ALT | OP_ASTERISK_ZERO_INF | OP_PLUS_ONE_INF | OP_QMARK_ZERO_ONE | OP_ESC_AZ_BUF_ANCHOR | OP_ESC_CAPITAL_G_BEGIN_ANCHOR | OP_ESC_W_WORD | OP_ESC_B_WORD_BOUND | OP_ESC_LTGT_WORD_BEGIN_END | OP_ESC_S_WHITE_SPACE | OP_ESC_D_DIGIT | OP_LINE_ANCHOR; final int GNU_REGEX_BV = CONTEXT_INDEP_ANCHORS | CONTEXT_INDEP_REPEAT_OPS | CONTEXT_INVALID_REPEAT_OPS | ALLOW_INVALID_INTERVAL | BACKSLASH_ESCAPE_IN_CC | ALLOW_DOUBLE_RANGE_OP_IN_CC; } jruby-joni-2.1.41/src/org/joni/constants/internal/000077500000000000000000000000001400407002500220245ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/constants/internal/AnchorType.java000066400000000000000000000061261400407002500247500ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface AnchorType { final int BEGIN_BUF = (1<<0); final int BEGIN_LINE = (1<<1); final int BEGIN_POSITION = (1<<2); final int END_BUF = (1<<3); final int SEMI_END_BUF = (1<<4); final int END_LINE = (1<<5); final int WORD_BOUND = (1<<6); final int NOT_WORD_BOUND = (1<<7); final int WORD_BEGIN = (1<<8); final int WORD_END = (1<<9); final int PREC_READ = (1<<10); final int PREC_READ_NOT = (1<<11); final int LOOK_BEHIND = (1<<12); final int LOOK_BEHIND_NOT = (1<<13); final int ANYCHAR_STAR = (1<<14); /* ".*" optimize info */ final int ANYCHAR_STAR_ML = (1<<15); /* ".*" optimize info (multi-line) */ final int ANYCHAR_STAR_MASK = (ANYCHAR_STAR | ANYCHAR_STAR_ML); final int END_BUF_MASK = (END_BUF | SEMI_END_BUF); final int KEEP = (1<<16); final int ALLOWED_IN_LB = ( LOOK_BEHIND | LOOK_BEHIND_NOT | BEGIN_LINE | END_LINE | BEGIN_BUF | BEGIN_POSITION | KEEP | WORD_BOUND | NOT_WORD_BOUND | WORD_BEGIN | WORD_END ); final int ALLOWED_IN_LB_NOT = ( LOOK_BEHIND | LOOK_BEHIND_NOT | BEGIN_LINE | END_LINE | BEGIN_BUF | BEGIN_POSITION | KEEP | WORD_BOUND | NOT_WORD_BOUND | WORD_BEGIN | WORD_END ); } jruby-joni-2.1.41/src/org/joni/constants/internal/Arguments.java000066400000000000000000000026051400407002500246370ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface Arguments { final int SPECIAL = -1; final int NON = 0; final int RELADDR = 1; final int ABSADDR = 2; final int LENGTH = 3; final int MEMNUM = 4; final int OPTION = 5; final int STATE_CHECK = 6; } jruby-joni-2.1.41/src/org/joni/constants/internal/EncloseType.java000066400000000000000000000027001400407002500251200ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface EncloseType { final int MEMORY = 1<<0; final int OPTION = 1<<1; final int STOP_BACKTRACK = 1<<2; final int CONDITION = 1<<3; final int ABSENT = 1<<4; final int ALLOWED_IN_LB = MEMORY | OPTION; final int ALLOWED_IN_LB_NOT = OPTION; } jruby-joni-2.1.41/src/org/joni/constants/internal/NodeStatus.java000066400000000000000000000036541400407002500247700ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface NodeStatus { /* status bits */ final int NST_MIN_FIXED = (1<<0); final int NST_MAX_FIXED = (1<<1); final int NST_CLEN_FIXED = (1<<2); final int NST_MARK1 = (1<<3); final int NST_MARK2 = (1<<4); final int NST_MEM_BACKREFED = (1<<5); final int NST_STOP_BT_SIMPLE_REPEAT= (1<<6); final int NST_RECURSION = (1<<7); final int NST_CALLED = (1<<8); final int NST_ADDR_FIXED = (1<<9); final int NST_NAMED_GROUP = (1<<10); final int NST_NAME_REF = (1<<11); final int NST_IN_REPEAT = (1<<12); /* STK_REPEAT is nested in stack. */ final int NST_NEST_LEVEL = (1<<13); final int NST_BY_NUMBER = (1<<14); /* {n,m} */ } jruby-joni-2.1.41/src/org/joni/constants/internal/NodeType.java000066400000000000000000000051671400407002500244270ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface NodeType { /* node type */ final int STR = 0; final int CCLASS = 1; final int CTYPE = 2; final int CANY = 3; final int BREF = 4; final int QTFR = 5; final int ENCLOSE = 6; final int ANCHOR = 7; final int LIST = 8; final int ALT = 9; final int CALL = 10; final int BIT_STR = 1 << STR; final int BIT_CCLASS = 1 << CCLASS; final int BIT_CTYPE = 1 << CTYPE; final int BIT_CANY = 1 << CANY; final int BIT_BREF = 1 << BREF; final int BIT_QTFR = 1 << QTFR; final int BIT_ENCLOSE = 1 << ENCLOSE; final int BIT_ANCHOR = 1 << ANCHOR; final int BIT_LIST = 1 << LIST; final int BIT_ALT = 1 << ALT; final int BIT_CALL = 1 << CALL; /* allowed node types in look-behind */ final int ALLOWED_IN_LB = ( BIT_LIST | BIT_ALT | BIT_STR | BIT_CCLASS | BIT_CTYPE | BIT_CANY | BIT_ANCHOR | BIT_ENCLOSE | BIT_QTFR | BIT_CALL ); final int SIMPLE = ( BIT_STR | BIT_CCLASS | BIT_CTYPE | BIT_CANY | BIT_BREF); } jruby-joni-2.1.41/src/org/joni/constants/internal/OPCode.java000066400000000000000000000416611400407002500240100ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; import org.joni.Config; public interface OPCode { final int FINISH = 0; /* matching process terminator (no more alternative) */ final int END = 1; /* pattern code terminator (success end) */ final int EXACT1 = 2; /* single byte, N = 1 */ final int EXACT2 = 3; /* single byte, N = 2 */ final int EXACT3 = 4; /* single byte, N = 3 */ final int EXACT4 = 5; /* single byte, N = 4 */ final int EXACT5 = 6; /* single byte, N = 5 */ final int EXACTN = 7; /* single byte */ final int EXACTMB2N1 = 8; /* mb-length = 2 N = 1 */ final int EXACTMB2N2 = 9; /* mb-length = 2 N = 2 */ final int EXACTMB2N3 = 10; /* mb-length = 2 N = 3 */ final int EXACTMB2N = 11; /* mb-length = 2 */ final int EXACTMB3N = 12; /* mb-length = 3 */ final int EXACTMBN = 13; /* other length */ final int EXACT1_IC = 14; /* single byte, N = 1, ignore case */ final int EXACTN_IC = 15; /* single byte, ignore case */ final int CCLASS = 16; final int CCLASS_MB = 17; final int CCLASS_MIX = 18; final int CCLASS_NOT = 19; final int CCLASS_MB_NOT = 20; final int CCLASS_MIX_NOT = 21; final int ANYCHAR = 22; /* "." */ final int ANYCHAR_ML = 23; /* "." multi-line */ final int ANYCHAR_STAR = 24; /* ".*" */ final int ANYCHAR_ML_STAR = 25; /* ".*" multi-line */ final int ANYCHAR_STAR_PEEK_NEXT = 26; final int ANYCHAR_ML_STAR_PEEK_NEXT = 27; final int WORD = 28; final int NOT_WORD = 29; final int WORD_BOUND = 30; final int NOT_WORD_BOUND = 31; final int WORD_BEGIN = 32; final int WORD_END = 33; final int ASCII_WORD = 34; final int ASCII_NOT_WORD = 35; final int ASCII_WORD_BOUND = 36; final int ASCII_NOT_WORD_BOUND = 37; final int ASCII_WORD_BEGIN = 38; final int ASCII_WORD_END = 39; final int BEGIN_BUF = 40; final int END_BUF = 41; final int BEGIN_LINE = 42; final int END_LINE = 43; final int SEMI_END_BUF = 44; final int BEGIN_POSITION = 45; final int BACKREF1 = 46; final int BACKREF2 = 47; final int BACKREFN = 48; final int BACKREFN_IC = 49; final int BACKREF_MULTI = 50; final int BACKREF_MULTI_IC = 51; final int BACKREF_WITH_LEVEL = 52; /* \k, \k */ final int MEMORY_START = 53; final int MEMORY_START_PUSH = 54; /* push back-tracker to stack */ final int MEMORY_END_PUSH = 55; /* push back-tracker to stack */ final int MEMORY_END_PUSH_REC = 56; /* push back-tracker to stack */ final int MEMORY_END = 57; final int MEMORY_END_REC = 58; /* push marker to stack */ final int KEEP = 59; final int FAIL = 60; /* pop stack and move */ final int JUMP = 61; final int PUSH = 62; final int POP = 63; final int PUSH_OR_JUMP_EXACT1 = 64; /* if match exact then push, else jump. */ final int PUSH_IF_PEEK_NEXT = 65; /* if match exact then push, else none. */ final int REPEAT = 66; /* {n,m} */ final int REPEAT_NG = 67; /* {n,m}? (non greedy) */ final int REPEAT_INC = 68; final int REPEAT_INC_NG = 69; /* non greedy */ final int REPEAT_INC_SG = 70; /* search and get in stack */ final int REPEAT_INC_NG_SG = 71; /* search and get in stack (non greedy) */ final int NULL_CHECK_START = 72; /* null loop checker start */ final int NULL_CHECK_END = 73; /* null loop checker end */ final int NULL_CHECK_END_MEMST = 74; /* null loop checker end (with capture status) */ final int NULL_CHECK_END_MEMST_PUSH = 75; /* with capture status and push check-end */ final int PUSH_POS = 76; /* (?=...) start */ final int POP_POS = 77; /* (?=...) end */ final int PUSH_POS_NOT = 78; /* (?!...) start */ final int FAIL_POS = 79; /* (?!...) end */ final int PUSH_STOP_BT = 80; /* (?>...) start */ final int POP_STOP_BT = 81; /* (?>...) end */ final int LOOK_BEHIND = 82; /* (?<=...) start (no needs end opcode) */ final int PUSH_LOOK_BEHIND_NOT = 83; /* (? */ final int RETURN = 89; final int CONDITION = 90; final int STATE_CHECK_PUSH = 91; /* combination explosion check and push */ final int STATE_CHECK_PUSH_OR_JUMP = 92; /* check ok -> push, else jump */ final int STATE_CHECK = 93; /* check only */ final int STATE_CHECK_ANYCHAR_STAR = 94; final int STATE_CHECK_ANYCHAR_ML_STAR = 95; /* no need: IS_DYNAMIC_OPTION() == 0 */ final int SET_OPTION_PUSH = 96; /* set option and push recover option */ final int SET_OPTION = 97; /* set option */ final int EXACT1_IC_SB = 98; /* single byte, N = 1, ignore case */ final int EXACTN_IC_SB = 99; /* single byte, ignore case */ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] { "finish", /*OP_FINISH*/ "end", /*OP_END*/ "exact1", /*OP_EXACT1*/ "exact2", /*OP_EXACT2*/ "exact3", /*OP_EXACT3*/ "exact4", /*OP_EXACT4*/ "exact5", /*OP_EXACT5*/ "exactn", /*OP_EXACTN*/ "exactmb2-n1", /*OP_EXACTMB2N1*/ "exactmb2-n2", /*OP_EXACTMB2N2*/ "exactmb2-n3", /*OP_EXACTMB2N3*/ "exactmb2-n", /*OP_EXACTMB2N*/ "exactmb3n", /*OP_EXACTMB3N*/ "exactmbn", /*OP_EXACTMBN*/ "exact1-ic", /*OP_EXACT1_IC*/ "exactn-ic", /*OP_EXACTN_IC*/ "cclass", /*OP_CCLASS*/ "cclass-mb", /*OP_CCLASS_MB*/ "cclass-mix", /*OP_CCLASS_MIX*/ "cclass-not", /*OP_CCLASS_NOT*/ "cclass-mb-not", /*OP_CCLASS_MB_NOT*/ "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/ "anychar", /*OP_ANYCHAR*/ "anychar-ml", /*OP_ANYCHAR_ML*/ "anychar*", /*OP_ANYCHAR_STAR*/ "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/ "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ "word", /*OP_WORD*/ "not-word", /*OP_NOT_WORD*/ "word-bound", /*OP_WORD_BOUND*/ "not-word-bound", /*OP_NOT_WORD_BOUND*/ "word-begin", /*OP_WORD_BEGIN*/ "word-end", /*OP_WORD_END*/ "ascii-word", /*OP_ASCII_WORD*/ "not-ascii-word", /*OP_NOT_ASCII_WORD*/ "ascii-word-bound", /*OP_ASCII_WORD_BOUND*/ "not-ascii-word-bound", /*OP_NOT_ASCII_WORD_BOUND*/ "ascii-word-begin", /*OP_ASCII_WORD_BEGIN*/ "ascii-word-end", /*OP_ASCII_WORD_END*/ "begin-buf", /*OP_BEGIN_BUF*/ "end-buf", /*OP_END_BUF*/ "begin-line", /*OP_BEGIN_LINE*/ "end-line", /*OP_END_LINE*/ "semi-end-buf", /*OP_SEMI_END_BUF*/ "begin-position", /*OP_BEGIN_POSITION*/ "backref1", /*OP_BACKREF1*/ "backref2", /*OP_BACKREF2*/ "backrefn", /*OP_BACKREFN*/ "backrefn-ic", /*OP_BACKREFN_IC*/ "backref_multi", /*OP_BACKREF_MULTI*/ "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/ "backref_at_level", /*OP_BACKREF_AT_LEVEL*/ "mem-start", /*OP_MEMORY_START*/ "mem-start-push", /*OP_MEMORY_START_PUSH*/ "mem-end-push", /*OP_MEMORY_END_PUSH*/ "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ "mem-end", /*OP_MEMORY_END*/ "mem-end-rec", /*OP_MEMORY_END_REC*/ "keep", /*OP_KEEP*/ "fail", /*OP_FAIL*/ "jump", /*OP_JUMP*/ "push", /*OP_PUSH*/ "pop", /*OP_POP*/ "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/ "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/ "repeat", /*OP_REPEAT*/ "repeat-ng", /*OP_REPEAT_NG*/ "repeat-inc", /*OP_REPEAT_INC*/ "repeat-inc-ng", /*OP_REPEAT_INC_NG*/ "repeat-inc-sg", /*OP_REPEAT_INC_SG*/ "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/ "null-check-start", /*OP_NULL_CHECK_START*/ "null-check-end", /*OP_NULL_CHECK_END*/ "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/ "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/ "push-pos", /*OP_PUSH_POS*/ "pop-pos", /*OP_POP_POS*/ "push-pos-not", /*OP_PUSH_POS_NOT*/ "fail-pos", /*OP_FAIL_POS*/ "push-stop-bt", /*OP_PUSH_STOP_BT*/ "pop-stop-bt", /*OP_POP_STOP_BT*/ "look-behind", /*OP_LOOK_BEHIND*/ "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/ "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ "push-absent-pos", /*OP_PUSH_ABSENT_POS*/ "absent", /*OP_ABSENT*/ "absent-end", /*OP_ABSENT_END*/ "call", /*OP_CALL*/ "return", /*OP_RETURN*/ "condition", /*OP_CONDITION*/ "state-check-push", /*OP_STATE_CHECK_PUSH*/ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ "state-check", /*OP_STATE_CHECK*/ "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/ "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ "set-option-push", /*OP_SET_OPTION_PUSH*/ "set-option", /*OP_SET_OPTION*/ "exact1-ic-sb", /*OP_EXACT1_IC*/ "exactn-ic-sb", /*OP_EXACTN_IC*/ } : null; public final int OpCodeArgTypes[] = Config.DEBUG_COMPILE ? new int[] { Arguments.NON, /*OP_FINISH*/ Arguments.NON, /*OP_END*/ Arguments.SPECIAL, /*OP_EXACT1*/ Arguments.SPECIAL, /*OP_EXACT2*/ Arguments.SPECIAL, /*OP_EXACT3*/ Arguments.SPECIAL, /*OP_EXACT4*/ Arguments.SPECIAL, /*OP_EXACT5*/ Arguments.SPECIAL, /*OP_EXACTN*/ Arguments.SPECIAL, /*OP_EXACTMB2N1*/ Arguments.SPECIAL, /*OP_EXACTMB2N2*/ Arguments.SPECIAL, /*OP_EXACTMB2N3*/ Arguments.SPECIAL, /*OP_EXACTMB2N*/ Arguments.SPECIAL, /*OP_EXACTMB3N*/ Arguments.SPECIAL, /*OP_EXACTMBN*/ Arguments.SPECIAL, /*OP_EXACT1_IC*/ Arguments.SPECIAL, /*OP_EXACTN_IC*/ Arguments.SPECIAL, /*OP_CCLASS*/ Arguments.SPECIAL, /*OP_CCLASS_MB*/ Arguments.SPECIAL, /*OP_CCLASS_MIX*/ Arguments.SPECIAL, /*OP_CCLASS_NOT*/ Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/ Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/ Arguments.NON, /*OP_ANYCHAR*/ Arguments.NON, /*OP_ANYCHAR_ML*/ Arguments.NON, /*OP_ANYCHAR_STAR*/ Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ Arguments.NON, /*OP_WORD*/ Arguments.NON, /*OP_NOT_WORD*/ Arguments.NON, /*OP_WORD_BOUND*/ Arguments.NON, /*OP_NOT_WORD_BOUND*/ Arguments.NON, /*OP_WORD_BEGIN*/ Arguments.NON, /*OP_WORD_END*/ Arguments.NON, /*OP_ASCII_WORD*/ Arguments.NON, /*OP_NOT_ASCII_WORD*/ Arguments.NON, /*OP_ASCII_WORD_BOUND*/ Arguments.NON, /*OP_NOT_ASCII_WORD_BOUND*/ Arguments.NON, /*OP_ASCII_WORD_BEGIN*/ Arguments.NON, /*OP_ASCII_WORD_END*/ Arguments.NON, /*OP_BEGIN_BUF*/ Arguments.NON, /*OP_END_BUF*/ Arguments.NON, /*OP_BEGIN_LINE*/ Arguments.NON, /*OP_END_LINE*/ Arguments.NON, /*OP_SEMI_END_BUF*/ Arguments.NON, /*OP_BEGIN_POSITION*/ Arguments.NON, /*OP_BACKREF1*/ Arguments.NON, /*OP_BACKREF2*/ Arguments.MEMNUM, /*OP_BACKREFN*/ Arguments.SPECIAL, /*OP_BACKREFN_IC*/ Arguments.SPECIAL, /*OP_BACKREF_MULTI*/ Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/ Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/ Arguments.MEMNUM, /*OP_MEMORY_START*/ Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/ Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ Arguments.MEMNUM, /*OP_MEMORY_END*/ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ Arguments.NON, /*OP_KEEP*/ Arguments.NON, /*OP_FAIL*/ Arguments.RELADDR, /*OP_JUMP*/ Arguments.RELADDR, /*OP_PUSH*/ Arguments.NON, /*OP_POP*/ Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/ Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/ Arguments.SPECIAL, /*OP_REPEAT*/ Arguments.SPECIAL, /*OP_REPEAT_NG*/ Arguments.MEMNUM, /*OP_REPEAT_INC*/ Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/ Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/ Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/ Arguments.MEMNUM, /*OP_NULL_CHECK_START*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/ Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/ Arguments.NON, /*OP_PUSH_POS*/ Arguments.NON, /*OP_POP_POS*/ Arguments.RELADDR, /*OP_PUSH_POS_NOT*/ Arguments.NON, /*OP_FAIL_POS*/ Arguments.NON, /*OP_PUSH_STOP_BT*/ Arguments.NON, /*OP_POP_STOP_BT*/ Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/ Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ Arguments.NON, /*OP_PUSH_ABSENT_POS*/ Arguments.RELADDR, /*OP_ABSENT*/ Arguments.NON, /*OP_ABSENT_END*/ Arguments.ABSADDR, /*OP_CALL*/ Arguments.NON, /*OP_RETURN*/ Arguments.SPECIAL, /*OP_CONDITION*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ Arguments.OPTION, /*OP_SET_OPTION*/ Arguments.SPECIAL, /*OP_EXACT1_IC*/ Arguments.SPECIAL, /*OP_EXACTN_IC*/ } : null; } jruby-joni-2.1.41/src/org/joni/constants/internal/OPSize.java000066400000000000000000000102001400407002500240310ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface OPSize { // this might be helpful for potential byte[] migration final int OPCODE = 1; final int RELADDR = 1; final int ABSADDR = 1; final int LENGTH = 1; final int MEMNUM = 1; final int STATE_CHECK_NUM = 1; final int REPEATNUM = 1; final int OPTION = 1; final int CODE_POINT = 1; final int POINTER = 1; final int INDEX = 1; /* op-code + arg size */ final int ANYCHAR_STAR = OPCODE; final int ANYCHAR_STAR_PEEK_NEXT = (OPCODE + 1); final int JUMP = (OPCODE + RELADDR); final int PUSH = (OPCODE + RELADDR); final int POP = OPCODE; final int PUSH_OR_JUMP_EXACT1 = (OPCODE + RELADDR + 1); final int PUSH_IF_PEEK_NEXT = (OPCODE + RELADDR + 1); final int REPEAT_INC = (OPCODE + MEMNUM); final int REPEAT_INC_NG = (OPCODE + MEMNUM); final int PUSH_POS = OPCODE; final int PUSH_POS_NOT = (OPCODE + RELADDR); final int POP_POS = OPCODE; final int FAIL_POS = OPCODE; final int SET_OPTION = (OPCODE + OPTION); final int SET_OPTION_PUSH = (OPCODE + OPTION); final int FAIL = OPCODE; final int MEMORY_START = (OPCODE + MEMNUM); final int MEMORY_START_PUSH = (OPCODE + MEMNUM); final int MEMORY_END_PUSH = (OPCODE + MEMNUM); final int MEMORY_END_PUSH_REC = (OPCODE + MEMNUM); final int MEMORY_END = (OPCODE + MEMNUM); final int MEMORY_END_REC = (OPCODE + MEMNUM); final int PUSH_STOP_BT = OPCODE; final int POP_STOP_BT = OPCODE; final int NULL_CHECK_START = (OPCODE + MEMNUM); final int NULL_CHECK_END = (OPCODE + MEMNUM); final int LOOK_BEHIND = (OPCODE + LENGTH); final int PUSH_LOOK_BEHIND_NOT = (OPCODE + RELADDR + LENGTH); final int FAIL_LOOK_BEHIND_NOT = OPCODE; final int CALL = (OPCODE + ABSADDR); final int RETURN = OPCODE; final int CONDITION = (OPCODE + MEMNUM + RELADDR); final int PUSH_ABSENT_POS = OPCODE; final int ABSENT = (OPCODE + RELADDR); final int ABSENT_END = OPCODE; // #ifdef USE_COMBINATION_EXPLOSION_CHECK final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM); final int STATE_CHECK_PUSH = (OPCODE + STATE_CHECK_NUM + RELADDR); final int STATE_CHECK_PUSH_OR_JUMP = (OPCODE + STATE_CHECK_NUM + RELADDR); final int STATE_CHECK_ANYCHAR_STAR = (OPCODE + STATE_CHECK_NUM); } jruby-joni-2.1.41/src/org/joni/constants/internal/StackPopLevel.java000066400000000000000000000023301400407002500254010ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface StackPopLevel { final int FREE = 0; final int MEM_START = 1; final int ALL = 2; } jruby-joni-2.1.41/src/org/joni/constants/internal/StackType.java000066400000000000000000000051701400407002500246010ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface StackType { /** stack **/ final int INVALID_STACK_INDEX = -1; /* stack type */ /* used by normal-POP */ final int ALT = 0x0001; final int LOOK_BEHIND_NOT = 0x0002; final int POS_NOT = 0x0003; /* handled by normal-POP */ final int MEM_START = 0x0100; final int MEM_END = 0x8200; final int REPEAT_INC = 0x0300; final int STATE_CHECK_MARK = 0x1000; /* avoided by normal-POP */ final int NULL_CHECK_START = 0x3000; final int NULL_CHECK_END = 0x5000; /* for recursive call */ final int MEM_END_MARK = 0x8400; final int POS = 0x0500; /* used when POP-POS */ final int STOP_BT = 0x0600; /* mark for "(?>...)" */ final int REPEAT = 0x0700; final int CALL_FRAME = 0x0800; final int RETURN = 0x0900; final int VOID = 0x0a00; /* for fill a blank */ final int ABSENT_POS = 0x0b00; /* for absent */ final int ABSENT = 0x0c00; /* absent inner loop marker */ /* stack type check mask */ final int MASK_POP_USED = 0x00ff; final int MASK_TO_VOID_TARGET = 0x10ff; final int MASK_MEM_END_OR_MARK = 0x8000; /* MEM_END or MEM_END_MARK */ } jruby-joni-2.1.41/src/org/joni/constants/internal/StringType.java000066400000000000000000000024611400407002500250020ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface StringType { final int NSTR_RAW = 1<<0; final int NSTR_AMBIG = 1<<1; final int NSTR_DONT_GET_OPT_INFO = 1<<2; final int NSTR_SHARED = 1<<3; } jruby-joni-2.1.41/src/org/joni/constants/internal/TargetInfo.java000066400000000000000000000024011400407002500247260ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface TargetInfo { final int ISNOT_EMPTY = 0; final int IS_EMPTY = 1; final int IS_EMPTY_MEM = 2; final int IS_EMPTY_REC = 3; } jruby-joni-2.1.41/src/org/joni/constants/internal/TokenType.java000066400000000000000000000033031400407002500246100ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public enum TokenType { EOT, /* end of token */ RAW_BYTE, CHAR, STRING, CODE_POINT, ANYCHAR, CHAR_TYPE, BACKREF, CALL, ANCHOR, OP_REPEAT, INTERVAL, ANYCHAR_ANYTIME, /* SQL '%' == .* */ ALT, SUBEXP_OPEN, SUBEXP_CLOSE, CC_OPEN, QUOTE_OPEN, CHAR_PROPERTY, /* \p{...}, \P{...} */ LINEBREAK, EXTENDED_GRAPHEME_CLUSTER, KEEP, /* in cc */ CC_CLOSE, CC_RANGE, POSIX_BRACKET_OPEN, CC_AND, /* && */ CC_CC_OPEN /* [ */ } jruby-joni-2.1.41/src/org/joni/constants/internal/Traverse.java000066400000000000000000000024701400407002500244650ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.constants.internal; public interface Traverse { final int TRAVERSE_CALLBACK_AT_FIRST = 1; final int TRAVERSE_CALLBACK_AT_LAST = 2; final int TRAVERSE_CALLBACK_AT_BOTH = TRAVERSE_CALLBACK_AT_FIRST | TRAVERSE_CALLBACK_AT_LAST; } jruby-joni-2.1.41/src/org/joni/exception/000077500000000000000000000000001400407002500201725ustar00rootroot00000000000000jruby-joni-2.1.41/src/org/joni/exception/ErrorMessages.java000066400000000000000000000131471400407002500236240ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; import org.joni.Config; public interface ErrorMessages extends org.jcodings.exception.ErrorMessages { /* internal error */ final String PARSER_BUG = "internal parser error (bug)"; final String UNDEFINED_BYTECODE = "undefined bytecode (bug)"; final String UNEXPECTED_BYTECODE = "unexpected bytecode (bug)"; final String TOO_MANY_CAPTURE_GROUPS = "too many capture groups are specified"; /* general error */ final String INVALID_ARGUMENT = "invalid argument"; /* syntax error */ final String END_PATTERN_AT_LEFT_BRACE = "end pattern at left brace"; final String END_PATTERN_AT_LEFT_BRACKET = "end pattern at left bracket"; final String EMPTY_CHAR_CLASS = "empty char-class"; final String PREMATURE_END_OF_CHAR_CLASS = "premature end of char-class"; final String END_PATTERN_AT_ESCAPE = "end pattern at escape"; final String END_PATTERN_AT_META = "end pattern at meta"; final String END_PATTERN_AT_CONTROL = "end pattern at control"; final String META_CODE_SYNTAX = "invalid meta-code syntax"; final String CONTROL_CODE_SYNTAX = "invalid control-code syntax"; final String CHAR_CLASS_VALUE_AT_END_OF_RANGE = "char-class value at end of range"; final String CHAR_CLASS_VALUE_AT_START_OF_RANGE = "char-class value at start of range"; final String UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS = "unmatched range specifier in char-class"; final String TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED = "target of repeat operator is not specified"; final String TARGET_OF_REPEAT_OPERATOR_INVALID = "target of repeat operator is invalid"; final String NESTED_REPEAT_NOT_ALLOWED = "nested repeat is not allowed"; final String NESTED_REPEAT_OPERATOR = "nested repeat operator"; final String UNMATCHED_CLOSE_PARENTHESIS = "unmatched close parenthesis"; final String END_PATTERN_WITH_UNMATCHED_PARENTHESIS = "end pattern with unmatched parenthesis"; final String END_PATTERN_IN_GROUP = "end pattern in group"; final String UNDEFINED_GROUP_OPTION = "undefined group option"; final String INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type"; final String INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind"; final String INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}"; final String INVALID_CONDITION_PATTERN = "invalid conditional pattern"; /* values error (syntax error) */ final String TOO_BIG_NUMBER = "too big number"; final String TOO_BIG_NUMBER_FOR_REPEAT_RANGE = "too big number for repeat range"; final String UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE = "upper is smaller than lower in repeat range"; final String EMPTY_RANGE_IN_CHAR_CLASS = "empty range in char class"; final String MISMATCH_CODE_LENGTH_IN_CLASS_RANGE = "mismatch multibyte code length in char-class range"; final String TOO_MANY_MULTI_BYTE_RANGES = "too many multibyte code ranges are specified"; final String TOO_SHORT_MULTI_BYTE_STRING = "too short multibyte code string"; final String TOO_BIG_BACKREF_NUMBER = "too big backref number"; final String INVALID_BACKREF = Config.USE_NAMED_GROUP ? "invalid backref number/name" : "invalid backref number"; final String NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED = "numbered backref/call is not allowed. (use name)"; final String TOO_SHORT_DIGITS = "too short digits"; final String INVALID_WIDE_CHAR_VALUE = "invalid wide-char value"; final String EMPTY_GROUP_NAME = "group name is empty"; final String INVALID_GROUP_NAME = "invalid group name <%n>"; final String INVALID_CHAR_IN_GROUP_NAME = Config.USE_NAMED_GROUP ? "invalid char in group name <%n>" : "invalid char in group number <%n>"; final String UNDEFINED_NAME_REFERENCE = "undefined name <%n> reference"; final String UNDEFINED_GROUP_REFERENCE = "undefined group <%n> reference"; final String MULTIPLEX_DEFINED_NAME = "multiplex defined name <%n>"; final String MULTIPLEX_DEFINITION_NAME_CALL = "multiplex definition name <%n> call"; final String PROPERTY_NAME_NEVER_TERMINATED = "property name never terminated \\p{%n"; final String NEVER_ENDING_RECURSION = "never ending recursion"; final String GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY = "group number is too big for capture history"; final String NOT_SUPPORTED_ENCODING_COMBINATION = "not supported encoding combination"; final String INVALID_COMBINATION_OF_OPTIONS = "invalid combination of options"; final String OVER_THREAD_PASS_LIMIT_COUNT = "over thread pass limit count"; final String TOO_BIG_SB_CHAR_VALUE = "too big singlebyte char value"; } jruby-joni-2.1.41/src/org/joni/exception/InternalException.java000066400000000000000000000024421400407002500244720ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class InternalException extends JOniException{ private static final long serialVersionUID = -3871816465397927992L; public InternalException(String message) { super(message); } } jruby-joni-2.1.41/src/org/joni/exception/JOniException.java000066400000000000000000000024351400407002500235570ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class JOniException extends RuntimeException{ private static final long serialVersionUID = -6027192180014164667L; public JOniException(String message) { super(message); } } jruby-joni-2.1.41/src/org/joni/exception/SyntaxException.java000066400000000000000000000024351400407002500242060ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class SyntaxException extends JOniException{ private static final long serialVersionUID = 7862720128961874288L; public SyntaxException(String message) { super(message); } } jruby-joni-2.1.41/src/org/joni/exception/ValueException.java000066400000000000000000000030211400407002500237640ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.exception; public class ValueException extends SyntaxException{ private static final long serialVersionUID = -196013852479929134L; public ValueException(String message) { super(message); } public ValueException(String message, String str) { super(message.replaceAll("%n", str)); } public ValueException(String message, byte[]bytes, int p, int end) { this(message, new String(bytes, p, end - p)); } } jruby-joni-2.1.41/test/000077500000000000000000000000001400407002500146365ustar00rootroot00000000000000jruby-joni-2.1.41/test/org/000077500000000000000000000000001400407002500154255ustar00rootroot00000000000000jruby-joni-2.1.41/test/org/joni/000077500000000000000000000000001400407002500163645ustar00rootroot00000000000000jruby-joni-2.1.41/test/org/joni/test/000077500000000000000000000000001400407002500173435ustar00rootroot00000000000000jruby-joni-2.1.41/test/org/joni/test/Test.java000066400000000000000000000264551400407002500211410ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import static org.junit.Assert.assertEquals; import java.io.UnsupportedEncodingException; import org.jcodings.Encoding; import org.jcodings.exception.CharacterPropertyException; import org.joni.Config; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; import org.joni.Region; import org.joni.Syntax; import org.joni.WarnCallback; import org.joni.exception.JOniException; public abstract class Test { static final boolean VERBOSE = false; int nsucc; int nerror; int nfail; public abstract int option(); public abstract Encoding encoding(); public abstract String testEncoding(); public abstract Syntax syntax(); protected String repr(byte[] bytes) { return new String(bytes); } protected int length(byte[] bytes) { return bytes.length; } protected String reprTest(byte[] pattern, byte[]str, int option) { StringBuilder sb = new StringBuilder(); sb.append("Pattern: [/").append(repr(pattern)).append("/]"); sb.append(" Str: [\"").append(repr(str)).append("\"]"); sb.append(" Encoding: [" + encoding() + "]"); sb.append(" Option: [" + Option.toString(option) + "]"); sb.append(" Syntax: [" + syntax().name + "]"); return sb.toString(); } protected void assertTrue(boolean expression, String failMessage) { if (expression) { nsucc++; } else { Config.err.println(failMessage); nfail++; } } public void xerrs(String pattern, String msg) throws Exception { xerr(pattern.getBytes(testEncoding()), msg, option()); } public void xerrs(String pattern, String msg, int option) throws Exception { xerr(pattern.getBytes(testEncoding()), msg, option); } public void xerr(byte[] pattern, String msg, int option) throws Exception { try { new Regex(pattern, 0, length(pattern), option, encoding(), syntax(), WarnCallback.NONE); nfail++; } catch (JOniException je) { nsucc++; assertEquals(je.getMessage(), msg); } catch (CharacterPropertyException cpe) { nsucc++; assertEquals(cpe.getMessage(), msg); } } public void xx(byte[] pattern, byte[] str, int from, int to, int mem, boolean not) throws InterruptedException { xx(pattern, str, from, to, mem, not, option()); } public void xx(byte[] pattern, byte[] str, int gpos, int from, int to, int mem, boolean not) throws InterruptedException { xx(pattern, str, gpos, 0, from, to, mem, not, option()); } static boolean is7bit(byte[]bytes, int p, int end) { for (int i = p; i < end; i++) { if ((bytes[i] & 0xff) >= 0x80) return false; } return true; } public int xx(byte[] pattern, byte[] str, int from, int to, int mem, boolean not, int option) throws InterruptedException { return xx(pattern, str, 0, 0, from, to, mem, not, option); } public int xx(byte[] pattern, byte[] str, int gpos, int searchStart, int from, int to, int mem, boolean not, int option) throws InterruptedException { Regex reg; try { reg = new Regex(pattern, 0, length(pattern), option, encoding(), syntax(), WarnCallback.NONE); } catch (JOniException je) { Config.err.println(reprTest(pattern, str, option)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return Matcher.FAILED; } catch (Exception e) { Config.err.println(reprTest(pattern, str, option)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return Matcher.FAILED; } if ((!encoding().isSingleByte()) && encoding().isAsciiCompatible() && is7bit(str, 0, str.length)) { check(reg, pattern, str, option | Option.CR_7_BIT, gpos, searchStart, from, to, mem, not); } return check(reg, pattern, str, option, gpos, searchStart, from, to, mem, not); } private int check(Regex reg, byte[]pattern, byte[]str, int option, int gpos, int searchStart, int from, int to, int mem, boolean not) throws InterruptedException { Matcher m = reg.matcher(str, 0, length(str)); final Region region; final int result; try { result = m.searchInterruptible(gpos, searchStart, length(str), option); region = m.getEagerRegion(); } catch (JOniException je) { Config.err.println("Pattern: " + reprTest(pattern, str, option)); je.printStackTrace(Config.err); Config.err.println("ERROR: " + je.getMessage()); nerror++; return Matcher.FAILED; } catch (InterruptedException e) { throw e; } catch (Exception e) { Config.err.println("Pattern: " + reprTest(pattern, str, option)); e.printStackTrace(Config.err); Config.err.println("SEVERE ERROR: " + e.getMessage()); nerror++; return Matcher.FAILED; } if (result == -1) { if (not) { if (VERBOSE) Config.log.println("OK(NOT): " + reprTest(pattern, str, option)); nsucc++; } else { Config.log.println("FAIL: " + reprTest(pattern, str, option) + " GPOS: " + gpos + " Start: " + searchStart); nfail++; } } else { if (not) { Config.log.println("FAIL(NOT): " + reprTest(pattern, str, option)); nfail++; } else { if (region.beg[mem] == from && region.end[mem] == to) { if (VERBOSE) Config.log.println("OK: " + reprTest(pattern, str, option)); nsucc++; } else { Config.log.println("FAIL: " + reprTest(pattern, str, option) + " GPOS: " + gpos + " Start: " + searchStart + " Groups: [Exp " + from + "-" + to + ", Act " + region.beg[mem] + "-" + region.end[mem] + "]"); nfail++; } } } return result; } protected void x2(byte[] pattern, byte[] str, int from, int to) throws InterruptedException { xx(pattern, str, from, to, 0, false); } protected void x2(byte[] pattern, byte[] str, int from, int to, int option) throws InterruptedException { xx(pattern, str, from, to, 0, false, option); } protected void x3(byte[] pattern, byte[] str, int from, int to, int mem) throws InterruptedException { xx(pattern, str, from, to, mem, false); } protected void n(byte[] pattern, byte[] str) throws InterruptedException { xx(pattern, str, 0, 0, 0, true); } protected void n(byte[] pattern, byte[] str, int option) throws InterruptedException { xx(pattern, str, 0, 0, 0, true, option); } protected void n(byte[] pattern, byte[] str, int gpos, int option) throws InterruptedException { xx(pattern, str, gpos, 0, 0, 0, 0, true, option); } public void xxs(String pattern, String str, int from, int to, int mem, boolean not) throws InterruptedException { xxs(pattern, str, from, to, mem, not, option()); } public void xxs(String pattern, String str, int from, int to, int mem, boolean not, int option) throws InterruptedException { try { xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, not, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public int x2s(String pattern, String str, int from, int to) throws InterruptedException { return x2s(pattern, str, from, to, option()); } public int x2s(String pattern, String str, int from, int to, int option) throws InterruptedException { try { return xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, 0, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); return Matcher.FAILED; } } public int x2s(String pattern, String str, int gpos, int searchStart, int from, int to) throws InterruptedException { return x2s(pattern, str, gpos, searchStart, from, to, option()); } public int x2s(String pattern, String str, int gpos, int searchStart, int from, int to, int option) throws InterruptedException { try { return xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), gpos, searchStart, from, to, 0, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); return Matcher.FAILED; } } public void x3s(String pattern, String str, int from, int to, int mem) throws InterruptedException { x3s(pattern, str, from, to, mem, option()); } public void x3s(String pattern, String str, int from, int to, int mem, int option) throws InterruptedException { try { xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), from, to, mem, false, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } public void ns(String pattern, String str) throws InterruptedException { ns(pattern, str, option()); } public void ns(String pattern, String str, int option) throws InterruptedException { ns(pattern, str, 0, option); } public void ns(String pattern, String str, int gpos, int option) throws InterruptedException { try { xx(pattern.getBytes(testEncoding()), str.getBytes(testEncoding()), gpos, 0, 0, 0, 0, true, option); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } } @org.junit.Test public void testRegexp() throws Exception { test(); Config.log.println("RESULT SUCC: " + nsucc + ", FAIL: " + nfail + ", ERROR: " + nerror + " Test: " + getClass().getSimpleName() + ", Encoding: " + encoding()); assertEquals(0, nfail + nerror); } public abstract void test() throws Exception; } jruby-joni-2.1.41/test/org/joni/test/TestA.java000066400000000000000000000534701400407002500212370ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.joni.Option; import org.joni.Syntax; import org.joni.exception.ErrorMessages; public class TestA extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return ASCIIEncoding.INSTANCE; } @Override public String testEncoding() { return "iso-8859-2"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { x2s("", "", 0, 0); x2s("^", "", 0, 0); x2s("$", "", 0, 0); x2s("\\G", "", 0, 0); x2s("\\A", "", 0, 0); x2s("\\Z", "", 0, 0); x2s("\\z", "", 0, 0); x2s("^$", "", 0, 0); x2s("\\ca", "\001", 0, 1); x2s("\\C-b", "\002", 0, 1); x2s("\\c\\\\", "\034", 0, 1); x2s("q[\\c\\\\]", "q\034", 0, 2); x2s("", "a", 0, 0); x2s("a", "a", 0, 1); x2s("\\x61", "a", 0, 1); x2s("aa", "aa", 0, 2); x2s("aaa", "aaa", 0, 3); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); x2s("ab", "ab", 0, 2); x2s("b", "ab", 1, 2); x2s("bc", "abc", 1, 3); x2s("(?i:#RET#)", "#INS##RET#", 5, 10); x2s("\\17", "\017", 0, 1); x2s("\\x1f", "\u001f", 0, 1); x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2); x2s("a(?#....\\\\JJJJ)b", "ab", 0, 2); x2s("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); x2s(".", "a", 0, 1); ns(".", ""); x2s("..", "ab", 0, 2); x2s("\\w", "e", 0, 1); ns("\\W", "e"); x2s("\\s", " ", 0, 1); x2s("\\S", "b", 0, 1); x2s("\\d", "4", 0, 1); ns("\\D", "4"); x2s("\\b", "z ", 0, 0); x2s("\\b", " z", 1, 1); x2s("\\B", "zz ", 1, 1); x2s("\\B", "z ", 2, 2); x2s("\\B", " z", 0, 0); x2s("[ab]", "b", 0, 1); ns("[ab]", "c"); x2s("[a-z]", "t", 0, 1); ns("[^a]", "a"); x2s("[^a]", "\n", 0, 1); x2s("[]]", "]", 0, 1); ns("[^]]", "]"); x2s("[\\^]+", "0^^1", 1, 3); x2s("[b-]", "b", 0, 1); x2s("[b-]", "-", 0, 1); x2s("[\\w]", "z", 0, 1); ns("[\\w]", " "); x2s("[\\W]", "b$", 1, 2); x2s("[\\d]", "5", 0, 1); ns("[\\d]", "e"); x2s("[\\D]", "t", 0, 1); ns("[\\D]", "3"); x2s("[\\s]", " ", 0, 1); ns("[\\s]", "a"); x2s("[\\S]", "b", 0, 1); ns("[\\S]", " "); x2s("[\\w\\d]", "2", 0, 1); ns("[\\w\\d]", " "); x2s("[[:upper:]]", "B", 0, 1); x2s("[*[:xdigit:]+]", "+", 0, 1); x2s("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); x2s("[*[:xdigit:]+]", "-@^+", 3, 4); ns("[[:upper]]", "A"); x2s("[[:upper]]", ":", 0, 1); x2s("[\\044-\\047]", "\046", 0, 1); x2s("[\\x5a-\\x5c]", "\u005b", 0, 1); x2s("[\\x6A-\\x6D]", "\u006c", 0, 1); ns("[\\x6A-\\x6D]", "\u006e"); ns("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); x2s("[\\[]", "[", 0, 1); x2s("[\\]]", "]", 0, 1); x2s("[&]", "&", 0, 1); x2s("[[ab]]", "b", 0, 1); x2s("[[ab]c]", "c", 0, 1); ns("[[^a]]", "a"); ns("[^[a]]", "a"); x2s("[[ab]&&bc]", "b", 0, 1); ns("[[ab]&&bc]", "a"); ns("[[ab]&&bc]", "c"); x2s("[a-z&&b-y&&c-x]", "w", 0, 1); ns("[^a-z&&b-y&&c-x]", "w"); x2s("[[^a&&a]&&a-z]", "b", 0, 1); ns("[[^a&&a]&&a-z]", "a"); x2s("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); ns("[[^a-z&&bcdef]&&[^c-g]]", "c"); x2s("[^[^abc]&&[^cde]]", "c", 0, 1); x2s("[^[^abc]&&[^cde]]", "e", 0, 1); ns("[^[^abc]&&[^cde]]", "f"); x2s("[a-&&-a]", "-", 0, 1); ns("[a\\-&&\\-a]", "&"); ns("\\wabc", " abc"); x2s("a\\Wbc", "a bc", 0, 4); x2s("a.b.c", "aabbc", 0, 5); x2s(".\\wb\\W..c", "abb bcc", 0, 7); x2s("\\s\\wzzz", " zzzz", 0, 5); x2s("aa.b", "aabb", 0, 4); ns(".a", "ab"); x2s(".a", "aa", 0, 2); x2s("^a", "a", 0, 1); x2s("^a$", "a", 0, 1); x2s("^\\w$", "a", 0, 1); ns("^\\w$", " "); x2s("^\\wab$", "zab", 0, 3); x2s("^\\wabcdef$", "zabcdef", 0, 7); x2s("^\\w...def$", "zabcdef", 0, 7); x2s("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); x2s("\\A\\Z", "", 0, 0); x2s("\\Axyz", "xyz", 0, 3); x2s("xyz\\Z", "xyz", 0, 3); x2s("xyz\\z", "xyz", 0, 3); x2s("a\\Z", "a", 0, 1); x2s("\\Gaz", "az", 0, 2); ns("\\Gz", "bza"); ns("az\\G", "az"); ns("az\\A", "az"); ns("a\\Az", "az"); x2s("\\^\\$", "^$", 0, 2); x2s("^x?y", "xy", 0, 2); x2s("^(x?y)", "xy", 0, 2); x2s("\\w", "_", 0, 1); ns("\\W", "_"); x2s("(?=z)z", "z", 0, 1); ns("(?=z).", "a"); x2s("(?!z)a", "a", 0, 1); ns("(?!z)a", "z"); x2s("(?i:a)", "a", 0, 1); x2s("(?i:a)", "A", 0, 1); x2s("(?i:A)", "a", 0, 1); ns("(?i:A)", "b"); x2s("(?i:[A-Z])", "a", 0, 1); x2s("(?i:[f-m])", "H", 0, 1); x2s("(?i:[f-m])", "h", 0, 1); ns("(?i:[f-m])", "e"); x2s("(?i:[A-c])", "D", 0, 1); x2s("(?i:[!-k])", "Z", 0, 1); x2s("(?i:[!-k])", "7", 0, 1); x2s("(?i:[T-}])", "b", 0, 1); x2s("(?i:[T-}])", "{", 0, 1); x2s("(?i:\\?a)", "?A", 0, 2); x2s("(?i:\\*A)", "*a", 0, 2); ns(".", "\n"); x2s("(?m:.)", "\n", 0, 1); x2s("(?m:a.)", "a\n", 0, 2); x2s("(?m:.b)", "a\nb", 1, 3); x2s(".*abc", "dddabdd\nddabc", 8, 13); x2s("(?m:.*abc)", "dddabddabc", 0, 10); ns("(?i)(?-i)a", "A"); ns("(?i)(?-i:a)", "A"); x2s("a?", "", 0, 0); x2s("a?", "b", 0, 0); x2s("a?", "a", 0, 1); x2s("a*", "", 0, 0); x2s("a*", "a", 0, 1); x2s("a*", "aaa", 0, 3); x2s("a*", "baaaa", 0, 0); ns("a+", ""); x2s("a+", "a", 0, 1); x2s("a+", "aaaa", 0, 4); x2s("a+", "aabbb", 0, 2); x2s("a+", "baaaa", 1, 5); x2s(".?", "", 0, 0); x2s(".?", "f", 0, 1); x2s(".?", "\n", 0, 0); x2s(".*", "", 0, 0); x2s(".*", "abcde", 0, 5); x2s(".+", "z", 0, 1); x2s(".+", "zdswer\n", 0, 6); x2s("(.*)a\\1f", "babfbac", 0, 4); x2s("(.*)a\\1f", "bacbabf", 3, 7); x2s("((.*)a\\2f)", "bacbabf", 3, 7); x2s("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); x2s("a|b", "a", 0, 1); x2s("a|b", "b", 0, 1); x2s("|a", "a", 0, 0); x2s("(|a)", "a", 0, 0); x2s("ab|bc", "ab", 0, 2); x2s("ab|bc", "bc", 0, 2); x2s("z(?:ab|bc)", "zbc", 0, 3); x2s("a(?:ab|bc)c", "aabc", 0, 4); x2s("ab|(?:ac|az)", "az", 0, 2); x2s("a|b|c", "dc", 1, 2); x2s("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); ns("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); x2s("a|^z", "ba", 1, 2); x2s("a|^z", "za", 0, 1); x2s("a|\\Gz", "bza", 2, 3); x2s("a|\\Gz", "za", 0, 1); x2s("a|\\Az", "bza", 2, 3); x2s("a|\\Az", "za", 0, 1); x2s("a|b\\Z", "ba", 1, 2); x2s("a|b\\Z", "b", 0, 1); x2s("a|b\\z", "ba", 1, 2); x2s("a|b\\z", "b", 0, 1); x2s("\\w|\\s", " ", 0, 1); ns("\\w|\\w", " "); x2s("\\w|%", "%", 0, 1); x2s("\\w|[&$]", "&", 0, 1); x2s("[b-d]|[^e-z]", "a", 0, 1); x2s("(?:a|[c-f])|bz", "dz", 0, 1); x2s("(?:a|[c-f])|bz", "bz", 0, 2); x2s("abc|(?=zz)..f", "zzf", 0, 3); x2s("abc|(?!zz)..f", "abf", 0, 3); x2s("(?=za)..a|(?=zz)..a", "zza", 0, 3); ns("(?>a|abd)c", "abdc"); x2s("(?>abd|a)c", "abdc", 0, 4); x2s("a?|b", "a", 0, 1); x2s("a?|b", "b", 0, 0); x2s("a?|b", "", 0, 0); x2s("a*|b", "aa", 0, 2); x2s("a*|b*", "ba", 0, 0); x2s("a*|b*", "ab", 0, 1); x2s("a+|b*", "", 0, 0); x2s("a+|b*", "bbb", 0, 3); x2s("a+|b*", "abbb", 0, 1); ns("a+|b+", ""); x2s("(a|b)?", "b", 0, 1); x2s("(a|b)*", "ba", 0, 2); x2s("(a|b)+", "bab", 0, 3); x2s("(ab|ca)+", "caabbc", 0, 4); x2s("(ab|ca)+", "aabca", 1, 5); x2s("(ab|ca)+", "abzca", 0, 2); x2s("(a|bab)+", "ababa", 0, 5); x2s("(a|bab)+", "ba", 1, 2); x2s("(a|bab)+", "baaaba", 1, 4); x2s("(?:a|b)(?:a|b)", "ab", 0, 2); x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); x2s("(?:a+|b+){2}", "aaabbb", 0, 6); x2s("h{0,}", "hhhh", 0, 4); x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6); ns("ax{2}*a", "0axxxa1"); ns("a.{0,2}a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXXa0"); x2s("^a{2,}?a$", "aaa", 0, 3); x2s("^[a-z]{2,}?$", "aaa", 0, 3); x2s("(?:a+|\\Ab*)cc", "cc", 0, 2); ns("(?:a+|\\Ab*)cc", "abcc"); x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8); x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7); x2s("a|(?i)c", "C", 0, 1); x2s("(?i)c|a", "C", 0, 1); x2s("(?i)c|a", "A", 0, 1); x2s("(?i:c)|a", "C", 0, 1); ns("(?i:c)|a", "A"); x2s("[abc]?", "abc", 0, 1); x2s("[abc]*", "abc", 0, 3); x2s("[^abc]*", "abc", 0, 0); ns("[^abc]+", "abc"); x2s("a??", "aaa", 0, 0); x2s("ba??b", "bab", 0, 3); x2s("a*?", "aaa", 0, 0); x2s("ba*?", "baa", 0, 1); x2s("ba*?b", "baab", 0, 4); x2s("a+?", "aaa", 0, 1); x2s("ba+?", "baa", 0, 2); x2s("ba+?b", "baab", 0, 4); x2s("(?:a?)??", "a", 0, 0); x2s("(?:a??)?", "a", 0, 0); x2s("(?:a?)+?", "aaa", 0, 1); x2s("(?:a+)??", "aaa", 0, 0); x2s("(?:a+)??b", "aaab", 0, 4); x2s("(?:ab)?{2}", "", 0, 0); x2s("(?:ab)?{2}", "ababa", 0, 4); x2s("(?:ab)*{0}", "ababa", 0, 0); x2s("(?:ab){3,}", "abababab", 0, 8); ns("(?:ab){3,}", "abab"); x2s("(?:ab){2,4}", "ababab", 0, 6); x2s("(?:ab){2,4}", "ababababab", 0, 8); x2s("(?:ab){2,4}?", "ababababab", 0, 4); x2s("(?:ab){,}", "ab{,}", 0, 5); x2s("(?:abc)+?{2}", "abcabcabc", 0, 6); x2s("(?:X*)(?i:xa)", "XXXa", 0, 4); x2s("(d+)([^abc]z)", "dddz", 0, 4); x2s("([^abc]*)([^abc]z)", "dddz", 0, 4); x2s("(\\w+)(\\wz)", "dddz", 0, 4); x3s("(a)", "a", 0, 1, 1); x3s("(ab)", "ab", 0, 2, 1); x2s("((ab))", "ab", 0, 2); x3s("((ab))", "ab", 0, 2, 1); x3s("((ab))", "ab", 0, 2, 2); x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); x3s("(ab)(cd)", "abcd", 0, 2, 1); x3s("(ab)(cd)", "abcd", 2, 4, 2); x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); x2s("(^a)", "a", 0, 1); x3s("(a)|(a)", "ba", 1, 2, 1); x3s("(^a)|(a)", "ba", 1, 2, 2); x3s("(a?)", "aaa", 0, 1, 1); x3s("(a*)", "aaa", 0, 3, 1); x3s("(a*)", "", 0, 0, 1); x3s("(a+)", "aaaaaaa", 0, 7, 1); x3s("(a+|b*)", "bbbaa", 0, 3, 1); x3s("(a+|b?)", "bbbaa", 0, 1, 1); x3s("(abc)?", "abc", 0, 3, 1); x3s("(abc)*", "abc", 0, 3, 1); x3s("(abc)+", "abc", 0, 3, 1); x3s("(xyz|abc)+", "abc", 0, 3, 1); x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1); x3s("((?i:abc))", "AbC", 0, 3, 1); x2s("(abc)(?i:\\1)", "abcABC", 0, 6); x3s("((?m:a.c))", "a\nc", 0, 3, 1); x3s("((?=az)a)", "azb", 0, 1, 1); x3s("abc|(.abd)", "zabd", 0, 4, 1); x2s("(?:abc)|(ABC)", "abc", 0, 3); x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); x3s("a*(.)", "aaaaz", 4, 5, 1); x3s("a*?(.)", "aaaaz", 0, 1, 1); x3s("a*?(c)", "aaaac", 4, 5, 1); x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1); x3s("(\\Abb)cc", "bbcc", 0, 2, 1); ns("(\\Abb)cc", "zbbcc"); x3s("(^bb)cc", "bbcc", 0, 2, 1); ns("(^bb)cc", "zbbcc"); x3s("cc(bb$)", "ccbb", 2, 4, 1); ns("cc(bb$)", "ccbbb"); ns("(\\1)", ""); ns("\\1(a)", "aa"); ns("(a(b)\\1)\\2+", "ababb"); ns("(?:(?:\\1|z)(a))+$", "zaa"); x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); x2s("(a)(?=\\1)", "aa", 0, 1); ns("(a)$|\\1", "az"); x2s("(a)\\1", "aa", 0, 2); ns("(a)\\1", "ab"); x2s("(a?)\\1", "aa", 0, 2); x2s("(a??)\\1", "aa", 0, 0); x2s("(a*)\\1", "aaaaa", 0, 4); x3s("(a*)\\1", "aaaaa", 0, 2, 1); x2s("a(b*)\\1", "abbbb", 0, 5); x2s("a(b*)\\1", "ab", 0, 1); x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); x2s("(a*)(b*)\\2", "aaabbbb", 0, 7); x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); x2s("([a-d])\\1", "cc", 0, 2); x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); ns("(\\w\\d\\s)\\1", "f5 f5"); x2s("(who|[a-c]{3})\\1", "whowho", 0, 6); x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6); x2s("(^a)\\1", "aa", 0, 2); ns("(^a)\\1", "baa"); ns("(a$)\\1", "aa"); ns("(ab\\Z)\\1", "ab"); x2s("(a*\\Z)\\1", "a", 1, 1); x2s(".(a*\\Z)\\1", "ba", 1, 2); x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1); x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); x2s("((?i:az))\\1", "AzAz", 0, 4); ns("((?i:az))\\1", "Azaz"); x2s("(?<=a)b", "ab", 1, 2); ns("(?<=a)b", "bb"); x2s("(?<=a|b)b", "bb", 1, 2); x2s("(?<=a|bc)b", "bcb", 2, 3); x2s("(?<=a|bc)b", "ab", 1, 2); x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x2s("(a)\\g<1>", "aa", 0, 2); x2s("(?a)", "a", 0, 1); x2s("(?ab)\\g", "abab", 0, 4); x2s("(?.zv.)\\k", "azvbazvb", 0, 8); x2s("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); x2s("(?|a\\g)+", "", 0, 0); x2s("(?|\\(\\g\\))+$", "()(())", 0, 6); x3s("\\g(?.){0}", "X", 0, 1, 1); x2s("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); x2s("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); x2s("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); x2s("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2s("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); x2s("(?:(?)|(?efg))\\k", "", 0, 0); x2s("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); ns("(?:(?abc)|(?efg))\\k", "abcefg"); x2s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); x3s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3s("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); x2s("(?a|\\(\\g\\))", "a", 0, 1); x2s("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); x3s("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); x2s("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3); x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2s("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); x2s("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); x2s("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); x2s("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); x2s("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); x2s("()*\\1", "", 0, 0); x2s("(?:()|())*\\1\\2", "", 0, 0); x3s("(?:\\1a|())*", "a", 0, 0, 1); x2s("x((.)*)*x", "0x1x2x3", 1, 6); x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); x3s("\\A(?|.|(?:(?.)\\g\\k))\\z", "reer", 0, 4, 1); x3s("(?-i:\\g)(?i:(?a)){0}", "A", 0, 1, 1); String pat = "(? \\g \\g* \\g ){0}" + "(? < \\g \\s* > ){0}" + "(? [a-zA-Z_:]+ ){0}" + "(? [^<&]+ (\\g | [^<&]+)* ){0}" + "(? >){0}" + "\\g"; String str = "fbbbf"; x3s(pat, str, 0, 27, 0, Option.EXTEND); x3s(pat, str, 0, 27, 1, Option.EXTEND); x3s(pat, str, 6, 11, 2, Option.EXTEND); x3s(pat, str, 7, 10, 3, Option.EXTEND); x3s(pat, str, 5, 21, 4, Option.EXTEND); x3s(pat, str, 21, 27, 5, Option.EXTEND); x2s("(a)b\\k<1>", "aba", 0, 3); x2s("^(?>(?=a)(a|))++$", "a", 0, 1); x2s("\\k", "k", 0, 1); x2s("\\kx", "kx", 0, 2); x2s("\\g", "g", 0, 1); x2s("\\gx", "gx", 0, 2); x2s("\\k\\g", "kg", 0, 2); ns("\\00", "00"); ns("\\70", "70"); x2s("\\80", "80", 0, 2); x2s("\\90", "90", 0, 2); ns("(?a)|(?b))(?:(?()cd|x)e|fg)", "bxe", 0, 3); ns("(?:(?a)|(?b))(?:(?()cd|x)e|fg)", "bxe"); x2s("((?<=a))?(?(1)b|c)", "abc", 1, 2); x2s("((?<=a))?(?(1)b|c)", "bc", 1, 2); x2s("((?x)|(?y))(?()y|x)", "xy", 0, 2); x2s("((?x)|(?y))(?()y|x)", "yx", 0, 2); ns("((?x)|(?y))(?()y|x)", "xx"); ns("((?x)|(?y))(?()y|x)", "yy"); xerrs("(a)?(?b)?(?(1)a)(?()b)", ErrorMessages.NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); xerrs("()(?(2))", ErrorMessages.INVALID_BACKREF); xerrs("(?(700000))", ErrorMessages.INVALID_BACKREF); x2s("\\R", "\n", 0, 1); x2s("\\R", "\r", 0, 1); x2s("\\R{3}", "\r\r\n\n", 0, 4); x2s("\\X{5}", "あいab\n", 0, 5); x2s("\\X{5}", "あいab\n", 0, 5); x2s("(?<=a).*b", "aab", 1, 3); x2s("([.])", ".", 0, 1); x2s("([a])", "a", 0, 1); x2s("([\\w])", "a", 0, 1); // gpos ns("\\Gabc", "123abcdef", 2, Option.DEFAULT); x2s("\\Gabc", "123abcdef", 3, 0, 3, 6); x2s("\\Gabc", "123abcdef", 3, 3, 3, 6); ns("\\Gabc", "123abcdef", 0, 3); x2s("(?!\\G)", "abcd", 2, 3, 3, 3); x2s("(?!\\G)", "abcd", 3, 3, 4, 4); } } jruby-joni-2.1.41/test/org/joni/test/TestC.java000077500000000000000000001323721400407002500212430ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.EUCJPEncoding; import org.joni.Option; import org.joni.Syntax; public class TestC extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return EUCJPEncoding.INSTANCE; } @Override public String testEncoding() { return "cp1250"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { x2s("", "", 0, 0); x2s("^", "", 0, 0); x2s("$", "", 0, 0); x2s("\\G", "", 0, 0); x2s("\\A", "", 0, 0); x2s("\\Z", "", 0, 0); x2s("\\z", "", 0, 0); x2s("^$", "", 0, 0); x2s("\\ca", "\001", 0, 1); x2s("\\C-b", "\002", 0, 1); x2s("\\c\\\\", "\034", 0, 1); x2s("q[\\c\\\\]", "q\034", 0, 2); x2s("", "a", 0, 0); x2s("a", "a", 0, 1); x2s("\\x61", "a", 0, 1); x2s("aa", "aa", 0, 2); x2s("aaa", "aaa", 0, 3); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35); x2s("ab", "ab", 0, 2); x2s("b", "ab", 1, 2); x2s("bc", "abc", 1, 3); x2s("(?i:#RET#)", "#INS##RET#", 5, 10); x2s("\\17", "\017", 0, 1); x2s("\\x1f", "\u001f", 0, 1); x2s("a(?#....\\\\JJJJ)b", "ab", 0, 2); x2s("(?x) G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7); x2s(".", "a", 0, 1); ns(".", ""); x2s("..", "ab", 0, 2); x2s("\\w", "e", 0, 1); ns("\\W", "e"); x2s("\\s", " ", 0, 1); x2s("\\S", "b", 0, 1); x2s("\\d", "4", 0, 1); ns("\\D", "4"); x2s("\\b", "z ", 0, 0); x2s("\\b", " z", 1, 1); x2s("\\B", "zz ", 1, 1); x2s("\\B", "z ", 2, 2); x2s("\\B", " z", 0, 0); x2s("[ab]", "b", 0, 1); ns("[ab]", "c"); x2s("[a-z]", "t", 0, 1); ns("[^a]", "a"); x2s("[^a]", "\n", 0, 1); x2s("[]]", "]", 0, 1); ns("[^]]", "]"); x2s("[\\^]+", "0^^1", 1, 3); x2s("[b-]", "b", 0, 1); x2s("[b-]", "-", 0, 1); x2s("[\\w]", "z", 0, 1); ns("[\\w]", " "); x2s("[\\W]", "b$", 1, 2); x2s("[\\d]", "5", 0, 1); ns("[\\d]", "e"); x2s("[\\D]", "t", 0, 1); ns("[\\D]", "3"); x2s("[\\s]", " ", 0, 1); ns("[\\s]", "a"); x2s("[\\S]", "b", 0, 1); ns("[\\S]", " "); x2s("[\\w\\d]", "2", 0, 1); ns("[\\w\\d]", " "); x2s("[[:upper:]]", "B", 0, 1); x2s("[*[:xdigit:]+]", "+", 0, 1); x2s("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7); x2s("[*[:xdigit:]+]", "-@^+", 3, 4); ns("[[:upper]]", "A"); x2s("[[:upper]]", ":", 0, 1); x2s("[\\044-\\047]", "\046", 0, 1); x2s("[\\x5a-\\x5c]", "\u005b", 0, 1); x2s("[\\x6A-\\x6D]", "\u006c", 0, 1); ns("[\\x6A-\\x6D]", "\u006e"); ns("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype () External | _rb_apply"); x2s("[\\[]", "[", 0, 1); x2s("[\\]]", "]", 0, 1); x2s("[&]", "&", 0, 1); x2s("[[ab]]", "b", 0, 1); x2s("[[ab]c]", "c", 0, 1); ns("[[^a]]", "a"); ns("[^[a]]", "a"); x2s("[[ab]&&bc]", "b", 0, 1); ns("[[ab]&&bc]", "a"); ns("[[ab]&&bc]", "c"); x2s("[a-z&&b-y&&c-x]", "w", 0, 1); ns("[^a-z&&b-y&&c-x]", "w"); x2s("[[^a&&a]&&a-z]", "b", 0, 1); ns("[[^a&&a]&&a-z]", "a"); x2s("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1); ns("[[^a-z&&bcdef]&&[^c-g]]", "c"); x2s("[^[^abc]&&[^cde]]", "c", 0, 1); x2s("[^[^abc]&&[^cde]]", "e", 0, 1); ns("[^[^abc]&&[^cde]]", "f"); x2s("[a-&&-a]", "-", 0, 1); ns("[a\\-&&\\-a]", "&"); ns("\\wabc", " abc"); x2s("a\\Wbc", "a bc", 0, 4); x2s("a.b.c", "aabbc", 0, 5); x2s(".\\wb\\W..c", "abb bcc", 0, 7); x2s("\\s\\wzzz", " zzzz", 0, 5); x2s("aa.b", "aabb", 0, 4); ns(".a", "ab"); x2s(".a", "aa", 0, 2); x2s("^a", "a", 0, 1); x2s("^a$", "a", 0, 1); x2s("^\\w$", "a", 0, 1); ns("^\\w$", " "); x2s("^\\wab$", "zab", 0, 3); x2s("^\\wabcdef$", "zabcdef", 0, 7); x2s("^\\w...def$", "zabcdef", 0, 7); x2s("\\w\\w\\s\\Waaa\\d", "aa aaa4", 0, 8); x2s("\\A\\Z", "", 0, 0); x2s("\\Axyz", "xyz", 0, 3); x2s("xyz\\Z", "xyz", 0, 3); x2s("xyz\\z", "xyz", 0, 3); x2s("a\\Z", "a", 0, 1); x2s("\\Gaz", "az", 0, 2); ns("\\Gz", "bza"); ns("az\\G", "az"); ns("az\\A", "az"); ns("a\\Az", "az"); x2s("\\^\\$", "^$", 0, 2); x2s("^x?y", "xy", 0, 2); x2s("^(x?y)", "xy", 0, 2); x2s("\\w", "_", 0, 1); ns("\\W", "_"); x2s("(?=z)z", "z", 0, 1); ns("(?=z).", "a"); x2s("(?!z)a", "a", 0, 1); ns("(?!z)a", "z"); x2s("(?i:a)", "a", 0, 1); x2s("(?i:a)", "A", 0, 1); x2s("(?i:A)", "a", 0, 1); ns("(?i:A)", "b"); x2s("(?i:[A-Z])", "a", 0, 1); x2s("(?i:[f-m])", "H", 0, 1); x2s("(?i:[f-m])", "h", 0, 1); ns("(?i:[f-m])", "e"); x2s("(?i:[A-c])", "D", 0, 1); ns("(?i:[^a-z])", "A"); ns("(?i:[^a-z])", "a"); x2s("(?i:[!-k])", "Z", 0, 1); x2s("(?i:[!-k])", "7", 0, 1); x2s("(?i:[T-}])", "b", 0, 1); x2s("(?i:[T-}])", "{", 0, 1); x2s("(?i:\\?a)", "?A", 0, 2); x2s("(?i:\\*A)", "*a", 0, 2); ns(".", "\n"); x2s("(?m:.)", "\n", 0, 1); x2s("(?m:a.)", "a\n", 0, 2); x2s("(?m:.b)", "a\nb", 1, 3); x2s(".*abc", "dddabdd\nddabc", 8, 13); x2s("(?m:.*abc)", "dddabddabc", 0, 10); ns("(?i)(?-i)a", "A"); ns("(?i)(?-i:a)", "A"); x2s("a?", "", 0, 0); x2s("a?", "b", 0, 0); x2s("a?", "a", 0, 1); x2s("a*", "", 0, 0); x2s("a*", "a", 0, 1); x2s("a*", "aaa", 0, 3); x2s("a*", "baaaa", 0, 0); ns("a+", ""); x2s("a+", "a", 0, 1); x2s("a+", "aaaa", 0, 4); x2s("a+", "aabbb", 0, 2); x2s("a+", "baaaa", 1, 5); x2s(".?", "", 0, 0); x2s(".?", "f", 0, 1); x2s(".?", "\n", 0, 0); x2s(".*", "", 0, 0); x2s(".*", "abcde", 0, 5); x2s(".+", "z", 0, 1); x2s(".+", "zdswer\n", 0, 6); x2s("(.*)a\\1f", "babfbac", 0, 4); x2s("(.*)a\\1f", "bacbabf", 3, 7); x2s("((.*)a\\2f)", "bacbabf", 3, 7); x2s("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23); x2s("a|b", "a", 0, 1); x2s("a|b", "b", 0, 1); x2s("|a", "a", 0, 0); x2s("(|a)", "a", 0, 0); x2s("ab|bc", "ab", 0, 2); x2s("ab|bc", "bc", 0, 2); x2s("z(?:ab|bc)", "zbc", 0, 3); x2s("a(?:ab|bc)c", "aabc", 0, 4); x2s("ab|(?:ac|az)", "az", 0, 2); x2s("a|b|c", "dc", 1, 2); x2s("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2); ns("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn"); x2s("a|^z", "ba", 1, 2); x2s("a|^z", "za", 0, 1); x2s("a|\\Gz", "bza", 2, 3); x2s("a|\\Gz", "za", 0, 1); x2s("a|\\Az", "bza", 2, 3); x2s("a|\\Az", "za", 0, 1); x2s("a|b\\Z", "ba", 1, 2); x2s("a|b\\Z", "b", 0, 1); x2s("a|b\\z", "ba", 1, 2); x2s("a|b\\z", "b", 0, 1); x2s("\\w|\\s", " ", 0, 1); ns("\\w|\\w", " "); x2s("\\w|%", "%", 0, 1); x2s("\\w|[&$]", "&", 0, 1); x2s("[b-d]|[^e-z]", "a", 0, 1); x2s("(?:a|[c-f])|bz", "dz", 0, 1); x2s("(?:a|[c-f])|bz", "bz", 0, 2); x2s("abc|(?=zz)..f", "zzf", 0, 3); x2s("abc|(?!zz)..f", "abf", 0, 3); x2s("(?=za)..a|(?=zz)..a", "zza", 0, 3); ns("(?>a|abd)c", "abdc"); x2s("(?>abd|a)c", "abdc", 0, 4); x2s("a?|b", "a", 0, 1); x2s("a?|b", "b", 0, 0); x2s("a?|b", "", 0, 0); x2s("a*|b", "aa", 0, 2); x2s("a*|b*", "ba", 0, 0); x2s("a*|b*", "ab", 0, 1); x2s("a+|b*", "", 0, 0); x2s("a+|b*", "bbb", 0, 3); x2s("a+|b*", "abbb", 0, 1); ns("a+|b+", ""); x2s("(a|b)?", "b", 0, 1); x2s("(a|b)*", "ba", 0, 2); x2s("(a|b)+", "bab", 0, 3); x2s("(ab|ca)+", "caabbc", 0, 4); x2s("(ab|ca)+", "aabca", 1, 5); x2s("(ab|ca)+", "abzca", 0, 2); x2s("(a|bab)+", "ababa", 0, 5); x2s("(a|bab)+", "ba", 1, 2); x2s("(a|bab)+", "baaaba", 1, 4); x2s("(?:a|b)(?:a|b)", "ab", 0, 2); x2s("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3); x2s("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6); x2s("(?:a+|b+){2}", "aaabbb", 0, 6); x2s("h{0,}", "hhhh", 0, 4); x2s("(?:a+|b+){1,2}", "aaabbb", 0, 6); ns("ax{2}*a", "0axxxa1"); ns("a.{0,2}a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXa0"); ns("a.{0,2}?a", "0aXXXXa0"); x2s("^a{2,}?a$", "aaa", 0, 3); x2s("^[a-z]{2,}?$", "aaa", 0, 3); x2s("(?:a+|\\Ab*)cc", "cc", 0, 2); ns("(?:a+|\\Ab*)cc", "abcc"); x2s("(?:^a+|b+)*c", "aabbbabc", 6, 8); x2s("(?:^a+|b+)*c", "aabbbbc", 0, 7); x2s("a|(?i)c", "C", 0, 1); x2s("(?i)c|a", "C", 0, 1); x2s("(?i)c|a", "A", 0, 1); x2s("(?i:c)|a", "C", 0, 1); ns("(?i:c)|a", "A"); x2s("[abc]?", "abc", 0, 1); x2s("[abc]*", "abc", 0, 3); x2s("[^abc]*", "abc", 0, 0); ns("[^abc]+", "abc"); x2s("a??", "aaa", 0, 0); x2s("ba??b", "bab", 0, 3); x2s("a*?", "aaa", 0, 0); x2s("ba*?", "baa", 0, 1); x2s("ba*?b", "baab", 0, 4); x2s("a+?", "aaa", 0, 1); x2s("ba+?", "baa", 0, 2); x2s("ba+?b", "baab", 0, 4); x2s("(?:a?)??", "a", 0, 0); x2s("(?:a??)?", "a", 0, 0); x2s("(?:a?)+?", "aaa", 0, 1); x2s("(?:a+)??", "aaa", 0, 0); x2s("(?:a+)??b", "aaab", 0, 4); x2s("(?:ab)?{2}", "", 0, 0); x2s("(?:ab)?{2}", "ababa", 0, 4); x2s("(?:ab)*{0}", "ababa", 0, 0); x2s("(?:ab){3,}", "abababab", 0, 8); ns("(?:ab){3,}", "abab"); x2s("(?:ab){2,4}", "ababab", 0, 6); x2s("(?:ab){2,4}", "ababababab", 0, 8); x2s("(?:ab){2,4}?", "ababababab", 0, 4); x2s("(?:ab){,}", "ab{,}", 0, 5); x2s("(?:abc)+?{2}", "abcabcabc", 0, 6); x2s("(?:X*)(?i:xa)", "XXXa", 0, 4); x2s("(d+)([^abc]z)", "dddz", 0, 4); x2s("([^abc]*)([^abc]z)", "dddz", 0, 4); x2s("(\\w+)(\\wz)", "dddz", 0, 4); x3s("(a)", "a", 0, 1, 1); x3s("(ab)", "ab", 0, 2, 1); x2s("((ab))", "ab", 0, 2); x3s("((ab))", "ab", 0, 2, 1); x3s("((ab))", "ab", 0, 2, 2); x3s("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20); x3s("(ab)(cd)", "abcd", 0, 2, 1); x3s("(ab)(cd)", "abcd", 2, 4, 2); x3s("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3); x3s("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4); x2s("(^a)", "a", 0, 1); x3s("(a)|(a)", "ba", 1, 2, 1); x3s("(^a)|(a)", "ba", 1, 2, 2); x3s("(a?)", "aaa", 0, 1, 1); x3s("(a*)", "aaa", 0, 3, 1); x3s("(a*)", "", 0, 0, 1); x3s("(a+)", "aaaaaaa", 0, 7, 1); x3s("(a+|b*)", "bbbaa", 0, 3, 1); x3s("(a+|b?)", "bbbaa", 0, 1, 1); x3s("(abc)?", "abc", 0, 3, 1); x3s("(abc)*", "abc", 0, 3, 1); x3s("(abc)+", "abc", 0, 3, 1); x3s("(xyz|abc)+", "abc", 0, 3, 1); x3s("([xyz][abc]|abc)+", "abc", 0, 3, 1); x3s("((?i:abc))", "AbC", 0, 3, 1); x2s("(abc)(?i:\\1)", "abcABC", 0, 6); x3s("((?m:a.c))", "a\nc", 0, 3, 1); x3s("((?=az)a)", "azb", 0, 1, 1); x3s("abc|(.abd)", "zabd", 0, 4, 1); x2s("(?:abc)|(ABC)", "abc", 0, 3); x3s("(?i:(abc))|(zzz)", "ABC", 0, 3, 1); x3s("a*(.)", "aaaaz", 4, 5, 1); x3s("a*?(.)", "aaaaz", 0, 1, 1); x3s("a*?(c)", "aaaac", 4, 5, 1); x3s("[bcd]a*(.)", "caaaaz", 5, 6, 1); x3s("(\\Abb)cc", "bbcc", 0, 2, 1); ns("(\\Abb)cc", "zbbcc"); x3s("(^bb)cc", "bbcc", 0, 2, 1); ns("(^bb)cc", "zbbcc"); x3s("cc(bb$)", "ccbb", 2, 4, 1); ns("cc(bb$)", "ccbbb"); ns("(\\1)", ""); ns("\\1(a)", "aa"); ns("(a(b)\\1)\\2+", "ababb"); ns("(?:(?:\\1|z)(a))+$", "zaa"); x2s("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4); x2s("(a)(?=\\1)", "aa", 0, 1); ns("(a)$|\\1", "az"); x2s("(a)\\1", "aa", 0, 2); ns("(a)\\1", "ab"); x2s("(a?)\\1", "aa", 0, 2); x2s("(a??)\\1", "aa", 0, 0); x2s("(a*)\\1", "aaaaa", 0, 4); x3s("(a*)\\1", "aaaaa", 0, 2, 1); x2s("a(b*)\\1", "abbbb", 0, 5); x2s("a(b*)\\1", "ab", 0, 1); x2s("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10); x2s("(a*)(b*)\\2", "aaabbbb", 0, 7); x2s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8); x3s("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7); x2s("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6); x2s("([a-d])\\1", "cc", 0, 2); x2s("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6); ns("(\\w\\d\\s)\\1", "f5 f5"); x2s("(who|[a-c]{3})\\1", "whowho", 0, 6); x2s("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9); x2s("(who|[a-c]{3})\\1", "cbccbc", 0, 6); x2s("(^a)\\1", "aa", 0, 2); ns("(^a)\\1", "baa"); ns("(a$)\\1", "aa"); ns("(ab\\Z)\\1", "ab"); x2s("(a*\\Z)\\1", "a", 1, 1); x2s(".(a*\\Z)\\1", "ba", 1, 2); x3s("(.(abc)\\2)", "zabcabc", 0, 7, 1); x3s("(.(..\\d.)\\2)", "z12341234", 0, 9, 1); x2s("((?i:az))\\1", "AzAz", 0, 4); ns("((?i:az))\\1", "Azaz"); x2s("(?<=a)b", "ab", 1, 2); ns("(?<=a)b", "bb"); x2s("(?<=a|b)b", "bb", 1, 2); x2s("(?<=a|bc)b", "bcb", 2, 3); x2s("(?<=a|bc)b", "ab", 1, 2); x2s("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2); x2s("(a)\\g<1>", "aa", 0, 2); x2s("(?a)", "a", 0, 1); x2s("(?ab)\\g", "abab", 0, 4); x2s("(?.zv.)\\k", "azvbazvb", 0, 8); x2s("(?<=\\g)|-\\zEND (?XyZ)", "XyZ", 3, 3); x2s("(?|a\\g)+", "", 0, 0); x2s("(?|\\(\\g\\))+$", "()(())", 0, 6); x3s("\\g(?.){0}", "X", 0, 1, 1); x2s("\\g(abc|df(?.YZ){2,8}){0}", "XYZ", 0, 3); x2s("\\A(?(a\\g)|)\\z", "aaaa", 0, 4); x2s("(?|\\g\\g)\\z|\\zEND (?a|(b)\\g)", "bbbbabba", 0, 8); x2s("(?\\w+\\sx)a+\\k", " fg xaaaaaaaafg x", 2, 18); x3s("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1); x2s("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3); x2s("((?\\d)|(?\\w))(\\k|\\k)", "ff", 0, 2); x2s("(?:(?)|(?efg))\\k", "", 0, 0); x2s("(?:(?abc)|(?efg))\\k", "abcefgefg", 3, 9); ns("(?:(?abc)|(?efg))\\k", "abcefg"); x2s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "a-pyumpyum", 2, 10); x3s("(?:(?.)|(?..)|(?...)|(?....)|(?.....)|(?......)|(?.......)|(?........)|(?.........)|(?..........)|(?...........)|(?............)|(?.............)|(?..............))\\k$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14); x3s("(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?)(?aaa)(?)$", "aaa", 0, 3, 16); x2s("(?a|\\(\\g\\))", "a", 0, 1); x2s("(?a|\\(\\g\\))", "((((((a))))))", 0, 13); x3s("(?a|\\(\\g\\))", "((((((((a))))))))", 0, 17, 1); x2s("\\g|\\zEND(?.*abc$)", "abcxxxabc", 0, 9); x2s("\\g<1>|\\zEND(.a.)", "bac", 0, 3); x3s("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1); x2s("\\A(?:\\g|\\g|\\zEND (?a|c\\gc)(?b|d\\gd))$", "cdcbcdc", 0, 7); x2s("\\A(?|a\\g)\\z|\\zEND (?\\g)", "aaaa", 0, 4); x2s("(?(a|b\\gc){3,5})", "baaaaca", 1, 5); x2s("(?(a|b\\gc){3,5})", "baaaacaaaaa", 0, 10); x2s("(?\\(([^\\(\\)]++|\\g)*+\\))", "((a))", 0, 5); x2s("()*\\1", "", 0, 0); x2s("(?:()|())*\\1\\2", "", 0, 0); x3s("(?:\\1a|())*", "a", 0, 0, 1); x2s("x((.)*)*x", "0x1x2x3", 1, 6); x2s("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); x2s("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0); x2s("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1); x2s("\\xED\\xF2", "\u00ed\u0148", 0, 2); x2s("", "\u00a4\u02d8", 0, 0); x2s("\u00a4\u02d8", "\u00a4\u02d8", 0, 2); ns("\u00a4\u00a4", "\u00a4\u02d8"); x2s("\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6); x2s("\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", "\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", 0, 70); x2s("\u00a4\u02d8", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x2s("\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 2, 6); x2s("\\xca\\xb8", "\u0118\u00b8", 0, 2); x2s(".", "\u00a4\u02d8", 0, 2); x2s("..", "\u00a4\u00ab\u00a4\u00ad", 0, 4); x2s("\\w", "\u00a4\u015e", 0, 2); ns("\\W", "\u00a4\u02d8"); x2s("[\\W]", "\u00a4\u00a6$", 2, 3); x2s("\\S", "\u00a4\u02dd", 0, 2); x2s("\\S", "\u00b4\u00c1", 0, 2); x2s("\\b", "\u00b5\u00a4 ", 0, 0); x2s("\\b", " \u00a4\u0170", 1, 1); x2s("\\B", "\u00a4\u00bb\u00a4\u02dd ", 2, 2); x2s("\\B", "\u00a4\u00a6 ", 3, 3); x2s("\\B", " \u00a4\u00a4", 0, 0); x2s("[\u00a4\u017c\u00a4\u00c1]", "\u00a4\u00c1", 0, 2); ns("[\u00a4\u0118\u00a4\u00cb]", "\u00a4\u011a"); x2s("[\u00a4\u00a6-\u00a4\u015e]", "\u00a4\u00a8", 0, 2); ns("[^\u00a4\u00b1]", "\u00a4\u00b1"); x2s("[\\w]", "\u00a4\u00cd", 0, 2); ns("[\\d]", "\u00a4\u0150"); x2s("[\\D]", "\u00a4\u010e", 0, 2); ns("[\\s]", "\u00a4\u017b"); x2s("[\\S]", "\u00a4\u0158", 0, 2); x2s("[\\w\\d]", "\u00a4\u010d", 0, 2); x2s("[\\w\\d]", " \u00a4\u010d", 3, 5); ns("\\w\u00b5\u00b4\u013d\u00d6", " \u00b5\u00b4\u013d\u00d6"); x2s("\u00b5\u00b4\\W\u013d\u00d6", "\u00b5\u00b4 \u013d\u00d6", 0, 5); x2s("\u00a4\u02d8.\u00a4\u00a4.\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 10); x2s(".\\w\u00a4\u00a6\\W..\u00a4\u013e", "\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6 \u00a4\u00a6\u00a4\u013e\u00a4\u013e", 0, 13); x2s("\\s\\w\u00a4\u0142\u00a4\u0142\u00a4\u0142", " \u00a4\u0142\u00a4\u0142\u00a4\u0142\u00a4\u0142", 0, 9); x2s("\u00a4\u02d8\u00a4\u02d8.\u00a4\u00b1", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00b1\u00a4\u00b1", 0, 8); ns(".\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a8"); x2s(".\u00a4\u015e", "\u00a4\u015e\u00a4\u015e", 0, 4); x2s("^\u00a4\u02d8", "\u00a4\u02d8", 0, 2); x2s("^\u00a4\u0155$", "\u00a4\u0155", 0, 2); x2s("^\\w$", "\u00a4\u00cb", 0, 2); x2s("^\\w\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142$", "z\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 0, 11); x2s("^\\w...\u00a4\u00a6\u00a4\u00a8\u00a4\u015e$", "z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a8\u00a4\u015e", 0, 13); x2s("\\w\\w\\s\\W\u00a4\u015e\u00a4\u015e\u00a4\u015e\\d", "a\u00a4\u015e \u00a4\u015e\u00a4\u015e\u00a4\u015e4", 0, 12); x2s("\\A\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", "\u00a4\u017c\u00a4\u00c1\u00a4\u00c4", 0, 6); x2s("\u00a4\u0155\u00a4\u00e1\u00a4\u00e2\\Z", "\u00a4\u0155\u00a4\u00e1\u00a4\u00e2", 0, 6); x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6); x2s("\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\\Z", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\n", 0, 6); x2s("\\G\u00a4\u00dd\u00a4\u00d4", "\u00a4\u00dd\u00a4\u00d4", 0, 4); ns("\\G\u00a4\u00a8", "\u00a4\u00a6\u00a4\u00a8\u00a4\u015e"); ns("\u00a4\u010c\u00a4\u0106\\G", "\u00a4\u010c\u00a4\u0106"); ns("\u00a4\u0162\u00a4\u00df\\A", "\u00a4\u0162\u00a4\u00df"); ns("\u00a4\u0162\\A\u00a4\u00df", "\u00a4\u0162\u00a4\u00df"); x2s("(?=\u00a4\u00bb)\u00a4\u00bb", "\u00a4\u00bb", 0, 2); ns("(?=\u00a4\u00a6).", "\u00a4\u00a4"); x2s("(?!\u00a4\u00a6)\u00a4\u00ab", "\u00a4\u00ab", 0, 2); ns("(?!\u00a4\u010c)\u00a4\u02d8", "\u00a4\u010c"); x2s("(?i:\u00a4\u02d8)", "\u00a4\u02d8", 0, 2); x2s("(?i:\u00a4\u00d6\u00a4\u016e)", "\u00a4\u00d6\u00a4\u016e", 0, 4); ns("(?i:\u00a4\u00a4)", "\u00a4\u00a6"); x2s("(?m:\u00a4\u010d.)", "\u00a4\u010d\n", 0, 3); x2s("(?m:.\u00a4\u00e1)", "\u00a4\u0162\n\u00a4\u00e1", 2, 5); x2s("\u00a4\u02d8?", "", 0, 0); x2s("\u0118\u0143?", "\u02db\u02dd", 0, 0); x2s("\u0118\u0143?", "\u0118\u0143", 0, 2); x2s("\u00ce\u011a*", "", 0, 0); x2s("\u00ce\u011a*", "\u00ce\u011a", 0, 2); x2s("\u00bb\u0147*", "\u00bb\u0147\u00bb\u0147\u00bb\u0147", 0, 6); x2s("\u00c7\u010e*", "\u013d\u017b\u00c7\u010e\u00c7\u010e\u00c7\u010e\u00c7\u010e", 0, 0); ns("\u00bb\u0142+", ""); x2s("\u02db\u010e+", "\u02db\u010e", 0, 2); x2s("\u00bb\u0163+", "\u00bb\u0163\u00bb\u0163\u00bb\u0163\u00bb\u0163", 0, 8); x2s("\u00a4\u00a8+", "\u00a4\u00a8\u00a4\u00a8\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 4); x2s("\u00a4\u00a6+", "\u00a4\u015e\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 2, 10); x2s(".?", "\u00a4\u017c", 0, 2); x2s(".*", "\u00a4\u0143\u00a4\u00d4\u00a4\u00d7\u00a4\u00da", 0, 8); x2s(".+", "\u00a4\u00ed", 0, 2); x2s(".+", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u00ab\n", 0, 8); x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u02d8", 0, 2); x2s("\u00a4\u02d8|\u00a4\u00a4", "\u00a4\u00a4", 0, 2); x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a4\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a6", 0, 4); x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)", "\u00a4\u0148\u00a4\u00ab\u00a4\u00ad", 0, 6); x2s("\u00a4\u0148(?:\u00a4\u00ab\u00a4\u00ad|\u00a4\u00ad\u00a4\u017b)\u00a4\u00b1", "\u00a4\u0148\u00a4\u00ad\u00a4\u017b\u00a4\u00b1", 0, 8); x2s("\u00a4\u02d8\u00a4\u00a4|(?:\u00a4\u02d8\u00a4\u00a6|\u00a4\u02d8\u00a4\u0148)", "\u00a4\u02d8\u00a4\u0148", 0, 4); x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6", "\u00a4\u00a8\u00a4\u00a6", 2, 4); x2s("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u00b7\u00a4\u0105\u00a4\u00bb", 0, 6); ns("\u00a4\u02d8|\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8|\u00a4\u015e\u00a4\u00ab\u00a4\u00ad|\u00a4\u017b|\u00a4\u00b1\u00a4\u0142\u00a4\u00b5|\u00a4\u00b7\u00a4\u0105\u00a4\u00bb|\u00a4\u02dd|\u00a4\u017c\u00a4\u00c1|\u00a4\u00c4\u00a4\u0106\u00a4\u010c\u00a4\u0118\u00a4\u00cb|\u00a4\u011a\u00a4\u00cd", "\u00a4\u0105\u00a4\u00bb"); x2s("\u00a4\u02d8|^\u00a4\u010f", "\u00a4\u00d6\u00a4\u02d8", 2, 4); x2s("\u00a4\u02d8|^\u00a4\u0148", "\u00a4\u0148\u00a4\u02d8", 0, 2); x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u00a4\u00b1\u013d\u00d6\u00b5\u00b4", 4, 6); x2s("\u00b5\u00b4|\\G\u013d\u00d6", "\u013d\u00d6\u00b5\u00b4", 0, 2); x2s("\u00b5\u00b4|\\A\u013d\u00d6", "b\u013d\u00d6\u00b5\u00b4", 3, 5); x2s("\u00b5\u00b4|\\A\u013d\u00d6", "\u013d\u00d6", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\u00b5\u00b4", 2, 4); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\Z", "\u013d\u00d6\n", 0, 2); x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6\u00b5\u00b4", 2, 4); x2s("\u00b5\u00b4|\u013d\u00d6\\z", "\u013d\u00d6", 0, 2); x2s("\\w|\\s", "\u00a4\u015e", 0, 2); x2s("\\w|%", "%\u00a4\u015e", 0, 1); x2s("\\w|[&$]", "\u00a4\u00a6&", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]", "\u00a4\u00a6", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u02d8", 0, 2); x2s("[\u00a4\u00a4-\u00a4\u00b1]|[^\u00a4\u00ab-\u00a4\u0142]", "\u00a4\u00ab", 0, 2); x2s("[^\u00a4\u02d8]", "\n", 0, 1); x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a6\u00a4\u0148", 0, 2); x2s("(?:\u00a4\u02d8|[\u00a4\u00a6-\u00a4\u00ad])|\u00a4\u00a4\u00a4\u0148", "\u00a4\u00a4\u00a4\u0148", 0, 4); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?=\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u00b1\u00a4\u00b1\u00a4\u0170", 0, 6); x2s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(?!\u00a4\u00b1\u00a4\u00b1)..\u00a4\u0170", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0170", 0, 6); // x2s("(?=\u00a4\u0148\u00a4\u02d8)..\u00a4\u02d8|(?=\u00a4\u0148\u00a4\u0148)..\u00a4\u02d8", "\u00a4\u0148\u00a4\u0148\u00a4\u02d8", 0, 6); x2s("(?<=\u00a4\u02d8|\u00a4\u00a4\u00a4\u00a6)\u00a4\u00a4", "\u00a4\u00a4\u00a4\u00a6\u00a4\u00a4", 4, 6); ns("(?>\u00a4\u02d8|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6"); x2s("(?>\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8|\u00a4\u02d8)\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8\u00a4\u00a6", 0, 8); x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u02d8", 0, 2); x2s("\u00a4\u02d8?|\u00a4\u00a4", "\u00a4\u00a4", 0, 0); x2s("\u00a4\u02d8?|\u00a4\u00a4", "", 0, 0); x2s("\u00a4\u02d8*|\u00a4\u00a4", "\u00a4\u02d8\u00a4\u02d8", 0, 4); x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u02d8", 0, 0); x2s("\u00a4\u02d8*|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4", 0, 2); x2s("[a\u00a4\u02d8]*|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 3); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "", 0, 0); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 2); x2s("\u00a4\u02d8+|\u00a4\u00a4*", "a\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 0); ns("\u00a4\u02d8+|\u00a4\u00a4+", ""); x2s("(\u00a4\u02d8|\u00a4\u00a4)?", "\u00a4\u00a4", 0, 2); x2s("(\u00a4\u02d8|\u00a4\u00a4)*", "\u00a4\u00a4\u00a4\u02d8", 0, 4); x2s("(\u00a4\u02d8|\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 8); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u00a8)+", "\u00a4\u00a6\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 12); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u02d8", 2, 10); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 0, 4); x2s("(\u00a4\u02d8\u00a4\u00a4|\u00a4\u00a6\u00a4\u02d8)+", "$$zzzz\u00a4\u02d8\u00a4\u00a4\u00a4\u0148\u00a4\u00a6\u00a4\u02d8", 6, 10); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 0, 10); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x2s("(\u00a4\u02d8|\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4)+", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u02d8", 2, 8); x2s("(?:\u00a4\u02d8|\u00a4\u00a4)(?:\u00a4\u02d8|\u00a4\u00a4)", "\u00a4\u02d8\u00a4\u00a4", 0, 4); x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8*|\u00a4\u00a4*)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 6); x2s("(?:\u00a4\u02d8*|\u00a4\u00a4*)(?:\u00a4\u02d8+|\u00a4\u00a4+)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\u00a4\u00a4+){1,2}", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 12); x2s("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a6\u00a4\u00a6", 0, 4); ns("(?:\u00a4\u02d8+|\\A\u00a4\u00a4*)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 12, 16); x2s("(?:^\u00a4\u02d8+|\u00a4\u00a4+)*\u00a4\u00a6", "\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6", 0, 14); x2s("\u00a4\u00a6{0,}", "\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6\u00a4\u00a6", 0, 8); x2s("\u00a4\u02d8|(?i)c", "C", 0, 1); x2s("(?i)c|\u00a4\u02d8", "C", 0, 1); x2s("(?i:\u00a4\u02d8)|a", "a", 0, 1); ns("(?i:\u00a4\u02d8)|a", "A"); x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 2); x2s("[\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6); x2s("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 0); ns("[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6"); x2s("\u00a4\u02d8??", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0); x2s("\u00a4\u00a4\u00a4\u02d8??\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u00a4", 0, 6); x2s("\u00a4\u02d8*?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 0); x2s("\u00a4\u00a4\u00a4\u02d8*?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2); x2s("\u00a4\u00a4\u00a4\u02d8*?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8); x2s("\u00a4\u02d8+?", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2); x2s("\u00a4\u00a4\u00a4\u02d8+?", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 4); x2s("\u00a4\u00a4\u00a4\u02d8+?\u00a4\u00a4", "\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4", 0, 8); x2s("(?:\u0139\u00b7?)??", "\u0139\u00b7", 0, 0); x2s("(?:\u0139\u00b7??)?", "\u0139\u00b7", 0, 0); x2s("(?:\u011a\u00b4?)+?", "\u011a\u00b4\u011a\u00b4\u011a\u00b4", 0, 2); x2s("(?:\u00c9\u00f7+)??", "\u00c9\u00f7\u00c9\u00f7\u00c9\u00f7", 0, 0); x2s("(?:\u0154\u0103+)??\u00c1\u00fa", "\u0154\u0103\u0154\u0103\u0154\u0103\u00c1\u00fa", 0, 8); x2s("(?:\u00a4\u02d8\u00a4\u00a4)?{2}", "", 0, 0); x2s("(?:\u00b5\u00b4\u013d\u00d6)?{2}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 8); x2s("(?:\u00b5\u00b4\u013d\u00d6)*{0}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4", 0, 0); x2s("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16); ns("(?:\u00b5\u00b4\u013d\u00d6){3,}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6"); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 12); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 16); x2s("(?:\u00b5\u00b4\u013d\u00d6){2,4}?", "\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6\u00b5\u00b4\u013d\u00d6", 0, 8); x2s("(?:\u00b5\u00b4\u013d\u00d6){,}", "\u00b5\u00b4\u013d\u00d6{,}", 0, 7); x2s("(?:\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+?{2}", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 12); x3s("(\u02db\u0110)", "\u02db\u0110", 0, 2, 1); x3s("(\u02db\u0110\u017c\u013a)", "\u02db\u0110\u017c\u013a", 0, 4, 1); x2s("((\u00bb\u0163\u00b4\u00d6))", "\u00bb\u0163\u00b4\u00d6", 0, 4); x3s("((\u00c9\u00f7\u017c\u013a))", "\u00c9\u00f7\u017c\u013a", 0, 4, 1); x3s("((\u015f\u0148\u0106\u00fc))", "\u015f\u0148\u0106\u00fc", 0, 4, 2); x3s("((((((((((((((((((((\u00ce\u011a\u00bb\u0147))))))))))))))))))))", "\u00ce\u011a\u00bb\u0147", 0, 4, 20); x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 0, 4, 1); x3s("(\u00a4\u02d8\u00a4\u00a4)(\u00a4\u00a6\u00a4\u00a8)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8", 4, 8, 2); x3s("()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 3); x3s("(()(\u00a4\u02d8)\u00a4\u00a4\u00a4\u00a6(\u00a4\u00a8\u00a4\u015e\u00a4\u00ab)\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142)", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e\u00a4\u00ab\u00a4\u00ad\u00a4\u017b\u00a4\u00b1\u00a4\u0142", 6, 12, 4); x3s(".*(\u0104\u0150\u0104\u00a9)\u0104\u00f3\u02c7\u00a6\u0104\u0162(\u0104\u00f3()\u0104\u00b7\u0104\u013a\u0104\u017c)\u0104\u00a4\u0104\u00f3", "\u0104\u0150\u0104\u00a9\u0104\u00f3\u02c7\u00a6\u0104\u0162\u0104\u00f3\u0104\u00b7\u0104\u013a\u0104\u017c\u0104\u00a4\u0104\u00f3", 10, 18, 2); x2s("(^\u00a4\u02d8)", "\u00a4\u02d8", 0, 2); x3s("(\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 1); x3s("(^\u00a4\u02d8)|(\u00a4\u02d8)", "\u00a4\u00a4\u00a4\u02d8", 2, 4, 2); x3s("(\u00a4\u02d8?)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8", 0, 2, 1); x3s("(\u00a4\u0162*)", "\u00a4\u0162\u00a4\u0162\u00a4\u0162", 0, 6, 1); x3s("(\u00a4\u010c*)", "", 0, 0, 1); x3s("(\u00a4\u00eb+)", "\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb", 0, 14, 1); x3s("(\u00a4\u0150+|\u00a4\u0158*)", "\u00a4\u0150\u00a4\u0150\u00a4\u0150\u00a4\u0158\u00a4\u0158", 0, 6, 1); x3s("(\u00a4\u02d8+|\u00a4\u00a4?)", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8", 0, 2, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)?", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)*", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("(\u00a4\u00b5\u00a4\u00b7\u00a4\u0105|\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6)+", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("([\u00a4\u0118\u00a4\u00cb\u00a4\u011a][\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]|\u00a4\u00ab\u00a4\u00ad\u00a4\u017b)+", "\u00a4\u00ab\u00a4\u00ad\u00a4\u017b", 0, 6, 1); x3s("((?i:\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6))", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6", 0, 6, 1); x3s("((?m:\u00a4\u02d8.\u00a4\u00a6))", "\u00a4\u02d8\n\u00a4\u00a6", 0, 5, 1); x3s("((?=\u00a4\u02d8\u00a4\u00f3)\u00a4\u02d8)", "\u00a4\u02d8\u00a4\u00f3\u00a4\u00a4", 0, 2, 1); x3s("\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6|(.\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8)", "\u00a4\u00f3\u00a4\u02d8\u00a4\u00a4\u00a4\u00a8", 0, 8, 1); x3s("\u00a4\u02d8*(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1); x3s("\u00a4\u02d8*?(.)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 0, 2, 1); x3s("\u00a4\u02d8*?(\u00a4\u00f3)", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 8, 10, 1); x3s("[\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8]\u00a4\u02d8*(.)", "\u00a4\u00a8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00f3", 10, 12, 1); x3s("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1); ns("(\\A\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x3s("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6", 0, 4, 1); ns("(^\u00a4\u00a4\u00a4\u00a4)\u00a4\u00a6\u00a4\u00a6", "\u00a4\u00f3\u00a4\u00a4\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6"); x3s("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb", 4, 8, 1); ns("\u00a4\u00ed\u00a4\u00ed(\u00a4\u00eb\u00a4\u00eb$)", "\u00a4\u00ed\u00a4\u00ed\u00a4\u00eb\u00a4\u00eb\u00a4\u00eb"); x2s("(\u011a\u00b5)\\1", "\u011a\u00b5\u011a\u00b5", 0, 4); ns("(\u011a\u00b5)\\1", "\u011a\u00b5\u00c9\u0111"); x2s("(\u00b6\u0151?)\\1", "\u00b6\u0151\u00b6\u0151", 0, 4); x2s("(\u00b6\u0151??)\\1", "\u00b6\u0151\u00b6\u0151", 0, 0); x2s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 8); x3s("(\u00b6\u0151*)\\1", "\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151\u00b6\u0151", 0, 4, 1); x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 10); x2s("\u00a4\u02d8(\u00a4\u00a4*)\\1", "\u00a4\u02d8\u00a4\u00a4", 0, 2); x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\1\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4", 0, 20); x2s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 0, 14); x3s("(\u00a4\u02d8*)(\u00a4\u00a4*)\\2", "\u00a4\u02d8\u00a4\u02d8\u00a4\u02d8\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4\u00a4", 6, 10, 2); x2s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 16); x3s("(((((((\u00a4\u00dd*)\u00a4\u00da))))))\u00a4\u00d4\\7", "\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd\u00a4\u00da\u00a4\u00d4\u00a4\u00dd\u00a4\u00dd\u00a4\u00dd", 0, 6, 7); x2s("(\u00a4\u010e)(\u00a4\u0147)(\u00a4\u0150)\\2\\1\\3", "\u00a4\u010e\u00a4\u0147\u00a4\u0150\u00a4\u0147\u00a4\u010e\u00a4\u0150", 0, 12); x2s("([\u00a4\u00ad-\u00a4\u00b1])\\1", "\u00a4\u017b\u00a4\u017b", 0, 4); x2s("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85 ", 0, 8); ns("(\\w\\d\\s)\\1", "\u00a4\u02d85 \u00a4\u02d85"); x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 8); x2s("...(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u02d8a\u00a4\u02d8\u0102\u017b\u02c7\u00a9\u0102\u017b\u02c7\u00a9", 0, 13); x2s("(\u0102\u017b\u02c7\u00a9|[\u00a4\u02d8-\u00a4\u00a6]{3})\\1", "\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6\u00a4\u00a6\u00a4\u00a4\u00a4\u00a6", 0, 12); x2s("(^\u00a4\u0142)\\1", "\u00a4\u0142\u00a4\u0142", 0, 4); ns("(^\u00a4\u0155)\\1", "\u00a4\u00e1\u00a4\u0155\u00a4\u0155"); ns("(\u00a4\u02d8$)\\1", "\u00a4\u02d8\u00a4\u02d8"); ns("(\u00a4\u02d8\u00a4\u00a4\\Z)\\1", "\u00a4\u02d8\u00a4\u00a4"); x2s("(\u00a4\u02d8*\\Z)\\1", "\u00a4\u02d8", 2, 2); x2s(".(\u00a4\u02d8*\\Z)\\1", "\u00a4\u00a4\u00a4\u02d8", 2, 4); x3s("(.(\u00a4\u00e4\u00a4\u00a4\u00a4\u0107)\\2)", "z\u00a4\u00e4\u00a4\u00a4\u00a4\u0107\u00a4\u00e4\u00a4\u00a4\u00a4\u0107", 0, 13, 1); x3s("(.(..\\d.)\\2)", "\u00a4\u02d812341234", 0, 10, 1); x2s("((?i:\u00a4\u02d8v\u00a4\u015f))\\1", "\u00a4\u02d8v\u00a4\u015f\u00a4\u02d8v\u00a4\u015f", 0, 10); x2s("(?<\u00b6\u0148\u00a4\u00ab>\u0118\u0143|\\(\\g<\u00b6\u0148\u00a4\u00ab>\\))", "((((((\u0118\u0143))))))", 0, 14); x2s("\\A(?:\\g<\u00b0\u00a4_1>|\\g<\u00b1\u013e_2>|\\z\u02dd\u015e\u00ce\u00bb (?<\u00b0\u00a4_1>\u00b4\u0143|\u013d\u00ab\\g<\u00b1\u013e_2>\u013d\u00ab)(?<\u00b1\u013e_2>\u015f\u00df|\u0118\u00ee\u00bb\u00a7\\g<\u00b0\u00a4_1>\u0118\u00ee\u00bb\u00a7))$", "\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u015f\u00df\u013d\u00ab\u0118\u00ee\u00bb\u00a7\u013d\u00ab\u0118\u00ee\u00bb\u00a7", 0, 26); x2s("[[\u00a4\u0147\u00a4\u0150]]", "\u00a4\u0150", 0, 2); x2s("[[\u00a4\u00a4\u00a4\u015e\u00a4\u00a6]\u00a4\u00ab]", "\u00a4\u00ab", 0, 2); ns("[[^\u00a4\u02d8]]", "\u00a4\u02d8"); ns("[^[\u00a4\u02d8]]", "\u00a4\u02d8"); x2s("[^[^\u00a4\u02d8]]", "\u00a4\u02d8", 0, 2); x2s("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u017b", 0, 2); ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00ab"); ns("[[\u00a4\u00ab\u00a4\u00ad\u00a4\u017b]&&\u00a4\u00ad\u00a4\u017b]", "\u00a4\u00b1"); x2s("[\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144", 0, 2); ns("[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4-\u00a4\u0148&&\u00a4\u00a6-\u00a4\u0144]", "\u00a4\u0144"); x2s("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u00a4", 0, 2); ns("[[^\u00a4\u02d8&&\u00a4\u02d8]&&\u00a4\u02d8-\u00a4\u00f3]", "\u00a4\u02d8"); x2s("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00ad", 0, 2); ns("[[^\u00a4\u02d8-\u00a4\u00f3&&\u00a4\u00a4\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]&&[^\u00a4\u00a6-\u00a4\u00ab]]", "\u00a4\u00a4"); x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a6", 0, 2); x2s("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00a8", 0, 2); ns("[^[^\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]]", "\u00a4\u00ab"); x2s("[\u00a4\u02d8-&&-\u00a4\u02d8]", "-", 0, 1); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]q-w]", "\u00a4\u00a8", 0, 2); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "f", 0, 1); x2s("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "g", 0, 1); ns("[^[^a-z\u00a4\u02d8\u00a4\u00a4\u00a4\u00a6]&&[^bcdefg\u00a4\u00a6\u00a4\u00a8\u00a4\u015e]g-w]", "2"); x2s("a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9", 0, 32); x2s(".\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9<\\/b>", "a\u0104\u0110\u02c7\u013d\u0104\u00b8\u0104\u00e7\u0104\u00f3\u00a4\u00ce\u0104\u0154\u0104\u00a6\u0104\u00f3\u0104\u00ed\u02c7\u013d\u0104\u00c9", 0, 32); } } jruby-joni-2.1.41/test/org/joni/test/TestCornerCases.java000077500000000000000000000040671400407002500232670ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.joni.Config; import org.joni.Option; import org.joni.Regex; import org.joni.Syntax; public class TestCornerCases extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return ASCIIEncoding.INSTANCE; } @Override public String testEncoding() { return "cp1250"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { byte[] reg = "l.".getBytes(); byte[] str = "hello,lo".getBytes(); Regex p = new Regex(reg,0,reg.length,Option.DEFAULT,ASCIIEncoding.INSTANCE,Syntax.DEFAULT); int result = p.matcher(str, 0, str.length).search(3, 0, Option.NONE); if(result != 3) { Config.log.println("FAIL: /l./ 'hello,lo' - with reverse, 3,0"); nfail++; } } } jruby-joni-2.1.41/test/org/joni/test/TestCrnl.java000077500000000000000000000062531400407002500217550ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Config; import org.joni.Option; import org.joni.Syntax; import org.junit.Ignore; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; @Ignore public class TestCrnl extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return ASCIIEncoding.INSTANCE; } @Override public String testEncoding() { return "ascii"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { x2s("", "\r\n", 0, 0); x2s(".", "\r\n", 0, 1); ns("..", "\r\n"); x2s("^", "\r\n", 0, 0); x2s("\\n^", "\r\nf", 1, 2); x2s("\\n^a", "\r\na", 1, 3); x2s("$", "\r\n", 0, 0); x2s("T$", "T\r\n", 0, 1); x2s("T$", "T\raT\r\n", 3, 4); x2s("\\z", "\r\n", 2, 2); ns("a\\z", "a\r\n"); x2s("\\Z", "\r\n", 0, 0); x2s("\\Z", "\r\na", 3, 3); x2s("\\Z", "\r\n\r\n\n", 4, 4); x2s("\\Z", "\r\n\r\nX", 5, 5); x2s("a\\Z", "a\r\n", 0, 1); x2s("aaaaaaaaaaaaaaa\\Z", "aaaaaaaaaaaaaaa\r\n", 0, 15); x2s("a|$", "b\r\n", 1, 1); x2s("$|b", "\rb", 1, 2); x2s("a$|ab$", "\r\nab\r\n", 2, 4); x2s("a|\\Z", "b\r\n", 1, 1); x2s("\\Z|b", "\rb", 1, 2); x2s("a\\Z|ab\\Z", "\r\nab\r\n", 2, 4); x2s("(?=a$).", "a\r\n", 0, 1); ns("(?=a$).", "a\r"); x2s("(?!a$)..", "a\r", 0, 2); x2s("(?<=a$).\\n", "a\r\n", 1, 3); ns("(? 0 || nerror > 0) Config.err.println("make sure to enable USE_CRNL_AS_LINE_TERMINATOR"); } } jruby-joni-2.1.41/test/org/joni/test/TestError.java000077500000000000000000000113651400407002500221500ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.joni.Option; import org.joni.Syntax; import org.joni.exception.ErrorMessages; public class TestError extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return UTF8Encoding.INSTANCE; } @Override public String testEncoding() { return "iso-8859-2"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { xerrs("(", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); xerrs("[[:WoRd:]]", ErrorMessages.INVALID_POSIX_BRACKET_TYPE); xerrs("(0?0|(?(1)||)|(?(1)||))?", ErrorMessages.INVALID_CONDITION_PATTERN); xerrs("[\\40000000000", ErrorMessages.TOO_BIG_NUMBER); xerrs("[\\40000000000\n", ErrorMessages.TOO_BIG_NUMBER); xerrs("[]", ErrorMessages.EMPTY_CHAR_CLASS); xerrs("[c-a]", ErrorMessages.EMPTY_RANGE_IN_CHAR_CLASS); xerrs("\\x{FFFFFFFF}", ErrorMessages.ERR_TOO_BIG_WIDE_CHAR_VALUE); xerrs("\\x{100000000}", ErrorMessages.ERR_TOO_LONG_WIDE_CHAR_VALUE); xerrs("\\u026x", ErrorMessages.TOO_SHORT_DIGITS); xerrs("()(?\\!(?'a')\\1)", ErrorMessages.UNDEFINED_GROUP_OPTION); xerrs("\\((", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); xerrs("(|", ErrorMessages.END_PATTERN_WITH_UNMATCHED_PARENTHESIS); xerrs("'/g\\\u00ff\u00ff\u00ff\u00ff&))", ErrorMessages.UNMATCHED_CLOSE_PARENTHESIS); xerrs("[0-0-\u00ff ", ErrorMessages.PREMATURE_END_OF_CHAR_CLASS); // \xe2 xerrs("\\p{foobarbaz}", ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replace("%n", "foobarbaz")); //xerrs("\\p{あ}", ErrorMessages.ERR_INVALID_CHAR_PROPERTY_NAME.replace("%n", "あ")); xerrs("a{100001}", ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); xerrs("a{0,100001}", ErrorMessages.TOO_BIG_NUMBER_FOR_REPEAT_RANGE); xerrs("a{5,1}", ErrorMessages.UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE); xerrs("[\\6000", ErrorMessages.TOO_BIG_NUMBER); // CVE-2017-9226 xerrs("[\\H- ]", ErrorMessages.UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS); // CVE-2017-9228 xerrs("[a-\\d]", ErrorMessages.CHAR_CLASS_VALUE_AT_END_OF_RANGE); xerrs("(?:ab|cd)*\\1", ErrorMessages.INVALID_BACKREF); xerrs("(ab|cd)*\\1", ErrorMessages.INVALID_BACKREF, Option.DONT_CAPTURE_GROUP); xerrs("(.(?=\\g<1>))", ErrorMessages.NEVER_ENDING_RECURSION); xerrs("(a)(?b)\\g<1>\\g", ErrorMessages.NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); // xerrs("(?<", ErrorMessages.ERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS); xerrs("(?<>)", ErrorMessages.EMPTY_GROUP_NAME); //xerrs("(?<.>)", ErrorMessages.ERR_INVALID_CHAR_IN_GROUP_NAME); xerrs("\\g<1->", ErrorMessages.INVALID_CHAR_IN_GROUP_NAME.replace("%n", "1->")); xerrs("\\k<1/>", ErrorMessages.INVALID_GROUP_NAME.replace("%n", "1/")); // xerrs("\\k<1-1/>", ErrorMessages.ERR_INVALID_GROUP_NAME.replace("%n", "1-1/>")); // xerrs("\\k", ErrorMessages.ERR_INVALID_CHAR_IN_GROUP_NAME.replace("%n", "a/")); // xerrs("\\g<1>", ErrorMessages.UNDEFINED_GROUP_REFERENCE); xerrs("*", ErrorMessages.TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); xerrs("{1}", ErrorMessages.TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED); xerrs("(?a)(?b)\\g", ErrorMessages.MULTIPLEX_DEFINITION_NAME_CALL.replace("%n", "a")); xerrs("(a)?(?b)?(?(1)a)(?()b)", ErrorMessages.NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); xerrs("()(?(2))", ErrorMessages.INVALID_BACKREF); xerrs("(?(700000))", ErrorMessages.INVALID_BACKREF); xerrs("(? " + acceptableMaximumTime); } } jruby-joni-2.1.41/test/org/joni/test/TestNSU8.java000077500000000000000000000053541400407002500216150ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.NonStrictUTF8Encoding; public class TestNSU8 extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return NonStrictUTF8Encoding.INSTANCE; } @Override public String testEncoding() { return "utf-8"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32, (byte)32}, 0, 5, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)240}, 0, 1, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32, (byte)32}, 0, 4, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)224}, 0, 1, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32, (byte)32}, 0, 3, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192, (byte)32}, 0, 2, 1, false); xx("([^\\[\\]]+)".getBytes(), new byte[]{(byte)192}, 0, 1, 1, false); } } jruby-joni-2.1.41/test/org/joni/test/TestPerl.java000077500000000000000000000031541400407002500217560ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; public class TestPerl extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return ASCIIEncoding.INSTANCE; } @Override public String testEncoding() { return "iso-8859-2"; } @Override public Syntax syntax() { return Syntax.PerlNG; } @Override public void test() throws Exception { } } jruby-joni-2.1.41/test/org/joni/test/TestU.java000077500000000000000000002251451400407002500212660ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.joni.Option; import org.joni.Syntax; import org.jcodings.Encoding; import org.jcodings.specific.UTF16BEEncoding; public class TestU extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return UTF16BEEncoding.INSTANCE; } @Override public String testEncoding() { return "iso-8859-1"; } @Override public Syntax syntax() { return Syntax.TEST; } private int ulen(byte[]bytes) { return encoding().strByteLengthNull(bytes, 0, bytes.length); } private String uconv(byte []bytes, int len) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < len; i += 2) { int c = bytes[i] & 0xff; // sb.append(String.format("\\%03o", c)); if (c == 0) { c = bytes[i+1] & 0xff; if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) { sb.append(String.format("\\%03o", c)); } else { sb.append(new String(new byte[]{(byte)c})); } } else { sb.append(String.format("\\%03o", c)); c = bytes[i+1] & 0xff; sb.append(String.format("\\%03o", c)); } } return sb.toString(); } protected String repr(byte[]bytes) { return uconv(bytes, ulen(bytes)); } protected int length(byte[]bytes) { return ulen(bytes); } @Override public void test() throws Exception { x2s("\000\000", "\000\000", 0, 0); x2s("\000^\000\000", "\000\000", 0, 0); x2s("\000$\000\000", "\000\000", 0, 0); x2s("\000\134\000G\000\000", "\000\000", 0, 0); x2s("\000\134\000A\000\000", "\000\000", 0, 0); x2s("\000\134\000Z\000\000", "\000\000", 0, 0); x2s("\000\134\000z\000\000", "\000\000", 0, 0); x2s("\000^\000$\000\000", "\000\000", 0, 0); x2s("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2); x2s("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2); x2s("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2); x2s("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4); x2s("\000\000", "\000a\000\000", 0, 0); x2s("\000a\000\000", "\000a\000\000", 0, 2); x2s("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2); x2s("\000a\000a\000\000", "\000a\000a\000\000", 0, 4); x2s("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70); x2s("\000a\000b\000\000", "\000a\000b\000\000", 0, 4); x2s("\000b\000\000", "\000a\000b\000\000", 2, 4); x2s("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6); x2s("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20); x2s("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2); x2s("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2); x2s("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4); x2s("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14); x2s("\000.\000\000", "\000a\000\000", 0, 2); ns("\000.\000\000", "\000\000"); x2s("\000.\000.\000\000", "\000a\000b\000\000", 0, 4); x2s("\000\134\000w\000\000", "\000e\000\000", 0, 2); ns("\000\134\000W\000\000", "\000e\000\000"); x2s("\000\134\000s\000\000", "\000 \000\000", 0, 2); x2s("\000\134\000S\000\000", "\000b\000\000", 0, 2); x2s("\000\134\000d\000\000", "\0004\000\000", 0, 2); ns("\000\134\000D\000\000", "\0004\000\000"); x2s("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0); x2s("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2); x2s("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2); x2s("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4); x2s("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0); x2s("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000a\000b\000]\000\000", "\000c\000\000"); x2s("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2); ns("\000[\000^\000a\000]\000\000", "\000a\000\000"); x2s("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2); x2s("\000[\000]\000]\000\000", "\000]\000\000", 0, 2); ns("\000[\000^\000]\000]\000\000", "\000]\000\000"); x2s("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6); x2s("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2); x2s("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2); x2s("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2); ns("\000[\000\134\000w\000]\000\000", "\000 \000\000"); x2s("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4); x2s("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2); ns("\000[\000\134\000d\000]\000\000", "\000e\000\000"); x2s("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2); ns("\000[\000\134\000D\000]\000\000", "\0003\000\000"); x2s("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2); ns("\000[\000\134\000s\000]\000\000", "\000a\000\000"); x2s("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000\134\000S\000]\000\000", "\000 \000\000"); x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2); ns("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000"); x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14); x2s("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8); ns("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000"); x2s("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2); x2s("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2); x2s("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2); ns("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000"); ns("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000"); x2s("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2); x2s("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2); x2s("\000[\000&\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2); x2s("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2); ns("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000"); ns("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000"); x2s("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000"); ns("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000"); x2s("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2); ns("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000"); x2s("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2); ns("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000"); x2s("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2); ns("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000"); x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2); ns("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000"); x2s("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2); ns("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000"); ns("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000"); x2s("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8); x2s("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10); x2s("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14); x2s("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10); x2s("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8); ns("\000.\000a\000\000", "\000a\000b\000\000"); x2s("\000.\000a\000\000", "\000a\000a\000\000", 0, 4); x2s("\000^\000a\000\000", "\000a\000\000", 0, 2); x2s("\000^\000a\000$\000\000", "\000a\000\000", 0, 2); x2s("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2); ns("\000^\000\134\000w\000$\000\000", "\000 \000\000"); x2s("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6); x2s("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); x2s("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14); x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16); x2s("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0); x2s("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6); x2s("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2); x2s("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4); ns("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000"); ns("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000"); ns("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000"); ns("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000"); x2s("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4); x2s("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4); x2s("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4); x2s("\000\134\000w\000\000", "\000_\000\000", 0, 2); ns("\000\134\000W\000\000", "\000_\000\000"); x2s("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2); ns("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000"); x2s("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000"); x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2); x2s("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000"); x2s("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2); ns("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000"); x2s("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2); ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000"); ns("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000"); x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2); x2s("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2); x2s("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4); x2s("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4); ns("\000.\000\000", "\000\012\000\000"); x2s("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2); x2s("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4); x2s("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6); x2s("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26); x2s("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20); ns("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000"); ns("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000"); x2s("\000a\000?\000\000", "\000\000", 0, 0); x2s("\000a\000?\000\000", "\000b\000\000", 0, 0); x2s("\000a\000?\000\000", "\000a\000\000", 0, 2); x2s("\000a\000*\000\000", "\000\000", 0, 0); x2s("\000a\000*\000\000", "\000a\000\000", 0, 2); x2s("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0); ns("\000a\000+\000\000", "\000\000"); x2s("\000a\000+\000\000", "\000a\000\000", 0, 2); x2s("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4); x2s("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10); x2s("\000.\000?\000\000", "\000\000", 0, 0); x2s("\000.\000?\000\000", "\000f\000\000", 0, 2); x2s("\000.\000?\000\000", "\000\012\000\000", 0, 0); x2s("\000.\000*\000\000", "\000\000", 0, 0); x2s("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10); x2s("\000.\000+\000\000", "\000z\000\000", 0, 2); x2s("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); x2s("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14); x2s("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46); x2s("\000a\000|\000b\000\000", "\000a\000\000", 0, 2); x2s("\000a\000|\000b\000\000", "\000b\000\000", 0, 2); x2s("\000|\000a\000\000", "\000a\000\000", 0, 0); x2s("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0); x2s("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4); x2s("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4); x2s("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6); x2s("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8); x2s("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4); x2s("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4); x2s("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4); ns("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000"); x2s("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6); x2s("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6); x2s("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2); x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2); x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4); x2s("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2); x2s("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2); ns("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000"); x2s("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2); x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2); x2s("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2); x2s("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4); x2s("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6); x2s("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6); x2s("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6); ns("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000"); x2s("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8); x2s("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2); x2s("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0); x2s("\000a\000?\000|\000b\000\000", "\000\000", 0, 0); x2s("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4); x2s("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0); x2s("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2); x2s("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0); x2s("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6); x2s("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2); ns("\000a\000+\000|\000b\000+\000\000", "\000\000"); x2s("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2); x2s("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4); x2s("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10); x2s("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4); x2s("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8); x2s("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4); x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6); x2s("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); x2s("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8); x2s("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12); ns("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000"); ns("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000"); x2s("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6); x2s("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4); ns("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000"); x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16); x2s("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14); x2s("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2); x2s("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2); ns("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000"); x2s("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2); x2s("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6); x2s("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0); ns("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000"); x2s("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6); x2s("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2); x2s("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); x2s("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); x2s("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4); x2s("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2); x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0); x2s("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); ns("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000"); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16); x2s("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8); x2s("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10); x2s("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12); x2s("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8); x2s("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); x2s("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); x2s("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8); x3s("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1); x3s("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1); x2s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4); x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1); x3s("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2); x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20); x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1); x3s("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2); x3s("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3); x3s("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4); x2s("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2); x3s("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1); x3s("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2); x3s("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1); x3s("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1); x3s("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1); x3s("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1); x3s("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1); x3s("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1); x3s("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1); x3s("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1); x2s("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12); x3s("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1); x3s("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1); x3s("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1); x2s("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6); x3s("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1); x3s("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1); x3s("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1); x3s("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1); x3s("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1); x3s("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); ns("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); x3s("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1); ns("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000"); x3s("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1); ns("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000"); ns("\000(\000\134\0001\000)\000\000", "\000\000"); ns("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000"); ns("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000"); ns("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000"); x2s("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8); x2s("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2); ns("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000"); x2s("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); ns("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000"); x2s("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); x2s("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0); x2s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8); x3s("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1); x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10); x2s("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2); x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20); x2s("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14); x2s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16); x3s("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7); x2s("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12); x2s("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4); x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12); ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000"); x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12); x2s("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18); x2s("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12); x2s("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4); ns("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000"); ns("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000"); ns("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000"); x2s("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2); x2s("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4); x3s("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1); x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); x2s("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8); ns("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000"); x2s("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4); ns("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000"); x2s("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4); x2s("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4); x2s("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4); x2s("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4); ns("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000"); x2s("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2); ns("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000"); x2s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8); x2s("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16); x2s("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6); x2s("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0); x2s("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12); x3s("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1); x2s("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6); x2s("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16); x2s("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36); x3s("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1); x2s("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6); x2s("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4); x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18); ns("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000"); x2s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20); x3s("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14); x3s("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16); x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2); x2s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26); x3s("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1); x2s("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18); x2s("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6); x3s("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1); x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14); x2s("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8); x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10); x2s("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20); x2s("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10); x2s("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0); x3s("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1); x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12); x2s("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2); x2s("\217\372\000\000", "\217\372\000\000", 0, 2); x2s("\000\000", "0B\000\000", 0, 0); x2s("0B\000\000", "0B\000\000", 0, 2); ns("0D\000\000", "0B\000\000"); x2s("0F0F\000\000", "0F0F\000\000", 0, 4); x2s("0B0D0F\000\000", "0B0D0F\000\000", 0, 6); x2s("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70); x2s("0B\000\000", "0D0B\000\000", 2, 4); x2s("0D0F\000\000", "0B0D0F\000\000", 2, 6); x2s("e\207\000\000", "e\207\000\000", 0, 2); x2s("\000.\000\000", "0B\000\000", 0, 2); x2s("\000.\000.\000\000", "0K0M\000\000", 0, 4); x2s("\000\134\000w\000\000", "0J\000\000", 0, 2); ns("\000\134\000W\000\000", "0B\000\000"); x2s("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4); x2s("\000\134\000S\000\000", "0]\000\000", 0, 2); x2s("\000\134\000S\000\000", "o\042\000\000", 0, 2); x2s("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0); x2s("\000\134\000b\000\000", "\000 0{\000\000", 2, 2); x2s("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2); x2s("\000\134\000B\000\000", "0F\000 \000\000", 4, 4); x2s("\000\134\000B\000\000", "\000 0D\000\000", 0, 0); x2s("\000[0_0a\000]\000\000", "0a\000\000", 0, 2); ns("\000[0j0k\000]\000\000", "0l\000\000"); x2s("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2); ns("\000[\000^0Q\000]\000\000", "0Q\000\000"); x2s("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2); ns("\000[\000\134\000d\000]\000\000", "0u\000\000"); x2s("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2); ns("\000[\000\134\000s\000]\000\000", "0O\000\000"); x2s("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2); x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2); x2s("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8); ns("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000"); x2s("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6); x2s("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10); x2s("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14); x2s("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10); x2s("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8); ns("\000.0D\000\000", "0D0H\000\000"); x2s("\000.0J\000\000", "0J0J\000\000", 0, 4); x2s("\000^0B\000\000", "0B\000\000", 0, 2); x2s("\000^0\200\000$\000\000", "0\200\000\000", 0, 2); x2s("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2); x2s("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12); x2s("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14); x2s("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16); x2s("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6); x2s("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6); x2s("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6); x2s("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6); x2s("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4); ns("\000\134\000G0H\000\000", "0F0H0J\000\000"); ns("0h0f\000\134\000G\000\000", "0h0f\000\000"); ns("0~0\177\000\134\000A\000\000", "0~0\177\000\000"); ns("0~\000\134\000A0\177\000\000", "0~0\177\000\000"); x2s("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2); ns("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000"); x2s("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2); ns("\000(\000?\000!0h\000)0B\000\000", "0h\000\000"); x2s("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2); x2s("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4); ns("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000"); x2s("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4); x2s("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6); x2s("0B\000?\000\000", "\000\000", 0, 0); x2s("Y\011\000?\000\000", "S\026\000\000", 0, 0); x2s("Y\011\000?\000\000", "Y\011\000\000", 0, 2); x2s("\221\317\000*\000\000", "\000\000", 0, 0); x2s("\221\317\000*\000\000", "\221\317\000\000", 0, 2); x2s("[P\000*\000\000", "[P[P[P\000\000", 0, 6); x2s("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0); ns("\134q\000+\000\000", "\000\000"); x2s("l\263\000+\000\000", "l\263\000\000", 0, 2); x2s("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8); x2s("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4); x2s("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10); x2s("\000.\000?\000\000", "0_\000\000", 0, 2); x2s("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8); x2s("\000.\000+\000\000", "0\215\000\000", 0, 2); x2s("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8); x2s("0B\000|0D\000\000", "0B\000\000", 0, 2); x2s("0B\000|0D\000\000", "0D\000\000", 0, 2); x2s("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4); x2s("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4); x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6); x2s("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8); x2s("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4); x2s("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4); x2s("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6); ns("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000"); x2s("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4); x2s("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2); x2s("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6); x2s("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2); x2s("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6); x2s("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2); x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4); x2s("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2); x2s("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2); x2s("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2); x2s("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2); x2s("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2); x2s("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2); x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2); x2s("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4); x2s("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6); x2s("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6); x2s("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6); x2s("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6); ns("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000"); x2s("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8); x2s("0B\000?\000|0D\000\000", "0B\000\000", 0, 2); x2s("0B\000?\000|0D\000\000", "0D\000\000", 0, 0); x2s("0B\000?\000|0D\000\000", "\000\000", 0, 0); x2s("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4); x2s("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0); x2s("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2); x2s("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4); x2s("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0); x2s("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6); x2s("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2); x2s("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0); ns("0B\000+\000|0D\000+\000\000", "\000\000"); x2s("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2); x2s("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4); x2s("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8); x2s("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4); x2s("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4); x2s("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8); x2s("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4); x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6); x2s("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12); x2s("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4); ns("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000"); x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16); x2s("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14); x2s("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8); x2s("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2); x2s("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2); ns("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000"); x2s("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2); x2s("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6); x2s("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0); ns("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000"); x2s("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0); x2s("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6); x2s("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0); x2s("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2); x2s("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); x2s("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2); x2s("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4); x2s("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8); x2s("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0); x2s("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0); x2s("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2); x2s("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0); x2s("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8); x2s("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0); x2s("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8); x2s("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0); x2s("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); ns("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000"); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16); x2s("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8); x2s("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10); x2s("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12); x3s("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1); x3s("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1); x2s("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4); x3s("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1); x3s("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2); x3s("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20); x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1); x3s("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2); x3s("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3); x3s("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4); x3s("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2); x2s("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2); x3s("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1); x3s("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2); x3s("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1); x3s("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1); x3s("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1); x3s("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1); x3s("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1); x3s("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1); x3s("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1); x3s("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1); x3s("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1); x3s("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1); x3s("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1); x3s("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); x3s("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1); x3s("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1); x3s("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1); x3s("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); ns("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); x3s("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1); ns("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000"); x3s("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1); ns("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000"); x2s("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4); ns("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000"); x2s("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4); x2s("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0); x2s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8); x3s("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1); x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10); x2s("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2); x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20); x2s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14); x3s("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2); x2s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16); x3s("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7); x2s("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12); x2s("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4); x2s("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12); ns("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000"); x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8); x2s("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14); x2s("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12); x2s("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4); ns("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000"); ns("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000"); ns("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000"); x2s("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2); x2s("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4); x3s("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1); x3s("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1); x2s("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12); x2s("\000(\000?\000Y\011\000|\000\134\000(\000\134\000g\000\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26); x2s("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000\201\352\000)\000(\000?\000W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26); x2s("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2); x2s("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2); ns("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000"); ns("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000"); x2s("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2); x2s("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2); ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000"); ns("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000"); x2s("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2); ns("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000"); x2s("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2); ns("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000"); x2s("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2); ns("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000"); x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2); x2s("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2); ns("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000"); x2s("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2); x2s("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2); ns("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000"); x2s("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); x2s("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40); // Unicode case folding tests // common case folding: u+0041 and u+0061 x2s("\u0000\u0041\000\000", "\u0000\u0061\000\000", 0, 2, Option.IGNORECASE); x2s("\u0000\u0061\000\000", "\u0000\u0041\000\000", 0, 2, Option.IGNORECASE); // common case folding: u+00C0 and u+00E0 x2s("\u0000\u00C0\000\000", "\u0000\u00E0\000\000", 0, 2, Option.IGNORECASE); x2s("\u0000\u00E0\000\000", "\u0000\u00C0\000\000", 0, 2, Option.IGNORECASE); // common case folding: u+00B5 and u+03BC x2s("\u0000\u00B5\000\000", "\u0003\u00BC\000\000", 0, 2, Option.IGNORECASE); x2s("\u0003\u00BC\000\000", "\u0000\u00B5\000\000", 0, 2, Option.IGNORECASE); // common case folding: u+0073 and u+017F x2s("\u0000\u0073\000\000", "\u0001\u007F\000\000", 0, 2, Option.IGNORECASE); x2s("\u0001\u007F\000\000", "\u0000\u0073\000\000", 0, 2, Option.IGNORECASE); // full case folding: u+1FA0 and u+1F60 u+03B9 x2s("\u001F\u00A0\000\000", "\u001F\u0060\u0003\u00B9\000\000", 0, 4, Option.IGNORECASE); x2s("\u001F\u0060\u0003\u00B9\000\000", "\u001F\u00A0\000\000", 0, 2, Option.IGNORECASE); // full case folding: u+1FA8 and u+1F60 u+03B9 x2s("\u001F\u00A8\000\000", "\u001F\u0060\u0003\u00B9\000\000", 0, 4, Option.IGNORECASE); x2s("\u001F\u0060\u0003\u00B9\000\000", "\u001F\u00A8\000\000", 0, 2, Option.IGNORECASE); // simple case folding: u+1FA8 and u+1FA0 x2s("\u001F\u00A8\000\000", "\u001F\u00A0\000\000", 0, 2, Option.IGNORECASE); x2s("\u001F\u00A0\000\000", "\u001F\u00A8\000\000", 0, 2, Option.IGNORECASE); // FIXME: Case folding for 'LATIN CAPITAL LETTER SHARP S' not supported // full case folding: u+1E9E and u+0073 u+0073 x2s("\u001E\u009E\000\000", "\u0000\u0073\u0000\u0073\000\000", 0, 4, Option.IGNORECASE); // simple case folding: u+1E9E and u+00DF x2s("\u001E\u009E\000\000", "\u0000\u00DF\000\000", 0, 2, Option.IGNORECASE); // Case fold exceeding Analyser#THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION (= 8) x2s("\u0000\u0041\u0000\u0041\u0000\u0041\u0000\u0041\000\000", "\u0000\u0061\u0000\u0061\u0000\u0061\u0000\u0061\000\000", 0, 8, Option.IGNORECASE); x2s("\000[\000\134\000x\000{\0000\000}\000-\000X\000]\000\000", "\0000\000\000", 0, 2, Option.IGNORECASE); } } jruby-joni-2.1.41/test/org/joni/test/TestU8.java000077500000000000000000000403711400407002500213520ustar00rootroot00000000000000/* * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is furnished to do * so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package org.joni.test; import org.jcodings.Encoding; import org.jcodings.specific.UTF8Encoding; import org.joni.Option; import org.joni.Syntax; public class TestU8 extends Test { @Override public int option() { return Option.DEFAULT; } @Override public Encoding encoding() { return UTF8Encoding.INSTANCE; } @Override public String testEncoding() { return "utf-8"; } @Override public Syntax syntax() { return Syntax.TEST; } @Override public void test() throws Exception { xx("^\\d\\d\\d-".getBytes(), new byte []{-30, -126, -84, 48, 45}, 0, 0, 0, true); x2s("x{2}", "xx", 0, 2, Option.IGNORECASE); x2s("x{2}", "XX", 0, 2, Option.IGNORECASE); x2s("x{3}", "XxX", 0, 3, Option.IGNORECASE); ns("x{2}", "x", Option.IGNORECASE); ns("x{2}", "X", Option.IGNORECASE); byte[] pat = new byte[] {(byte)227, (byte)131, (byte)160, (byte)40, (byte)46, (byte)41}; byte[] str = new byte[]{(byte)227, (byte)130, (byte)185, (byte)227, (byte)131, (byte)145, (byte)227, (byte)131, (byte)160, (byte)227, (byte)131, (byte)143, (byte)227, (byte)131, (byte)179, (byte)227, (byte)130, (byte)175}; x2(pat, str, 6, 12); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35, Option.IGNORECASE); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 0, 35, Option.IGNORECASE); x2s("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAA", 0, 35, Option.IGNORECASE); pat = new byte[]{94, 40, (byte)239, (byte)188, (byte)161, 41, 92, 49, 36}; str = new byte[]{(byte)239, (byte)188, (byte)161, 65}; n(pat, str, Option.IGNORECASE); pat = new byte[]{94, (byte)195, (byte)159, 123, 50, 125, 36}; str = new byte[]{(byte)195, (byte)159, 115, 115}; x2(pat, str, 0, 4, Option.IGNORECASE); String str2 = new String(new byte[]{-61, -123, -61, -123}); String pat2 = new String(new byte[]{'^', -61, -123, '{', '2', '}', '$'}); // x2s(pat2, str2, 4, 4); // x2s(pat2, str2, 4, 4, Option.IGNORECASE); ns("(?i-mx:ak)a", "ema"); x2s("(?i:!\\[CDAT)", "![CDAT", 0, 6); x2s("(?i:\\!\\[CDAa)", "\\![CDAa", 1, 7); x2s("(?i:\\!\\[CDAb)", "\\![CDAb", 1, 7); x2s("\\R", "\u0085", 0, 2); x2s("\\R", "\u2028", 0, 3); x2s("\\R", "\u2029", 0, 3); x2s("\\A\\R\\z", "\r", 0, 1); x2s("\\A\\R\\z", "\n", 0, 1); x2s("\\A\\R\\z", "\r\n", 0, 2); x2s("foo\\b", "foo", 0, 3); x2s("(x?)x*\\1", "x", 0, 1, Option.IGNORECASE); x2s("(x?)x*\\k<1+0>", "x", 0, 1, Option.IGNORECASE); x2s("(?x?)(?x?)\\k", "x", 0, 1, Option.IGNORECASE); x2s("(?=((?)(\\k)))", "", 0, 0); x2s("a\\g<0>*z", "aaazzz", 0, 6); x2s("ab\\Kcd", "abcd", 2, 4); x2s("ab\\Kc(\\Kd|z)", "abcd", 3, 4); x2s("ab\\Kc(\\Kz|d)", "abcd", 2, 4); x2s("(a\\K)*", "aaab", 3, 3); x3s("(a\\K)*", "aaab", 2, 3, 1); // x2s("a\\K?a", "aa", 0, 2); // error: differ from perl x2s("ab(?=c\\Kd)", "abcd", 2, 2); // This behaviour is currently not well defined. (see: perlre) x2s("(?<=a\\Kb|aa)cd", "abcd", 1, 4); // ... x2s("(?<=ab|a\\Ka)cd", "abcd", 2, 4); // ... x2s("\\X", "\n", 0, 1); x2s("\\X", "\r", 0, 1); x2s("\\X{3}", "\r\r\n\n", 0, 4); x2s("\\X", "\u306F\u309A\n", 0, 6); x2s("\\A\\X\\z", "\u0020\u200d", 0, 4); x2s("\\A\\X\\z", "\u0600\u0600", 0, 4); x2s("\\A\\X\\z", "\u0600\u0020", 0, 3); x2s("\\A\\X\\z", " ‍", 0, 4); x2s("\\A\\X\\z", "؀؀", 0, 4); x2s("\\A\\X\\z", "؀", 0, 2); x2s("\\A\\X\\z", "☝🏻", 0, 7); x2s("\\A\\X\\z", "😀", 0, 4); x2s("\\A\\X\\z", " ̈", 0, 3); // u{1f600} // u{20 200d} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)32, (byte)226, (byte)128, (byte)141}, 0, 4); // u{600 600} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)216, (byte)128, (byte)216, (byte)128}, 0, 4); // u{600 20} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)216, (byte)128, (byte)32}, 0, 3); // u{261d 1F3FB} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)226, (byte)152, (byte)157, (byte)240, (byte)159, (byte)143, (byte)187}, 0, 7); // u{1f600} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)240, (byte)159, (byte)152, (byte)128}, 0, 4); // u{20 308} x2s("\\A\\X\\z", " \u0308", 0, 3); x2("\\A\\X\\z".getBytes(), new byte[] {(byte)32, (byte)204, (byte)136}, 0, 3); // u{a 308} x2s("\\A\\X\\z", "a\u0308", 0, 3); x2("\\A\\X\\X\\z".getBytes(), new byte[] {(byte)10, (byte)204, (byte)136}, 0, 3); // u{d 308} x2s("\\A\\X\\z", "d\u0308", 0, 3); x2("\\A\\X\\X\\z".getBytes(), new byte[] {(byte)13, (byte)204, (byte)136}, 0, 3); // u{1F477 1F3FF 200D 2640 FE0F} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)240, (byte)159, (byte)145, (byte)183, (byte)240, (byte)159, (byte)143, (byte)191, (byte)226, (byte)128, (byte)141, (byte)226, (byte)153, (byte)128, (byte)239, (byte)184, (byte)143}, 0, 17); // u{1F468 200D 1F393} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)240, (byte)159, (byte)145, (byte)168, (byte)226, (byte)128, (byte)141, (byte)240, (byte)159, (byte)142, (byte)147}, 0, 11); // u{1F46F 200D 2642 FE0F} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)240, (byte)159, (byte)145, (byte)175, (byte)226, (byte)128, (byte)141, (byte)226, (byte)153, (byte)130, (byte)239, (byte)184, (byte)143}, 0, 13); // u{1f469 200d 2764 fe0f 200d 1f469} x2("\\A\\X\\z".getBytes(), new byte[] {(byte)240, (byte)159, (byte)145, (byte)169, (byte)226, (byte)128, (byte)141, (byte)226, (byte)157, (byte)164, (byte)239, (byte)184, (byte)143, (byte)226, (byte)128, (byte)141, (byte)240, (byte)159, (byte)145, (byte)169}, 0, 20); x2s("\\A\\X\\X\\z", "\r\u0308", 0, 3); x2s("\\A\\X\\X\\z", "\n\u0308", 0, 3); x2s("[0-9-a]+", " 0123456789-a ", 1, 13); x2s("[0-9-\\s]+", " 0123456789-a ", 0, 12); x2s("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 18); x2s("[a-b-]+", "ab-", 0, 3); x2s("[a-b-&&-]+", "ab-", 2, 3); x2s("(?i)[a[b-あ]]+", "abあ", 0, 5); x2s("(?i)[\\d[:^graph:]]+", "0あ", 0, 1); x2s("(?ia)[\\d[:^print:]]+", "0あ", 0, 4); x2s("(?i:a) B", "a B", 0, 3); x2s("(?i:a )B", "a B", 0, 3); x2s("B (?i:a)", "B a", 0, 3); x2s("B(?i: a)", "B a", 0, 3); x2s("(?a)[\\p{Space}\\d]", "\u00a0", 0, 2); x2s("(?a)[\\d\\p{Space}]", "\u00a0", 0, 2); ns("(?a)[^\\p{Space}\\d]", "\u00a0"); ns("(?a)[^\\d\\p{Space}]", "\u00a0"); x2s("(?d)[[:space:]\\d]", "\u00a0", 0, 2); ns("(?d)[^\\d[:space:]]", "\u00a0"); x2s("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 6); x2s("(?i)\u1ffc", "\u2126\u1fbe", 0, 6); x2s("(?i)\u1ffc", "\u1ff3", 0, 3); x2s("(?i)\u0390", "\u03b9\u0308\u0301", 0, 6); x2s("(?i)\u03b9\u0308\u0301", "\u0390", 0, 2); x2s("(?i)ff", "\ufb00", 0, 3); x2s("(?i)\ufb01", "fi", 0, 2); x2s("(?i)\u0149\u0149", "\u0149\u0149", 0, 4); x2s("(?i)(?<=\u0149)a", "\u02bcna", 3, 4); x2s("(?m:.*abc)", "dddabdd\nddabc", 0, 13); x2s("(?m:.+abc)", "dddabdd\nddabc", 0, 13); x2s("(?-m:.*abc)", "dddabdd\nddabc", 8, 13); ns("(?-m:.*ab[x-z])", "dddabdd\nddabc"); x2s("(?-m:.*(?:abc|\\Gabc))", "dddabdd\nddabc", 8, 13); x2s("(?-m:.+abc)", "dddabdd\nddabc", 8, 13); x2s("(?-m:.*abc)", "dddabdd\nabc", 8, 11); ns("(?-m:.+abc)", "dddabdd\nabc"); x2s("(?m:.*\\Z)", "dddabdd\nddabc", 0, 13); x2s("(?-m:.*\\Z)", "dddabdd\nddabc", 8, 13); x2s("(.*)X\\1", "1234X2345", 1, 8); x2s("(?<=(?i)ab)cd", "ABcd", 2, 4); x2s("(?<=(?i:ab))cd", "ABcd", 2, 4); ns("(?<=(?i)ab)cd", "ABCD"); ns("(?<=(?i:ab))cd", "ABCD"); x2s("(?)->", "<- ->->", 0, 5); x2s("<-(?~->)->\n", "<-1->2<-3->\n", 6, 12); x2s("<-(?~->)->.*<-(?~->)->", "<-1->2<-3->4<-5->", 0, 17); x2s("<-(?~->)->.*?<-(?~->)->", "<-1->2<-3->4<-5->", 0, 11); x2s("(?~abc)c", "abc", 0, 3); x2s("(?~abc)bc", "abc", 0, 3); x2s("(?~abc)abc", "abc", 0, 3); // ns("(?~)", " "); ns("(?~)", ""); ns(" (?~)", " "); ns(" (?~)", " "); // x2s("(?~(?~))", "abc", 0, 3); x2s("(?~a)", "", 0, 0); x2s("(?~a)a", "a", 0, 1); x2s("(?~a)", "x", 0, 1); x2s("(?~a)a", "xa", 0, 2); x2s("(?~.)", "", 0, 0); x2s("(?~.)a", "a", 0, 1); x2s("(?~.)", "x", 0, 0); x2s("(?~.)a", "xa", 1, 2); x2s("(?~abc)", "abc", 0, 2); x2s("(?~b)", "abc", 0, 1); x2s("(?~abc|b)", "abc", 0, 1); // ns("(?~|abc)", "abc"); // ? x2s("(?~abc|)", "abc", 0, 1); // ? x2s("(?~abc|def)x", "abcx", 1, 4); x2s("(?~abc|def)x", "defx", 1, 4); x2s("^(?~\\S+)TEST", "TEST", 0, 4); x3s("(?~(a)c)", "aab", -1, -1, 1); // # $1 should not match. x2s("𠜎𠜱", "𠜎𠜱", 0, 8); x2s("𠜎?𠜱", "𠜎𠜱", 0, 8); x2s("𠜎*𠜱", "𠜎𠜱", 0, 8); x2s("𠜎{3}", "𠜎𠜎𠜎", 0, 12); x2s("[^a\\x{80}]", "x", 0, 1); ns("[^a\\x{80}]", "a"); ns("[a\\x{80}]", "x", Option.CR_7_BIT); x2s("[a\\x{80}]", "a", 0, 1, Option.CR_7_BIT); x2s("[^a\\x{80}]", "x", 0, 1, Option.CR_7_BIT); ns("[^a\\x{80}]", "a", Option.CR_7_BIT); ns("(\\2)(\\1)", ""); x2s("(?<=fo).*", "foo", 2, 3); x2s("(?m)(?<=fo).*", "foo", 2, 3); x2s("(?m)(?<=fo).+", "foo", 2, 3); x3s("\\(((?:[^(]|\\g<0>)*)\\)", "(abc)(abc)", 1, 4, 1); x3s("\\(((?:[^(]|\\g<0>)*)\\)", "((abc)(abc))", 1, 11, 1); x3s("\\(((?:[^(]|(\\g<0>))*)\\)", "((abc)(abc))", 6, 11, 2); x2s("^.+$", "a\n", 0, 1); x2s("^.+$", "\na\n", 1, 2); ns("^.+$", "\n"); ns("💌", "aa"); ns("aa", "💌"); x2s("\\P{In_Supplemental_Symbols_and_Pictographs}?", "", 0, 0); x2s("\\P{In_Transport_and_Map_Symbols}?", "", 0, 0); x2s("^(\"|)(.*)\\1$", "X6", 0, 2); } }