jericho-html-3.1/0000755000175000017500000000000011214132410013665 5ustar twernertwernerjericho-html-3.1/licence.txt0000644000175000017500000000076110624407666016062 0ustar twernertwernerThis library is free software; you can redistribute it and/or modify it under the terms of either one of the following licences: 1. The Eclipse Public License (EPL) version 1.0, included in this distribution in the file licence-epl-1.0.html or available at http://www.eclipse.org/legal/epl-v10.html 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, included in this distribution in the file licence-lgpl-2.1.txt or available at http://www.gnu.org/licenses/lgpl.txt jericho-html-3.1/test.bat0000644000175000017500000000155211172062670015354 0ustar twernertwerner@if defined JERICHO_JDK_HOME echo JERICHO_JDK_HOME is set to %JERICHO_JDK_HOME% @set dependencies_test=classes;samples\console\classes;test\lib\junit-4.5.jar @if defined JERICHO_JDK_HOME ( set JERICHO_JAVAC_EXE=%JERICHO_JDK_HOME%\bin\javac set JERICHO_JAVA_EXE=%JERICHO_JDK_HOME%\bin\java ) else ( set JERICHO_JAVAC_EXE=javac set JERICHO_JAVA_EXE=java ) rem ----- Compile tests: @if exist test\classes rd /s/q test\classes @md test\classes "%JERICHO_JAVAC_EXE%" -Xlint:unchecked -g -classpath %dependencies_test% -d test\classes test\src\*.java test\src\samples\*.java test\src\net\htmlparser\jericho\*.java @if errorlevel 1 goto end rem ----- Run tests: "%JERICHO_JAVA_EXE%" -classpath %dependencies_test%;test\classes -Djava.util.logging.config.file=test\logging.properties org.junit.runner.JUnitCore TestSuite @if errorlevel 1 goto end :end jericho-html-3.1/test/0000755000175000017500000000000011214015050014644 5ustar twernertwernerjericho-html-3.1/test/README.txt0000644000175000017500000000011711214015446016352 0ustar twernertwernerNote that the unit tests only cover recently developed portions of the library.jericho-html-3.1/test/data/0000755000175000017500000000000011204513626015570 5ustar twernertwernerjericho-html-3.1/test/data/StAXTest.html0000644000175000017500000012201211171615466020143 0ustar twernertwerner Jericho HTML Parser Test & Document 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789

Test HTML Document

The following text demonstrates the use of a CDATA section which has limited browser compatability

example of markup that is not to write with < and such. ]]>
jericho-html-3.1/test/data/SegmentTest.html0000644000175000017500000000067711204612412020723 0ustar twernertwerner SegmentTest

SegmentTest

p1

p2

p3

p4

p5

p6

p7

jericho-html-3.1/test/data/StreamedSourceTest.html0000644000175000017500000012310511173171456022253 0ustar twernertwerner <%@ page language="java" %><%@ taglib uri="/WEB-INF/struts-i18n.tld" prefix="i18n" %> "> ]> Jericho HTML Parser Test Document

Test HTML Document

This paragraph contains character references: € and ©.

The following text demonstrates the use of a CDATA section which has limited browser compatability

example of markup that is not to write with < and such. ]]>
<*abc def="ghi"> This is an example of an element from a hypothetical server language whose tag formats have not been registered with the TagTypeRegister class

This a very long text segment 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789

jericho-html-3.1/test/lib/0000755000175000017500000000000011167436712015435 5ustar twernertwernerjericho-html-3.1/test/lib/junit-4.5.jar0000644000175000017500000060443411052534240017567 0ustar twernertwernerPK y9 META-INF/PK y9=䛌Q_META-INF/MANIFEST.MFMLK-. K-*ϳR03rCq,HLHU%AE%)N zƺ 9 yz\\PK y9junit/PK y9junit/extensions/PK y9junit/framework/PK y9 junit/runner/PK y9 junit/textui/PK yp6org/PK y9 org/hamcrest/PK y9org/hamcrest/core/PK y9org/hamcrest/internal/PK y9 org/junit/PK y9org/junit/experimental/PK y9org/junit/experimental/results/PK y9org/junit/experimental/runners/PK y9 org/junit/experimental/theories/PK y9)org/junit/experimental/theories/internal/PK y9*org/junit/experimental/theories/suppliers/PK y9org/junit/internal/PK y9org/junit/internal/builders/PK y9org/junit/internal/matchers/PK y9org/junit/internal/requests/PK y9org/junit/internal/runners/PK y9!org/junit/internal/runners/model/PK y9&org/junit/internal/runners/statements/PK y9org/junit/matchers/PK y9org/junit/runner/PK y9org/junit/runner/manipulation/PK y9org/junit/runner/notification/PK y9org/junit/runners/PK y9org/junit/runners/model/PK yp6s!! LICENSE.txtTM8G(iZʲ9adC,f8mĭ$44lV }Uʨ&GqѾ}o{ByhyƗ8b#Ҍ[xi:;ͣ}> &Cvɝքg;4O+1|GGH l:zݻp_(ǣ;Z7t7M7_JnZZian5ݗnQ nFB jv#G\CIcc{3.3;Xa;* hSoODPof38L7$݉. M84Zj@Bb? \lN@#3to4wKh {E.'w~]ʹ~Wd ~K8,4D\(RJVVO"-sNiK5UqnK-Ų.0/}[ɕJl 4KVjUBL::! PYi*Fhv%^\.9Te/L -O?v'WPE؆ T0|>B^*-t9* Y+.DTT>Պ',0a\8ZJDV|wH*zpUȨ{c'91>-QH-M*k^W+?`^Blӎr[$ĊX$c,e y]2 PK y9P(junit/extensions/ActiveTestSuite$1.classSMo@}q8$-Q@ip!uښ8*JHMŵ%J pG!ZPD$lyfvΌ@0H[2JTpVwmཱྀ[yx.r/0z *PVHƩO\ԛJl1xPD|xSǩH~s >s"c)Px|^hԬ‘럸ιRW @|2:8X iWe.i4G N]"agd2 &v`↉eMT`egb&b$PWi Μ$XO-N<_]s0"1\YU+rv#-~59J< 7wg({3hzH4MR3|PS_G+w+FKPQ {?.3\5dLkX"uh3#Zk;yf 2 ޥ7xgNPK y9`ǫ&junit/extensions/ActiveTestSuite.classUmSU=7 ,,+ DZk-j P Kt񋟙03zeIB0g}ssxd#zT/ \[eւ6p듊X/-|eaU TSs)C@^iF_|O- $ ӏRHW=_E϶dn5x2Zmc7s|Ru/V݋R/C釥6F)Y+TnJ ò&PuiF8NӾ?v^U3T7E" nxsgf;c6T<3@̭9GAQA \-|$è_`ly X11 ӕ۬ Skp>úܦ3˜p^ h_58y vь\Ќusp cz˔^;_4 Lefk<Ԁ;핿V`Wi  A Ï)=YEJ~Sy; L󕁮}]|ZY>}̝%F_F96w0 Ői9Nxmx]g,cr!w#h]\&ҕ>uըXb'K[8Y&3)rb?RaUyT+ 4`cK֖MtN<OYhf_h&*w\CT$}{X2f_U֚u_(VwZFؚ5RZ%Mj}x0vs0xC3xW-A`.Փ6]G[\]\P%#I a8 *W0!s3W7ܖ㦂E }@4D!`_wt}d"'wK I dj,y9\SM|:5R7t|>VҩQL?NEMhMJ愧u-ٰ<8DC{" ب9M-4i⑃0l_m_& J:&1?)vzX`tSw)nAk();Dv(}uu)nu"4hlTiju:f5~Gipr s>^ `*7|*" x ×g/r.b~M8В wp겚.d\Q1 54VcI50.i[bZ`[{z*症 bxGuuPK y9x@Xn"junit/extensions/TestSetup$1.classRmOA~-rm) jr  /IUc`?8{%4X23{3<3?}`c5$͌T'=͎=DV ># ©u'bhqI?(ywtN~Ĕ>72sI5+~](,%֧Qj6ݸY.b&^-mDW`J7U*CtP}H*yH2.Y `ר8CRSnQ>GZTgq>e xTX$f邠x~PK y9pv  junit/extensions/TestSetup.classRMo@}v1&%m)4~@Dp  ) DKǭ״ H QYJH=μyfv :!Zhӳ 2RZ KVG7B9Es}z[*G) Oo 9v<#kw =( uZFQ~{J]Wyi?Zپˉ(iL]"2`˸ř.ͧ-ҁ}`+Y8 dw~Wy_MњIfob; *F/PK y9R, :junit/framework/Assert.classit/#e'qb ر% [Kbl+ Tixlm9H#-%AB ..JXB. O]>ߺi$MF'99~}-t/ `Ћ4܌pr?'T< c|\CxPj nUO3*ʣY 7^rO&<ƕ5ėtJ6I|YW4\yJ5|]36Y|8yor-Nw|xAi [ٳxчr=xƇerlC^ᬊU'JX * *iCD.L}dZ)=_O'.+DFA|DgqLdCҾtޝSq+HY#g4Y(O鄕0S q6(FQ=a=T1l]wf\MfE-ƔR>E*ӥ҂BOY4J^l laDK(6NAeF}de0$٠] Ys/~Vvj`@X ]DžU첗U,&blek3bZ#٤{b)ʗ\!|غST<]3[bpcT+km^m[*yf6=eD|g `'n `D;=حv%ZczW:my(zp7pVsa=<6x~@ɏ~`U=bfbsly-ϳ+En՛luB߰ uH~ɕWE+50r0oql93-.R1k5.L'G9bYNpWh{?HNo[-i7ZKʴZ2b+tڸǼCrDϰR K["oͭd3؎k_OK@r''4yLxJǤ'<=u{r<9$xbZ<ʡ$wfsXTؓX>UcXy]:D.MN.MҾsi9ͻ&1R(ƄaSJ|@  /_l̡^/_G•Bv)|?MHְm39gX29o;i4If/]ϐgp~yǩT!p>(qiM ={ ]>9Oڛgrie\y+@$  4݄Xʕ-9w>D v'$Bm9 AB:$>$HkI|HO]tV֓q"="rHYvbkzƱe\=OQl([É!gEO ؐij-L9z?Gϗ%s$q“x[">"s$qsNgx,a$Nz_}_qvKL9wxNo%N9)Ilzݲoq!N E/zZo?oK 3+ye3iMn?=,/DTUyN](zqJH#-eMe&]n?/6Yro#\cb~oqE1)BTAԥ2׸P ZKqKrú:m3r%gpl춒G8+**AѬhئ1\+]]t )5wam[FJ=Bn*KEVV*Zqq 2&܊؄K泣@Ϧ^T,]f vc҃\AXOhl(rs{ >$>?PK y9kWN.*junit/framework/AssertionFailedError.classP]KA=WWM,M)0$z4bQ'av~WOB~Ttg]>sg{a_^aۆj9 @j%ܡԁۋ3] = BCF2"BDy*!IXs'H1rSv1 {Ҵ{Y";-Ӏc/+'ZC4xmWx ʛvkn.@Lc_=#=}e<i}LqIfdtC%#:ZVC7҅7-:g 3;apsZOv^ֱ];D4"(.`?E wjҟPŁ32.:I¡CdmYK1,A :9IqA']X;pE4L|* E( MD.;㯐l D$?t_> Szb۴#b6"Gu{!\].Pk%{O\j/|/u#^D uh* \%>Mݚ'ۻ[&y8-}e%pJFt3±#! g)CnwͱEk憕0|M)_6sG$y9TTq7R^PS4]$Ϝ7Z3'^&KѼ{JU&))PK y95.T'junit/framework/ComparisonFailure.classRmoA~\B(Zh{ߌƄ ~jǂWٻS&$Qƹ3?}`Rx6塊0lVEŦ-A٪KϰTs@)m! i~[Hv7 *я1OgE27Nn3}i;' jj3$ڎ?cx\^%Rn3(5# `p"dE(ZT'j1I]+ߛ5wpFPu+=-IOI>^ +"ed %찪ŒvLG:X֑2#]Lv*4ј}{dϙS8RiRQ ͆F7EoKMhXN^ K_CzgOWٹhW,P\=cT`b_" Zy kIc>HV3MYd!+YwɊSn~XL!j;qN0H~y"HNr4IB,&W"'&sɱi\r1BPK y9n(z'junit/framework/JUnit4TestAdapter.classVi[W~&jP,E(j! 0fDŮvMv>ЧsھB0w9_WD z1TUB Qè؎Ea`>)VI!1.؎-T%q*<1U1!𠊇p((yOxRS*Vg7 ם]WAM Oh\TCX'YԑCi2 }3?L/HZ?"WH#Jc09{6aHװ Gr 'X`Zn{ LOΠ>$N#c}@ yc&\[Z?UkִMHMX&"(y p΢ RUN[tFw :SˑR0ᦓpZ8DF j|Ӟ8; 'ef]!կ*J2܄c<ӦYP97+|O :g(?3 L2s]ɀ2gڬ4ݬ5gf<{sLkT\ܐTy9ͤ'npSq2S.=Q*>09k;t -Uui i'a^QDA^Z] /ihF^^^S7Ѯ-h {OAܩ"z5'*>>VOlI`TO195QP+,=pS?0tH0 ]B$Kg!mv~[CƸnZْBl #f!bd.)|݌Si>GMoN{i3\y<>4r=]Vme_)>m1Irv?ADOI h"W|Fv muPdO-xd»<,ipn;5G[vh"d6P3h9oN~S-hLw v%7ӻŏOy47wAϨͧw]7XMQ,k|J "Q<,=Jҁō KKkϻ!7`-֡%C_\_sx֊ǹDXP*71* u>#&@ڤOi̪&m\yi:c-5_CX@Ȕ:9.[+Utn +V(Y+Vt,Ǟ칅h(YDb?!Ae#WHZlj[^Z'Q#cD3w5 piTkנ;+6B& e0Ji-9iucW\ +9$< d7i|?{HfWq5_ǂ+yNne8Y Fmcſ4pWyG\iΠv?,o\.XCw,h-S֬gY0⿣je2з2 t$, [ii eVYg{QÊJD0G(U7/ 5t ɉQu:Vp>+Z8$+O܊<@3\A!QCw Wxpq^)sPK y9.junit/framework/JUnit4TestAdapterCache$1.classTkS@=ۖB䡂h D<t*}i6&I ?Qw s{w4:"%Nkc+]F3 fePOpW|Q6}q{w_2Cߧk? %!1&Yflt_}ȺcX޴v@{"KW)pk׌4MYsܴ.qy-U[Yz3W9dĉ4z (@2镄0@Cx 0lqm`x~?v]Pw[9Sfpq^&)2dx],c(x=5ȷh|bEJ`V5*iqrNM-;g v*KOT(2P ֛L\T>ZPǣaya8q$.e4n`Xi3 R'Qc2S5G}9޵wq *k|,5ghl؞r$PcBlig2P.Hoz@ungcp:&y[GX:,"nb\xNI >2nঅ[gc-3KzL -PᕰRYxX'ӧɺ2q/ZzLUc5V6וXB%v>{ᷣBlo.%I ,TǺಫZ_.]]R^[}r;fg3+Ue*OKoPb8Q'EB9]_p{ݬ,ӟM9:z}?+հ]WԊ+Lb⳨@ʴplpmg Y(/A D- + p$P/ѬrSDAт~ӬҭaˊEֶEDn,>Po (}.B?ꉻ{h,4=z 'rF$Cea-k[B21m[B틨lA}TEy`  ŕ8Hhp@wnOy(ei#k38@AB@g-D-vl u5'E4@Y}<9uxgnf),#\B7WӿT5Uc}.;C#ik"yɧʝEVvH/ӵ_o Z:dy Ykf 稧Y5e'J1ȋdhhf {Eui[EWj?~M _PK y9W@}*junit/framework/JUnit4TestCaseFacade.classS]oA=|,lWZZhe1Aӗ*h"2"a׏&&?xwY)R a{s=3;]jHz)hp[*nP֐@EEUŶ#1=lahg}L;CJC|g-d6\Z\A%+{̰՘pv%|m>}A{|,AR:S{I1ʕF;=s%|r}CCbZI @,p fL\_z VE .:T(KQ~%pJ>d}y7T#?Na>2LpkQNT?Ug*ieW=Rl9‚)q-J)2#g(#g$BU PK y92̏ݘ!junit/framework/Protectable.class;o>CvNv.FFf 0F.׊Ԃbvn ?8(9-3'A 81)'U/+,A:4/D?(17<([I PH~Nb^RaPHFQ~9H!##0]y فl&PK y9>V9junit/framework/Test.classmM; @cb"mҸX[ *ĸ,<w{0 `q,ENԽSƮaCֳv0/vS-M恍<ct͛cBE+07$LC*:b_\ۄ@1u;4PK y9<99 junit/framework/TestCase.classVYsUn24}%L2 @$kgI:icw.I}qRJ@I >XZo,iʇ99|}ຊ: ] D>t~9: 9pG𨊅xE*t()UQ %/)lĐ +|Xk*ÑSQ+'PHคeBI+xC ס3jq=cLU̼mL5ؽ5FGXt=ltkr.Ƽ,$ tע J7[NMم7&~ h9=c,O`q`V*BqB^zܾN!6ݡO)jdLH+E@vg!7nxvoA"賌i'RKw XHͲ~ښ[,TM7S>q {vJ !c 3%)3JWA)H Naʬ}vthU .j >.ӡ;`^p2s3H4\UH 0$0v 0w.0tk}f9G+7ӯ%^N} Tli|cJӏ$n@6k^ UƮʖ2ƧL.T>ݴd'fڽfi8 wS,p̕!r>5>vʄsSϴ|熺=G9Ĺ\ߒKA_|j'sv9g.ثw OY2l!nTc?Vp|WHBԪeHIV? ɟ%v3ªWrET1YB͕PybJiG/G6GQUrqh!V 6`? l9DJVy_}ZTEXϑ %p^,z ޔlfw+چŲҿ#~p.Z[sQL' a^V bD% *)P5Gu_1u{a6ͨ9Yr]l6ss%b'-'\c/X8E䣽iK$a]w2aÑO42}LO)e>|_S6oq7{rt L%nvdտgV"TqN8_`s;ow ^b}2BlwxCS%duK> 4Vd>* "LXR忊JFBo3A/:{PK y90q2Q8!junit/framework/TestFailure.classU[SPN[6Fʵ""^[(UB9`m4gK>茔qWn#r?~0u=J nqS-1L)V3 }[k1wWpOEhX )xaVVQSNloe)}F\|m[{Յ%VU[1Q.Nc" 5bN`<})omInVrЭQyi&1YM¢6K9*z2i>îP Zsl-ȳr6岴!Lc%\m:"L{vbNhu-Z&Y[!d_Q?֬]&HƙTC?j8szGՄO90+[֠cI 2=e+VՐC3Y@U\s #a E /@2P> 66U3B!bhCJHt$PSj@kz?Z]Y$nƴgdO<\hqm!7X59@^+YdDY Whz4I7iΣ R9㔬T l-n!QNLLp^G8aߑ34 PK y9lV"junit/framework/TestListener.classuON@ &vC&'jT(_l̝wFQ;G -vVϯ8Oq⌐麾BX]moE`EmlV6Oj,vл-!޵n^psu7D+q):ZYQ5s^KCGlgò7hZ@ Vp)g2}oPK y9wC"junit/framework/TestResult$1.class}RN1=NlXl(MD ЈJE,lw[ Q "%d}2xXE3Dq\V{IxGb!aEc%alveتsj!WWgmR$ݖM yP>Oi `pI)t=Q$Ab%# KbS/U>Qq~ ٓ~@^싸,,;bŁQ8pP9XEz7LCB./jNcyQOa~@u-BD2̼8OPtK="/\ ӟS-dES,@*51m͒u0o7?SGVCtQL8&ɾ#(=nx_"gԒD e!)c*Ŧ14QYxPK y9hF junit/framework/TestResult.classWWw~fwae)($ҸeBD(0-ldMgfsZk5=U{M9JĜc=C3Ypxw}?HT@f>K"ahӎ .'rEFWq ψc7!Yd-=y] %,^*+gU|OeH.UpR/˰잿_ֳhf狶3p_Aa{rBTtK_5%v|)V ',l:QFɐm^p g*%YueVAKa)_IEޞRM8 6SΓm|d,TV֒~J|9gu(s1/î↸ϻ(Kl2ҊU".Cϭ(H∂:;eً9<7\5= zrXS ӶmXN\gkUN)W/q38aF~G,$Jؚ1T]]ڌ-v}Q/o?0r丹4m5S{TYgD/0M|BPQRR0NwFhͤb} n9p*;F+N5rC+xhV (F> =Нf{\1^ɬ6-NjOK#b%3lP1y{\rɢBgDxPAw5Ӗ_Lp) c"b\!|Rmb$*0S j n?a.4L`R Sy>T\/K iY82 l3Κޯ4<]Ś_#;n6l:bI"kΔ*c.07X5гFr설DNv][WY+`\ **F#6M33eAoNk8Zm75l3 R^w-[St%oF?H[zD1o$Zaƥ9`L|f5YV|vqHgS |?"VT) UD)!wSO?:1xaڇ0w5O>|7iQ S?ږӣ@{] 2PXt*+vң+ʦ<t8(& S8b'q/Ӵ}V2g\Sq"KaLBft̲dvc2s xL[êބ yVzgl k8AmF=A{O zQ5|qMS{|8vaMDVA }7*W fFnl!C#DW2$2\.)7D(1K%~C,)KHoW=IxG ŸA5}P&P&_ e$$ }&E{Su}˩m JG׽}`xdy`8{= {/QI'I|H|5p@֧BjjT%4TT:{ְk!7KE=>IQ$iKѳs\{q7/K̯W\FS<9'"8s׎ b(p@Fx-NU9 Z'=WE5$2>#{Fuiax7)≷wh ܻɛA<PߒhD.^4z g V,.ݤ+-ƃ?eDl;~; H`m ,,2d[ m0Y{^vj끖}` `!junit/framework/TestSuite.classX |Wy?v+ʲuXv%+vb'9l' b"aG6ЬWckծ#v(`CC) @"ۘ@Ӹ-BB/qߛj$O3owc>sFF7>0|x2SaB%?/%|9gl_ _ k!|=o_ &  !|' p^0VA0Ux2XcDv(ΐퟆц- ?.?_!"_q9ǿ ?+oyP5xPARaU|$Q~ GP]՞.Ԩ*,t50Bj!we!L]jU"Bd퉮t"skKelS>*&Xvt9QHe3µUsCGiV63B"SH%䞛A(gN*oJB01 n2y$ޑL(T3fJgt kYj+J Vz(7MkKuDY*e=Зț:=s˚-30K@I7ݮ4xz+QH_Ac\v2.8q8ˆ֙%凳-v7\DLU(D'TsTiT־mKI9fS48{IP&Iь^_[>Iű]iPxgG(宙H)4{yɚD.I63$R U).ℙs1 y"W ܓҮ_<1 ʞ /w$\zF$2sMB\0rXLvr [=$y5< CIڼT̖}]]Wz d)w`ϬJX+y/W̰ux+{/-szl.J $?gq|bYK^ 5|eY(XLSf]8g;c ZЎU7zf/*3]ԸBwgYG.DRCTm9h?@nPKS2I: < @gSXZiuTz L(h{Z L!z<:G36 ꚅ%+,^Q)QAx8TK T5W΀jRͬIy'Bt1QaE_")܌ JI鎲D ܁W K)5MezΜL[Ä>(Wn8W̃ \`ϝݨb?۬gJ1W({ r#T Z)BFp~bAPȍNO%9Ɏ\.q Wଡ:CuLvZ>zUhT"!}=;\mwjPWAPW UuP= kx, Y%ƲP!;An'dq3#CLlN%g^c/$Rf ,+(7b.#YbeSٻU|E1caZ7XHYUwP_F, Ěs?Bul A"!> §:Ps$Vt1G t>.Ģ?F|!9Ľ: M3O.)'ҹcLOOK6 =hGn%n񹅴[I}=v5)zp9N/v;x\ϝ Gq=p !$}+"νWSzoL)xzF!޸A&Z&(~&5wbD1 vd%V[Qj6;,<^ϡ߮+ZW#nT]qԪՒԆ*WXߩ?`w?M_ #vT>*V3nYEڏb$ҿ6J|UN}^u*N\wՓ0b\M^t9v`!ClZe-BXX8 e(N%vV?e ![FtO:ţ\u,~T.pT;\ؼy8xV_7ciP.p\a 9a|诸Bo{{,|ȎS7L3EA0Tq3,b?&dw4*TDp]}sNo*CAGZj@Zj)H 7_`H-^ida&(XWҡ|t5X:M>mg!h[XGr9~_EJ\mݪ]lmέb~D]0t^2} Zju}N?PftnPK y9Ȳ.#!junit/runner/BaseTestRunner.classZ |\e?'̽ii6mC%I&WZ#mS)"Mni'3̝> (`yD4]M[F .>p}뺋@˲ݛVrw>s֛O>MDKq7:z OL]njWиҠ s@*՞1'O`]Jf+9ږ쌝LOoˤvƉY Xzl䰳phxc<ؙc .;e B Spe<w:*[d K Bx͍3;= [M XV&.bRr`Ҟ%v}wYgzn0(SCGjT-KFThe=vo>&?*!vaI5A]:L?%~O'M?kK1|I_\$[Qm&}?Gx6MbOӀ1ب&ߧr ) >̌oȨQLnOQ<,i4L m^@LT`˂Ỳ.$.ԅщȊU?mҧp/DbƏjEcpZ6:QexWĹ$d1FU}I/&?F1|]U'1^ו݄GG' okIhیu7|ϝiكG'aNlP[Al` )Q9s^E .I'%Jƈuh8f+q[L ;uluBwR+eLLE+K.[J R2D&J?,0֧$93>g&=e S\{+ qܦ ZZ>G@j0>IOve< tItO|`7T2N9%yQ:gy%KM|$2̛`;Rf!YYl2aۮR8lArΔ10YFoN[")_ݛ".W–AP.'[p5C o+p/+OF5`qX?4Lw鳁-IV2Y4r$?oVӮ=Hnh~+kx&z%AW⎴Z܍Sn%CR<6u_Z~o STq\NRNܛ P mu8<ʙ@%IK1O]`zETU$Uj>mET^5"{ R1o;AU?E=d\!58xVQq]JuCŴ\}yJ5#*}ji)u¡y 4snGv54  ԽHXvWb*v V`^o/]P:ٻS1k@(=Q?C*grv40}c*K@p ҚܶF݊[KW{< 4:o/>>J:Ji2W^E \$Xo+;yc#^3JmyhEt^( NSK)Z28Ep=hc9 a3 {4Fi}GUARѦ*OtAD EiiǩiQ T+"Ivlio?xivyJwT=Ki޷~*}89OlJAF+毭pkEp0 r6͢:-?-9b^@6s3mMXz/A{TaCAJY@+ՋO¶V@kz928QEg2Pd1W{j'ͯ#SGѮNkbP&_!'֯r~=t _ U uW^@pdT8vsϊ t _Hk"Z-|IĖ@$X@|I nz`gHeyd7+i6_)~PK y9a "junit/runner/TestRunListener.classmQJ@mcV ^ =I ($iKIl||J_ Vfk7f 8'ՋEs!+ťZ]g σpyvDZƊJlfNw\nX>+=uzrC>y!/v;{$MOQ\mMg9 'NwqQk $GVJHQ>P.eܲZsx[& DV[tahv0Ss?ig4op-^>MvrPK y9%;vrjunit/runner/Version.classmQJ@=ӦM}QAP n\MۡNi&.T\~xD,=se>?aǀX,cE*CD?eH7 -]q8]ywDHz|Jy j3a@" \W(#Xz1CJ*=qw`}%4Kz7]N(3m/P=q.Cf<M䐧mZ:LcDE&M~2\vc_8TN![A ͨ6v%Y 6l"}{G StA Mz1C1n ԣ ɖRXԞ@(# Q7h/W? Q4 PK y9<Z junit/textui/ResultPrinter.classVNN6%GMtI h\dB/& LY=@[jCEJ jkmk/{AEgy{91܂{ yGBNOB]xPC(=:ǣ2zo |@eC2xLIa{\Gu|Lu|O'O Ѕrs"2B _ė:2|J_uguQq^Dž3 YME(S]۳\ m]fvRv\˜mN0ӠhhsId(}"ۜIKQ;g.LXnڜr%>d>ӵe,F#v^Ct):Ԙ/d=lԢq.PaV4$)לT{G&PVZri{h;ߴ)ݙ59)jD;+C븴aeu?l`vkx Zlj(W\ӳ5sS;4O8V鴌SEgU.A~qphc8`i4nΙFkXJ(5 o򛆮pPFT2nҰWAj-fp*J;J%V֜[i[-:QX4[^2;kԦnNNӰ.\*יUq[z{¼bXᖚn-W.G-_qJϕ2׭dZPk{|qfa[:a-j`/؏8 dF,;ޱݛ89,{ɭ;2Qޟ0=|@Q,SK4p73Y,GJX3 R_?s-./ /5~aer//^T ~grr -yr\k(kX9( Øcd&1TUTx[E N_NA3V3o))C=WEJha2+*Rtٮ%1SgqgLn4SH=)'\r r3m!ćͨ㣎J=ؼHs}är³|DHOp|f47gn"2 6]ޝƸD^}&E/Y?q0CxE!? 24Dp~|NO >-?/<#gT_9 -D2wʧVef.6 zoGGgoytiк4ȤsdtѪ܆3v*94tѹksOyJkpVhpwd⦆9݉ٓO4}$g31#9`d2'H"H}fښOㅡ]ƅFs"ӬkeM#&cل:tC3YC |%ùnHHI 5=JBCÑ;Z:§GlJCY#eeq;NpڰYn^{w SBm+iGp$pLSg?493f$(x,e$ )wr`$f\CsyűF1e T+O'hVĨQtZF%Fdd$ /R=T+yt\0jc (˜Xl:6~<)k3oɡr&mLNj#VyF|N2/]GڱsO0H !ق6t< #q0Y\V$iid@3A{ 4r97[ [_arL\y|6fnHHϙ բ̇/ ztt[âc%,m8IDWKx5,8FՑ 4V#؍k:oM}^Ƿmcأ;@;čqG:~gxчtt*ODKJCUSLǯt{A?܅ri>[O$bx~+OWaIpf 7cF:&b#T7Gܡ,EU3k5b3C_iq˰!?4,`:cDaD)%AU([u ᄢy=++im=,3Ŝ-)xw3Jls) DgTh"+9+|cU %7%*ʖl&5MURvh~N0:O\͘bLӡ|RͲ=RPݡ.2æ-e|)GWyo!>j#Ҽ o$s.;w1\jY#V|R/NY嚸yx.)9[{Cqg}yk8؉V;Bl@R)G+'d$3F\F8)S׻_3]wURkěΊwϬ縣bVԱ,δp!h:6 at g<Sf'L=t5X6!@n!7PF xÑ«KwcqZfQ;f+3}rٓ0UA4Ϩ8l#fݦT N]3Yл#ffnZ1ZA۱C֋B\&?tPmAU <ُ;m.GWqZ 6S$=!\zqʮCaOUhU0AB1u|،nyc'{shӐA*6/qӷ*D|0J@V@0) |ƼuHØ_{ ns_ԇ4wd9c).r>F2)jQq^D0/ 8'"˥XH<'\DhfHLj*YQÒ=q q{.|ET?n?WQFaA8RiFx¸*$=ʘ{LQo/n-^Szժ[~U<60ʻѾtF i*i"Ȩ̂b=|ۨӯ} =[-cyk\dPK yp65R"org/hamcrest/BaseDescription.classWkwG~Vky[חӸʱ imJ@*$NSZRUR--p+V(RN9>po<3^kmEi{}DVZч>ₘI(VJ k4TI<+4<*J_bO'E|IxFųIlWV|ϋ/ ׄ[ᚊ)h-%oLASf >^MlxL3eRYΙNI|ĸW, vLTpѼw,t,7R> i[uS08o^6˦]]84{R.QL_s93Q}Vyge\oP[N̜^c3 {ΉK:z?'zC{-5o:WqX`ѩ`[.ژ~d`^!<:uFcEo܇T\C9Ypz G z{Woi(nh6Ƨʾ5OT"O|;SPCA=bm#ŒZCg-8<ʙХhXnKZC(ZFecs=xf\t0'lϼ^*E,jg'+U'o-6ֱ6tŐ3}j}= cs>^d_~(α+c?",浽] ub,"q\1Yc:~e:D\<`w$%0l<|q̅՜~>W a~`_}%+V咎Bot/^S :FpPğ>1bp@{=<";;nTPoܚܸD)ӉeʋR^南MWܮZI<p\.i̺_[u]6lO`MB7&D̥ 3;{g[Uӧo-H0·Fsٲ ^QΣ,RpDHe w񽛅A>7q8̯a 焱\͒v@Q|3{#>Gg.#f'%B)b8 -eGe-Y-4ŰAVmQk8$ՍⰯN)[p]+ldEbKxuۍeh.eIkdh]D>*ބ.|Mo ]n&wn{ݔ=e]F#5tήtZB:1=D/:_;Џ; .Fa7]x{0] w2p13,(qe Uofe(9_01F*kĴ'D]7ѽJuMr*&W\5x RP"2(Hc;`6[UVz k@B m!H~*DP&B K"Re% )l?_ܹʑ(J* B()>"(|7 ޲L@;94O!&^DVP/!Sc]m5>~~0M1iFv !O£1fHHHOmk?Sƚobﳡo'hdM{] md{٘lYǰs;NjVJT!nrs\X<'<ZXzj}{ Ev( !/.h4!~|NFP8x#h乞$ KCF0)kK^qdtlrm3r2r=Ch]psYry xeVl 'YO5+Č9RPK yp6[l5org/hamcrest/BaseMatcher.classRN1=d/T;t!aFG\. l'eP3tH+]i\~a@3{s9=;"r)İ:6,lZZb7b^K0,TnS:o,W= e1ӑ>Cvû)q_\qt.1or^nyزsEWjTm[*޲ŃemjFKfw\څ!Jɚl+n|c=7;Re y}K80I#vakrNK3v.C.GeHO1a5-r1=D0ЋR&;(=IH0K+`.S^ AmJjiLMIuN:{EiBn_"Xdy PK yp6iY~org/hamcrest/CoreMatchers.class][E[vmKK)^JjMj4h"hu [X t[yzwWxCxf l̜ϙٳ0_5\gVs6_hl5hj0a&ؔ5fcHJ^Ue[ެ@{rtUc4-\nn(S2ʫks0m[nݒkVԼ㚋W6JZL&kzX [ E- K:;5Yr.X[U]Z)Fl[gbk#XcQm 4ZhQD(w=MXfw<H|T5EzΞfϰKCʑQ.sJǼADReLf< .mtӊL+QucZi|hz5siRq hhJ%П|&Bm7๴KtrOokycS`䜵HW;5K~o4oU]-S]kNcBe/ktB[zr[2[iDM2IG?ٌH{>lFxc7x&t?9~I^g#>H:Ot,aY` S*pNChyr\TK\pT3 \r XʳjVU!T4(LeyU(]%*=v:m$v`^[˸Bm7_Zٺ!ڞSL*Qzh j|ǎ(2?MbT&1Vǎ C^ acR^La&2 2v4fI蒱310&dl:iHܞwc)[B[xW{umRK%Id슄4a߯cgIRiSR.cR%X}En٦nU;䯠q3L T'{,!g#\m?$HNy<9~T~Lؼ*9+SȢJY BBze!?v8&'qQ?õ%yqK{+PǛ%"(>^PK yp6X)org/hamcrest/Description.class;o>CQvavFĂԼԊF= IJDļt̼tkMt}̂ /d YBPK yp6n(|org/hamcrest/Factory.classOMK@}Smj/"xų')xچ!nfS_GI7"p`fx_q a!d+Di& &2WK2^J|k+kJ}Z\?B$4]I]?GUVC"j4?"w/N# \S&qa bbˠS.z5~PK yp6 l!org/hamcrest/SelfDescribing.class;o>CNvvvF̤Ԑ|FY t}\AIf~fPqp~iQr[fN*#ppjNDERf^^VbY"#4J@sRK@b`L lPK yp6aWn$org/hamcrest/StringDescription.classSN@=C'N ,!4-]PB.,] T)-RAT<:@ %H{?'Z3w'%L`bZFa fqʘSa=9^H¢n, K |m1$Jʱ)^~\ %M,l3F3JennD*mľ[Uΐ*^&[MU^WM hg2% 5(^m)q4K:pl3vc)(KzG8R!ȿX˳)͟.:*}Exa*_DOoJ0 ]\ZQ1(d`6x7EN,cV8qB+niL VI醕:15*n8Vh]y..m+}+{ ôҋUG6U1p&1'.V4N>LS {~#K".# f/vjwJ. *ӟbГXN.1`Lp)S7Pz^I@J4[Hi^Z#b4){_A'<8?T&O dC^<*PDjU|D;>@.|ػ tL6uGt4r͐,sҡJ\"iA݂$1a }O?MYn(nϐL*z-VBޣ S1d= PK yp6Э,oorg/hamcrest/core/AllOf.classVrT]DZ-[ii.%))ԗ.pM m(uҘȶYf' /}:s֑Ūv`<}.{M|S1G9\F5>eƧdp%9⚊5|RsץB ̞vM'0Y`|kT:SU억cn1.E]gkV}RQ]O ]l]G!Wm?Qls4ݖ6KIw"0p똕Ng>1vGj_(/Q$6̖[- 4];'A ]y'2#^DN@mc5ʫݾkum/\@T@ʐiESr:ƀoXnGS``޳kVbWl2H֌Kq@N ˌ?:텱,Em=9;uwZ%'Dơ,h8W+XװM75|[`FدyȂa庠+ ۸k wpW"e+LDc.0f4k05`=M)'tm߹wNl6GNs77̡oգ:oeTI\'r81i8}+ FUq ~_ Yv׸K`)YyҤLP? ߑ19N &i,wS6Nu[Bwd|+'$)ߐ8Ę!?`S7.$#RcÀ=sw Hc1oR[-eC}^Ay~C\ z PS'S$~^$*o]#K?=߾}XeʹIE.QfPQJcaO<sq&y&Lbp=PK yp6org/hamcrest/core/AnyOf.classV]sT=ױ-[ii>JRR6-ҦƤEvDDH2C~?W^x 3?+9UKe`r\-R [2~۾onYN/۶2EoOF8BMNKL?nssYLƉ0<?u=sՒPa4F[ËiXdž;>] & 9W.+ >Ӱ{ >p4, ބaS3i+ X$>޺Df`3y$>v3iݶ;&#\:)q&KYs\~G~JJ\w9$6<@̸!/'#5u5]I<0T+=ɬ6(35_ayV>/|J;/N94ɚο5ʢ-Ji_S\)M)=y3hV PK yp6@; #org/hamcrest/core/DescribedAs.classV[WU& LHpK)-K`(%)Z'dh2LzW_|Er?Ɵ33&˙ٗoϯ! b QR@2a|n%Up |ap!\v/x"Ox=|&B[`bUKյT˲DV9Ub*kV VdK)1C@nS%̴R9teբ&[5|x*riܑ5fXL:&`QYj9e"M˖ ;S5J Hǎ1) UckJ99_+ryS6T.GC(RvI0oj~Z=pdӃJ-j^A⾘}~^ .iƎ^<'(ŌV`0C}ld&vYlZa5ە_ofj̳8q26jVئjēyM-<6xKbS2 )ew1އioGo&k5CaK*ghO'3 o⺄! Kk4ICpc" &0)#E %XDYB]BO$0EXjؑ9v%{ǑI{IW\dICJ+xNv8 }>iA8N apy@ su7AB"Z}$?%!\n8g/py=3sqǺ^򰔓Ԑ0%$f "eLH72i&Sby1&LTnU7ws0K7<-u { Ri;FҰ[Ze|ǻ;Rd=WAJ[s6|nw0tAyܰ PK yp6.9=org/hamcrest/core/Is.classkS@ 4P"w-EҊ-0JGg K SLRFtFG?lK%gsnw7/Р~*ƃN+7Hհ2ЅS=~3en2=axѷ m)ɡ ڶp ,0B-ϴ->q0\h̠H)&Zis)gOUc|U0Kj]zqEujp #jgVDtMݚe\a7<9%Puh8QGaFngG#{!֖k^6T˹u6z`Clr{Z;/h4m ]! 5% adh3Z֑.>{ ׿^ L6n8+UIC,mbi#}Denwhhn>ݔuPK yp6f y$org/hamcrest/core/IsInstanceOf.class}S[OA,- `[/JŻDIH*$B6vٝO>H5g;7E,9a! nq [Pd]AUU }+kޖeF {JXT+Odvb~G ZQ;X%XwyuR-gLU$qIP]?p \G}c>bոr+':sUK^*Q NIn bgMw3\3gZK 3'lŐ9aů  yOZLpMS&ve(i/HM0kux.Ҏh!Vʽg込UjJgkqM~H "BtyG2B~!rHV& LRaMlf5'HOlzMR=3ݞٯltS|nP"e46[PXE!0`m\d|v,eʊW$5/4if Ts^v'92&1knz㆑7PK yp6Corg/hamcrest/core/IsNot.classTkOP~ vUq7QTƦ9Ʉ-Lmel:"??h4in&}yos ` [H Bx#PsȆ0-,&Ojcn4mRł+mq0 .L|,??x %مB4Fa6Jw ܳPK yp6Фorg/hamcrest/core/IsNull.classTN@= [.~)kJAHH)}8V0rvE}}R_+:8NCmig={̙ *6cK,㑌ŒWbDCi6C:+1HN`(qب Wet [dM0ȉEd>,PR?YZS70,\18jz/ %LNH2Dg-G".E"<63<%C_<gHއ6PK yp6,dporg/hamcrest/core/IsSame.classuS]OP~VVڕ(CD2QD%*&ѻdlvFxƠ?e|O[F;~>~`thAEiyQϝiUCm +`?x˝Nu{ 5Ã'I\v0_: Xo2(6eێx5资gVWH&liN%8}ugy=A iQ:U^)+@~OqàoyvK.J)M:a|qfd2SW{pǢH_ {ݮϦbN_z3p'ož:pِ.5VzGϖN}1B7܁g][^&aC1Py)ࡁ-l3uIxWX|i#}7v:[L_.;#K±F?F?$}r$ 2B:F9HfH`dyz $AB54\XɺZT}>[f̗H[׌y wd>AU>C~r(OV<, 9 (90s2,rЎ1 $12˅I([N0EJ3jXU"_17w# GpqHu!^ŸyoP )oPK yp6s^zorg/hamcrest/core/package.html=A B0TAH1"O͌Q2 @g30R QNV `rpbT-eߣ~ѓ7PK yp6;o)org/hamcrest/internal/ArrayIterator.class[OAӖn,V[.D"Toܴ!4i%'2ltJ/>h"H"~(mbCgv9;gs~ ` ܋ WD917хI2=Lx#ݘ40cు' ]\J!Y#r)$ՌoQnN@ڗҜ:7"(G0ϡR4)D͐g{=i礍0x`!:Gq7PK yp6@G/org/hamcrest/internal/SelfDescribingValue.classRMO@}$5q@j @[ $kҏSb絳Ef9? G!f4Rq؝ݙ=#Y(}yl&LlP8P0,N9wzoTWb[rKW ddXN?0俅]BϷg\@##g>>z+z ?#1b)8m?TΔrX2]A4u=U:輭n nȰQ2 U.u'` tӮ,I6e0O }g4'6lZuڗYJE ͐|%Ja9h4;6EnmYr~ߢ6\g뉡oZnMa{*]XN-z-J ˋ$tj!Q1^SNجђJR9O ܱH:6wzαM8C<9tG^WZR3ؕtylqY\e0-2ySu )ޅ/!??9GV+q6h`$9 %zC rHGet*d Xu/J`Kd)C+K<EZMvc<ބ^֧ܴy'}eC,$>M릃L.OvJ¤!/$_M)?ϡWfЅ0EvU2i%h<-_rBnTgp.nF`oPK yp6"eLorg/hamcrest/package.html5A0}~Q%np@ .HГeY.sJ!O3x=SL]$'OєX5KdCPF\3h[y'~Jk8,%!1A INo$؛Ԇ:M0!GvK}P^UL6DycWd2H6eΩ'6GB'`v 4 .ث+PK y91iQ@$org/junit/Assert.classX |T73sL $@a23IB$$`\Xl;܄Lu]lX> V/[nuwmֶǾ:6_ss˿g\*M &nܼ/ ?/0ϿgΣ7[=7;ܼL4FW ƿ/}|:', ?OM3O2/M o q7q[3{ hCԘvA3W a"h ZCxYb8O&zZs 1 1WM/HgNh󀀯;7l Mg!+%9po. $iE_qG m;'Q[W`+̤mOacwgrW-̎m.ѕX>f Q)`@0\V@l#Qy.)0d:#!C6&\MC;TcEJedٰӖ5\^ &SEJBI茄m1%5{KrVz98s 4#Ns'5.)tebR]2NSgVz˰y ^sLv>r"'U21lNYB9 %{=8ˏ%rujV$R`7HyY%T GDyWJ9 ӕHf8d_}S ^l/ X@m*vq/IRv$RyPlKS;J;c0Drgu+-V"=NUjZZ"7JiKP9#LrR&ޤF"UJ&q+K9ū0^+=ιQ_H0"8jKgFs!o1 %{dEe&tTAeX\$I]K&]= U()-jCcwD9.ot9jƟs1OۉyWZp.0Z&./kҝ#vPXNjJQTiԳ*,o_| fSO gha&D&p S4i 6dj8 $$FoCR b)b,+IԸA W Hr@#JcTcxpAB5A5Aeyym"T7xAq {^[c/^?p H2Na9HC2(2DO9G8 MHg'&as҄. ֖L ԞДKI 0AEh^\=|q@j IXX{=nEFjc4H*v@,~uO,ur4&9kI*S",C1fÌ 0cf+Kv3{"6Nv+.heyn'HdZf v~}*ڽڽоm[Iw\h:h* 폈UsOa_߸sShQWE{A{`wxPO.;gv11[RAihD;-,w===3YWU8h<6~{,/3EYST-Shzǎc^OI{yµs$Ź7"ZNx!5 (ᑏF@al7&]OؽѤOb,n2d??֤p1DۃMq\&tK;(CNFs>WhnPߎ_['9,yNEgÜ%)k;gCx7eZu:UQ57ve=Ww*pںO:Ht:e͓h.ֲI.)ķ.;WFB Yl"]4E?=3Sg˞yN2rk$S_titmn/3.i툜ݯ\h+^tQ:8(C{/iԘĥnQ? LTQ${GQ<8HjZ,~:lET]t^HOܼ~KF]E!uBbUT:MP=r7FOT8*թ)Fah%h)2h q.lFGTWq^A| Q DMFf+\f:A2c @} ԫPo"PsA51lre@/EvUkq^IPmE 6-T^2Z }cx1D'诂<۶SC`V;h(yhC*Eѭ=QADZCHthK9=h;ɩVq.͙i6T\{1} nM&*<=ru(bY7۩oψ=4xG~){>(B˒1s8Omiy, h׊=RN[ GC҇þK6F)Gi56zE[w.>?P/f!w^_V?=}cg(xsb9Rd"D*+R5ʞνBp%u<[B}RBI[0L4V*|ÁҔ[ltܳ组Ig/jXH?CI3 Ɓg:6@cE$TI8"TMw Ɗ)WbJ@5t"੆E,)X  WpUU<k ^jb]AN^1Maڞܪve:S✫^c|?Q|}=8ɽr-(zj{"{ANtqZ]ZD;'hYTuI\EgfuCCaKh_i#5ujYB!TI7 ^* ]}"k}C.Z42M}I Q[Ћ>y"%LO uF`K4Pp 4~ jTUg "(L:yj`0.KQL%%6>|EOD&'!II#!*Iq #Tqu:ƙnI$!隙62`=|hц$&id-wPK y9Iworg/junit/Before.classOMK@}SmW[?QD=xQVbiưa+п' p`f>`>?aOaGaWaH?:& .5T|Z!짵vvn/Dp|h3)G :'t:G͗bP}v;NTGQdf7Qod7+0T.koON+dZr GGBGz Mu3>`lA_@-lPK y9Yorg/junit/BeforeClass.classOMK@}SmWW=xCϞFxچ1lJ)yĉ!?aWa[aGa@?X& FKN =ׄvTfj9t0UFYHQ] ݹ$>xkTtގca%eA'|ly&E䊰<*jgBT^LgpԚl>$:+h՟EO %c}l47PK y9Iu}p 5org/junit/ComparisonFailure$ComparisonCompactor.classVsUm K/ii`%MЂP$rnM%ٔͦTPPdƿ_t@g2C[y~ٓ[q]Qob"nL6wi6\ ">e2؁yq7ܭئqs),e FQA{_"K` .e,j4u+ӊE' VvhDg5#W 2pRJ>\y Pp0ҠFbQ(kPRdYzL|,wNu3]g. Pe,z}ҎHC-;.; 骂JQi\v_a&¤Ys%(⤊8 zTb2~* XR129c ^|d:-rMX*"8GQ^Ndb1ȀT} xҏ fhUu^j1Z{TLkĜo)h--x]Yh) yBiˡΗZЬ1تۤҳ+ҠbZ=wӻEIra7jvߩwi yHgDG=tvD7X /)hv`Q'CxG{%sOG92}K} }>ֲ.@MQVn 8ɾv톟%ejUj5x ݠf Xp FAmIr9VyۢN?^0(.~DcfٍO*_C (I\J u/[JdΟ9Nwoֆ`Ѣ9߄7"T쟠F?PK y9>V!org/junit/ComparisonFailure.classS[OAݶtYbՊr٢>h4&)ZL.Yv4hG,%tܾX i~;v()s˽JfHa 7o^Rj1h_0LO4Gm! ET'uQ 8rˎu%zk8 ])TG"xcbM;":KLSy$]i G3}}U%30,rc0b*xh3,\av:= jRǒ!uͽXtE:mW\.3|*^7QzD=y-b$=}4Hn|3s}ktg̥|r1OT俋q@ G|5dȗJZ$)IIvgP&5LM^p8-\5sLm1l`~L;b/h+7PK y9^org/junit/Test$None.classmPN@MX6΃( !A B `_)Iw6|G! W=?>a/D[>}NZ%LZ~ LlaоVZ7qFIzrZ>ϤM,ge9 g+zR9̢ҎsduW%ǩta"sy9]!@c0gB/tiݿ^/><kT'0nrHw[m^]63[ECPK y9sp|org/junit/Test.classQmKA~6S{͈ED%aW_d'Y>QѬD -,;33̼8@†M [u_Rx2 )HKZ2b#jCFY`X)_KeiN\m;=#ԵVRG)U;,8O,=*q.k j_G }GA\5Sؾ6#ene/u7`7I2Ld﹭a\\VƧ82hfxQ5Aόj7NfR`~ݸnrcG-,$/x~|c<ɉ8$)b-1e˜f`LU1a-"[K /V5cAX$M PK y9_J@0org/junit/experimental/results/FailureList.classTRA=a׸YET (:bX6 45NRU*T/z& 5},oOŽvtaH-,\`d`,7pEab@dQnŗen{YsVeh_P˞/pmv.jS+'}+j VEϚON-xlJpj?I@xH.[yr2/'Ne.`]O~^z)=T\]PһUi._Rm5:cW7I/Ȕlͬd)Q&B@A>#ҡdJ ϮB jP \7q=}a71nMYhʷ{&p}t@%f)^!6O% 7y*&A%WR2K(* L,)AbwH>?/X>{ϟY֊S5i/6HzB)Ӳg;7ju$R W28OY7?mtaBKecc1ـd? ℉N~zkqǀꬆ~s$Nq ؾqf -70M ̏lxAN"_>D &Z`>@7~d4˯b$ AccevB(})'01EdZy*gi8홺 :pCHU PK y9E"?n4org/junit/experimental/results/PrintableResult.classTkSI=M" Y_y>|$bָFQVE?Xq0ClɿҪ]b?ޞ! }_۷O?+~5!p5 \k&&QJ7q 6P61M^*lki/f߰CQem* Jj9v,g߶մ݆UnAPכw6%CզLR`A* LT5ʤڤnc&KOלk' 6%C4b .>q4V>֟Yz4xΚϵ*Q"_3}͓{ n8YRʫ)\Wʗldug9pv8@ iy7HL6F}c-*1^ě>r"ȳԠnsq)YM]{FYw3FG9T)*'_>PK y9>@$5org/junit/experimental/results/ResultMatchers$1.classTYo@6 q=R -3-PZ )!Z$d+ǎlj5 u ((f7X YLCaRWr5)_K:nnᶂJ faoXnX3D%5!aī  Z޲3gI2Y lo׹#j Sw0=WKWr1eQ6HGxWmdahe]36p5VAߔa+ JUx0Ѯ-CiliX}K-iMT,k8D= ;9;u. n7;5_U2mWX%UaVqh}D'y*mM>4ݲ/hi !jz#XB+$QiЪg'0$0% Op.9D4 'F&⟡lZMqD/Op0ٛ dgh (nn<#2f w, pwt4yoTTq#0id|/2Ti _PK y9 N 5org/junit/experimental/results/ResultMatchers$2.classTR@D!B\E--AQ)(EM6L\^'Q|(Ǔ2t/w.`/q :QńIL) 0-cFLnl0Ltʸ'>1s гŷfrBYuΰ o/}Z= `Z5,ʯ΋l1}$2A],M$ӫSֶ|rp"_NA|􅥋5;7-o+grL4䵚JQ\c!Or.v(SOGޣNzX<t#Zߍ𽴚E>@deB4 F5\+`m6B%l.w\5]'H-GBpLF_W"D:JfB~=bRq5wĕsWOHHRP->8s c|7hNc aPK y9h5org/junit/experimental/results/ResultMatchers$3.classTNAnYV XൈHĤ۱]ݭ{!<LDM|xfٹs̟?XFiBCFE9y,*XR1L9rȍW pk]Ӯ3-nJxVdH3\/1KNM0M[U%$cpkGqaz `^ٶpKtWx{pBܔfȽ /f?;")^U0eB 4ȡoo:vhateYnWq{"`xR}NuI!MʉތdTa9UgG `Ut(&PB:y X_|5p¥GQ-zRkQ bJ2}:w=q[a'P)#=v'p6ScFݪKA  ]%S${XQ8Pq[8G:qא&n5U(|_M+%f&zÞv7WdkqwmZئ}]T^'&n 3=,@k&b|na+[ _p ]ſVc5 COs`ϐ:Y83ulMoeLm^ ?Dwɿ7DIG3މDHTF\,+fbPK y9?>-org/junit/experimental/runners/Enclosed.classRN@=j"ظư ]a 0)eJr ?2ނ6{Ν8f^^b<Lh`2ȞT22TK~m+>pWjvːm*%owp>Rj0![Jݦgg浸ZFݙ :g(}k n{BˮPw?]aH_Er1 FsasH ?u>7+W)\-t_c'a]PK y9yn90org/junit/experimental/theories/DataPoints.classONA B  Gt G5H4e':l;8 ֥\j9ϒFk+^2ahLv00Ds*ØRaĬGJNL¸͜ESca(F%,EhN <ti1yF=ۂ| ^yIYҙyCYq=鑜^,V|؜mC=TiBI/|Q JzrKl;v$*vXƩ)'L!^Z(egb#,ycIÚO(2znbsҚ+{R0}8 tzv!oy2|IϥK3F2bnmX.cu#!}%k!S*XU[2uݫPCƦj*V-ȫc<1ɼhPLD݉h1&5\74ijj!{(Hxsu,GE1IΫAV[ICC#=?IY ۼ!,f\9 _jJ(XSr,^+}ly=Jժ:yQԧg ؁Z"ʨU[| EqT:Ҭ+a?R/vu1N]ZW`s5+j` Bs *^Jk~4MkK̿4 993c ͼJ[){EHKPk rGmPGzHm5%m[a_嫰Ve&op!,.c+ZOP'|{aCW'}h^BYrRe`l\K66P-$iUډ-]Bn.m59ZC(1({a5y-ޟHM'D~pdH[n# D Iކa):)F*Hk܃5pۜZלGJcjv5cT+;{8ڋ1I3Njg{Ł{)c!t9FB4vvV UٙlCNĝv"bIk[?]tt:| bYp9 g OQSIH;tfȌ`i" ^D[9rE\gBHMc*hDʅA@7lh=7%/""~CDždz Е; .TB %F 'A&Z7S-|cP>EvDl8|#CL;Qzq06-fg|3;`68jı@8љvS`F:ޫذҏD-u!p6+ IOzMSF%VS!Ukrd~1{x&Ĝ`N3弥0ZG%FFpnd:;V2WE2=,f\E\}:F^5QY*o%.8C?<56H-nBT&۰Cl B+堪=d[y/PK y9Oo: :org/junit/experimental/theories/ParametersSuppliedBy.classJ1'uklZIOE AEPt,iOĩH[PGf7C?^9lrfPH᠔)4.F #CtDu(QAKNExFz1N1bUi{FFd;g7t1=E,&jdPxK@dft@V|J On7W N}Ҷ ڋQy`k5 K$PnpWՀ&- h}PK y9"{5;org/junit/experimental/theories/PotentialAssignment$1.classSjA&&n [6Ik(""H"*F1IdfNJK>xfB,9u~ @ 2plqFfwel9^X0C~]}Mg(z6>MHO'җ)Vy> Ru#\Cp<=x^'”Ȑ +Aa(H򸡂;{W@PViH= CT39nZ:{pGZ*?P%Vg :svAW&9*ÁTH,sA8KUhoX-hj0<\nou5zJGEԃi6g`Rqlf;YYdFɲ|;E0T9Mz6=D>o П:MeЬ贇UO;HT!@} (Dt7иt3t+9NOQ$ i]I47Vb*IC8eea5"'3$Gl*$`>ll#PK y9컳yDTXorg/junit/experimental/theories/PotentialAssignment$CouldNotGenerateValueException.classQ=KA}\r$&? XX*DB7ɐl=߲,(qba!ov 0oUFk>6|l ,JF=2u) nIz2J3; I`4uiGf9"=7VV`'4I4wCb(3hxJKs!ݶG$a3xJ(pc2" qa鴼ɩ@$U:Z|P)GI= '.r|YY>D3A笥XHUʢX qLerX JPK y9 ;9org/junit/experimental/theories/PotentialAssignment.classS]kA=lZ֯bbROPŖ4^63evg铠GwK dvf=s3~ `#4p7F q?)=ސ&+dVЫӄ2Z` yN@ Ի@00$2Tg M"ʟ`s<Ɠb:=&bwD*qDETt_Rp"OdI*Eލ'~oFr3v0%W)\.BD3kوe<2Yh6ʛ~槾h mD3om o ,P/;eQ /iW ƃ]u oF 2VxmcW*]FxlOe,%3:Zʈk ƭWҚ[#\y伃592sjPK y9uG)?org/junit/experimental/theories/Theories$TheoryAnchor$1$1.classUYSA a!ryFMn*(ZdqKn8 Z-(MRdvggЈ0"H[V 4$Yk` Ok4h2IvU&w,?#̹N.:jE5 @@Ď3y/Ð;RY̰~Vʒ^rDew10E Ur3d-G.skRk6YZҮiثx^2VsJYمvL[CP>OB1&$H/÷_*P ҬO:e4>H8 -W)VuҀdC=x6#{Q| >)6ԣWig2O&!Ι_lk Z "4-PK y94 =org/junit/experimental/theories/Theories$TheoryAnchor$1.classVmSW~nlYbѱK%JJ)-DV6Y2w7[CWEpƿ鹛BYfr}sN9 ދ>Ls L _ƌ~S鬠+9"(FAA26ms[%WTc:ªq7lݯrWX-!ˌYG{yjy,KcV]AE`=ۧJMGjLYtitDҺeT|p3+ vd9|^[ld/a’1.;\O( smA0 hOe |ӷ\Sptm~ ŐgB>n}K qa4F*m]^zһLY5 g:']gjܯٮ\. k 0j||Do\~ '[)oJG2} npgєŵu:,sϗev1δc|Ke.L>gۯL1똶YNᨆﰤpI X0qꅈIX+xaO4))04Q!ݞѠ<$dQK]ʧt)#փve"?Y~ùD<}.8msEF4:F=/usȓ㶉ͦU/)H(`tmT50Q/:n//+iH z$2w&<0% &fD W3OL7pv ǤMCr0;K*Abɤc=i>> =Lf^eD,; ] CЯw\ $pv# ^ p2Bi'ZSݗчҋ&zv@ Fi=4(K;̴J%j XgZjEہp5\'BRÍ'/hrވ&s"d4~q+LKl(+k{8U@Pd?㷉w[?E:<PK y9H*#'=org/junit/experimental/theories/Theories$TheoryAnchor$2.classU[OA"7AD*m,w"IE" $,cmv1}'?xf[ dcsw~ `ua@lb5 ЂF18&$LIx(aA.FX9a2iie'osֳtUCq3ܲu(r$Iq)>;B'݌=bMێ,jY *=l,YќQ bfZy5.ָ H ^N s⦖l:.  Cu0L]D̄:=c/~WWe` ]fhH&_gR /몭sYdô?'ã" Ϲe:fj9%2" DQ5,t2ؕ.qޮ'' K \\qQkV.Q!a%Eq t)2cNF3ZeȨq 2$ 2੄gHHE K T>sQGHx0K%4ũmJNi"$̪*jJmﯱ*B'jw0Ìadc7un%v:_F66SHV7 .TNx4"稉yG3DOtCF}Qj@RzZnTj-&/ ƾ{A{ Bn.{|:ELnZ=-c!iwCJ?FX'ԕUzF'>}H=@.CS GSaN(b ~7V_&uPK y9%t;org/junit/experimental/theories/Theories$TheoryAnchor.classWwF^F LFn%%`Bk811ƩSfxFCi}I$ ݷ&i,niӦsΌlNt{o޻75;O ݰyphI؁Y^1'{89 $xE|9y xD|QBW,+W%lD|]7D\`R)նU[0$ 25ɘiæ5<34'i C䌙V䠥̨gLG+yjȘUt-=CO)J2hzrXkӦ YyA, EOvn&h1{R{;V粪ͨCjZj':AʯU HaPGr3'Uk\9NR ߬e+Ȯ}F*cZ8R*V#ȇ͝CV)H9򢀚)iudك!S}7Q0\=uCKA{c(v_!#edud[o9+EySC;!`} -H q}(iE"#6ZApKx1(|1X,*ί癥+tSj])k,&ێb^ƈ\QW+-jas$V/ hԬi9Ejܲ<{|ǐ)܌pQht>z-(m06gQFM}]tꡦ'jjnhn'xMaP㖻`f.ASFV0 # rJKYr_U2vcL`e|񒌗x2}64s,͘izZl|`DU蔙3bNFq6%=5J1h!2X᲌+*bQ#تq1ㆈ ~*g2~ǿkTUMįTUtP| uۯeo+wC.fğ TuKZ\32׿:WeZn,]ڭDK^ع:R]W)nG$J$:SY1|i\CJj>W hؼ|@FY$lRA4Hgn+!תc]\ bR' a2lYbSF#8u3EXlrp]?@J*W+b%.)@b:Blܽ2B^^j`u7{b˂LjNnLj %m1XUA;Bsަ&u`AOc/=PC`k<1j5/~rh/JlxQLY12Htag8]_ȋ蘌w%𖗊2=W9n* %$E{% 9mWp/Dp!`EP8AH?{YQhvu>\Z@h]$K3/ݢ6v$U_(5!8WI y$n >x u5ȴ.,W>Z1B_4/+1l C)Mf>.*iE?p:ewy4b (+dEػ ?+0#fEqyXx;ͷN7ۈޯÛ PK y9o~.org/junit/experimental/theories/Theories.classW[G $Y, (5^ 5EZA,ےdaK7{oE{o%}~}C}=I67X9s\~̙9_~6"c=N؀'0&2NIOi x&dD%` 11kbs b#Ss~XHфYkFYiN̋ᜄe%ĠĸiiѲ0իO05C1,LjHWɐneQΰt@3P2>uB'N݀QQ:1 dh*IC|n[ZlEH3x3e% d-/nG5HHxzX4T;iʊqUu 9UX8dYE3zI[CZvt|<][Rs<̳"Vl{1Qr,a6?qS3>Qj$Дl3"#B}/$*4"|g2ǹeX_JFlQP3Jk^ XIQrPJIYj5l=L' 6TK캈I٭ &o8U6%9.8%Lwf xmʶQdޗ6OF"a3iEx&cMV+UѢ`'%5㼄7Dѭs o+wS>>G)We+ ۖfLHjz[ˇtW] > >%'v`_-S\I\\R5Q- _*W6u6 ~ ~5qagg1!,W@4SGOkS'>Cb-3m6˫uJ>JYgomA;YZĢCjyDW-;z%W,Ei;13KK㨮h(N2hF5*Q`* 7z0Tiaf ;JI33 dH7Je!Ar$PeinW0h)$,Wi~J5&M<(qD&xrB"wQlXT[R,u]7!ZQRx!XG_ ~ēJiu P+QhQv+O6RhNJJBD+49Dm#Cv'e%n%m9q()zI"/LxpaD%/uJJr_ [( 70&V0\F#+@PGe{ ]|t(?(mdj FXMY8kqᵔk)G]:"P)l'pyowѸ k%Y]0A%G4cDD47 /pxC8F@[[#;y$yVq ,4"b3tʼT`dG2tQqQvSPK y9 ^'Pq,org/junit/experimental/theories/Theory.classP=KA}K>c'vA!8yN ñs>`;'mHDIe27yf_ Ԅ,`^Tffq͕%TƁ]NQLGzkzrqu&_U _\JUgR?9 fVp{^kڄ6:2ɔL . tIm/PK y9pCorg/junit/experimental/theories/internal/AllMembersSupplier$1.class=A_7(hf"+'-꡻G<C-֣xţ `q,CN%e- a65>iWJ낑j2WnD熽9DmU<2K/ 7"WV0}ДR]Qs _o@Vorg/junit/experimental/theories/internal/AllMembersSupplier$MethodParameterValue.classUmSU~nCoZ" j-""S:tlɅnf.hghF{sss^_'E|W@N#Xhf/,,|eNw-,YW 恅-X?>6fat@i'J@Fw#-~${ pzaU*I{WFO]z)S薊ƻ\PhZUߍcIԓ>"##Ֆv}Gd);*s@͊?x;t|%YG0ᆫݭV)ߔG`:[s\w4}M>Æ˦D0zxɎVa@3`'4zڊ8A̾DZk2"jN֑ o ]i3 fYXd4&&'ו+A&"1a"F1&P>& kE<:[p~&=IM}ƙ$0xZ6ne-[Ŷג޾l>:.p? DlGR'Q *pS7Y۽;cPx1ΫVQdx+a76CHr6\-ӑ8"Ō }+nAB/5z٪7#*8Y@? ܩ`pZugez>F'dg ,]"C|F͝4GԼǷuejG~E%2µ,׫xhj>R#FS6gԍ/Ȋxw^l4]ݞ8YEr~>ͧf?=!T'DLE )i.h55J)MS"JZ x2F4 ojR|zN9 VvW5r#'Lq.⸚9f0s#SӛRy|}22PΓcPK y9JAorg/junit/experimental/theories/internal/AllMembersSupplier.classW֖B,|mDl@&8v۸ֻji6ýKzDhHڤwofeYeLM~̛7oF/mO[.C;*qќPq2Sxr<ǗcO-* !l6sA|^KBXe_Q BgU|=oo(m;-=6 j oI>ax{P2Q)qG3݌XO8IÊ0ҞxIn"7!|PA`'T18b#G: q`v(7N ל0lO޸Ḧ6}9kYPʤRiBͩ0:fx!'vG՜ghu/}Gz!b,)XZ]̮ʳ3CN\[gmbTt4`uQRF$u7 2JBJu] qw`24X-w(5hRM*D%uax2\٥Xm +ᐧ{d%Wcik.`0aYmcay#噎M;J B~Mmf~SЅh#C(HphOAt1 =Sq^OkxY Ӹ`MV qh.].0f-ƪY WpUÏpM-Mk ~g9 *~~_k i-~„㚊?h#^R?ό̾/cZIfXta % ?E/&Ž>+?0 &؂HveD/\8i{6N2}Žvr¿K\֯Va`)dXæU&[5;n%Kxɱa[_\zma}1#12$*qZY< h^^"ͥ q==`$EqL%Y{iw1R6k=KR9ص$35_dwW[w7ɣg,/:%X4zԩ7R>|}S~'iri1#>##.ퟏLHB mS _v߅le4h3(;ϯ2v%ٶ#@ ˰ =ыI<#q?vqN|uQ~hhK4*d[et4R Jª¾uH^j9'5+ *(A0eRzLa3;;ˊh,-1YTejϢ#piTl'> HT[e;1tHz$uZ؈5؏!]uw'%? {O%O53> r>mhɢ!PEj[/PC@ƚj^|>$[h"(!.~{$JcDsaJvZ"V"4ћ #_ǰ M(|osxUAO Whb@ʶ@*tT6TfOa辪CmiPx64Tʰb.46nbwLV9O]@ Yz6q3NFF}:ބ{j?qVPrC{-ǣԩ 8WoD1o2O||.SQS-P]@lcl\aSȢ:^-2 2PM8=|K3^}<*ż="{gr]yAyq/Bu:WTַ(Y_FPkv%Rwjlɕ2؜qW3.HKQbUY9_Qsye$Gꂇ2v`UEPK y9>bL!J:org/junit/experimental/theories/internal/Assignments.classW_W>؝UuY6]H֐)4ˣ-)-i f3$j}DZJmCXؿÿC=ν=so5X!C/P;k2.#L\gOЄOL - <*1tK2r^ _37x&|KBmKP?那Ivh{T`iwlYK镂;ir^UtT#,k9K9(j2x{hPϘ/㪖9k'`G p<`LSj. &f?0)ix=s"DP7tauA. Q&s՘S-1,&jFֿX!Eohw@R #: C5Җhh'=9˹ +u}]ʑpxo.ZIш=v9=g2Mkͭ p碄ZSMLe1MxVbUٹ!NW$jhIsF2N(Z{8{fa|M52+) &Q-rEvry Y0 {%$WyR h!jDMwd|W.ْ'g k[{IAO7({"&N9eK6TFut#DuGqrֈTKM3G5"S #@؎LG:c[foSěu,\1+J>Pn $~ W\S0#?Vҳ¦(ee뜚,:gXRs\p31~_CTh7Nq.@ш.k?eqhQ)x/(~E aa0),g7r'' `'s*XO 7!Ñur^eG,-Q 2Ͷ,T68όij[{:B_9d)˞ _.tv; ]?j6K| F#iW@IhNLkQ{$O>]Xpi$xνhe9VeuE.&DlX옦G@ۯD)')ڸ$bM풛%/ivҽ-( kacCBV!۸55sZ-hu6%oCJn!콅p-D4 Jcq&yAR;qIk0h)~sJĦ?$UX3#>Et^xG{h<i6G;clz7mLU[ջ뼆PL[[9ڞmnH1ZPTS0vSNPNS6Fb'ݔO`´HqߨqM'i4)_M@IPL=9|!_Dd~K<(r:Ox,A9{upt{\ V>;;wY qЦ }"T ٭*:|+<]w5zQi##6q.`Y:OРנ N514X@!0uY$/EEmlcQ9%E}}2?ד {%]c3ɫy&'{G:niܐB[ǩn@"5nV]%m8}MTS;ёdrg7{%LTj.)ЌKktwIezqz:dhgVK"jiM,jPK y9DQC Jorg/junit/experimental/theories/internal/ParameterizedAssertionError.classU]WUݗLBS@,MUhW TR>,5|P*ׄLL:3Aꛏ/>ouIWZ{ν9/xXTr(a%aܗO蒏{F5i[WO{l|lI#L>E vU/6n;e>\Yz,q5Ҫ Kd+ھjf)Yo|U9e̹afvO}ϝ.X]/kZM Z{$mc-rֲKJAkjմ[-НamҲZMN/d2%۶u5K{z]}l&Ke(*V{0Ď@W^ V,n%^NV?v^5jiYQM/X*q(?T\&/k |X<Ei -ЋE~\IߘŸCS#9djnfW^i(0W ~ DsVӗ )@? cNb̙`s!ЕH㦂/U|*^*4UL"`OE䢢2 ߠb5a^ KE Z-xQcZDGk \= aD9gXEˮiT&yᮟ9> Nn\Idl[{~ iNpj,qqz+4b2ր_鑂ѮhnSb¿bJYsּN&N-^͂@G)w[o9bW9(=+uZⴔ h2&Cv SMsMtO>C詷JMQ8u$٣,vyzOMA ' uzW's L|أcW$ß+u+B0aj ôI(ɜ-ΘZ 'i(tf{dÜ>> >nwf8囥oΓwgC[U[z51p z*|%.rHXG}W!ƀBQjvF={Ѕ.ӎcVGIOS̳QyMAWCN@mLf ך'׷gNQeR)1sFge$oNI9sʢޟ*\KQwƨϞ`U3a6>PK y9u)W@org/junit/experimental/theories/suppliers/TestedOnSupplier.classUKSA&d²˨EUT@Q XX/qeڝXz/U/ZZԞ$BS*ӽ{ǯ\´=8e#aa"GM=6.㊍^ٸk~A bH3 H *9=s>IgDsMhg*ḟQ[,#OXZ0R2d{eTPI(XIe!,GzXZFU .Gr3( U5SDEbH dJ WfQa3VގM@یUǶݽӡ&pLEEjX+\*Sbvt0оr8|=5 Cnq ;;bc=swy b.=k61 MYAjUhѭAE6"k޺F,Ni>""-np45(N ٲU,KXБ hөJɨu]6hY2xƘzJ;oi:ƐyFۈnf*w=H,,ػ;xh{`fl'(#:}u銰 t:UE(eh 8RaM^4͹SԷtgy6ݹTweɜUXA+h'G9{g;rtfd(v A^ Dg)5J4 GzGя!q=8LUd#M#oy8I;p;xFU i撬PK y9䇙@/org/junit/internal/ArrayComparisonFailure.classTmS[U~nIIZ[! %oR(55JZ+!9  sM_WT'?OQs"0cfgw{_D-9s]#XS܋K=cΈ1|h. =XƊ>2Xa ~ Z e@׽GE@B BEYk.~B V{e]RC [6B]ttՓaWcn5Uy/TUfrqY2$ye-W=ZYCu}5=ܘ8rz`>U,OV(7jzH2D} 75~5`֬f;'kyߗ,ԷwԽ%k9ۯz*^Y='yWˋz[yf0aˊWUOMD$r"J6:U"<ςdhq6VV1-BXo]1ZԒ6ݎҔArgZg3lq5b.;.s.O)W *±Tc2S5E1(9(PD1[uҕcn $O[% 35ycV+ ;/{`$onlR3QzRvEE0MAVXdU=K[TBQSCNiOuy"&7Ͽ@UB&(ަ!kWטܷaHTa'Z|GGwqLEbdЮ  AGF&(|FլS27:27hs-PK y9.$org/junit/internal/JUnitSystem.classM @Dg5&;;A!h%\H༈G+1|Xa` %D|7A,guYM+YkBZΗ1 f5x\˾OX8_dFdh[å2<!BHzPK y9_SM#org/junit/internal/RealSystem.class}PMO@BVAP7ƳLFR7MiWE2*/oޛ73ݏϷw{hȡf"M[::SC2hd(Yʓgd$+1rX-ܡT<ު)C1w" e _JӢW!A؉ 듫_t-G |~ yg߷A璸 lᬔlsdXIdˬIxDyy^2{L _lǗe _j5_PسBd`ҰJe61`;U ("RXUZϖӯeGfpƴ/~ӘXf*0N;s)@$2CLbݡ–}Ӕ;RX ! ᩲWؾeLwM`z 7dɰrLe/gVtNYp%2w 육Q;XsfA|`§m5 t3d"lr`z^\ #g&vl _db|UI˶f4) =r%B -x|\M?=;tJa-׹lH"GLgCQJ;1 x8&+hX{C/a&qn }t.ĜHug*W&τ Z. o `[NB?mp)f|p70 pM=l7D q|l@N|R9YS|t:+`+l A#Rف,SwsƳ,y7Уmb/!A/-'lOq nY%WqwH/v*\ARxGپxbv_jUt^߿% , gdl.6KeԞglE2 (cS~64>j-;W#"HOӴfgyiT3 94浿mܾtyYv,p8֜;AZ+)ebq'G[}/DsYKxjP6uk( 5܀u ,)6^nA !ϐ K[Z*p*o!|Ck\G 5PK y9eٶ2 @org/junit/internal/builders/AllDefaultPossibilitiesBuilder.classVmWE~&deBJZ[J^XT RCZY@mB=mJ_m=~Pl$$sws?W0xD$C>>>PJx0kS.!e\nJHJ.$r!Y O_սy2!$d EKGY*&iREcy2䋶 I1,Ow,T*uWY4e}WCuaw Cnn nYS%SsiLq_FSLS||mۆ Œ6 FUk ]&`-!2pH3Le<=C]\bG aH! ףՑ}(ÝP==ö\sس4vJW~2}W_6`d{mMm'5˲=wW_A( JaLc=!md3ݩ%vݩR[3͋$<>qo2ܑ{2Ǣ9xE|!K|%P+ؑcWj2 \Xj?ONx KCeP^i6-c8ݺi5qއ8SQ7jn*zգ+S5Ԃ]QdOncn-Xܒѫ!Xd ڮѿs>/Bv cIPtӗHqƷhV@),wGYoӘ=!a^C]&Tz1+d1"(/ùԲ@"#$j9JBT"⫓/!1^b.҇sBZxSstwDӑ|'y`h(iTIeI/bGJ"!&0!mc B8 4fIwinট[4.w4d 4c>?a0x+OPDjJ,Hħ;MjmNk'zC!ΒLU&[m,]z_PK y9`4] 2org/junit/internal/builders/AnnotatedBuilder.classVYWF $"& ąljR'i{@8ZH_EiI6O} f_>w2Y'qO´ 3 D1+澄9\2@BN—vQKx$c e+ Ws|i$V(~P,sP\_cH.fֶVڞaٖ閷>V7<7M0L,ζ[9eqj&7b;RY2wI3rd`Y6,kY7L.+;Xו1j S- ㎥F-to6{Scîm'g.%Lpfm SN^=-A b>5Së(_!8v63'2/P{"+a:)ۖhm Dy8g~; bEvڞz c-yW+ܫڛ 93z F̪ (Ir _4Ķ <\S"!\Uv[A FQ-Lљ?шhL{x(XS+gt0i&|2C,%RiDZzvM ]Պ[}sS\=4*jE7_lG}I T@g `[Ӌ=C&)qtmsoeLv45͉֚Qt:ٟJrtS\0Ýs8|ˤ-Z(ȹ4?\jeX +RWZv7.-۩ýT{[7^}00%U/hHn hHkLD~ ӳ|A1Ir0 JY&"r$ۏ!bo]=8D;ZeT@$NP='JaFT(AILV!MhK&9F Ic};H T:%Zt$o8g k9Ft xݧȑttO@O9 ɉ5|]Z%Yn&U"ϑ}hdÇ'F+M gA.ImR;I$HPK y9 D2p0org/junit/internal/builders/IgnoredBuilder.classR]KA=l]O8|SBC->NaYglʾTh}*s9̹w?F<O< Q}m; T6q:P6]~S%\Y}t~?,Vݙ;yBB:(I=B({Df9HEQGE`SRo1 V|ۜީteec&s yϵv,zyK$:9x 2N7h1ro34OC:<_]a,L1t[sAG*1r,DEDleU[ 2&¼PCeXo2'ҍ-@ Z+x4VKL mD-Mg0Zأˉ6XؖyZp#4T`<~25ݧ!IP$=ekyG@(UDIPf4ࡼP $>BnJ){fsΜ_lt`ᖅeyܶp]Xg>Cq]2`; v+]x>e;\IϒhOjV7TCo?&!OP~,]5?Ok %PP}Zy0Z/Ŵ{#Zb2:tpdzs a-I[Ŋ(ޔ5;G-%#uq U鋳j :2=4p\\U39D;f N8 {ms-,4]n/aery몧07XH2\ZQm_ s\+9iJ(8*$bȜ0GXm!ߴQ8bGI'#U" UHHRZ&iLE5 XJ]n"c`(ȖqbrwY5'э?PK y9 {Փ/org/junit/internal/builders/JUnit4Builder.classRJ@=} *o>HQp)8C;5Nd> ~%LZ{sN3U̹ L:r0͐PZ Rʐ$C_Ei]Ԥ95*.0*b.laoEdĕHyg|k[Ik"Ͷ'/*-q%'tzcB6 LmJNo/߻IX$sSu`TC02$mŚs宲Cvتy8 y"_J,c,CfXh[&?xCG;n:/Qqbؽ.КE^;!Y+>XxBt;J_ zA1!1C?|ǕABCs1b[)h PK y9lk-org/junit/internal/builders/NullBuilder.classQKO1ʊɃ x p/K%K>L<Q邈{773m?> NÁ 6H6A@RZވvp6φBs0-L`"|Վtb#*TbC!G\JyH2:T붧[(tQԘFX}߂N"z3wB %"P{&Qk"߹yt<m8&ꉱbAQB2sz^]fbnҗQ,H8C8ۜ~\\?k'{07Sb&MbF+lMF` vЦvG[bFCהHY^TJAUW0cl/@1BKGQ PK y9i`4org/junit/internal/builders/SuiteMethodBuilder.classS]sP= JZ_t;A:O$4$~_KK;?7clvϞݻOxA# i,Xj*hhXU!a9-wQ=gg {|9ߴ<+a@D\8pMw<r"p.]eq:;C gv}6 _nMLCҏj7Ǿ:Ck 3d߄LÛ蘪i5#ė](i,>ьuۖ<;"lwqae΍E\03$<颵*Cayzž~Di=+^3~tsfN˝rZMt}a99+x1QK|`rt^7,#h}%CdԐ3: dBHdA6Fh!jC9ea%I# O*,Ei#JqKrS&tBn $|"n^l.$?G9BzgD_rٗ^L~ȐEIaJh2aM}a=`a;Cfe-z֛ ќQ1ӲuIT>ަp0 ga61my!kj qAh2h.^Ea:mV?EmjV`v1Y'Q {boy*=n{*\qzI/ G rIom-nR)!>y,xx W{jJ (M{0ٍŨrC9T,q7<ԧG @*wccDslq-'aܗ0%Mn%tKuw3PK y9\> d(org/junit/internal/matchers/Each$1.classSmOA~Z8\E+V rK LH`ݦ]?/&(H4vvggyf?E3Yd 3PǪ><4fa_澿^: l;sCݎ7洩T<ހ [Rx\szĐ * 㔳zYNٶ7}. _"/+_?EӼ5 E(Sk>uVj4uz9rz7AK7QMtJ[ģ[KK~b撧Rx*bIV:efhb* :^@ǽ:@{2Qdri:Nä)ɘVIK q]Uu$>F,/S,HF\7J=v2LT6-H1B'c1¦ʲCW)C] T2&'pҿoofX?~v}g3&;N}ЏؗTDa(߉N.29"(ed`?ܱNZ< YR Y Q\OLˑ(DwΜy՜y?0$?\(cqF]Q^PK y9&k2org/junit/internal/matchers/SubstringMatcher.classS[OA=]vXEx)-]DCӴ!m%|1Q >QoK+Mf绝ߟa݂G&RZt,"BJx<%8I*y-'I8~ c%ߠ`T~V2{NhG[ j [䙉f.1F6{&^(xPK y918q1org/junit/internal/matchers/TypeSafeMatcher.classTrE=cz}ۉ1mCLb qb! jlKZ DQE%PN T#Eb*/;3=Ow?05D&*nJ˚[X* 6~Lkxy(ʈMyᮊ{*cWty| {ƗFf;ٵ8 %k6ܦ ۟K)dn.wJ60Ēmqw+{a5!,Mβ-7JFG 7 eR#xa-CXwr kPֲ].l'+tHNvb?[zU KݶzG=KH*LSAeC9T6 >KnL׶hԽ\ Fr^=!,>b@3L!09<,:_s,ct}Oʏd }iUkt;YZtXhդb YfGh.d&3LjOe%Ŀå?z1CK?|S@B?>o-fdJe則d yTK)1`T"K;IoU^BYj1Fq'ʷ7P#(GmYGĻlޤ^ XXPoIeN/R7ҤH|]\͵|r)F5I&2v4mńڊF!ޢkam7⭫PK y9'4.org/junit/internal/requests/ClassRequest.classRn@=11n\Z8P*APMD6&bl $>B̮"ٙ׷`D\MnncbwUg6d0""RK ]9yw١W&2E| C~[2aX琏 v0 bGE'R}P%t:A8rOv R32k>5$+Zuuލ*/|Cu #'/BU9?U7m\$a_J%4eKa[Xce K(k%0rx31 H'c)i_!,l>k;T9C gؼ`}\ 6-OAYo (R'7tf,:c0uvg=XXF"%.R\F*kXA Y]өAOW(&:!L\V''CrcΈM] Ot!4&g495ʮ:EEX&Xx|Av<0>qE# MBV&t 7oPK y9@/org/junit/internal/requests/FilterRequest.classTR@^H\"`/HDE :8Cf:JCZ¤Iݤ^GFr<(Rݳ|/o?b]A &'y2%ܗ0#A@ƬHA7<Ƽ(T‚g ru|4=at,_M1-2HuM0u1;Vi:Z+RK,32WZ6C ɢz[z&OsKCgߵ<|[}ˡRnkH{ī!D72lEpa_7hqӬ떳0MJmräs fwXTяRKx0? Z' =ms.-vFW _֝7}H6t]]*$w2왆b+*^`EK(x%C*`L bLo4 ]BwLV,sAPWr"Ͱf'l {Q,M>,@YRBQѻSZ#\]D>AZQ?c eit [Âm0/:DHk$X~x rJ}Gw9"nVW!yC!Fv]bn&i2aiOVi&\PK y9I[A0org/junit/internal/requests/SortingRequest.classS[OAzaB (jl/^0>5P%quvv4H4gElh<3}_x" * ;5}cȡɐ?>1;{DӉRB{.A8qj$wBW-,ڻT|-Xjw,I3IFs=zRVCQ&Ϥa\yǐt A\s W&QpCj'im9h!R3Ӎ!0",: {/zǔ=f)f/0,h>FWl4%m"OD4*=Cvl/`Uc[#;"xLab2YCIYw?#uL+Ocu\mXJlҨ'4D&nh۶o'PK y9TǏ ,org/junit/internal/runners/ClassRoadie.classVVg~& @U !,ڂbY+UĪ݆daN&.]m{Noz朂zZj$w{-044!1̉Xnঊy *niXmX|gW:k?p21T1V%ǂ iٙkrJ1>50Tt nڜƊ>efڐicz֪`-5n JKG2:NcP8Nx::qJŪ N߂~ E5_1"˴}Qᡠ⁎xⱎo-KuQel-Ew# B`BAS.҃N㇭MVCUFk&^9\Z)feyoԩL`,c^ XVygCCb8Drb,9'']f[KJ* FX,=|oUU%Q›=_[wa2۟w@ёDSJʾ$$]\yzS(M}76Q!ueQzZj6Z{EM>*rO_r7@or?!}oΑ ~-jVuMwr9}k!3$xi)ޭLfy_3TR FKo!BH}/6P~f ʵ∤E'4b1Fo%=N%Iej@LLtPi{MBn> R}ШWs󖂿Wi ͷ_pw -/ <ő8K <80aJ0C 3O;JOhGl1TGwlP YG?zAcb90pK3|̷fN*!&ɑ m! Z+(I- A"| vͤ,˲ZfiU]APK y9 5org/junit/internal/runners/ErrorReportingRunner.classVkwUݓt:x cEPV1ECQRA46SLLEK~wBf(I[X]{>~s5%*UZqL0$z(I3x+R$_GD8nce{vƟ@Up{+x1m~ AA{>:/AAkaxN$(xon``}X%YaёB|VVA$k%&N1xD\ ay~: A7byp}^GVHO Hqw&6H67>NUAuudvD[Zd[7+pVefϳPK y9F5-org/junit/internal/runners/FailedBefore.classQN1})BB.GEBHf6v"~ K$(q17PV*Z 9\y kК+CTSm?(CNIhC7bB^MbF:XV8+t"yBROΨXBizA@vgSXgs0Hy ަ:q^XԱ?G\D䱏o7=Pf_ᬀ*G:_HϐkN`lɾVjAwPK y9AD^4org/junit/internal/runners/InitializationError.classS[OAWv-TPoH/ʂBBLFP2̒]Ghj4Q3B)ι/ x"ewp%k. /yY043BKެ H_Xc`W}\5ތD1߮k뀐={Q(^E"S H پRor6wo72RKRpaOY1$VH%GBo(CשsN &]IW|"" VH)mP4ˏ<$ViCtO.w&d PKհT`mT*Ag R<<4ܪx*3Hڷ&Pؗ 徨@ъR~hvR=bcxFx#<6݂CnwaΛ!^0eEk~Hx`&0s_l::=.90d7QGg $=N״huK-R+bGőyZA_$/ލ LR=~ H[o h7V:՜R-$즼DCWr*ј;tۨR'[Hm[HW[g 1\9k4>|7;c$K_ܥJiMvhjofuAPK y95org/junit/internal/runners/JUnit38ClassRunner$1.classA 0Eh5Z s­ƽ((xXCM $s<֥+g{O)⓭\&7JKxw6\,WZxn^ &+eyW[ L)&/i@HhF5kar~2+ h jFP:FjPK y9n|a4 Porg/junit/internal/runners/JUnit38ClassRunner$OldTestClassAdaptingListener.classVmSU~nl,ib!XˠV`*jK]MXL7__fr"EV17 f *p)(I*7~E_A#(oIu)RB2+[6we%GF?<-YN&)7$([Π^A:/@`O3quY ply"Hw4ּ/)5SԲ *.] A5zkq^9/=Stv5YwiGE\ c709 _xS*7tx X3h`rbZ͸2/7Ұ/p5D;gfKqF]1Uɑ6Ґ6%sl"PZ ]:N*ltoV~mLΌ \H6.uhAږ~M:uzt.j;b~.0Sa}4KF>1D)IrvYE詧Ig~AO-ZGe. q(4$ޮ+}%.ǨKg7:|Q4)CALẢq&e$5BI P^E9sX !~2 )zNqZHUC~`FMC/0z@{f ? #: (K lgRBcԤ$GԾdK_`Ix80|M[l e2dOVZ:3syYFwC ia4 DAЮ6d*hC,^Vw8 6zb8PK y95=n3org/junit/internal/runners/JUnit38ClassRunner.classWsEfl& 9 Ql+r(@M9'dlFDo4 RZU#׽ͱf~{=6=*b05(հ}_Ȗb@CB<(Z" V԰ B< )f֫8+Dx]݋*.UW5WxuMUf6Ầ=2]O ƍaϲZ,3W *X2-/C٤3] -(( gh°ڮ G7%㦂6RC=e$()mIƌñD{$hѲ=ӱDI o!!:n YNg D+;Vmx)*V%tª¹C"ɠeV^DNzV3<+iG6J4,a*1T59]됣" 14<3{-՜ ṉd ipN1hΘ}8/d*Td&5O$sVLYPͷ)bC0x+OR""s 6X1$k}sKstrߤ@7!öS^qE_]s1SU6c&ouRwl1[AWN[?:^C7tG .)ϗх؇*>)>9P1K*X3EpOlo+||;|Ȅ3u<&2}~급틥 M/%:ḂK 'e}r$٬`%ELsȰaZUH^Lҝ7G! 3UdS).wzͤq1<{_fSK'K%B*{ A* J6gg\l7"]>&oޅl -83|W8ڱxҔLٞ$aPHzӜsZ,jqIy1>YS\44x[ǻm@W^Cl3ﲿ(` (܃PƑ?;aߚTiB)sc)xzB[lck;+&Pzre(W/Jo1"lmX3U +7QYi>ZP?U\(iA]@jF<$@&rA8=D%NN4alc\T{ar4_/.`W1H2,%@if߃8D6GapZv3YқA n'$F2?˨Z>ͪņ=Q}!6egg)Π 2,+/I b5m(l]k/ۗ}e,%NK+vxY%!~cPK y9w4org/junit/internal/runners/JUnit4ClassRunner$1.classS[KA=ĬnM&U{uMZЗBKOL1N^_BC?f!>|;ef=f om [XG~#|GR2da$Znh-CwlD&8#2k9-)7cjg7:2G/ |9q}H tNoRP7aޢpʳ]2ޮ O >à >|_2`-5ά{ y|byMll|?C?cKPGGƣ#1 UGcZ(nP6H(7"7u miuXL}4[!ֆ.BU#MQs,t$1t3Cf<Xȅ#tIXO[%#$Keus@;LΦ7{G.:a")Gm3dPd٢tAaLJIˈFb2~"aqb Ǻ-ӭZ2]ґǴ1Y$bqTNA{;jkshO*4Q,#ăe"Q55V7Dk)-8+ @%k[z ͭ"0TivޮdžMCEd y<M.Wnd0wktzVcGp= ͂d&8iD&æN>xITcѣlLI&0NkB(IjOz#^,UM6ˣMsƃQ OOvGƁq3:ŵ@6 )0ejIs`JY9;Z:rGv:CgRèJaiow"WH2aFKMhc2v @0JSaVg)y*/FTTvT%11/Cs+87i 7.7:iHQqun]4u{Nz: }Ј)ncg^VP GnnYf|XXkE d݆}Zd9p0Mڍ0Z<e@&j}eL1joiv}u4ˁ|@ *݈q=O]xY}m_K4rH(G}<ϛgT,JL}"z, >nVҌEZK' ]< _ s/-MfӹP$Y 1i( A=R!,(v!bG: ijR2C̎,`CAh4eudso#De1E DJv$܁:Jxu}vRoK2[@F"|%TYtct 0͎ͮ? !&h ՟e,B";"-mOk ֓'&ԥ[O62n7~;(rSZ(VY KBc&u"En;>~F8_VZ \]=U0s!1 ,Ot,g6XdyT:dI@F }Klcum|YZ:3mӠ3Ŀ8VRJS %J)PIZM麌n9\3%G!DѺfСT S8!< -¿q ZLl< y[o2G09%:!V&jYc?Ns)I@4KP$fSx 6x5Ahbl2{Ys:78/8\0r2:nM*m6q&cR-PK y9X`ER /org/junit/internal/runners/MethodRoadie$1.classVVW "R5A"D*B(UCR!ކ̐L:s*}Uj$ d9o_߅~La*QL 3QndF,?Sx%7f^<1gNv}˩4 XŚPĐoU1Jĸ`}{?G? 9?б CMUXZ K']B"0 .Ɉ7\|Ȉ-۶|" fk}7) yKůX-ʢy'xf[ kVP]9rQ`4;% VL8 IH&SX˭E,J"u$)ST}[K~5 wvjSvLѭ`e5frk@ 8o3bTxÎW7Sݞ:6>^(ahG#_ F)lڜ$Eo{'2>'\5P_{sPK y9t/org/junit/internal/runners/MethodRoadie$2.classRJ@=FT])؋E(UmvK@_ٴ>63sΞ 0faVabĖm٨'1C]?P2rDDw";R\RRaqjV`T`XI%/-4x#$Wk@450طZ0T=pJfTXGō֣>dZ=?;,ر )i!Okj v=C/]x Չ5 & Pf dՠ)ӄG{'M"t3k*W>_H=6EbQ|B摅A9*}"9[r}H4X;LA?*EFPK y9cV Y-org/junit/internal/runners/MethodRoadie.classX_\>x "ib&1 CPhlM$hkyK78Kvquij%i^l.fh$Jmmf7?{fl3w9s{y/ :LJM-A ni>$͇}>=W~A;!<1*qyD)2'uf DȚp4}=flJHydZ`ΆLؑ"b&%zVE@v>aM +^u>,8|˅=n0FZ c|)֝iRpv^}d,d\J FbVH"P#q;SnjN61n l;KNjp^K^ƴJ gcĽ,\}됥3iax`oL! I R2/}˜&Bh2;a+OiMoe_n'q5GyX,Jr,;v$,j!լ6[~)3?/ R6NH,5+Z+Ko5ձgZV2?b[ okWwYGŸu_q[9`v-nxKÚ+7ũ}q62EJ&/;1萀aUbn#%(\K֬־S-bt"#H5^vyUe0x"^9RέG"\lf +"PZ<']͑I2)7ۤHS7ޢ\%uQx)V'wtv6T&1AODyM@5L]pV^t  2Hn}-ϟV[Ydr*vY5dƻQyrTWuzp$Vfu_A<$XPw uc؞m6 =Eu_Cp땭LZkUKN($F(a* 3e\$1JqI~e`.'ib@ç>⚎:0AI̫ݗi,+iԱcYұcE`hqpA`4W޵VvbRmIʵhS5{IH٫͇sǨ;N +{~ڍrPݴuX eE;VI`pO8ӗ]`x^}<_X ޸c]:n dNõȧKOQ φ2}o@>嶢mޗO=Ϫѓy}ݔZuPS TЛm2[qBdm'uRQ=K&Xe_ k(`MǺ{Ž$]k5q3r[Wf؀\,0ڏpjSN!V7divo<&6Q0 \#N\u?NmGx"5{Foou,=N͓g;SemR<>`NP{n5(v"g֨D=$~|h%( ӡ6 Kre!c뽀Vǔ71,Q?0͗x 30&x<5S(>8o<0o#y@w@l{k2k\?Gq ooi\EYT # h- - =3#CN`ů>ˢ/ȬͶ0rW&԰I(>@ ^%%Z!ۤX!1^y9cJ@w8]`X4DqoŻPK y98T*org/junit/internal/runners/TestClass.classW[wU*ݝn* $$Bd-PBQ$DD.ҩ$P]8fxq.a֬fZd/@ΩtpRu>|]&]Oa+Ω8;b^ŽC)D p:GA8b$. +b8^/TRྫྷ!|U(.+,VWkx,)h cz&[)گ 6aZ[p W&8hZ;ޓ #4o1^2I}*GI똝s'u{_vLcvf3 DΘk882|fȻ%#=;./J-k5\upibze̼K;k13n/'Acv BƸ\eٮӁuVE\.eyf8X (輽̸8 6]S)gXTR(h0PKDŽ!A+? uvx줯29X,uǀE?NZb܍Ɇ6?i/Lm׵ ?ոӧ ֺεc\9r ]`?|Gz'b$ق M@& #S$[-;~٧T|`5?2ֈ꺘+#wB ~-h+TD5DQ*~ t͛=A5?mUJcVVy{\JeuLkV86a1jaޝB &+&ĉI3ory*s|gG c#_Ѱ{XҲԛYS[ N5Lb?ix xCÛ-iUV4wCQixK4w9V{XRvH5L!_4untZFr9+eYLr"[˧D5`5m?+hzJɥ9=?n\r%-Ts^@ylNw*]=k~P~=:i򵽧/w{9;'8̯EVMV3JFBAU#ut'yuz4^պ@aGmAyv Pr(hzlH NİuxH"UZq-=Iw`o'W<[[Dm@[REp g]'Rgۚ8ic E'GbQ93+K&?B b"`SS\h D@8.y 4&oUC{v:L4GbXx":XzAK'F[,o2%OЖ ^?}$k5:zLSy/Pe>^b,0#&ìQc` d?3JQFh15xÒEcqP2&+!fPp>jy7y:hRc4T6~ M)Ɉv#pF*XhY,$??~ |1w:]]ޮRy;T"{ Xm2!aXi} 3ۃܥ?"Wi'y"<qtHME$!}͔x'.)Oɧ,Ho7.A^yjMkI2c?!F+$ڎ1vh(NQydM'Ko)Vj+lH/4h%J8񚉵O^ d} <>v0'i-NfzZ݁֩@^O ^xן,/TDrbh?gi>s<^2]f Hո)>HTe5N aZjiPK y9tJc +org/junit/internal/runners/TestMethod.classUkSU~6 YB.H R-)%a~3u3:~3(.Be2s{ypFXM`kb= |fؔ-&ܒw 6J*ؑU|@`x?5ݬmQuK pܵ8 抖]M+lSi 7_6h`-l`5kO((ت (*Zeۆ|1w`1~T"rMfմlAy4yRT[2Uw=-tӴ\5,S`PYuZ`K ֟ő7΄t\ĶQ5un3\r.$)w2zjstGKQBhŬ|$',Ī6qluר勆bpȘ,T$" i#{@dd۴⁊*Qm0| (!lQֽzMܭm;cfX峤u|a#F1Ƣ&"_i(H s-2-˔_ڦ\ѐFF] eBhʌ[*4>]: ™vFf&z`WtCs2nT. 'TP"^ijܯP5AKB]$?E e=sfIcv ]|&D@Kݠ:@B+{+ E`ouӮ2PRR,·D^b] >>*GIPK y9y@P7org/junit/internal/runners/model/EachTestNotifier.classUmWG~&DV6S1h[*V4 Bm FےLp]ϾTxl_C3ll6 );Ͻsg0:`~9|U _H a!E\W[ 7t2 nĒJհaaQqِc(]o ^83X:i;/2ǏxG1 {EO&njd=R{oZ@K3'+W~@2F_ƣ0A@bDs Z+ESNo$oC*'%1F+k$ŞQvxw[Wa*MD^r E+$rj,ia,sI}f7`F5RkB?$Oa  3Ҝos JâyJ=e?`'/1SzNE?Ce-4IwXHg{WƯH&}7{o TA2OUq%.Rd%QeO)fJVP$?zh(/2G>ԥ QVL9ILE0c䅨SW u{)B-pN)g0qg'Εs_PK y9$ZE?org/junit/internal/runners/model/MultipleFailureException.classRn1=γ$MBx( 1bRuSZ 4iغ\O+@H,> q+U64s}}9 C Pv Ujvw64SO &^Z?&Uuh>p.v +3F&t'#=ҧN [-v<1jǪƮ l )GS~|H[<=?Q8+G%ɚ?47b7Rz4Ke9 M,5^/:Zx sJ_/[]^A{|Bptqr ԡΌljAV:v:xlu8bƻQuHt;9c`%c1G*6]gyhO4(Nc r>XdpuJ(;SBC~ j^.PKNْ|u'PK y9d 9org/junit/internal/runners/model/ReflectiveCallable.classQ[OA=nwq[[+q N фd 6OXYft[Y 2*I6z]ϛ[XKc9E + VST,/ƚ@ucD /hVf>unBVT~$f(.+\_F$ƖYK7V<#a.\J3*}[# X\پ<cR範 옓xeM:u ^ ~gb>#ӟ_`߹ $k`-Wx-?Ω+.&o3ʽ3N{:=~/G$m}F2Acj&ZD#z5*W5NIK!|!r(g5[PK y93+];org/junit/internal/runners/statements/ExpectException.classT[sY` 5xBDUw lHTV|#NfAK?UJ6U `Eqtg>#xc 1_rQ4\~q3:Jj⸮fu$񧆹8upS_n zZk - ^q dC:ӹrE`_y|alөfmj3hyR _,>ܗ-gJ*Z. \@GYH-GVZe-[3Gg)} Y>,'c=X :jXÊcNضc[W.pn׸|a- D3ʠ5VX5,b;chROIG-&-Ecd[Ϊ| k`ࠁ wp55PJo,{Ezn_ӲOwmEpTYc) /MH%WA`UC-uߗry(^I0OM{VI ˫ed-,OB [%ͦt8әq6 5 sj{T<q߻1~#qƸK1z9J9Is(e"H.w4Ec\p݋a$& 0 ܝ$wiEwJ8C~/̬si#+T6`fu 6v+Ow=\:Bu/6 $?8R7~7Cb88"܏33=q|f>ʨ0#9sjk&W&m,RdQJKGآ WXǣsjA,-Z@sɆmvH\A u嚼@9x+W)k$dviňPK y9MH;org/junit/internal/runners/statements/FailOnTimeout$1.class]oA)ȖuiXԪh.6  Yf~#o5Qk4{J~M ٙ9y̙_PvX3FDM<ªu%) T T >mG( BE)9w4= _b^C'x+ W𵊇*h],ZkWˑky?r]Ul9RhVr1] bFMqg2w Rl*F˟3R i8w ߖD%\KMZk8f ?eά[w*n6NZQ^%-V]R >d U#7=׌|:i@fk5ˌHXmNZ_ZfFLZg.@6 "Am-:/nF/l:1J~va2ñqdnm#1o{|cE`(ݤ0{8̖TmaV  O}2{-h%/Mkʎyy頎xGGu*HYއc#Ч؄cH K"*:0ֱG:TuT=7mVHSOt؋ Y-Tm9elIKQXj쇾m+m鷧 lfiE֐'vQ3p;E-p82_}bPݍ: [—5"*=Ӭ"c&Ti[A5u؃fޖRBW6IMQ@{[oTApg-7O;'<_h!8}D6 "zj*V>Btpo4#J~i]!2Ey8@{D!qq>L\~ *e{!T͌CqBL<$01[ 5Q]]"rVWHƬ׫We#?A~Q$ddqiқkbu+ruQ$p) ʔ7pm1P &!>ᩑ?PK y9׿8org/junit/internal/runners/statements/InvokeMethod.classR[OA],](ոb4` 1@!AA4*k9/PK y9ߛk 5org/junit/internal/runners/statements/RunAfters.classVMP[UnBxl*? X>}0//a3n:ȸpǺ3R;k;tܹrk}T9s=߽/?=]Hq=hD$}+x;*` .J2`Dh*U0&㒼'$$hK . T-L7_YsJf^7)GsrI^i_XִYKszZpxRTAs 30RY2-ٺ@uFFƾԝ|= ENL'6%c 'r+s=͙,4)6(9K:ްX< +9&~bNWK'ih5ѽ7"\䒝!3EZzIQ<9i]vFvX]L뫎x,>jUOIE3S +ΐmkyL*HvIRT)LhG>|\ {W1 >U4Z!h2g:ƪj[V,xN XhWر%mMyّs;.lM"آoiZ\gf&kf=Zޕyy)\LB1#+czN77>riB 3|⽨$W'讄#' NK|ǵ! mm߂7ڱ]YF~hC0'#Xsh<^+˵2p#{\xi ~DJ:K!e fOvD\;H (5bdj~:Aϻ!4P/)7/Xnު^SrJrI (9ocY1]ruC{0Fx 1q-Quo#tYЇ9N nBU2.^'zd/-ov 6q%d0 1^wa_\Ar PK y9X^96org/junit/internal/runners/statements/RunBefores.classT[OQNweDQ (m, M &B0ѧm9mlw^ L"L4>+%V93gf7ߜ|00!7#m۸ b@ %2f,{p53Mn;ZZㆶ./qӝbP+]Ϭe]3t̮gy޲Z{0p|otYLl0u= ^[ݢ65Cӂg'4zUy1dɗR+z~VN7Vu[Qv;V^.M88+Ϭ2Ce3`\ָ UR;O٬sϋPz6%^ 3HI@fH;7CH1tx]&_l~;7]alyv/#ֆ*:ѫ-*hWъ6*aFcOTE Gt*L-2\t =CQ͕mmUf|r|DE#:x^VQw*_5 _}kmfۛ27v&ox&_G fDZܟP i\ pu$IdG3Xz` _h2ɔ)J)O;Y5]kZ@AQ@IX$t|,T=dW1HD40df) wD+K_G?Gn 1AІ8i>nJ p"cTP&dh|PK y9WG{O &org/junit/matchers/JUnitMatchers.classSAǿ+Ᏼ4CK1ߊ5L_np5w3ލUS3^TӳAq0|??~(.a55D.hGQDF«(ؔ%a!R5J3  ~V+yn|fFzA)(*d*&lV7q@rE %n95;0$cDˊVyZ[%R0iɎpe^%x1e~K r:5Ū4MV[j&:M_儿䶑 :83vI9 M z0d28"ga'Rr9&S"r2 o`H˧vD 7Nkg)H:Rz4X5KQ5s2T99ռ81/LK }n;jcW5pӥTb;| qղUTZv]BI 1 Hx(&4fD3'a']%a2RWb;0}}0NK\&;U2r+bӦR%;Y֪vpQ񉀉vF27Ї!jC p#;%F;!؀wa'1NF{_D`^-vn,n7wlߋ]ra`&i}7!>:w6KC`&t {|!+ydO?RD@R'f{L'G ~zPK y9WQ"org/junit/runner/Describable.class;o>CNvvvF̂IEiur mȶ%ͮ]Arb_Y[VxTMצ-gfc&[r⃏]}I}T1n'&`cfvdM*"H!W%^f၅e-g@qavʹE6#vaTz!\Q4VY󣾨Ytvl!֦:5ZSS'SE ?ǿ*tp8)̜vDYIm 'zR0T4V$]06njूQܣ|U`O(cPnUpR f0=RpSV~yU#dGǀ(qAwie/ zhZЭ`uz$:~EgJvtl5u0>_dv;nMx@SyvYs TJ~.Ddjq̌ԐSb~ֆ]7爞\'t}N%#ϩj`~ 439|"yMK;hm&fInG wI,Mht鎱Vm}| &eyȅxSQ!I^iHj8t-„q‘&/Hp6E]{U++#+keHcvIG->vLJ[5'U 9,nZÌ$P,#6e&іTҴx|`u.jnڃiY;*x8V9|J\B[}`B.1*Ge'˸ᡪQy5oii${iwf9:{qji%:#`bؓJd'* #.H#+cg9~O_'ivb@ځ5;:6E_B _ooR^{X| WY,^<ף͇L8o]ۛW?K\bCG(p1I12ڐD@';$޻%)wІw=;\a?!m8>KV==IpK§WFo= b;g^89,%Q֟6;} uGBݡp5MhMvᏟ _WTǏ3. Q&v.р"O.쁆3$"^ `S}RT=˼V|\~s'갹>A'KPkPK y9  org/junit/runner/JUnitCore.classXix=cy<6`Me2Ik .1uh Ghd K)-MBBht mҤ[ m"۸ҽ龷i}H%lyo޻=7ߋǿl1VPcฌ((1/qBxדGNQ~1+WC0>"-?#zxQ18/f.("$v,zi11"zY1*=a}nIhhKX)Ӱ#V4u+b:16f#ef%IBi]}D.aNaեs-ݚe̠0MոvlOX:U5ÔP[6E-ovtjV?QyLK"o1{o>b&i3L[L-<"jIgZBh&Zz2ԭu8SR8=%:o۝3\?b9]Y=u!CYL ;~SSeMEMkN/N]ѫ'c1$'aT’>͈;۸n"\#QzyIDJ&g}d`ihdM6. %;]ٔm# Ns3wp4Ib8sB7gy(2DZ]+"[ebүݺt2'+wtn{zRB- a}];giu(5vpkWJeL wϽz{[ +i»C]dTAU>'Dʊ魆8UdVHś *nbcݡkqT؍[\Уb e2.H6ٖ2аKL_)*oLŲ!*ȸL؞H{CtG0{CN$,Xv=ՒxU fuD$~HXvGrb?Rq/aa-0_ŏ?d\/pQ/+odVE^nR;^Yk<f&p]w+Կa?*RgE_7*OX>w[c<}dg*ӓɾTܹB ?}I)侺]$&R|Y?F"I"]sJ64z՝"yeX;g9kH)_㥶[ԩάC`XWjnI@fEJ҅q5Ie3Ic6E5%#̛5j)XOdbeJYal6 @ g/wc%(,sk7=V5*1P( ,E~V 2 GNrpX38}* Ny! eOxMg 7_v6q7`)6;!m8=Ў[$6B ]iT{w݁N]y#vڇNd!=r? c091MiTbiOf_Оi5Ѳ`&i\#`Y!h Fqt;  pKX4QeKzJ|KA_8!Kxf,ei,7r݊1,f=rPu`IX ix-?O줛ڌ3wy.aW3<h'EcHza:8|OpiξH^~' gIXu(x;$zHw('^a8@('rwR*-IDY-Cud S ^Y׽|8HWOPФ6'('Ae+}]D?_Ʀ⚚8b1M Fh(pC1l ^Ys@h&m"O6L [ MxRa1*EޝvsbY[pGhn[2)kj3Y'o)̢"p3=If(\s!s{~5PK y9@Sn org/junit/runner/Request$1.class}QMK1}iv]Z%x衭RГ(CM~w)(?J+MHf22"1aWGEGXrG+|jD߂VzhׇHid2 d#V_Vq/N&lN藖fcgϼCud0F?ai` 99_r`1x,pCʔc'ɊRK٫.E~{qu!d3}Ɔ[QMr??z_.ӛFbJbLB%¬PK y9T*ӆGorg/junit/runner/Request.classWi[W~ :VXRBQdr C'3qQ/_;)~s &$/|+=Ut#/4 *zQTL*PR` Wb@3)6i2Cb)pRKz"Icֲw$Lk0Em= l"pϟ$%9[uY]f-&k95h+N77Ā=$ƀġgf7և`4̢K*3>ҥ%xZ^n꽥1C=4)i Lߍ'h\J?FS5͓-sS ϺMPL|"Ψmˎ7+Tm\gCM#~֩bj AM9.7tS^ZdO,o除|۶_f-\0\K7KF(r1?n]v1͒nhwr="2&nkAKw./0t{ZaLW"iiNﴉA5*60\o`u0b,6ֲ5R'#%'3QsH2&]tδninh" ͩ4ӊ.VAܶ9ḧ́]s2cfyWd^>7x gbʴSzK]ݴo$Aw?⅔󌏧< =aq 3h͒QV1ff?JxnTy! 'aUmTSZ9"4~! 7p|7`ϯsDZ3 i4 ȼN>?K_>5?YiLgNevA܈Ô{=HTaYfg)M(Bd}uq8Ko!>i$5E4Zљn,]po5*Mo8"EqWg"mD82tD-wZQ mxs/ CM3bR 6"jPwcA=LT6Ipo׋{Ի>,,*a w1,%WHrtuʻeX݀?PK y9<לorg/junit/runner/Result$1.classuLK 0h[;qµg" b %$s<ҵox1|7%c F'աՊP\:l;Hm}k {g(+E1! ^ioZoSu08ea­ ZI#ۼ[J[|2r{r?U<ݚuƁwd;}~,M߳Hy-w*pw*myǷmTeݱ|OP@rߕe'ўtY&[}Иdiz3 ו0w.[֓mp<zFu階tccSa 57,kM9*0MmMJ1*{־eU *bmʅ(iu#IPwvhj*qH*b;ׇܰφ)|<&y:x1Sxm<װ:~@[_"0?r;šUz}POdhcs\|ȎΛHxlz<BG2LSt3 741:`kP]؈7Y<RoL;7fh# ./%\&S1zw$Npy`]LؐwWiU_0W8GV"U/"(td蜮eE9̣t )%K"KW48?(d# |E tKqDĤ)5*{d""A4SupR<+LE<)TU3yWÂJ\ħ; Jhyȏ PK y93Norg/junit/runner/Result.classUkOP~VVuQAu(BevdvI/~/h$jGvl7?=y}sN~P47xòn-Aĭ VpG]% ',>ir]mf/S!U]nJTV-̆tuE3!핦QW9HkFZgY^(7㚆Z.o Ǩ15׍DVꚣYH-DȐ`);P6Lr[ֵ:eʍV,?IyaѸ'25ݡ 0@@_*_;C3qFfnUآk>su.V94r2e8:ӫUě<6PU;wm];yQ7mkUߊ:!2‌agqDGqaeio92bJ)L1L5S=bQGפ5^h/Rr>^;ץZ,g6p9!MYBw>(o>9Z#(S$i+H)&<0']7PM_< H A&CDxDO8 TJ)_h/ X"p; Q)d8}"l/|Owm?#]g(w,*BW_ Yb..tdCE'2 ۅ݋wqf"K(*OÔyl# !R@$+tNMJp{jY;2+rqmt]G~͋?AdHPK y99!' org/junit/runner/RunWith.classQKAkڥYYC=HlUDO ^ ~}h· a0 ;<=l *JI~t TH@^E*IZreB&O dM!+Ժ N툮uUf{:=aDN9D;[}rd 8ң^ܴG|erL&^kanjjG/L]&;@m1|7 i~4 Kȋ2wx+ S Ua=+!aPK y9ګJN<org/junit/runner/Runner.classuQN@} B("F/8UcbB0Qý+.Y~/x(lˁHۙ}f7S:aǁ]4mطb_)%C1X3TBapSQOG&_-,f A$&!ORr%pPs}gZDY/1'KU^{'#-D1Lrdv/j>(:qsG 0˖ҝzwaࢌ5l14֘`o˱wLxH5a]'cs,Bڅb2溟`tOu()p`TgBl!VΑ#OdIy;aPK y9T~s,org/junit/runner/manipulation/Filter$1.classQMK1}mmժ EZA/^/UAX=X[چ6%͖쮿ʃ8]PDą$oޛy8BElYCTj18vCP7/+Bj~0ǍFc2r0P3&1LlSޔk9dK"aG' 86 E80rfe'{ `[PL}rj=FF1J 6ASNZ]Å $޵92Wj'aDoP؁Cd?u-P,LAE y+)%,>"Μո?DYOFKXNְ8 8ӏ"o희ՄFA ed@PK y9Ns0 *org/junit/runner/manipulation/Filter.classR]kA=l&6[[nR\՗AЖM6df7__ ($bdΜ3wlaB u rgb͂z&6L<0a3{Æ}w2u, Ex(YS+"n3w$<#tݨ'<l,yσZ@#Ȅ ,d!C>_HXU@Vl xwX}rUqf;~V6 @=6u7؜*W1_~pcFY J'ʔP*=o*h͓k$IeX ёH@ OAx&\p-c4aPViV9Kr)26O&42E .hn0\X5Kk}1i@noWI xDm>!X}*0-gWqM{-:nMB Bnrk7oOPD;;2&>EPCJPK y9l.org/junit/runner/manipulation/Filterable.classA 0ETm zvcܹ֕(O()54x(1Qp) 0`A^TidY(0lIjunJiU =!_s>mjLΎ`ʬq!/sORXg0^U;mTWH EmB m;}PK y9zJ:org/junit/runner/manipulation/NoTestsRemainException.classN0m#BJK LlX@,$PJv7XUbWNx-&$Bܤ >GGߵclGbbb0(2O+57%֔4U*ZX"?H >*B?FMbTrN+'']NbQ3HD!^ֹbb;UHm^2l3Btokkv-䳌p9Q4sb; 5X97k[Y6 cWlmkPK y9$Ar,org/junit/runner/manipulation/Sortable.classA 0Dت x k%7p.<8a=}l4X#J"sQlҒC0o={N6Mc{%e0' a]?Sl¦swfH5PaUSWPK y9",org/junit/runner/manipulation/Sorter$1.classRMo1}l6!R (HԻRG;z? 8 (؉jJԃ=yy?t1B܉p7½-ʞT3;G aO T]1 34:7ҝОȜ e<U}mFɸ J&\iq+JXaZ.CɔR{vS#]0ðu=exGd\ñHmwE?=q0Vr-7,̒7ʭ6{˯O@&{{=vZs^4ӹTCaOqUDXa\&[Egꧽ0leuI4i4*-1V)K( %b+Yuy#+ssN.(}>ż:gAeA]n_}#Z_ GW+Qy7}-!h@+sWPK y90/g*org/junit/runner/manipulation/Sorter.class[SP'-MSnDҦPTV|@yq&ԈaBIG> 8rMjR>={痯 b ȸB S  9oʸ%cNAE^8gx| ñmk޼@ղS5:1Lѭ&k妙Y2iDKÝ#9%dF6eCyHJs4%YyG:{\iqbN)jUPEEWp4g]!E}/- 0MSIw"W%KZT>se^g'#tV,@ID`-,?Dbɍ}(H}ʭGč;=1PK y9Wtx]S+org/junit/runner/notification/Failure.classUkSa~^Mo0+PʼTVY ͏+.F̲tTL?tβNw9˯?~=Q̇0|ܓq `^Ce[؅f紊%hJ5k[Q$n kibF0MJC-M f]_-8h[ˎ9eKS;ڵtAunhDlSsCT"Yj5;b+Pp  㢄M['elv^N+qx)J [RusFU@GWNAI`onkIh渵mw&.^)O;"Te8/Q^X8u1Rdq/;W!\6em5]hs 88DN/(}h!Wn^ zJ4:ENC$RK$ՁXQ:"n%K5:ysAB>2d_0 :4;éc\N8&\|o`Z5,'8ܠ:}\ {$O %!T颃kPVÅw5*u;0yGq*)IL9ڟ ;AIfNKoS5 d ,zf4G4Qk^oX/ywaМ3#]*l_xo։PK y9tTA/org/junit/runner/notification/RunListener.classSN1=!KCZ 4J]PFBZq}s6&8Z>!Џ:6!쮴x93ݿ~zW ]c٬1?%-0ِJ|M/BfLFk`^HvI(ԩRB*\FDǩjHc0LXa,N,V񟅉;V?6Pr 16f5N¤z%}iTcQq?snj {w-MNü;0&m}mhWT$Iu$Mo\uO^A $ cfGDtcPo @_{ %څdѭk_Z J*7 x2Y"X.y1A$ESNc&l{,gW1we dEV@V ';JF_vss7kPK y931org/junit/runner/notification/RunNotifier$1.classS]O@=[ZeY AWe[ `L bbRxp /> !52M|w +j6u3޹oѩa wtdIJge>Z=|d3,I:X*?X&2k=b%2ǻZ$g g0@HADH3HBT4x084VijLV^Tk±߇PYxP/?2S:%<"x8,S7M4/!?┖8LƃIŠ6\5,\r0 ª5tQ޺dA?j4 t}=)BJrϛ+sD;IX'k44Bmi*Ǩ´NI< N1Es4^fry\S>QRO|1ǤgD7?2ᄸM*XDfM¦0LBPK y931org/junit/runner/notification/RunNotifier$2.classSKo@6qc&mR@n"$Zzh^8mMY#?*YH$~bN^33k}@&k*c.h`8 n TċRDgG d( ٗ>OdTeIO^Owa#C+KY>x$|f+D3CޑuDpPnQFH >d_r*˾0N$Ħ&UX6LܰQÖm-8t e`xYH+.lw]Wf'#q,3mgZSdctx;t,ѩ%8fHkt+(뺹QWM&q( ,:u(Li%8h3~u,EA۴0|C;q(+8|p+0.c5!aUڙCTH(ſPK y9;F1org/junit/runner/notification/RunNotifier$5.classT[O@f)ۭ e-/B 1Yyŧ;; џe"GL69w?U$eن'6bB*C휇aN_ɨ nܥD;2ىHIHN'RFCe y lҧ;2Z%B  Jڍ Fa'ؗ* [SY S߈lw-;huP-5lXtЂKQч'꜊ydhfgXD4IL)]wTgdgx'fں|OB֎Ѥ6?`R~S󕅾 I]/:EIP 4gP`PhvE]ܯ|1$G Y($JZ]"^Ԕ (%}!a%:Dd}fTPK y9~'1org/junit/runner/notification/RunNotifier$6.classS[OA]vv E.t /TӰCYo2M|gBmn39gQihĒXP0uzG$A,_2R (ǙgJ:b0ӾL ޯ*JeW\/2"a$9>fhw0Cc;*-^diGy,>4<]n;I"Ȳ^T}ʱw"GXx0{m$,P~(Lj@6 zEڏ:V4:v`ኃ*,;h¥hfq}g u:!W= ٩ЮŁHݞbaX._ >%nL5AHۤ^L4`mUC*I[(L)v/(]'6w"4j%~ADpc q}Hs?(o'<tsa:--J8J\|}wr],`yڙ3(?PK y9.B= 1org/junit/runner/notification/RunNotifier$7.classSn@6qm6m@Bn"$TZ @^8mMY#T.H$<ӆDۙofv}@5L&+6Lڸ;2̞iיr'=8W2\)O[ f֗icSřʐkWE ]$:$ quA<org/junit/runner/notification/RunNotifier$SafeNotifier.classU[SPN[(VN UDB aJ$)O :?! j;9gwϷ_vO~@/"RA"@-npJŐ$^F>*a0"`BBL$YC8aZjTt%=9i9#KY`w&<[[VZ[̒.affB}ҖQRXVmNC^?nyUmCSNmfH lp7Y5pKsL4#EGm%vwlc8+p\-a oJyk%ulF2KPSfi],Dh@:ԋUH´GO$$d")cOz.̒t2y2!\ӚY\¼s x0I՟[\F2/Ra-6zs%tA >EǞ0ާQ)>Cug Ҋf'Nk Wi8JR,fnP4?팥oR鷳Sd<5:[oAe,HGeNB͒n4|0(瑛p_ ]t`P4=| n"m4=J3<;yp|V\&)w|=KϘ /!އTVb,T9#zP_;n6tA^B+I .w=-R0 huVTї& ȱ<|{`>YtPA(&)(˯rqLԻ~FAm=h/UjpPK y96hj /org/junit/runner/notification/RunNotifier.classSUǿ7 lPbƖVTJhK[J* `. ,tW|O Q\6I0{=sν z0e װXajqn|f[<dž5Y#o5 ;nh(O<5X-Ɠm qı~B=/J7IbQݲ%ӆeK$ӟɲm,;E.R^tI4ˌMnbr\§f̯nɌ]:[ig#R\*oKsM.$.e†n{נZGQ]=ǠrՠG:XG]pm rD\G)פe^M[5灴qLB0W\9@`OveG&VU.ʇTcؙjtLU4+cuJ7 *vT!UڴXu9kY}hw%%a7>wW S Ӳkg٬቉ 6rMӕyJ-S*Y28u;x71OX̚)!"N0B؃AivP7B(Loݾ&27lݒyՑ$ՙiH.LI`mu\ޓY;{7idP!C6"2ufңd#!e ('*l\cZFeq2! fEB-fSK\nDHȟZfȸBUFτS&Iğ𝨩cst׺Z򻵴XZ{)z22?܍Ϩ܏I#?\_ #ut!0|% S?G S/apǜ>~PK y9͜5w:org/junit/runner/notification/StoppedByUserException.class1OAAN-ܘb!rrؽYV&q0VVnv7zp7)e)ik&!tFd^$W5hFG`dg Qrܽ&C3V23:# Cb3H儱^?XzBMS5;pϱJ Jc $hE8O @DˍK]Tx} w +;赌YD `+SPK y9x+%D org/junit/runners/AllTests.classuQN1=D7nb5]¾@űC:ąGoQb{鹽+SxbEE!ߑJ.C0;\M^}4dpzD0Ur;zG!1 pȵ阙HOyBξN:p br7֞ґqm]+badb *nMW+#;v*%z,.u.}?ĪpceQ*#ᏪH#.Eڲ`{; Ѧn)&>C1:~{"K 5TUuɩDOfPCqBT RTPK y9׮[0org/junit/runners/BlockJUnit4ClassRunner$1.classR[OAnYP^/UzYQj|DS!t)3fv ,M$>kg`M朳|:? X1k!p=7Jn f~<ib;Z,~L-Akmųl-z. |>[c6{a#=aY蘄]neChgRDw, 'd~ ᇼi ?Ǽ'S ?}0g%<'seKen~/7K[NVGHXW%UF;^w%?%%?G+ HBZ|@{}!,ִC ZXI mTtGT㹹h1Ow$ۓ ʾh"V1_{(۵Ztmν<_`R:CIFUWxV~y큀:"$ ja<#3,1s@$|Au$w۲6‘[ $$3aѰ?[%u;HW$Q hd?OE: a, (?0c^Ktk65) @TSM]wl!}ktbI5qlT m %Z׭yg۪Dq2h"w [Fҝ -H5?Aɂ'X[e>$ ψGx.):)O !USz9L'j:(%7 ̋Z3/}ҳ\Y`P$BST\?O%Cp;#Y$mگ6Gtv6FtL'G7#Q+ehS_rW@USσxI0sH[؞42owu4xߵ;fpt:ߘme6AH2an?UV}!-P?6))YLTELqD"f9D/u3EJ;WIRlHI~JR=,)bpݔ \A&!=z6Vj`ve"b"<\-fÀ"b'Ou5)b> b{k!>VZ\^ m2?Rkk8\Kq#n.d*S^@f󋨴()|\/sK`(9Zp[%&ݥf͝Rʃh ގK=[g9h! Tm%NR;zVm%R(,֢zlנﰭSzø* w' Zl)TA6K5d-xlҫwG)v3Dk[b]֜Vs(eiDfvq/SSş*zl+.b?nQK Z1m^V!U$K;؞̆(+ \ iUwsm!θJS"LN!lNu[D+CH?diYwD`̖jggvg 1퐪;q'aupޕ+QhJ$Q-Vܜ;FUH{VSIdM UrN>'xy4nw6K3{ jIv؈`:J(qF!.fZ/8NBlVWFI.l@]^++;i/[iTьUZ!~w z(4&4yB2 iX_(> O1c:ŜSɃZljlfs`q5a/*_N/EB!сj!qN: &ŅDg|khzV*j%!ҟ}$z> ЈԷAڶ`7 Dz<(F9EE1K"vEUYLN0Hߘ|IT+3w47&"byO CD03I Hd$gp7YК9V$@!ٔ.KZ%idM>oɷY2˺2AGtHC)v%,v%Y[L3)wbF(#L? pq,PK!#2ޜR.kJ5&7c9}B6chSj*էQ{%2j9 IfyĆi!vZj2")Q r0|g{GXCEfb~IO1B4lxNq̘Xꯪ Eg Z!w5j7Yv: YtLIӤW} .Cg]XJZZf#[j-5ȺP1[ަ{{ ].<>NmĀXDuT7TR3]bhOz) ƿ .PK y9?LG#org/junit/runners/JUnit4.classuQN0=d~Bb &Q\06mF+L|(&{zz~|8ƾ"Z:,mP"1]g`a}E6<#2Y=y>1M't?R{18\˄m'#▲CiߦOcxޘ<&a0Tb8ڣdLDޙPլ#AC_ |UVOONy~^s3WO %}K~E FL^P:FʚkaP RTE 1;5h*Ix/EPK y94T0org/junit/runners/Parameterized$Parameters.classJ1kתAD=x0x$bRW/:lVB}4>%^ZBIf'/`W | 7;mHWNz8^tͼ:CHX laJ6U`hό͸ 1*R& zOt@q s:>8n#!_/ |%{ X6GQZ(X0,ްU!^``9핼 ekTBHMa_)yVXaiyNUZ^)30;I{b aYxN"D.|Q'ZdN튮s7ZAmcЦEbۓw<^ wZpIY‘5&2AT}e3'jnT^d.+n󾖈6Q 525&Ҳƕ6_  t,3\L*-rLW+!WqRRm RNb('[-.`6nS15뱗wQp/+\ 9L?N$!)!G !'z{!YxC$Dk]>KzMtMh?+r]nҳ_hMCS1"R:M:4N4dQCP'udY6Iyz3zеX-ğ(S˷% QϰIH5W'O)ާ~I7]{ꮧ0ɯB_9pO}_ѭGȐ( laIJҐZRDV@c@lw.U X5CdGI*LFi[TգD}Z.R FX3" UKf"R35d k/F+$yEbDcZ@Nbi)t PK y94z %org/junit/runners/Parameterized.classW[W]J\E  VT؀T(6$#3.vjmk7`j_Aև>_Ԟ;lׇ̹sν9|'b3FD3"1*Yn{?/E^c"DED5 FϜ($"1%@3tQ$€i8V\ճ|tRD ^WjȦy8iѫ!O)b RWv^t`+hTeR#.Ðgi3 l -ߜn'bjo~F{'-wwdd:#670y^P5e 65X4=(GFdC %SGJrBTvI%D&y,ONxUT)x& ei &1Mbv orq,mޡ %hم{fJRuÚpDV#!CѨ.ަ\9wC.rS)Nğ_ybo^2RȀ߯Xa=0KNƤo}2Ux9f7 .S#ܵDDgp^AeT4{Gbmmm zbdZ3m 1#9'(:Jh VXU _`N% _⌀઄k, 8$"!P}1"`h룱7-"H~׵ YJ |-h Iw8l'ZnP9P֬S܋#8KzS"f^D#N+Yy!Ӧ\î].QBInڜν řq'q_yH0,ʌe7riK7/ڌ)ro#.esؿHLXyb}l#$͠}#GF-@̨qƙ$p!퐝hb)tg4зg#}R]9S[QDIo`FQ#>_< ;Z1ҖyHqGoyoCJ:r̵/n^WÎ]X ̣bPG   d|֟eȜOayZ kQ_U8b*6iѹm (T+Ԝg7rW"sq/tE`#}tjSN| k?L=Ftn*+<IWxB!k$,+PcubR" WI%H)M> ɟQ*narIZ"rʝry8p'yL=8ybPK y9.d @!$org/junit/runners/ParentRunner.classX xTo2yy$! !&3DVHIuyILfҙ ۽ҍڪLڂJ".VZj]jW+=7o,!I}}s{ιﭣwX!xw8bܭcroC?2?-p?~*K+~Ϗ_+x@l̏3GxU'xi ϞU/Ǘg/X ^Qц*}M_T_yox7? Tщ*xKqE@B@֓P2)(Ow Ō?1鉤 ѣ~{jdDSzB`I~@(f EC)#$IW<1%sg@Ms`WhO &L"`N=@<n% ~]~l,PB7Ie13O@m6œ^ݽz-1e}a}m@=]F_,Jy3OO7Bz7RF0L1#' c.9t9g.=Zݒ J$&ۤJiՒrf)x5fDZ`ӤG$k3qܶ1'͹cn}7ɹFuhݟ吐fh+EB)}Ψ7"xgoGr=3kR (}7%Bxb MvJDRq>j(F#.:iEϱaﴧXl5UL}QA\Ajٮû6O' 'TMP F2DY)j^#տN'ti+vVFSD)YaACw_iR؞nV,+dN[6-sUe)o0JA .,-m] mp+쬊DH{Tk $ƒ"Ny*9Rac#BQg294 n7TV]ZI,ڕu&pVsA㶭lm#~ݽVKRz $5 S z ԓVRNdfRM3yLV`InBFi^m8`:аUDZOgB՗SݓE#8H H .P-y|Za=lZ|@P"jy-NJ,`ʒS 7/[ݑo*P L(]DX'Z ˙N=12 @@ÕJܠa3VD&X0ib,  h؍(L1|\.Q&Q&TQ M̠h[BA\2\BîpIvU)uZqDmŪ[lnv"#&f*ETkF\Z1igA 'Y: 0zMa/-^\j<I^ $ fo-xO!)yg; :z51OQ3Qh4|b>np+nSDM,MXDE^ɖ_LyF5ABhjF[R@TUแYìVؗE.WYq_? RRqKvewe'0A͔\KBEYo{hea1'FIq\~[8 }us{t~2Ҋ8"]%3V-9E|7_C؏MNՊVAR y#_=iwPpXDr5GXadcjr`bKnMv,Pl@##/eb~R]%e{y26bq80\v:ؕ+ -v#VQZh4A. ֬RVX4rO dmhV* `ɋvXoZu Z/O:=>Bd 'o$p-# i@ .=le &yr\L%`% xV }{v?xiORK%dԧՎV+f Yyhey6k(]::DNJ1#tW:~Kd%tK- $: j:J[zeJ[z%=7S]3UZ`㓲F*5;, 6$'KtHߢ߭Bx񌣳h]NÓW96YA\wdԒugvN}*&٤˘ɦ؂srP`PK y9n3 *org/junit/runners/Suite$SuiteClasses.classJ1hZj" ׊ Rॴ ]0I }5>%E+2"s[>?\`æm;ʼnԎj'H_KZ&ɥ@B# WgL{t4)K)(uƪ1D 4YiUdwɒI-DZ zǠ88$qi11T^;-uFeCqg5=q/_3Y|hhsχG榀ELQbQdj.gV e>ZaoP*RAXG5Hm`YPK y9#ɺborg/junit/runners/Suite.classWksF='EB PZjZ8@yyjMRmAvDA$@_fڄi˷Guzw%+kws(~I㦄q+/vsuG]|)!`Z} *Hc^B4 {ȢA,2?4i r;ڴڶfSijܶpT.+jFN!SMi[=^mY嶩;eK`]ܹ9rTGن .e ӭyUuSm7uSqKt|Î$IuE-b$$. w=Y7<~ء/J5d(EAuSS:ə`$c 詶n̓c1gI'uwsŗK!",_ 9_Bη Ӿwwrn_3BL aFI)661'ݱfpM1QM;I}ݚƅA1|(k/~ÎuI}AI32A%hP70cj\LzdWx&p)I"JANSɓ*AJkH.R,<˞1)>M6q~|,^"?CU5.KE y.CTGxΊ~ hlg!D|3w$?PK y9rPiC/org/junit/runners/model/FrameworkMethod$1.classSmoP~.*lVpĨK a jT/~tW(-l+MD(2Lh{ny9~ +pGYuձ{:6#SCAp !ZܒmYaOLjy7`HY N-1߲:} -w(O^*g:|N<Duݦ)Jѳ<ྣ̄ FMH8E#˖U'腎VI ).g *\JL.6h ;dm蘒23.vTum5XJ(V8@ ⁆LY8 qC\(֨I^HlU\Mi6&蚔 A0IO l-!GzS5In17.GqfK%;mo}Na2ZQؠE^펧RQuD$їD*&f,3ߐ\ HiD G*ٹcpi w#/Kf̦&s܌(5m\NR PK y9-襼U-org/junit/runners/model/FrameworkMethod.classV{V~;jn^%)svtnKI36@fM.-P[:+ix؅6qIp ЎiW(:0#Ɋ-!K}~w߷n؏6a7>>AO}Q.L_ }6W"hW7y|Mpza|+|9n/OeqrCF?+!DxU,%P$gbs"e 㪕3چgy%S3)CΩi+HЫ5OŽ;%&$U¦a-Tc\ʑJnB14VV3%$uc&5[)ϫ\jP qnɜiF;:ko9u`Mm^]P!'f)s%]Uﴠ5\GBZ-X7C%#@M#R–*+,CPu^iRGS9-=kߘRÚi1k春XZZtxT i3KVrdx{+g `飊1m&XER]5$'pj hZZ2-*9n1U%Р*,P,ݠŜE]o* ޏݹn.i|DHoł1O2u 2Й\0L1""*th٧-5?WiIE.t0GE=մLݭuȎ(Hj]լwZe]Urֱwj߶wС[|+f]0>cذ@e1hAMBZ|PAMy/L!c[Bůe\^18!cٮz!#E-Q VsQ\oU\&Ux^{WD)7@n"[2>[,Gpu0(vqIOqn2~߅p[m),&gnZ %j2GT̳_d2ŧC K$2ep<5c|vG֟j뛜\;+ jcvHBaKwHX%hu&H%Ûd9giM8 sێ"t)bgCֶGg@KT9@]evFbp|rt@ |juItK=,ʳih~ÝY2hs/Mă\?ķA4$WАzu5\^Zv~ ߍmÍpw=^vuw1k 6[0jB&&U\d{Z遉ϣ88}>li..X+0]>!ec_%|{[h 6ro.R¦4DKN G[c"@-|xWcG~:N4#NA9Aʨm#,㕞 ^ I lncǶ,1ud-۸V8PSB!x̖]s+x눿;<0F= 8$VءHҁmL1HR9(ܹAR 7^{&p&6n6v$E4O`ٗ/!#e p!c4r%p 39 ?ŇʭWG`W {|JdV8aS^q i%$mK {.{uv? h v ΠтY6sfu`'gm[=[mKE-sO }3vtdV8ѽ p20+:\wI;"]]4 DN\/ck%uz@l+zn˰X曮'^D'!!䜓£dn˞ӏHؑQ(XQ(U_a{UBuJz:{}aG jmvSZ~$♫/]zS1c#9/PX+&6pXE !YU DJ~FQHYLw3jӗ,FH?{HMi<5|u7jǟlM %tݔ鉦l-70 0*:rh n xvz^U׸2l.҄Wm[f3kdWKN-fp.Ћ}[okC&ׅr߯ikVup]ǝ167Эpa46E&(2uV7?@p,Ot*xdmL{tom ʎL3TbFtGqgiV V4bMaOAK ypkez2"9o4 % 4ިcHãuӜӚUP}x 12#,G.gq7t:iXln(( ?o?E_\O-wєD+.%h8݁Ela-?C~h^dq;-{8 Ȓh\jLI">/_kG6ۻPr;h4KPAk@e2D t΃]ji>a4*DOsH ?.ԅ6wh -w wpUn(1ԉJ%?,TDPˍ'=WE098pƫVBK]GQ$εWԡc>tN}vA6=+*%E.$6 x:jmX%A쮰ZH7JHxViԌao\ f_&o禇^^[.œ#% lON= ..=fE=!=?b+ThV[g9z~@>kNG\c5nb ~8aus/PK y9jF'org/junit/runners/model/TestClass.classW[~ ,((p4 nK !AB0h/.,dvVm{ӋaoMK/imk<3<ԾՂ懙.;}w~݇XqʰRn|B>C5^qW})?]V|5zYe|]x}!|E*&^EB7z%oX ֪2/@uaGOpSOEP\M$]P/ḚCG$TL5 4̍iF49NꃉDPX2A}5|ڈc2E/R#2KI}>NČN$4=^Jj.iev霒b-K3>9R;)U=H/)GŵmSBq-A!iDBY ~}CU#%F4ٶaRK,Z,g)rpK:;;=5uuw! rpV;߁ ;w I.- -/ NՕ𙄧|˚ /Y%aWAbcEȑcډAHV$WU;blGw_Bं!o`QL(H㢂 w )^2Rpx[9<` ГBwTKꚂ? mqӟȃ5,!!/ Iؿ͞2Z4NVFW(* Erk~ eGq2 u1RQ&jTMk-\+N~Z=1\s[h^L1c:]@]#8 ˏI.6QdhB%~U8~A'WxVdasH؃6]wY=z7>ˇ%}:)7ƺ\n&u=}`g<7&6µ KIUʽܭSaƸt1ZLbb}!8N[[ krXk|)jDg2&x>]vWQb 4(W־2nl 2u #eٹf}&rW kA8㦀0`#hiFca88uc ʘ4>Sw`4dc-0)F&?Ƈgmm)Tu@[Z^cيr69V6qʯ۵cw[D̡|=:v?]YMf<\"t ian˖qT"1.`FfO0W&3&z&M,%q`1[IPxk;n::Ȋ1#i1GsYB6,Վ:KwzMC-ZKDN9*"=FRlRf[D]o#dS*\Ϋ9T*LPK y9 AMETA-INF/PK y9=䛌Q_+META-INF/MANIFEST.MFPK y9Ajunit/PK y9Ajunit/extensions/PK y9Ajunit/framework/PK y9 A/junit/runner/PK y9 AZjunit/textui/PK yp6Aorg/PK y9 Aorg/hamcrest/PK y9Aorg/hamcrest/core/PK y9Aorg/hamcrest/internal/PK y9 A6org/junit/PK y9A^org/junit/experimental/PK y9Aorg/junit/experimental/results/PK y9Aorg/junit/experimental/runners/PK y9 A org/junit/experimental/theories/PK y9)AKorg/junit/experimental/theories/internal/PK y9*Aorg/junit/experimental/theories/suppliers/PK y9Aorg/junit/internal/PK y9A org/junit/internal/builders/PK y9AEorg/junit/internal/matchers/PK y9Aorg/junit/internal/requests/PK y9Aorg/junit/internal/runners/PK y9!Aorg/junit/internal/runners/model/PK y9&A1org/junit/internal/runners/statements/PK y9Auorg/junit/matchers/PK y9Aorg/junit/runner/PK y9Aorg/junit/runner/manipulation/PK y9Aorg/junit/runner/notification/PK y9AMorg/junit/runners/PK y9A}org/junit/runners/model/PK yp6s!! LICENSE.txtPK y9P( junit/extensions/ActiveTestSuite$1.classPK y9`ǫ&: junit/extensions/ActiveTestSuite.classPK y9d<#junit/extensions/RepeatedTest.classPK y9n $junit/extensions/TestDecorator.classPK y9x@Xn"Wjunit/extensions/TestSetup$1.classPK y9pv  {junit/extensions/TestSetup.classPK y9R, :junit/framework/Assert.classPK y9kWN.* #junit/framework/AssertionFailedError.classPK y9|5*G )$junit/framework/ComparisonCompactor.classPK y95.T'*junit/framework/ComparisonFailure.classPK y9n(z',junit/framework/JUnit4TestAdapter.classPK y9.B3junit/framework/JUnit4TestAdapterCache$1.classPK y9D_s ,]6junit/framework/JUnit4TestAdapterCache.classPK y9W@}*<junit/framework/JUnit4TestCaseFacade.classPK y92̏ݘ!>junit/framework/Protectable.classPK y9>V9y?junit/framework/Test.classPK y9<99 F@junit/framework/TestCase.classPK y90q2Q8!Fjunit/framework/TestFailure.classPK y9lV"KJjunit/framework/TestListener.classPK y9wC"^Kjunit/framework/TestResult$1.classPK y9hF _Mjunit/framework/TestResult.classPK y9Q̈!Tjunit/framework/TestSuite$1.classPK y9>` `!Vjunit/framework/TestSuite.classPK y9Ȳ.#!djunit/runner/BaseTestRunner.classPK y9a "^vjunit/runner/TestRunListener.classPK y9%;vrwjunit/runner/Version.classPK y9<Z Wyjunit/textui/ResultPrinter.classPK y9dh сjunit/textui/TestRunner.classPK yp65R"torg/hamcrest/BaseDescription.classPK yp6[l5org/hamcrest/BaseMatcher.classPK yp6iY~ѕorg/hamcrest/CoreMatchers.classPK yp6X)org/hamcrest/Description.classPK yp6n(|org/hamcrest/Factory.classPK yp6=[org/hamcrest/Matcher.classPK yp6 l!/org/hamcrest/SelfDescribing.classPK yp6aWn$org/hamcrest/StringDescription.classPK yp6Э,o1org/hamcrest/core/AllOf.classPK yp6"org/hamcrest/core/AnyOf.classPK yp6@; #org/hamcrest/core/DescribedAs.classPK yp6.9= org/hamcrest/core/Is.classPK yp6:"~org/hamcrest/core/IsAnything.classPK yp6:org/hamcrest/core/IsEqual.classPK yp6f y$ܹorg/hamcrest/core/IsInstanceOf.classPK yp6Corg/hamcrest/core/IsNot.classPK yp6Фorg/hamcrest/core/IsNull.classPK yp6,dporg/hamcrest/core/IsSame.classPK yp6s^zorg/hamcrest/core/package.htmlPK yp6;o)%org/hamcrest/internal/ArrayIterator.classPK yp6@G/!org/hamcrest/internal/SelfDescribingValue.classPK yp6'M<072org/hamcrest/internal/SelfDescribingValueIterator.classPK yp6"eLorg/hamcrest/package.htmlPK y9ΧVuorg/junit/After.classPK y9horg/junit/AfterClass.classPK y91iQ@$org/junit/Assert.classPK y9Borg/junit/Assume.classPK y9Iworg/junit/Before.classPK y9Yorg/junit/BeforeClass.classPK y9Iu}p 5org/junit/ComparisonFailure$ComparisonCompactor.classPK y9>V!org/junit/ComparisonFailure.classPK y9org/junit/Ignore.classPK y9^org/junit/Test$None.classPK y9sp|*org/junit/Test.classPK y9_J@0org/junit/experimental/results/FailureList.classPK y9E"?n4org/junit/experimental/results/PrintableResult.classPK y9>@$5org/junit/experimental/results/ResultMatchers$1.classPK y9 N 5Iorg/junit/experimental/results/ResultMatchers$2.classPK y9h5org/junit/experimental/results/ResultMatchers$3.classPK y9Ǎ<.3org/junit/experimental/results/ResultMatchers.classPK y9?>-org/junit/experimental/runners/Enclosed.classPK y94J7/org/junit/experimental/theories/DataPoint.classPK y9yn90org/junit/experimental/theories/DataPoints.classPK y9,)8 org/junit/experimental/theories/ParameterSignature.classPK y9<,K7Worg/junit/experimental/theories/ParameterSupplier.classPK y9Oo: :org/junit/experimental/theories/ParametersSuppliedBy.classPK y9"{5;=org/junit/experimental/theories/PotentialAssignment$1.classPK y9컳yDTXorg/junit/experimental/theories/PotentialAssignment$CouldNotGenerateValueException.classPK y9 ;9org/junit/experimental/theories/PotentialAssignment.classPK y9uG)?org/junit/experimental/theories/Theories$TheoryAnchor$1$1.classPK y94 =+org/junit/experimental/theories/Theories$TheoryAnchor$1.classPK y9H*#'=!org/junit/experimental/theories/Theories$TheoryAnchor$2.classPK y9%t;8%org/junit/experimental/theories/Theories$TheoryAnchor.classPK y9o~..org/junit/experimental/theories/Theories.classPK y9 ^'Pq,4org/junit/experimental/theories/Theory.classPK y9pC#6org/junit/experimental/theories/internal/AllMembersSupplier$1.classPK y9Z#>V@7org/junit/experimental/theories/internal/AllMembersSupplier$MethodParameterValue.classPK y9JA4;org/junit/experimental/theories/internal/AllMembersSupplier.classPK y9>bL!J:VDorg/junit/experimental/theories/internal/Assignments.classPK y9DQC JLorg/junit/experimental/theories/internal/ParameterizedAssertionError.classPK y9B-:8Qorg/junit/experimental/theories/suppliers/TestedOn.classPK y9u)W@Rorg/junit/experimental/theories/suppliers/TestedOnSupplier.classPK y9䇙@/sVorg/junit/internal/ArrayComparisonFailure.classPK y9N9Ě4Zorg/junit/internal/AssumptionViolatedException.classPK y9.$^org/junit/internal/JUnitSystem.classPK y9_SM#w_org/junit/internal/RealSystem.classPK y9!e% aorg/junit/internal/TextListener.classPK y9eٶ2 @4horg/junit/internal/builders/AllDefaultPossibilitiesBuilder.classPK y9`4] 2lorg/junit/internal/builders/AnnotatedBuilder.classPK y9 D2p0Hqorg/junit/internal/builders/IgnoredBuilder.classPK y9w `4Qsorg/junit/internal/builders/IgnoredClassRunner.classPK y9TLK</uorg/junit/internal/builders/JUnit3Builder.classPK y9 {Փ/xorg/junit/internal/builders/JUnit4Builder.classPK y9lk-yorg/junit/internal/builders/NullBuilder.classPK y9i`4{org/junit/internal/builders/SuiteMethodBuilder.classPK y9/ƭ3\~org/junit/internal/matchers/CombinableMatcher.classPK y9\> d(Zorg/junit/internal/matchers/Each$1.classPK y9;*&org/junit/internal/matchers/Each.classPK y9#!Ό8<org/junit/internal/matchers/IsCollectionContaining.classPK y9̜0org/junit/internal/matchers/StringContains.classPK y9&k2gorg/junit/internal/matchers/SubstringMatcher.classPK y918q1"org/junit/internal/matchers/TypeSafeMatcher.classPK y9'4.org/junit/internal/requests/ClassRequest.classPK y9@/org/junit/internal/requests/FilterRequest.classPK y9I[A0%org/junit/internal/requests/SortingRequest.classPK y9TǏ ,org/junit/internal/runners/ClassRoadie.classPK y9 5forg/junit/internal/runners/ErrorReportingRunner.classPK y9F5-ͨorg/junit/internal/runners/FailedBefore.classPK y9AD^4Morg/junit/internal/runners/InitializationError.classPK y95)org/junit/internal/runners/JUnit38ClassRunner$1.classPK y9n|a4 P(org/junit/internal/runners/JUnit38ClassRunner$OldTestClassAdaptingListener.classPK y95=n3ʲorg/junit/internal/runners/JUnit38ClassRunner.classPK y9w4org/junit/internal/runners/JUnit4ClassRunner$1.classPK y97Qw4org/junit/internal/runners/JUnit4ClassRunner$2.classPK y9sf "2Uorg/junit/internal/runners/JUnit4ClassRunner.classPK y91 org/junit/internal/runners/MethodRoadie$1$1.classPK y9X`ER /<org/junit/internal/runners/MethodRoadie$1.classPK y9t/org/junit/internal/runners/MethodRoadie$2.classPK y9cV Y-org/junit/internal/runners/MethodRoadie.classPK y9DTyJm00org/junit/internal/runners/MethodValidator.classPK y9Lhmo,org/junit/internal/runners/SuiteMethod.classPK y98T*org/junit/internal/runners/TestClass.classPK y9tJc +org/junit/internal/runners/TestMethod.classPK y9y@P7org/junit/internal/runners/model/EachTestNotifier.classPK y9$ZE?org/junit/internal/runners/model/MultipleFailureException.classPK y9d 9org/junit/internal/runners/model/ReflectiveCallable.classPK y93+];org/junit/internal/runners/statements/ExpectException.classPK y9Q+0aorg/junit/internal/runners/statements/Fail.classPK y9MH;org/junit/internal/runners/statements/FailOnTimeout$1.classPK y9/ 9org/junit/internal/runners/statements/FailOnTimeout.classPK y9׿8org/junit/internal/runners/statements/InvokeMethod.classPK y9ߛk 5 org/junit/internal/runners/statements/RunAfters.classPK y9X^96org/junit/internal/runners/statements/RunBefores.classPK y9WG{O &org/junit/matchers/JUnitMatchers.classPK y9WQ"*org/junit/runner/Describable.classPK y9~G"org/junit/runner/Description.classPK y9  yorg/junit/runner/JUnitCore.classPK y9@Sn &org/junit/runner/Request$1.classPK y9 /( <(org/junit/runner/Request$2.classPK y9T*ӆG+org/junit/runner/Request.classPK y9<לd1org/junit/runner/Result$1.classPK y9ޓ~'&=2org/junit/runner/Result$Listener.classPK y93N&6org/junit/runner/Result.classPK y99!' 9org/junit/runner/RunWith.classPK y9ګJN<I;org/junit/runner/Runner.classPK y9T~s,<org/junit/runner/manipulation/Filter$1.classPK y9Ns0 *>org/junit/runner/manipulation/Filter.classPK y9l.@org/junit/runner/manipulation/Filterable.classPK y9zJ:Aorg/junit/runner/manipulation/NoTestsRemainException.classPK y9$Ar,FCorg/junit/runner/manipulation/Sortable.classPK y9",Dorg/junit/runner/manipulation/Sorter$1.classPK y90/g*/Forg/junit/runner/manipulation/Sorter.classPK y9Wtx]S+KIorg/junit/runner/notification/Failure.classPK y9tTA/Lorg/junit/runner/notification/RunListener.classPK y931Oorg/junit/runner/notification/RunNotifier$1.classPK y931QQorg/junit/runner/notification/RunNotifier$2.classPK y9τ31Sorg/junit/runner/notification/RunNotifier$3.classPK y9+ ky1Uorg/junit/runner/notification/RunNotifier$4.classPK y9;F1HXorg/junit/runner/notification/RunNotifier$5.classPK y9~'1Zorg/junit/runner/notification/RunNotifier$6.classPK y9.B= 1\org/junit/runner/notification/RunNotifier$7.classPK y9\>uA<M_org/junit/runner/notification/RunNotifier$SafeNotifier.classPK y96hj /borg/junit/runner/notification/RunNotifier.classPK y9͜5w:gorg/junit/runner/notification/StoppedByUserException.classPK y9x+%D iorg/junit/runners/AllTests.classPK y9׮[0jorg/junit/runners/BlockJUnit4ClassRunner$1.classPK y9G K ,#.lorg/junit/runners/BlockJUnit4ClassRunner.classPK y9?LG#zorg/junit/runners/JUnit4.classPK y94T0{org/junit/runners/Parameterized$Parameters.classPK y9Ϋ< B|org/junit/runners/Parameterized$TestClassRunnerForParameters.classPK y94z %org/junit/runners/Parameterized.classPK y9)CG&Ĉorg/junit/runners/ParentRunner$1.classPK y93L&֊org/junit/runners/ParentRunner$2.classPK y9.d @!$forg/junit/runners/ParentRunner.classPK y9n3 *norg/junit/runners/Suite$SuiteClasses.classPK y9#ɺborg/junit/runners/Suite.classPK y9rPiC/org/junit/runners/model/FrameworkMethod$1.classPK y9-襼U-Eorg/junit/runners/model/FrameworkMethod.classPK y9;Q031zorg/junit/runners/model/InitializationError.classPK y9<  +org/junit/runners/model/RunnerBuilder.classPK y9Oo'Norg/junit/runners/model/Statement.classPK y9jF'org/junit/runners/model/TestClass.classPKJjericho-html-3.1/test/src/0000755000175000017500000000000011172062432015444 5ustar twernertwernerjericho-html-3.1/test/src/TestSuite.java0000644000175000017500000000056611214015462020245 0ustar twernertwernerimport org.junit.runner.RunWith; import org.junit.runners.Suite; import net.htmlparser.jericho.*; @RunWith(Suite.class) @Suite.SuiteClasses({ SegmentTest.class, NodeIteratorTest.class, StreamedTextTest.class, StreamedParseTextTest.class, StreamedSourceTest.class, StreamedSourceHugeFileTest.class, HTMLSanitiserTest.class }) public class TestSuite {}jericho-html-3.1/test/src/StAXTest.java0000644000175000017500000000474311173323530017776 0ustar twernertwerner/*// comment out entire class as it requires JDK 1.6 import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.net.*; import java.util.*; import javax.xml.stream.*; public class StAXTest { private static final String sourceUrlString="file:test/data/StAXTest.html"; @Test public void test1() throws Exception { InputStream in=null; try { in=new URL(sourceUrlString).openStream(); XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES,Boolean.FALSE); factory.setProperty("http://java.sun.com/xml/stream/properties/report-cdata-event",Boolean.TRUE); assertTrue(factory.isPropertySupported(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)); //factory.setProperty(XMLInputFactory.IS_COALESCING,Boolean.TRUE); XMLStreamReader parser = factory.createXMLStreamReader(in); while (true) { int event = parser.next(); if (event == XMLStreamConstants.START_ELEMENT) { System.out.println(parser.getLocalName()); } else if (event == XMLStreamConstants.ATTRIBUTE) { System.out.println("attribute"); } else if (event == XMLStreamConstants.END_ELEMENT) { System.out.println("/"+parser.getLocalName()); } else if (event == XMLStreamConstants.CHARACTERS) { System.out.println("length="+parser.getTextLength()); System.out.println("buffer size="+parser.getTextCharacters().length); System.out.println("CHARACTERS*"+parser.getText()+"*"); } else if (event == XMLStreamConstants.CDATA) { System.out.println("CDATA*"+parser.getText()+"*"); } else if (event == XMLStreamConstants.COMMENT) { System.out.println("length="+parser.getTextLength()); System.out.println("buffer size="+parser.getTextCharacters().length); System.out.println("COMMENT*"+parser.getText()+"*"); } else if (event == XMLStreamConstants.SPACE) { System.out.println("space*"+parser.getText()+"*"); } else if (event == XMLStreamConstants.END_DOCUMENT) { parser.close(); break; } else if (event == XMLStreamConstants.PROCESSING_INSTRUCTION) { System.out.println(parser.getPITarget()); } else if (event == XMLStreamConstants.ENTITY_REFERENCE) { System.out.println("ENTITY_REFERENCE*"+parser.getText()+"*"); // can't get this to happen } else if (event == XMLStreamConstants.DTD) { System.out.println("DTD"); } } } finally { if (in!=null) in.close(); } } } */jericho-html-3.1/test/src/net/0000755000175000017500000000000011167436712016244 5ustar twernertwernerjericho-html-3.1/test/src/net/htmlparser/0000755000175000017500000000000011167436712020425 5ustar twernertwernerjericho-html-3.1/test/src/net/htmlparser/jericho/0000755000175000017500000000000011204513400022026 5ustar twernertwernerjericho-html-3.1/test/src/net/htmlparser/jericho/SegmentTest.java0000644000175000017500000000417711204612704025153 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.net.*; import java.util.*; public class SegmentTest { private static final String sourceUrlString="file:test/data/SegmentTest.html"; @Test public void test() throws Exception { Source source=new Source(new URL(sourceUrlString)); assertEquals("",source.getFirstStartTag().toString()); assertEquals("SegmentTest",source.getFirstElement("title").getContent().toString()); assertEquals("document keywords",source.getFirstStartTag("name","keywords",false).getAttributeValue("content")); assertEquals("",source.getFirstStartTag(StartTagType.COMMENT).toString()); Segment outerDiv=source.getElementById("OuterDiv"); List elements=outerDiv.getAllElements(); assertEquals(4,elements.size()); // outerDiv itself plus 3 contained elements assertEquals(StartTagType.COMMENT,elements.get(2).getStartTag().getTagType()); assertEquals(1,outerDiv.getAllElements(StartTagType.COMMENT).size()); assertEquals(2,outerDiv.getAllElements(HTMLElementName.P).size()); Segment testSegment=new Segment(source,outerDiv.getBegin(),source.getElementById("p2").getStartTag().getEnd()+1); // this segment ends in the middle of the content of p2. assertEquals(4,testSegment.getAllStartTags().size()); // outerDiv start tag plus 3 contained start tags assertEquals(2,testSegment.getAllElements().size()); // only p1 and comment, as outerDiv and p2 are not enclosed by testSegment assertNull(source.getFirstStartTag(StartTagType.COMMENT).getFirstStartTag(HTMLElementName.P)); assertNull(testSegment.getFirstElement("id","p2",true)); assertEquals(0,source.getAllElementsByClass("de").size()); List defElements=source.getAllElementsByClass("def"); assertEquals(4,defElements.size()); assertEquals("p4",defElements.get(0).getContent().toString()); assertEquals("p5",defElements.get(1).getContent().toString()); assertEquals("p6",defElements.get(2).getContent().toString()); assertEquals("p7",defElements.get(3).getContent().toString()); } } jericho-html-3.1/test/src/net/htmlparser/jericho/NodeIteratorTest.java0000644000175000017500000002216111174037616026152 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.net.*; import java.util.*; import java.nio.CharBuffer; public class NodeIteratorTest { private static final String sourceUrlString="file:test/data/StreamedSourceTest.html"; @Test public void test() throws Exception { Source source=new Source(new URL(sourceUrlString)); Segment segment; StartTag startTag; assertEquals("UTF-8",source.getEncoding()); Iterator i=source.iterator(); assertTrue(i.hasNext()); segment=i.next(); assertEquals(StartTagType.XML_DECLARATION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); assertEquals(" (XML declaration) ((r1,c1,p0)-(r1,c40,p39))",segment.getDebugInfo()); // note row and column information is included assertTrue(i.hasNext()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertTrue(i.hasNext()); segment=i.next(); assertEquals(StartTagType.SERVER_COMMON,((Tag)segment).getTagType()); assertEquals("<%@ page language=\"java\" %>",segment.toString()); segment=i.next(); assertEquals(StartTagType.SERVER_COMMON,((Tag)segment).getTagType()); assertEquals("<%@ taglib uri=\"/WEB-INF/struts-i18n.tld\" prefix=\"i18n\" %>",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); segment=i.next(); assertEquals(StartTagType.XML_PROCESSING_INSTRUCTION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); segment=i.next(); assertEquals(StartTagType.DOCTYPE_DECLARATION,((Tag)segment).getTagType()); assertEquals("\r\n \">\r\n]>",segment.toString()); segment=i.next(); assertEquals(StartTagType.MARKUP_DECLARATION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); segment=i.next(); assertEquals(StartTagType.MARKUP_DECLARATION,((Tag)segment).getTagType()); assertEquals("\">",segment.toString()); for (int x=0; x<7; x++) segment=i.next(); assertEquals("Jericho HTML Parser Test Document",segment.toString()); for (int x=0; x<5; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.NORMAL,startTag.getTagType()); assertEquals("",segment.toString()); assertEquals("HTML parser,test document,R&D",startTag.getAttributeValue("content")); // note that character reference inside attribute value is not handled as a separate segment for (int x=0; x<8; x++) segment=i.next(); StringBuilder sb=new StringBuilder(); segment=i.next(); assertEquals("This paragraph contains character references: ",segment.toString()); sb.append(segment); segment=i.next(); assertEquals("€",segment.toString()); CharacterEntityReference characterEntityReference=(CharacterEntityReference)segment; characterEntityReference.appendCharTo(sb); segment=i.next(); assertEquals(" and ",segment.toString()); sb.append(segment); segment=i.next(); assertEquals("©",segment.toString()); NumericCharacterReference numericCharacterReference=(NumericCharacterReference)segment; numericCharacterReference.appendCharTo(sb); segment=i.next(); assertEquals(".",segment.toString()); sb.append(segment); assertEquals("This paragraph contains character references: \u20AC and \u00A9.",sb.toString()); for (int x=0; x<3; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals("",segment.toString()); assertEquals("

This paragraph is commented out

",startTag.getTagContent().toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); // note that

tag isn't found inside comment segment=i.next(); assertEquals("This element is defined inside an onclick attribute')\"/>",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); // note that

')\"/>",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); // note that

tag isn't found inside the attribute value for (int x=0; x<5; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.CDATA_SECTION,startTag.getTagType()); assertEquals(" example of markup that is not to write with < and such.\r\n]]>",segment.toString()); assertEquals("\r\n example of markup that is not to write with < and such.\r\n",startTag.getTagContent().toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); assertEquals(240,streamedSource.getBufferSize()); segment=i.next(); assertEquals(480,streamedSource.getBufferSize()); // last next() call fetches the next text segment as well as the following comment, which totals > 240 characters segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); segment=i.next(); assertEquals("<% server tag %>",segment.toString()); // server tag is recognised inside comment segment=i.next(); assertEquals("\r\n",segment.toString()); // processing instruction isn't recognised inside comment segment=i.next(); assertEquals("
",segment.toString()); segment=i.next(); assertEquals("\r\n<*abc def=\"ghi\">\r\n This is an example of an element from a hypothetical server language \r\n whose tag formats have not been registered with the TagTypeRegister class \r\n\r\n",segment.toString()); segment=i.next(); assertEquals("

",segment.toString()); assertEquals(480,streamedSource.getBufferSize()); // coming up is the very long text segment of 20071 characters for (int x=0; x<41; x++) { // because coalescing is false, it is handled as 41 separate segments of 480 characters (filling the available buffer), plus one extra segment of 391 characters to make up the entire 20071 characters. segment=i.next(); assertEquals(480,segment.length()); } segment=i.next(); assertEquals(391,segment.length()); // last chunk of the large text assertEquals(480,streamedSource.getBufferSize()); // buffer hasn't been expanded because we are not coalescing segment=i.next(); assertEquals("

",segment.toString()); assertEquals(480,streamedSource.getBufferSize()); segment=i.next(); // fetches whitespace and pre-fetches very long comment assertEquals(30720,streamedSource.getBufferSize()); segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals(20077,segment.length()); segment=i.next(); assertTrue(i.hasNext()); segment=i.next(); assertEquals("",segment.toString()); assertTrue(i.hasNext()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertEquals(30720,streamedSource.getBufferSize()); assertFalse(i.hasNext()); try { segment=i.next(); fail("Should throw NoSuchElementException"); } catch (NoSuchElementException ex) {} } finally { if (streamedSource!=null) streamedSource.close(); } } @Test public void testCoalescing() throws Exception { StreamedSource streamedSource=null; Segment segment; StartTag startTag; try { int originalInitialExpandableBufferSize=StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE; StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=120; streamedSource=new StreamedSource(new URL(sourceUrlString)).setCoalescing(true); StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=originalInitialExpandableBufferSize; // behaviour is identical to non-coalescing until we hit a large non-tag text segment Iterator i=streamedSource.iterator(); segment=i.next(); for (int x=0; x<66; x++) segment=i.next(); assertEquals("

",segment.toString()); assertEquals(480,streamedSource.getBufferSize()); segment=i.next(); // fetches very long text segment of 20071 characters assertEquals(20071,segment.length()); assertEquals(30720,streamedSource.getBufferSize()); // buffer has been expanded because we are coalescing assertEquals(20071,segment.toString().length()); // all plain text up to the next tag is returned in one segment segment=i.next(); assertEquals("

",segment.toString()); segment=i.next(); // fetches whitespace and pre-fetches very long comment segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals(20077,segment.length()); assertEquals(30720,streamedSource.getBufferSize()); // buffer was already big enough to fit it so didn't change segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertEquals(30720,streamedSource.getBufferSize()); assertFalse(i.hasNext()); } finally { if (streamedSource!=null) streamedSource.close(); } } @Test public void testFixedBuffer() throws Exception { char[] buffer=new char[250]; StreamedSource streamedSource=null; Segment segment; StartTag startTag; try { streamedSource=new StreamedSource(new URL(sourceUrlString)).setBuffer(buffer); Iterator i=streamedSource.iterator(); assertTrue(i.hasNext()); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); for (int x=0; x<14; x++) segment=i.next(); segment=i.next(); assertEquals("Jericho HTML Parser Test Document",segment.toString()); for (int x=0; x<50; x++) segment=i.next(); assertEquals("

",segment.toString()); // coming up is the very long text segment of 20071 characters for (int x=0; x<80; x++) { // because coalescing is false, it is handled as 80 separate segments of 250 characters (filling the available buffer), plus one extra segment of 71 characters to make up the entire 20071 characters. segment=i.next(); assertEquals(250,segment.length()); } segment=i.next(); assertEquals(71,segment.length()); // last chunk of the large text segment=i.next(); assertEquals("

",segment.toString()); assertEquals(250,streamedSource.getBufferSize()); try { i.hasNext(); // This call to hasNext() attempts to fetch the large comment. // This causes a BufferOverflowException because the comment is a tag, which can't be chunked like plain text and must fit entirely in the buffer. // Because we are using a fixed buffer, it is not able to expand to accommodate the large tag and throws the exception. fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} } finally { if (streamedSource!=null) streamedSource.close(); } } @Test public void testCharSequence() throws Exception { String sourceText=new Source(new URL(sourceUrlString)).toString(); // convenient way of getting text from URL StreamedSource streamedSource=null; Segment segment; StartTag startTag; try { streamedSource=new StreamedSource(sourceText); assertEquals(42565,streamedSource.getBufferSize()); // covers entire document Iterator i=streamedSource.iterator(); segment=i.next(); for (int x=0; x<11; x++) segment=i.next(); assertEquals("",segment.toString()); StartTag htmlStartTag=(StartTag)segment; for (int x=0; x<55; x++) segment=i.next(); assertEquals("

",segment.toString()); segment=i.next(); // fetches very long text segment assertEquals(20071,segment.toString().length()); // can get text using normal segment.toString() for (int x=0; x<3; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals(20077,segment.toString().length()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertFalse(i.hasNext()); try { htmlStartTag.getElement(); fail("Should throw UnsupportedOperationException"); } catch (UnsupportedOperationException ex) { assertEquals("Elements are not supported when using StreamedSource",ex.getMessage()); } } finally { if (streamedSource!=null) streamedSource.close(); } } @Test public void exampleFetchElementContent() throws Exception { List paragraphTextList=new ArrayList(); StreamedSource streamedSource=null; try { streamedSource=new StreamedSource(new URL(sourceUrlString)); StringBuilder sb=new StringBuilder(); boolean insideParagraphElement=false; for (Segment segment : streamedSource) { if (segment instanceof Tag) { Tag tag=(Tag)segment; if (tag.getName().equals("p")) { if (tag instanceof StartTag) { insideParagraphElement=true; sb.setLength(0); } else { insideParagraphElement=false; paragraphTextList.add(sb.toString()); } } } else if (insideParagraphElement) { if (segment instanceof CharacterReference) { ((CharacterReference)segment).appendCharTo(sb); } else { sb.append(segment); } } } } finally { if (streamedSource!=null) streamedSource.close(); } assertEquals(3,paragraphTextList.size()); assertEquals("This paragraph contains character references: \u20AC and \u00A9.",paragraphTextList.get(0)); assertEquals("The following text demonstrates the use of a CDATA section which has limited browser compatability",paragraphTextList.get(1)); assertEquals(20071,paragraphTextList.get(2).length()); } /* @Test public void benchmark() throws Exception { for (int i=0; i<5000; i++) { for (Segment segment : new StreamedSource(new URL(sourceUrlString))) {} } } */ } jericho-html-3.1/test/src/net/htmlparser/jericho/StreamedTextTest.java0000644000175000017500000001634011172550220026153 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.nio.*; public class StreamedTextTest { private static final String text="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @Test public void testFixedBuffer() { Reader reader=new StringReader(text); char[] buffer=new char[10]; StreamedText streamedText=new StreamedText(reader,buffer); try { streamedText.length(); fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) { assertEquals("Length of streamed text cannot be determined until end of file has been reached",ex.getMessage()); } assertEquals("",streamedText.getCurrentBufferContent()); assertEquals('C',streamedText.charAt(2)); assertEquals("ABCDEFGHIJ",streamedText.getCurrentBufferContent()); assertEquals('B',streamedText.charAt(1)); assertEquals('A',streamedText.charAt(0)); assertEquals('J',streamedText.charAt(9)); try { streamedText.charAt(10); fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} streamedText.setMinRequiredBufferBegin(8); streamedText.setMinRequiredBufferBegin(5); // allowed to do this because position 5 is still in the buffer assertEquals('A',streamedText.charAt(0)); // only really gets discarded when we need to read more text into the buffer assertEquals('J',streamedText.charAt(9)); assertEquals("ABCDEFGHIJ",streamedText.getCurrentBufferContent()); assertEquals('K',streamedText.charAt(10)); assertEquals("FGHIJKLMNO",streamedText.getCurrentBufferContent()); assertEquals('F',streamedText.charAt(5)); try { streamedText.charAt(4); fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) { assertEquals("StreamedText position 4 has been discarded",ex.getMessage()); } assertEquals('O',streamedText.charAt(14)); try { streamedText.charAt(15); fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} assertEquals("IJKL",streamedText.subSequence(8,12).toString()); CharBuffer charBuffer=streamedText.getCharBuffer(8,12); char[] charBufferArray=charBuffer.array(); assertEquals("IJKL",new String(charBufferArray,charBuffer.position(),charBuffer.length())); assertEquals("FGHIJKLMNO",streamedText.substring(5,15)); try { streamedText.setMinRequiredBufferBegin(3); fail("Should throw IllegalArgumentException"); } catch (IllegalArgumentException ex) { assertEquals("Cannot set minimum required buffer begin to already discarded position 3",ex.getMessage()); } streamedText.setMinRequiredBufferBegin(20); assertEquals("FGHIJKLMNO",streamedText.getCurrentBufferContent()); assertEquals('O',streamedText.charAt(14)); try { streamedText.charAt(16); // causes reader to skip to MinRequiredBufferBegin and empty buffer fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) { assertEquals("StreamedText position 16 has been discarded",ex.getMessage()); } assertEquals("",streamedText.getCurrentBufferContent()); assertEquals('U',streamedText.charAt(20)); assertEquals("UVWXYZ0123",streamedText.getCurrentBufferContent()); streamedText.setMinRequiredBufferBegin(30); try { streamedText.length(); fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) {} assertEquals('4',streamedText.charAt(30)); assertEquals("456789",streamedText.getCurrentBufferContent()); assertEquals('9',streamedText.charAt(35)); try { streamedText.length(); fail("Should throw IllegalStateException"); // although we are at the end of the stream, the StreamedText object doesn't know that yet because the Reader class doesn't have a method to check for EOF. } catch (IllegalStateException ex) {} try { streamedText.charAt(36); fail("Should throw IndexOutOfBoundsException"); } catch (IndexOutOfBoundsException ex) {} assertEquals(36,streamedText.length()); } @Test public void testExpandableBuffer() { Reader reader=new StringReader(text); int originalInitialExpandableBufferSize=StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE; StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=2; StreamedText streamedText=new StreamedText(reader); StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=originalInitialExpandableBufferSize; assertEquals(2,streamedText.getBuffer().length); assertEquals('B',streamedText.charAt(1)); assertEquals(2,streamedText.getBuffer().length); assertEquals("AB",streamedText.getCurrentBufferContent()); assertEquals('E',streamedText.charAt(4)); assertEquals(5,streamedText.getBuffer().length); assertEquals("ABCDE",streamedText.getCurrentBufferContent()); assertEquals('B',streamedText.charAt(1)); assertEquals('A',streamedText.charAt(0)); assertEquals('F',streamedText.charAt(5)); assertEquals(10,streamedText.getBuffer().length); assertEquals("ABCDEFGHIJ",streamedText.getCurrentBufferContent()); streamedText.setMinRequiredBufferBegin(8); assertEquals('A',streamedText.charAt(0)); // only really gets discarded when we need to read more text into the buffer assertEquals('M',streamedText.charAt(12)); assertEquals("IJKLMNOPQR",streamedText.getCurrentBufferContent()); streamedText.setMinRequiredBufferBegin(20); assertEquals(10,streamedText.getBuffer().length); assertEquals('5',streamedText.charAt(31)); assertEquals(20,streamedText.getBuffer().length); assertEquals("UVWXYZ0123456789",streamedText.getCurrentBufferContent()); assertEquals('9',streamedText.charAt(35)); try { streamedText.length(); fail("Should throw IllegalStateException"); // although we are at the end of the stream, the StreamedText object doesn't know that yet because the Reader class doesn't have a method to check for EOF. } catch (IllegalStateException ex) {} try { streamedText.charAt(36); fail("Should throw IndexOutOfBoundsException"); } catch (IndexOutOfBoundsException ex) {} assertEquals(36,streamedText.length()); } @Test public void testEndOfFileFountWhileSkipping() { Reader reader=new StringReader(text); char[] buffer=new char[10]; StreamedText streamedText=new StreamedText(reader,buffer); streamedText.setMinRequiredBufferBegin(40); // past end of stream try { streamedText.length(); fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) {} try { streamedText.charAt(40); fail("Should throw IndexOutOfBoundsException"); } catch (IndexOutOfBoundsException ex) {} assertEquals(36,streamedText.length()); } @Test public void testCharBuffer() { char[] charArray=text.toCharArray(); CharBuffer charBuffer=CharBuffer.wrap(charArray,0,26); StreamedText streamedText=new StreamedText(charBuffer); assertEquals(26,streamedText.length()); assertEquals(36,streamedText.getBuffer().length); assertEquals("ABCDEFGHIJKLMNOPQRSTUVWXYZ",streamedText.getCurrentBufferContent()); assertEquals('A',streamedText.charAt(0)); assertEquals('Z',streamedText.charAt(25)); streamedText.setMinRequiredBufferBegin(20); assertEquals('A',streamedText.charAt(0)); try { streamedText.charAt(26); fail("Should throw IndexOutOfBoundsException"); } catch (IndexOutOfBoundsException ex) {} } } jericho-html-3.1/test/src/samples/0000755000175000017500000000000011167436712017122 5ustar twernertwernerjericho-html-3.1/test/src/samples/HTMLSanitiserTest.java0000644000175000017500000001553511166741456023267 0ustar twernertwernerimport org.junit.Test; import static org.junit.Assert.*; public class HTMLSanitiserTest { @Test public void testEncodeInvalidMarkup() { assertEquals("abc",HTMLSanitiser.encodeInvalidMarkup("abc")); // return text verbatim without markup assertEquals("ab & c",HTMLSanitiser.encodeInvalidMarkup("ab & c")); // encode text assertEquals("abc

def

geh",HTMLSanitiser.encodeInvalidMarkup("abc

def

geh")); // keep

element assertEquals("abc def geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // keep href, target and title attributes assertEquals("abc def geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // encode parameter values assertEquals("abc <u>def</u> geh",HTMLSanitiser.encodeInvalidMarkup("abc def geh")); // element not allowed assertEquals("

abc

",HTMLSanitiser.encodeInvalidMarkup("

abc")); // add optional end tag assertEquals("abc
def",HTMLSanitiser.encodeInvalidMarkup("abc
def")); // convert to XHTML empty element tag assertEquals("<script>abc</script>",HTMLSanitiser.encodeInvalidMarkup("")); // remove potentially dangerous script assertEquals("

abc

",HTMLSanitiser.encodeInvalidMarkup("

abc

")); // keep approved attributes but strip non-approved attributes assertEquals("

abc

",HTMLSanitiser.encodeInvalidMarkup("

abc

")); // keep id and class attributes assertEquals("

abc

",HTMLSanitiser.encodeInvalidMarkup("

abc

")); // tidy up attributes to make them XHTML compliant assertEquals("List:
  • A
  • B
  • C
",HTMLSanitiser.encodeInvalidMarkup("List:
  • A
  • B
  • C
")); // inserts optional end tags assertEquals("List:<li>A</li><li>B<li>C",HTMLSanitiser.encodeInvalidMarkup("List:
  • A
  • B
  • C")); // missing required
      or
        element assertEquals("List:<ul><li>A</li><li>B<li>C",HTMLSanitiser.encodeInvalidMarkup("List:
        • A
        • B
        • C")); // missing required
        end tag assertEquals("List:
        • A
        • B
        • C
        ",HTMLSanitiser.encodeInvalidMarkup("List:
        • A
        • B
        • C
        ")); // inserts optional end tags assertEquals("List:
        • A
        • <li>B
        • C
        ",HTMLSanitiser.encodeInvalidMarkup("List:
        • A
        • B
        • C
        ")); //
      1. is invalid as it is not directly under
          or
            } @Test public void testStripInvalidMarkup() { assertEquals("abc",HTMLSanitiser.stripInvalidMarkup("abc")); // return text verbatim without markup assertEquals("ab & c",HTMLSanitiser.stripInvalidMarkup("ab & c")); // encode text assertEquals("abc

            def

            geh",HTMLSanitiser.stripInvalidMarkup("abc

            def

            geh")); // keep

            element assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // keep element assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // keep href, target and title attributes assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // encode parameter values assertEquals("abc def geh",HTMLSanitiser.stripInvalidMarkup("abc def geh")); // element not allowed assertEquals("

            abc

            ",HTMLSanitiser.stripInvalidMarkup("

            abc")); // add optional end tag assertEquals("abc
            def",HTMLSanitiser.stripInvalidMarkup("abc
            def")); // convert to XHTML empty element tag assertEquals("abc",HTMLSanitiser.stripInvalidMarkup("")); // remove potentially dangerous script assertEquals("

            abc

            ",HTMLSanitiser.stripInvalidMarkup("

            abc

            ")); // keep approved attributes but strip non-approved attributes assertEquals("

            abc

            ",HTMLSanitiser.stripInvalidMarkup("

            abc

            ")); // keep id and class attributes assertEquals("

            abc

            ",HTMLSanitiser.stripInvalidMarkup("

            abc

            ")); // tidy up attributes to make them XHTML compliant assertEquals("List:
            • A
            • B
            • C
            ",HTMLSanitiser.stripInvalidMarkup("List:
            • A
            • B
            • C
            ")); // inserts optional end tags assertEquals("List:ABC",HTMLSanitiser.stripInvalidMarkup("List:
          1. A
          2. B
          3. C")); // missing required
              or
                element assertEquals("List:ABC",HTMLSanitiser.stripInvalidMarkup("List:
                • A
                • B
                • C")); // missing required
                end tag assertEquals("List:
                • A
                • B
                • C
                ",HTMLSanitiser.stripInvalidMarkup("List:
                • A
                • B
                • C
                ")); // inserts optional end tags assertEquals("List:
                • A
                • B
                • C
                ",HTMLSanitiser.stripInvalidMarkup("List:
                • A
                • B
                • C
                ")); //
              1. is invalid as it is not directly under
                  or
                    } @Test public void testStripInvalidMarkupWithFormatting() { assertEquals("abc\n def",HTMLSanitiser.stripInvalidMarkup("abc\n def",false)); // no conversion of formatting characters assertEquals("abc
                    def",HTMLSanitiser.stripInvalidMarkup("abc\ndef",true)); // convert LF to
                    assertEquals("abc
                    def",HTMLSanitiser.stripInvalidMarkup("abc\rdef",true)); // convert CR to
                    assertEquals("abc
                    def",HTMLSanitiser.stripInvalidMarkup("abc\r\ndef",true)); // convert CRLF to
                    assertEquals("    abc",HTMLSanitiser.stripInvalidMarkup(" abc",true)); // ensure consecutive spaces are rendered assertEquals("    abc",HTMLSanitiser.stripInvalidMarkup("\tabc",true)); // convert TAB to equivalent of four spaces } } jericho-html-3.1/test/logging.properties0000644000175000017500000000021511166737762020441 0ustar twernertwernerhandlers=java.util.logging.ConsoleHandler .level=WARNING java.util.logging.ConsoleHandler.formatter=net.htmlparser.jericho.BasicLogFormatter jericho-html-3.1/dist/0000755000175000017500000000000011214132416014636 5ustar twernertwernerjericho-html-3.1/dist/jericho-html-3.1.jar0000644000175000017500000053775511214132416020246 0ustar twernertwernerPK!M: META-INF/PKPK!M:META-INF/MANIFEST.MFMLK-. K-*ϳR03r.JM,IMu *h)f&W+x%irrPK 6OGGPK M:net/PK M:net/htmlparser/PK !M:net/htmlparser/jericho/PK M:&net/htmlparser/jericho/Attribute.classVSW]HKX#>PAj&>E!MB,a1l0 A;я@==s=;1& r8'GH'8yAqcq.&P1, F F88`Tq0%b+uNU .($'b '1YE'Mqg*S Ȩ2=52lg֕15&T`h6bƕLVʹ->nAPRKŰO`.mY!/f <3 D"^`@W$-E0/p$D  飡H(WM46!$6G<& nM׌NҹL\m>K *\Io۲ d`XfsūmK8d+. ؊q涆`%c Znu( i2>ꉘcyKN0 PJ-[l/{,n$ɸ:nh԰ޤcų\9-?ȩ:]=;}ñ;tPn$AP[x[;`‰{ x_PGXi S5+j;LЇu aI44$4+.YVk3X&kL2xl!Njثlk#; Dn#y{{J$["G%$?X"?D%rɝ%tT pA[[돦 ʾ<*ꝿML4z'p8z 9rЍjV|˶6dvQU@ž fƶ6אY 1 G%gUê8q ȂgAX0mn4rڦKUsTȫWUЪ@,*\E--7+ň,<+-`kU&V>lٗe! L)0'!6]G}xXW~<-Wp<{,+,a436@%8껁MF \OmEk(8IH+ih_ϙbus1lWlo$J*J~$ǻ2/]ulWPFo 3?PK28 PK M:)net/htmlparser/jericho/Attributes$1.classn@NbZrXbh¡n@ !-.h*vE/jGƮMy$Pٴg>hw~yNp*.ɻ,RI.冂 n3<3o\{阧ƈ$%DN{gmw~ơxɐo +ɐ6g؏"H wf4p#3#/33ՐGC;MTPʢS M55<]aT ;y=I,xD&zjo-sngZ۴~ZIa{ַzbj˶ζ;˛0TŻ֛ܢh^*=N<'_o}_; (bBXP%T2,V/(a:(& [ x'dk66u>Y isj*IW)jPK iPK M:4net/htmlparser/jericho/Attributes$ParsingState.classTmOP=B (L׹ tso00luiT!߱1k2-[칷ɢ%9s{s}?R""aYœ$d^1cH3~*c3S˘f1ø ᅀAeKWwjVU^ovi74t,e۶ڛ-ӱ:+'Yܡns㴛] );U])vuU z/'5]k{JeL&ksqU*WJNoy>H5W_?\luV3L7?P$^eʡybZi4n7UhMg]@z잀lئm  ?3IG&XG1\ؐPtQ܅EYHE1 `i3Ha[@:ilϖCmo$k8Oty|qnH\g >O<3c)A b1wH%q!|C_S$?q;=S$!3 -,:nZ^/=B4F93\s r1`!.b\\Ĺytsi.c#b@3B.4Cr1fhhL;poD]"*PK@pPK M:'net/htmlparser/jericho/Attributes.class9 X[uHW+qy,l9!F0q*1`'`G Ž8؉t9 KW~eZצoNMvNGh" dǺefnݺsuIFq[O99|͗[ILvyxQC_f4I0+/Kp / *¯;/o8hr |_3o ;No;u;"1/HMH?v& g9b7_0%#Io,w% U?$O oῘ3C2~OoB9"Ak9 h% EQ"#]B!`+R*ư3xZFwXb"j&b=,v!o(JDK Sj|Hߗo, 8D#Ԇ1%Ճʙx<%5e-Bc=KCjW PX-Bo,L̎ 6IOHVVt_W:Ymڕ(h'v*Nި$[Qzu5J2N)QWЄ^oqEu˗!Bim$N)HorJ?U'U%G,neڭ¢XURq%cInfi-!崉L[-J= m9sHt(1QJ,@˲lIw-]Tl* *S9hъ#-u]yJD# VGm,F[mTP(# Lg(؏pJ1+PT-a cC.%HpGߙ:GX WY"^$(,517@|ݑXrX1JRo /N(AdR g*#dVm~,AEͦ-GT!"$#|j|Ya>;= &lᬭp{FhD+VmR,?Iet)znvS 'e 87X\C{B$QU'ҼQ$z3k0Jd &Ef4luH=݋ϲPGi9R\v*Vs(c=0%HpPρc=)⣕`MTB|ꚈN&NSш(a^dHWAi/qHotwlI?4F3J >b{ϱU }\,kG_cǦ\N^%Aɫeܤ2%ZȠr}7yfQɀ #*h*}*ejʫJ49m$g!ل{S(:u=8?qs8}fL˯YEh +6TRK`>V4Iފ^'zLicq@=MœwK*AtÛԵLcuo7bEo~ gph;kYto,Ԇ3WY6o)\{Hz]` NbUgo 6ki:g< @͵ֆY@[x ,>% .cm;8}Nal-(qas&Fi^sJ3`_6r'``K cc2g s-;~[| |.r*n1 q.#-Ay=e~%נrd:˭~ٗ Z;k,~տu UJ)π+{F%m]"33Pߖ[>j ],5wG{& nYu je5j񹜮YX?YY\kCe{xDDaϢBƄ{siCaEf;Z{rA?B%e@6b`}en1~m mFAxȩ J&c;e:Wyӈ! p¡2 Fh5YI3Joi48wj<,L{ILx2D|9Q ~s6[S A:2I갡Z]( *?PKf6}&PK!M:4net/htmlparser/jericho/AttributesOutputSegment.classUmSU~nlXZRզ-ƪ -DmCe6]?oGf7G9] &gys{ݿw7`xwtLc^KXn :G[l?uB y. R(3eEVuL}aMú O@ҫذ/wbXY* GG Ϟ,Ђ9jQ`,{R6% _\RJ淳5^, =9;n=eK0ifCX*QBپRQr`F/n/WȦr|EfSfRmU]ɹ̚[iXJ*6VC UYzw)?@>jho} (ࡁ)p$ҶܥSǺIOWZFRx7煮\"geb e2b`  ^ˡd 0W<|MstRoMQeK]OB%,o[TuiΟVwfѓyK 0DQߨgt)'(ً9gNSO/ _ۨIKH"D_4OgUJk䙤q\>AaP!F=Vz}xއ+c$b5z;^rfA`~˟֏Z#{]\'wǰ 4+H "^nFYo6XX'.1l<֙(GbcZ\Cj)>1<@:}!uN!& m\|J=bI&]PK.S PK!M:.net/htmlparser/jericho/BasicLogFormatter.classSmOA~-8 (E*] wii-Tc,Ǒ㮹^/b$Q;Knfggggf,Ve`J5\q74 21#CB1Ib%pG¸$0Jx$6N ' ^94mΐ5.6K -}G׺iicmhˎ{wK i*Tʂe Oye[.LkzƖnKYjrKV\ܐ M)}A~ 1]\"3[+3iU2Ck;9VŮKLPC0%L\nlIƙZeӕUSw3.VέlU.э}|肀FL2[/ ,5 _ŲF5Đ LHiXaT*ekt>Nmkqa6iG8a)*gdB'ʌ)^̿52A IVu[1Wn޺\#Դ~Tcz1fGB;ҖK>Hq0Gt م 0pL!ߛjVw=]G^N|;;طw<8QzTZE\DmE&zH{,Ř-"SlVĻ??\E"Nz8Io>x@Uf!l"_6LeiEk=&4әȩ͜HG{1uPK5cPK M:"net/htmlparser/jericho/Cache.classVweMtt(%4-)} Qi)R0hZ4ҁ|ǂݸ#"G.\txfq﾿&57wa-v {x3&eneSu؋}Lߍ:`A!b4s."3,1Las:"Q>8&"ɒLRL"2ntb7O3q:r"NpFψ8)6i1E@{$}dFֲwTl/j( X.'rbTFFs3#rl:Y*dZ|V@a65512:=6;(?q ƝetZ%!SOGuMMu5S>SJu/&sQR }V@+K*J1])/)`,iЃ Wú$ ;ЇuUi 9 +ˇ"{!/IxHxJxy 1 a&u!M&CxKxG»"=  #  [͖ I!wzkNMMK_IJ p hXtk2\,L\BiHpwt(gʠ R] i҇B:觓r\y,,E|Q˂m` Ÿx# G p?F8"p<¿"p"I'#p*i#p&Yg#p.y#,EB.FXp rK.C W"\p5JkV!F:5k!Ga 7"܄p3-"܆p;w"lD n{E~DxaGEx q'Dx igExy^Dx eW^Ex u7Dx mwEx}>DcO>E s/D koE{~D؄6@5hE[m ZD[ m1]vCm9h{텶7 }n/~h+n?h mAhF5mA-ڡhA;vhGhwDڑhGhcvWXvO{xv"Z$uhG{hGۀv*ߡv:hg=,vߣ=\6mBی6v>hhA{(6ihCEہm .B.od;M./]o{Dp{4Xzq%0x[" zN>$ >"OޙM[ƆAtcG{ L3<0%AdbQS\)dq!ggrfVAܦ\6H:X811ey 7ґhFs\~i_2l|sWa +N:#WpU\lWqxx5g4kJL{*&cFkbU$FX}ƛr2ײjK6eeqx6b ؖlnMӺɝX\. liYP2r2yX.!=r|K6@zo~7%MndY2o)˂՝jSIQބmŲ" ]Uk]}F}% YWW|+EgX;cZxeCGE|lȷd>]4D]<%;mVOIɎT,g:yFdg|'q3V璳 %_3&5Lg"%$E>^Y.ȨW%p#qץdA!*񦼧D˷E|&]}$>ޓ5XBdW+&Ltʤ~̵҅e" m~mfm~._ȫu%5'$5+)Zx_zFdo\\\rq\Z3YH~䒋XOgx O)ʻY$:EH~6Q'Hi)ask; wr9Qm鲢Z3ɨ8:?9,0Sr<<%9yA6=5UrZy 8gF\ںqViNOдI_*k[EZLn~dnntgNv]'6w,]^0RY'[YzTa6TMɴ$o"xsw3gK-bS:wr9s\=efƵ-4hNV]Oz<|i ӨҴ- w[=TZ\*tN5NJ\X=MpR3)vQ+6=Via&?y2Qd1V9#%͜Sлof68JFqt8FV*NDt"]ˤv❃9eTf}~*#35e 貕;Q^ Ӈȗ,tW,/ng$]^fu^< dU?\#U)%#%{N6h̫M[Z$XN,ArHddT(# vUPk%g$ڤwO)sg4̝0vԆsؖ1%NgRt3E03Cҝ =>N?ԫ. NthlKF֑Ȏ8$M6f69[G}f[KȶkmM[WW7@z2t/%-z9:C' Eޛ䦧fhs7i+0\"ԙNĶ=,ݔT+݌LI0v)%FLL4Z3a0K:bDǿx)Lw AYD{g2U_ 0.z80J6pLK vb_$ag]bMf=f { aO'dokYbz̞dO09 LuSbTY^R9IoKCa rsriB&jψALػĞUbfsc;-R{A^h/T] &V#;2? 6wGmc%"Cه|7\?/bCQ2i_LCUQhC-LjH4(֏3T}^(,s]%&̟_~x!.GZ-F4}맕|bdD?PB?p"ߗ|6a F*/T| Qŵ&jMjVj EZ\uXܐ/ߐ/!?/K3UgF]ԱsAWA|xD<44ƣ *77:Oh|d@2*ԉHtMɗg1echzLusVYr'X/1$IʁrT'ӊT`Z)&Gr~r N{ǔAIj%:hJpMXMTdf#C4N_:8s?h:֖hX.ѱjhIXS0ǚ)G1Q4Nt0vnˠ3:hK $]!'>|Ԥ*M1 ->Tq$)K^ZRQ>rQdҬVPw%ތW>UztS·y|u*Ga{5\=pQΎHVX#N1z-k;ŏEx16+7G<6XO葁g#l|@-aocR_e{E̮Wثbj2fqI>ݚB,K av$:bv5(irUٵ@ڰtRl]$mhf g$+uݯ}3Zs3:tà^_;}CNDٿZe54wr@^7l=)m3к}7TОSϟo\#Dt^eXoILw+^q甜5Q/,EiXo{" C~ oK>lϨ1{}kOApt5'~nv ǒ=HHq]Kz_GGzHHOvn =ѿ#=IpL:z=G?:z.ynttfG'Hwt VG'ICIF: vGF:ҝΑ^腤9z1%?;/I'}$}AhGC>q>oqIIISII3IIsII/u/tE/v2җ8z9K}}UJG_CzWב^赤9z= M-mFGEnGC^GG~G?@AG?DaG?BQG?FqG?AIG?EiG?CYG?GyG@EGDeGBUGFuGAMGEmGC]GG}G@CGDcGBSGFsGAKGEkGC[GG{G@GGo `6Tf H;4:t9+nN=Htt/ҽ]A HW:zK=@GoEуHW;z0 !ޖPG#]ᤷswp;:z'#=ΎMzw!ٍ&7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!p p p p p p QA!3a]H QBHnDTbOR[E/z ?® š .} =AQ͵RJj`s>QvH6 l嚠(_bLJ9^׬JLpKT֬ JlW,(6gm ʆ ʖ֬[[nh<]twX[еg(\#V< Ѽqri3,DrDm>noe҆A˫g ױ޳ǚ^޳]lnm˫pڠY[Ivà yCj}MPr26[w\/M:ԔYli&(+Bon *P CC AqЅ+ hW1cJk*pM՘KHeKhy1_L([ :+(cJXQTQV os%HMm.ȡq.e%oC0xv͖%ekqV_ţ;VdK. zUϹ8(,_XZ aLz9zmW$7a`VhavC{KC ŰFBn"nbpG(ă;*\^ /({pgWxpJ*jpwWx*\q W{pzpOyp/k<µpQރ^ 7xV WOMf)Ń+Ճ(̓) `;=ؠp*˃Sx)ǃ L{@xp=x‡<8[^ _`=ت%&CC<ئu7=خ-mfzS{)|߃ ~ ?"yp=D'O=gy/ ?_~~)ʃWPTR{GS&+0<Ƀ =xB+<ŃQx4t L,l\<|ЏRKg+Csѹ <~htB?4Z.PF*㐋q 8dB?D,W!*q 8 ~B\ЏCR!W+㐕 8~JV!*q~VS!q 8dBoP }׾I`UoV8ă([m 'zzN=?TG){T=ЏXx ƃO*>ȃO+,3 у*,s xy=B_R_VXW z5U|]|CPpRX; w y=|_$~p?TG =„?Qx?Ux?Sx?WxPxT!_)CBV`~_+, BoR=^a t{} 2ϧFiOAi§V|1-a!>,8·(pOj:ͧeJҟWLiܧ].i7fO+}ާ.Ƨ=O{*-i/1VէJ>Bi/Uۧi~>RiO+Jtҁ>JiO ;Zn>twntOŷyMm^M|}Xap@p083؋ۇ-Gpj)ۭ w0(|6;2/k|MThp(0Y\PK?]WJPK M:Dnet/htmlparser/jericho/CharacterReference$EncodingFilterWriter.classTR@=KbcbDEiEdvqtabZ>O8᷎Gg|Իp3sO{￾|09 ) FTtl8'yQ.@c*2ȊAWW11:MmmxeDo^ywYdr+ZF>dJ3/Eˌ)XzjVPV2bel IR(*l@CNj&4kn!aj8%bwpW>YGF9zU 䁱)W lCZOy}ZS)G| i<0=; yHUT%[tbwN Gjqu}cٚ%XV1"C<"2vj%{y3۞{MX&KαNĐ8s(BԨuPAnˠAo?'g&wrԿ =T|e24h5XO7H# y#X bN&@ ǤH7j wbjȺn?дO,=TL:iz2- N)9%C>2`a0r!)jjy8aVi?e PK8.gPK M:/net/htmlparser/jericho/CharacterReference.classX \_%SC4J( "~Ucňm6N@+1|BlHhԾlm>6έZĺvl=ݺ]~}_B+ss9<ڣX%Xm4:j{$y)n>tp;qLg 9" .j Zk#hwmQPHKI%L)j7 &.MR'Y WuVܦ^O$%c-#hpU/ '{vCX7L Ru59vf|AntIX$Kpq50}0D]ݧƙ΄*ˈ&5 Fr$KCaG48C,[3W1/@4'ãb%~U9ͭ'HGq5 u r3E$"{/38cT]'&by.r;fpʄQCWRpdO.l2ɶz 2c:3 Or؎h28&ψ"'Xt_bv?W&U-igag id5y388U{X7Y#7#2zIeea inc8ҥ%!Šn#l,ˢOEkvih2ܫ,cIEd܉dD,戹A $BX:2E(nwvo;d#mE1MvŠle>Hf.4jwMʢ|un/CM5Q.,_QY&73[K2IˢB,EXNW~e!2Je9ӵXCT O?ڥڟܺoc?ڕ8$Rx%Q-qJ.ɸU&,Y+:JղX#J^Eb=(3l\1r]U48ƓXt#)}%1ײ8I0HG6m}}O6:ئ} e:~=p9$4̠!y"h$|aD5ڝU[W9ZTf,L=tZdTY ֪)唑grͩXj3T UcGG| hRnVu9gTlIba?{IhZnnmܿF}@QNNO4.zYC^5jYFXk` ' {רavOkU9ˈ41)΃^I=m\kopj"eD`I "Ud|޾`27!["0ܪcʖ-ڣfvi_ՙ[isTh]=Ȓi${빴M C=s+UOjn+G0Ӓpos,9XO0պsznn4/EoO nz+hݰrEb3B>.QU3򩟫xKJO"Oq!Ƞ+^P阱L4-|^H$]0)0D.!i@5;gl A,Cyi|aPFao*- bQZ++/"/F18< Uc U)O!_])̣Ds9z vyO8I)np3LS#)PEq8ưrzޮһTuz,Ƈ_UuZedRheg*(xNzBUC}jS bY'yG],Qߍw&kxɒb a!zY'/8VNJ{&i٥Z5xtGdMD׷NvכO` ʼg!ߝ?-.K7O`}+a/y^2) ) Sjf oqp?AC׏MGJMS盎M9l=Qli7[N#oiJF-b{m=6AQm{(tΠ 1*ܶQ0\p)l&'b=F%f yYqiAc4lma *иp4!:Fb_ƐP?PKrM {PK!M:.net/htmlparser/jericho/CharOutputSegment.class}SkS@=ۖ QTy[)PPeGƏia4ʏcrPh'{ι&66T$cs)1'N?➊x IX1,iG )rV߷4 5H31fqSxॆ 'VV]i0;.+jx!ےT2g4P7]JA, f``u[_O(Mc>$0#!gN`B4yqʏ#Evv!B>9ˌ I 8@+O',*9c)R9>B՞p7} ש҄wh&ہ! z-#R˒R;b!lH/ZdpqOџ: o%`/PKtPK M:2net/htmlparser/jericho/CharSequenceParseText.classTMoU=xNz&5{`h@]X"מ&¸%/$0 HHl] ½8vH,?7^P'VM5B[ǻR:,\DZ d,"!;? j{{UIXol!AYpBy 5 %%\ ~l߬W'z|hs돶*2S&HT  R#C_H]ϲ2'? yf)LgHn'~fXH%G҇cXk`N> ;.p"zPbCumgmkROey 7mLjL*Ȗ>ۇѡVP9_C2O)W7}MJBzD58<,0٥Ss\O[Up-Ԭp^zp)v`uP6]Оb:.xQ a ](Їj]XmZ{&^[Qi&ƈ7I>))Bq:,2S鵕f#ݾ#D;[b۱`3-%_<۫zI6 ]e҄vQk^\b +hh&u\{m&F7C'/Ha螈0s<}}X!$}"'SY} "cfȇPKMvPK M:-net/htmlparser/jericho/CharStreamSource.class]N@ECxEZQB(Q 7^޵6cȷQ|bb=3 G1 7p+qg~>FMέg1NCݶlWp%zvQLb-tMIɜm!%a7Ig᜸xc YI'KˢJiV5{U7yM}xĺYLٲ\BFO~(Q{} {H?QPKʻFPK!M:1net/htmlparser/jericho/CharStreamSourceUtil.classSKsP=GCSRjG@X_:SBGV-܁0!a'ܺv[7beF(cE|9_~ :8K2B,Ã+3k_ aU.|CBB"cI&&Ab+,[-j|E+Z~^ʾ.zjb~fspu!z:JUd-wm2 M6=jv7ږkSmF˱,,v:%("xB5[x<DQø= "V\BZ­ T&ƈzwp`HP7XI|2\c'X;e\7ND{K߷t3HQ .bRΚ%wn=Q/EJ^M"Ύt`#iLkc(#!۴EE]oWboZ7g,"YW7PKK-PK M:5net/htmlparser/jericho/Config$CompatibilityMode.classV]We~dIXZCh |H4$(%ydJz7xs?Jl(BC.fvvf^, aɇpKMtSOYw>c9/҇f,Xc Xf[`[^2\h=< uNƋFC E2Y 9&s#W"NPPT3E0aꖾC9ҪѢaBV74K[ZAK 6DA)(,Lb"%Lԕ,A(D3i 2g↰"[V6 (DEAOm"ќovK8V_ɢUhrjzm>K,-M4fj´4rZG%i!$4Ħ%EsټfzFҔ0Ow*;\Og%ϸn GܳLU^9_ VPO5^͙39 ko:;1` 圶cUN&& IvVc\zD)ҳZ>d+mDYL5ϝP)^܌9 +Zb4x*0De/"1t㎂zѧ. ,+(A'>Y3iA[W`X0xTU ,+x;tN4{=[̪):<I*:TU^o(WVBPU-Y-du!73k ,py"g{B- |  C&1mm}DOt݀ ɦ;z#nX:?d݄p>o JMtGx=Ý{  s$染FZ"WUܞ_$OAO8H*Sv펿),5||\~}ٚ0*00*0ׄQ*0ԆQ*0-P dto[Jh;d&\^.;%;wtU9);qwh HmtW1NK",~8@`iá?"2u^wu:/mÎaǩ6^냚a7[x*c|aDcn?@PK_-( PK M:Jnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings.classTRPNZP#Z.R(^Jcg 2ˉ N;iT|/PGf|JӦ:dfwϞowob98 "K.U)1AH#+}CM붊*=̇ ø0aUL״<. ?+Q--<V`,q6lwSb#9X^{*W,2Lܶuy(5^}\Vmfufk[j֨MCOM Vh >zcΐ{5{mQQP> ;Eѿ׍(4~9T\鴞QF,*g Jo/PKAb#PK M:#net/htmlparser/jericho/Config.classSRA=(P!\d^eZym DOGI-зTI8_&@'PKdaPK M:$net/htmlparser/jericho/Element.classW[~C+Y;)㤭q(`S4dAdUV+۴i#ML[Ii$ =M^i;=~_vfw%A<1;732wql<ʰ>ļײS Ŧ5]1R$ ec3{ֹ%S(%!=6qLlUɮ_$=mа:b>nZ3Sԙ58├LRqctCnԝ!"/a H{D\p/J8JD\Boc@o,x@֞0Lƴl59Q-L#vvL9ȆiE|&r.K*(>H6% 2 1G~6u5$FHZf*7>+glVUYUKV䩜Y)VN+YyB%IWMJ k~h3TIgHhQШ3ɅM:)NٜV'm ½<'~{]Rf K9JX/DR+x@¯pJ¯;XjQt,U^Bzr(n5Lk14 %'YAm/_$43lf30$Ȉo^*u ӌ}K*M 5QTTZʖ6-87Rb\N-[x7*$ħF˭/뱕Z F w2q*d+V GjEM:w]@rI%&ćV#ls0]cYRCNSӪ af*i\C U:4 OB*3 e>%qR_ڦKͩuv]9%y}Wōi浟U LB~Ri){\-ZRirWZ87QsUۜ=7gzpx>GH$(TT?0i߷cA+!v,r UaՑ[X qWܷ9#Eڳ_O3q]x Ccjqwws`A}C͡=*zEu( Kb"CyYH4V`EAXnu o,g$q 颗)80u$(:`{&Bj9GCGfth;0?z_NޝY7rjLw2bX ,(Y|nPKfpUBPK M:-net/htmlparser/jericho/EncodingDetector.classW[ , QWP!Ny&ɚd`p̪6m4}Mbn0i>c?Jsfvgs{^O¿˜@Y:O jfVEB".؜u &do1v=>;,z`&ߋY<#؆Y<"~Ə"؁ w^~ x/xY/W̜grIįE"U=%5xCě"e]oN ǻ,^M/TYM3yS3:gO "yEuz>RPڤUl!`CTڬ+\܀۔:qΫ'DO?ɌO 4e+}ZZP ԍLI O,Dˁ"bTTgcC-⌀9>#[Uu|認:za(YʵqgN1y n2eer^V/ZVs,z&%bӭb`vŽr$XLOQ튄dHV,9KY.lˊ9* X$ Oq$,5dS< i&9a Q STM5` (tj[WIMAvc\@aYU|*aeF$:ng a+WH]Fg-["y.VB&F^_տ1hgDSDg=sC U] gxHV95WLd N<[AP\f|ba+ |7XCR.Z,]$k J -E3I hcYS \I9[6Me,| 7੢huSǩbe~þmqEbgY~F|53NβF)P%T`_a. [Ҭ> (]jb4pOͫ~owUb99 4TiIzИ=V+"CU]烪%QSͼ@3]SYT$m:LR8DO/n7j=G_57agr[uty|7ؐ#{]4?雯Z Gӊ}eD 5%~_ǜ mU0^p udN(aM/a223%DKhJDX}7%_ԲҍfʲeKo6&.!BæKf:@qh qa[lcN,a틐ERZDK5gh 9Xlab0q;te(XGvWA-DVofoY \n"[FI\2Ͼؑa+9^;0q=sDؤ f@"҉(B3OpDˎ| ?d\넗 ϳ"[d/ɤy죱j }Rd!V%Ia Nb%ƱU$c_.ʉ`)Wa\ H&6Ӱ{]|Fݠ::z8*Vh+(E.G(Pġm]5jM5^}VXq,ydm[ QtvR•UgPKvF[PK M:#net/htmlparser/jericho/EndTag.classV[wUN$a `Hj&TE)ho6ZTpڜL&\A]B苮7LBtC3g_n(spU,hsB: vBxޅᢃN'ItЅnE-2.AD,rHK C^ ∌"*D4$"2,t2FB!$Q%)cŵhDEqm7ꇍĘ^?p- 5ǵ߷~8yS(#ZPX}Z5%D7|kV_:LX,iGYH9Zj:SyW{qJrq_f Jk\W `˓2v~>$*zxh4ϕˢpr|G6{bi7*o#$Ur]&}]GOiE,oiFP k( BFcf w4 ] gjY_'C+qa-v;y*Y<~ SLJrq% t8I hEgxҰx()(MY81L8R8'l/yBRIuxKRۅOFF0;$/܁%&YC+Q4|vB)Y!V zo;m)ٞE?[.C ;FGp>$MuRv/("ID{Gt&{67@Z6]V6a01$ʤ##C'eٟSdY*e'6_@BڟɶJ G0Q(mZˎOd ҋa^ 0>ad ?~PKr$PK!M:<net/htmlparser/jericho/EndTagTypeGenericImplementation.classU[wUNd&a%4@AҤ1Ph RNNd&Lx|/>ZZ]G??PΤˬgg_WIs$Ʊ$-ȨxS:IL>˫ftvbJ8ɨYS:N'Q:d윎3 :ˠsg I\ļvIe o ~`5xSH0o ^Q >m;vp\yIv/ DKnU ?)0_`֩VYIC}˨)GyfNW l\@xfQA4-W<۪yYX EpE`烞OFUy|`*1.8g #k=~cBu&" *%>ᚆ7 ]òHwI喲UK4I$4P>(Q鸁'vKڸ&?,e}%@ھlھOȠpUհv-˸mC|dc|SsҡJө>lP:mir|R b:VoU |)09Ga VC5rUfFirT'"n>_=p7n$ӗ<<=d/ y$磊Z&N $=ygʩZPXFvihF;^)}q4i;>>5+o[7(.8*s= ö_vY\l(S"*vӹDc(|<&QBS=:H8pxY6S'oyv+dwԦHG%{ԙefrFe/2.SɍaAS54_摝ewo˽86c/rye3KMbyv2v$dz|:MVDahhV?"7x(IAr8{ߠE 9}FD6F2BrkmI:!swz=$#G6dwz6DYB8M$IN:HKm>S1#Zhnbyul7&?:Hq 5lOP 8 t;R;y]7h#wqu\z;ĝ\:;D>XǞ~$Nտ K;w_t3GGcb-olXZcaUPKY PK!M:Fnet/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent.classR]OA=#[X?Vkhї/@K  L`,Y&>hGהּy;s{go|GDĂ SA+iھ9-Uh,"BD^oZUw9 >P l"Đ,{S a}:H)AKHMHw^ ћ?)P&+'ߋH"QAY:6۫"iWNX-"lP2 ~-dJoA:= eccFtvqt۵+zۮ?u\أEo{SSzmnr øS0? V+d(C?0)PKPK!M:6net/htmlparser/jericho/EndTagTypeMasonNamedBlock.classRN1=2>Ee! p!$b"R&&&~el7D좷sz;S 6lVۈv#Pjz]+U:g6t-sL ss֪ߦØ5:Xlڽ<`p-x3k_3䀹qdl-:8];Q+9'P5KOJ~ZA#8!AJd}4_2.}5kl2q Sͤ| 8Bw~O#[wv9 (zW/5rY"H/MԃoHG UqK86zգz})|Od 4ie+H\@~f o|PKwPK!M:-net/htmlparser/jericho/EndTagTypeNormal.classQ[KA5wW.M.ҋ"HAJoA˃PGE3&K<̙9|99qh6n`# &EES^;:v 5fL8#1M&,q,iazQҠJu\%H7!#(yL4$a8C{]A1,;QA0/;|V UsH݂= 8 6`y Y.\2|0Xȸ8A{{DJ.Ք/G^\5: gr8AA/ )%Vz(DM씭i'`Yܾ{Ͽ\F`ulY\MR(Ims .OkήX4Vmgk6Jfn`Z0Mc W76k [Bm݉~}(0h+" +XDX4K CŐ.`8(h QuՋQFAgi=غ68ᚅ2uT8> rxsr|M9hXñ86CGߍ|r:eZ/"6w+2P=90tU# Z{pyVp(ZH4Huv|Yγ s VW=rj2-]Ss/,(o }\5c GQgHX_Q ),ӤzϨ#![=F#stqn0Xl709"EᶈwxޑNO0 c;L42oCV3jLh4j=0U>uvsH{[=*ב["Њ=&iBS׌ݮDrDbgqoHYbhEl6T:d0rv =K7Ü:R33H'C$/Xn,J ̗{ eӚú^!Χw1jO3}dƗ 3yOY?w1 F..q gNfq*wzwqf ֯gl,`'zPKfr8PK M:*net/htmlparser/jericho/FormControl$1.classmOPwOf'26ATx1[' :7֮]J;gc w~!9`ҽϯsOソ<J1U%n0NKp6,3,Xf%I 4n)(nr(v"eDJ7aD?r2{@{rp@Z`a='qÁ u ܗ1E,xCR}+W7Cv( #*=bi#9 @H-N0UPKwhPK M:9net/htmlparser/jericho/FormControl$ElementContainer.classV[WWN3arLfBRAڢئU;$0a&LX?>`.'\>̅sٳogO*p &*nfS*nᶴ|!W*1#/U&q_)+{2'r@?ۯV%EXbS>-iU n~Cfq?< wժ/EгaloZy2GײY _ge2V,qT\Qk-J+U%t#C|ƴMa4sBv!6甈,He-ju,? ZE!WHƧ“ge#嵂/w[榏BsK vIhλw=+*QY~wUyAkN\3e´#oC998  'd"FLXFyOD!%T"N#q8F󸾏PHj9bHƶ~G#dt; N'1B =EN8;!TBAt`7`魳p2g{OObZ CbzIhNRl JCQNDOX 2r"]aTP$H.1HahGcU)8PKÄp PK M:?net/htmlparser/jericho/FormControl$ImageSubmitFormControl.classSN@=I)Rh) }@A x!W^GgP"Q1AH هz̙ٹ]X>?n DbPP 9}GW4LhfH: 1L;JqTe`{[ CDѷ%CMGv!34d9fap|DOXPek? U^?u\[;b9,,hXHsaaE_Nnd06kg -z #5F=Qr,ͨ'#{ jBɰZ'Ҧ/iuCuZ{6mldlʝф^{rU-ժŖl9ڒ`xI4 c 2Iwȣ"SQB* s|R|=?p8~x@xGU9q죮C!/908L49 İ>B9a[s!ڶ@. U`1rlo`h=RU(%9 ypQ6Kur(Aңdȣzr1jUv*CO{e`0R 9iRru"1ZL܉lXR(P#2PN&,0վQ(.dr|J*66f3 L*( C&rLvZNT_ܥ[2D}lj^{ _M>P)Y*|юtaԝJp$w2O/ӹ|Nuc[BvphI> < I$~"mCy]E[ou"1厐 ~S!vG6}`N1oc98_,T+m(y4iȰzVFmGN}" , PKi PK M:;net/htmlparser/jericho/FormControl$PositionComparator.classRMO@}8)B 1 (RZ)**HHa9PJ= ($R+>myzf\]7\,x(3ϱbɄ_xb*aj7nJ+ncAp;=x/Ot?@.'sO*뷔ԙJ"MxgqƲ|뉎nGW"kqS߉飤+c}jL|ﯧ:IkU[G3ᑏ5\5ppD'JF4F^r+TjྊL8a BDm27wՆeiZoOp6~ s)sdq}svsky5`&OQؼScX$X 4`anΣX]Ȩهe,PK ޫuPK M:Anet/htmlparser/jericho/FormControl$RadioCheckboxFormControl.classWWW"gŸ[C mZ!@MKę J]ǶTOK%{s~޻;"&q;pЍ;=8fZF+DKݻd‹w)nt cn#ݢ 7z'8Nx/^< l‹$ -GU!eXhmSMQIR=մz ji>`1|O>%1|K _K ck1 e0|'Ïg~ Mn%0`nBS,܌jpN#?IWekwe /rQE%imrg5IeM)Lw+*aޢe93AK/kҌtZVXaXZ1=eʀ&"˒Vu R֙vqbS.Qx2H ՗: L]}+O@DlIG Nh/9C)2L{!hAKW2хPn(IX <*D!US*sA/d{H>==ux\;+^%UQS+_A|-qEo,(=Rb}lR[HC iqtnHhw[gW_yx-@%/TȕL*L1I֒^) :Ok%Њji([U#SQrj s*םbڝ!NHs`!vُXH)UՖHOOF;wKPIp(@ Mb"%Z>Ϩ Ob$BEq銊<<{b9;'\x!zN@]xRPuOҲ ,ŋX_PKltB~PK M::net/htmlparser/jericho/FormControl$SelectFormControl.classXcWMv7L6MH$G0ViCe.A6 &Uqؽlgfjkj->ZժaZW}/~ϝn]vs{p[B#Uq {^ qJ"!"Yd* wӠa ESSB ,6dp\% BCKgx ˏ ۏ'(>g4? g,=΅?^q?/ O XgK szm]g栦X mЇ46t;b3S Sjxs+*G=IZZ1-fcH=EWw-PKU_@9 .W_pLwH2)f4YM䘢eh@0zL9h>MUӒ my⌭j=a"s>N銝1Ifw]o'6 $g{Ƀ]Yvo ,(8A$J2Ok NӐF p7>+o$#w}*Ӓ\rXW,) ч3v:c5nPY)1+^$eVx fi/K",i<#+| _'ōo"Șny!ce_ol+ XA6`G36v)ǔ+QBYWHdS/o*AWl4ƜGFG"C:x.seR!'G7EE('J6Tl |îB}\ڮF&cFRTy*. z_kG56N"}Y,mYl>ʹYw#[y@Ь~Wy6Q܏?PKa PK M::net/htmlparser/jericho/FormControl$SubmitFormControl.classT[OA-&xRqE/E)4@,6CYmvgIC }GlנBmM܇9gg}"1(C7b[Q:ݎ&n*iH1)Y̅1FE7u2U2PEhjmzR7 uߴF2m'MKWB"p8*iǚjhf]- [7C\b2Սn$'Su % XUp+-$၄/PS6|o|+$ᑂ)x -Mh1p2=mym&'/pCT)Yw溜puTPuWT[[/L JY `p7&5]箛37ǐQ]چT-^"*z[px&@mT x)LY/ՋFH3uGT0ȅw=~ت&=W(UsrS`å{wR>>~N-ͩ)][ 'Co,ҡG=8ɟo@? G'>|TSY=y=e57GͿ|~샿O"3´? o6OO/?OE?~ W_y8*Z}pk >؅A'HqɌ+`"6`/y*Ws?kE1yc np"7'@qs5n˛ElBĭ ؎:06`nvw;} oc>N} {b/#cbVf{k1S"̒nq@^# x x, 8( E4M1Ri*&B@JQ4O,YAT-UlȖNMJJ[tcI =62#'Z<ѷnDNzߴdR?Q4*z E.mhL1ʩVl6 G5ꚶfRĩ]GCO]) h G䮔%F-CՒ4֨h ۖ+E|Z)Z 4ށ>2t(=?pP4vd7zhcZfɩ"A:}Ã{Gzc#%tKMu-m֏IM21IS{1Ty02q @ pUXڝ>=AjC^ҝcv=XO\aȶ4Æ|DZ:? 1E,=nksk؛9wxݻbo+xB^\#XHZɌr|F`EݾT=,O5USL) `K}5]ԫ풸m䠡$): o*$>=žV,3}?/UM6-%e)E6rX$3'.:šiQNdmU͘-"ㆽ<9c J*aXW3ʞzSr\hsƂe!)mܯLfmJwgm FIJ蛖Qьىxے)/225')>ڑU,=b~5 { Cy'RtV!Kr8"!N:.ܗt6PC^0}owUlbm;m[jOٗ7%6|GG%| 8*I8Pr8&a hy$;+`cy4'>(ku>Y Nb\ <'&%FrϨb0Q03v"Cqgq՚d4CIM5DeJ!AKk"b:ԯ&wԢȔHpJGѐDA!@1 f{DwViϨsn%R`чaN28BKs5&S,ȿ\ǩ-l7F OI.x2 \3Λ%xS3[O| IV|%}L:NsŪ;W.a-5δ+Z``,2<p &>_ %ܨD·bӪH:d3A(P&e YKFz#u%|xN  ߉)|7]|Η9  Coy/'eaV2OJpzc>-i}~XHQxFIq>'Ic,'𰀟S'gsLzWd@ F2(AVz$$pY%<~A8^X>_x2}NҪ l&Ϥ3)S7f/r#` O({ CLeZH_ړKm?Vs}ʆI 9"l7]٢ KdD#q7zE[*u ! 83TCIYSFҵbk#NЛM@7\Rz2%e6U. hJR%Ĉ)hctpj2b~a \.鴢%8BUnaIҍ,(kKEKų5xfo$Q lӔn[|$ P59eg>|[QRS$HR O60ņVA[1;zKUG{Wob2 J&p:eZPջcW.J%ȃLښU"ŏ| ;FkKJfSyZM}W^&;P5pumduMŒμWg'N"vnsvi⼢OidӲ9Dq)?E񃟫=ڙ+e/p뉓/+8/K=ޯ0s,z( C2,Q9F$2FJZ~:|,C bӆ~QCa}&z?jΛkʹ"(~v*z2_Tɼjζ-]I{f8~.)kK~Mz|Ri "|>D=OjBMj!i1+"&gaEa(|T]qjq>xV Єpp\ϟUaDkcuL,-U@Cgp6-`_?7ѮmZmZDpn/$k-B-A[}>ﰛAny6pIhBA n8j{ž`@??-v< ^] py<CY=§b)BM{'K6c9g=ۙz>iBk0DD],=_i\c3Ŕ,E"XI9ÆtZtҹߟ\@sw;QmS5*J뵫dBhj `Vx&¿5$ P'9jQj[u_߲%!o5cMd^z#3'g]TfXTdJJ囨 PfBH[Ty PK_' $PK M:Fnet/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValue.classSmOP~`k) 8/b\2ĤJ]],܍ئ9>Y\ļpDA㢁%wM,X1q/ `U{x^^xnĐN;RrOtxU1}~ɃE-)"Poy[09%3L rKNJ2F<pD?w]:ĺNjwT*0j-NKܖA؇~ZϔW C]a7lf.v 7q!Sm& az+cb]D?kv!{dj0jCՆ7aRI EZ9< LPK'܁-CPK M:3net/htmlparser/jericho/FormControlOutputStyle.classSn@= ܸ%5!mP*!Ab r!FFfjTZUu!iygsfx6ϢxM(Vny^Hs㴗 )" "bU1yG3:ܰ,ݩdOǭJ@iZ:UNkDwS[yg;#:ٞԋ,jw˵Y~T-f܍J3);G U ;&i P;SJsE ˠGAD5F/@5d:hX} J zr_OGElx{elP<;.$_RHܕHPU#Fzmv}2{uv{[\K 㝥Y.GʞPN 8m1Fi6@pY)|Dg W յ#S=.k)! sRGM@n_#|, vl yP '_8߼pBa< 0 |*7OsS ӇyE/qQg 3q|!Eq/1D80 w_e8:$7[[; { 3|!G 3$>2|㳊J  Z6 UTJC+ZI.R!R4R1\ZՆȔ99:-%RMLH%“D,JL5lQ2ٸY6#ǒ<;)H Ц嵂fI WP^5dKU`Y-OZnNW(R'rebY%CERR(i)O ^ӫz~hJ-/'U餚#F'%T2M$2TRIke&Fw"edIiLL=̶Wͅ㳒,`/}9#}+Z0lj jktt,iqvZM#8@clN7%FKBXxz9Y46.աH6o P;7IF01D|/o.Q<c"N"."M)-Ίߋ,HװP1͢1GMKţ"Np3Mqn"<]W NID.jISEL ԴANe-{nxQ&oQm\N3xuQ%5)+?\ǭA|h +pAUZ14xVejı6wӲA[Wu|8sh]XvE 8"jj.7v ӿXp6qƈ6J6Fmq#ѵ„|!`5Mpc{!ߋiIӪֱ&Xk/=3vL7p&-EA}pM34:ܡC u::]beԚy28ko#UxjMm~Zn{k FvUBh[h!3V^먋-b$܄*VGt^Ƕ5,b3鰈Nrv8jDEt7PJMcEf=qPk1w={lfg1 ~953o lf/MvV)]Utiw=rS.9ͪ#D% n}* h57puZn#pYǑSܿd T?D=t?Fk~{G,8wؓSPKgi PK M:&net/htmlparser/jericho/FormField.classVWó>|w|_/%~l$+|؂ooX$S~ld e)͘;<ׇ||Qq pه,1962a5#9 ~`Fli ]B 6Mjmf )OBm&hfFNf,[PfT/cƄiP7aqw؆3z|m0F-Τ~Jz{;AgՑd*ѝT$3+tmNZiSXp>X7bvѮ9+f8ifVԊ%cX!!#dVFu?Ps#ޒ 70F"DԚ]_]BV 5k֓eD# QZFcȔjqJƌ--\fԵE.R5P^keYEKsqW ͧBECѨ5$t?Wk LZR*)j XlӰfgLֻ*6Vr{JoBORnkfQqqا*~ -T~}Wх* ?U&NEN*9ઊ<-u.aPţ8*Ẋ_ຂ_~M1<" y}yf7T,ປf\OQ?g=]`靾BK/S.7=^,kA kζOrvM@J'/\ ~ߏ5%Nѕ2l=_~w)/cZ_?mKJ۽χK xxUuNLKetSg8R`y-GQv _3tYe^?kA>=1aKH.ɰPi,;]ڷ)&&t+^躥 &#CћLiPG J~.n'=X`PGj}уL@f(aI߹-ZUq3=H%Ecx{`\Sg hu<4 =f!=65P/E< 4K4{ozF>nXqĎkGia(iGzX5_UHZӵ9<3κ.,fv,eh}K*H%:aw$7q\QSXhta]ϴTWGVlG^neq#ǡE[籁+ACQ"9| H1K$kC&0%۲q9cDH3acmRdht`Cnu#7Pe7+KK^/Kc˱@p_w\C@[>xpu]n]ֲylmki.&If<]FZ^4e0$a qc b? ~r̊6f2Eto MLE^W@\Sq1ow?v2‡i`rZ~~sobg6+\>-wCTĒ]h,&J9H0vмFZuxM A 7yY.$ qIe ʏS\Boy3Dpd*ϊryW=Pd"gکCص}QﻼI%y?mL,?1r*hRO򉫲} h &n$qe##^!K|hg O?=SI~Dh[e#7\Ϧz]'堣Mf߮PK,pWPK M:.net/htmlparser/jericho/FormFields$Column.classPMK@״Z=TP#  A衷ٶ[I__ Ń\fރfp.UTC#]{ Qbg]%uL8El¦FSipD?1I@Bmne,Gȸ/BxfdV1ʨp.vWʝ$pF*<EE'ыvCd-TT3Ν V]x<ߊ9⊉o3xk2s3WK%z;?`+hb/PK kPK M:'net/htmlparser/jericho/FormFields.classW x\Uo23/3s$46.idi M$)5yI^ 3oZ Z@) ¸N ZDE W܍%}Ӽ6|_s9?=: `h2ܦ!T }  >Çĝ!,] GPǘLO|)>B>8>󏉟}Rx'Qp+:*30J8`$9<|c 6%^=!46FyQ6+XX7}Y7x&rKO|gބwA==}>ydV?Տ)~O3?W@5* d{:ZSj>*լ@cKm*zCO_:`ޟVBN|2!_ӑ֌c0Usy-״S=LnMRk֓tGˤȜKCĶkT2M%Jyy>Y *:Xuc=s'rJ1Lz͊K|^oiȂ55SH؊vׯ!ɕ/ 1&T#[2'O)m? w*p85xZӝgO#X[j7R]fg!hpOJSs] Pޥ&v:X{iLmmtH2cõCݡ[y_/zœQN:Qme&.A-O]:{MX%ߞ:Bm22֚|M.F>BÄ́%I[Ӳ)GkOo.قq^0q9TO9)f9ǘf=sjD/vA'y,$r9%]ux( ~)+<#,X25`.(ɟN?J_Mu7Ef&%Y\K>-Ŀ$HnIF2}E9KQ礨 H#)|/qH\۰[ ETI!)B:軎m(^-' 1Y7l!5RRD-1h@7-R#D/ Ħ@b D=k](vzHq:هw2")E,b)nX<&S+%׳Lf{GP] L֗6?;ITeDt1@rۺHlQ$7 ө"k-rj;n4[j'KK^ä#dyD4=?q7yrywCtdvSI^O]4:-Eׁ7f JȘ4C9 M3^Sأ=rv'@AtjW=-Guq5\úNϒ-7Mq3f;ךou7] B)kXnV.):dRtiSB:]K%%" YT>/c`"t%BѨ#!):iCiU)$FV0(}i(1*ql,W T f$Rz>Ke(42==RFVgQE\P9Mxi/Р _A^JLyz*z72dlD[hTDěR{ZuE4FJC%ޥ~Q?MPC5͠i̡Op%>Edڛ}39\( /4Ǜ}˔2>[0۟DޯՒt SVe;l_4\?F#/N7FϩϑJb~AZ#9SOa+ң`wh8DY 8X?J ,i71YLv G)eJvRpA+וL|]lb;J夒D0EiFb8Vr,c@.N=>%TrIfL4VJNqKe46а<;n v=M?ݵ歺-Ÿ\"[J緪-ӨV f`5RL`#TV0XM$S ;*5ZgׯkM~m!*J+ }٫V#th4kU' 5tUE~CjZ 5ۥvУ v>-ԠՠcfThu,TGLf*5ӨN |7l[5St8vѢ;p>P0#b>>X8OOb>>T43τb>>\< //bR2+bZ:oobV6ъPKJP#L PK!M:/net/htmlparser/jericho/HTMLElementNameSet.classSnP=7/aiB Jh0)J)Ұ"Ls8rl8bOe-l"b͏(sCHĖϜ3gEC* 7H;ɰuɐ!*{t#O˹)9Z[T(!Bk7PKu.PK!M:)net/htmlparser/jericho/HTMLElements.classw\ǟ!t`ن3`&`̲18 1l!a$S=81vY."` q̓6W3tj2N0䧝o G|_,2ppH ߥ"'XNCzXLI"L{K+ffcG?G}pq!}p/B]UfL p_bv<Z FQ"K":FҲ .3+XeYa{uCo{3{+f!už3Q!C@>dSR ccgl`ldlb;,dj c DtT5ǔI'icISA!BaS!3 ,1% NJ(T0e5<2ڐM|5:yK65FYḩH0̍ T0#9.xU^^ v vx#,shʕh7R̢OZ2 -̔Ȝ197s24 dhoPi_(jB|?k+x*OnQrOy.@qM _Jr4[ռ_t##ViJm@HG-LfHU+f;RS aeY'pa]Ҧg JoVb, uR@x2TWêxyg+^/N}?KOuzq?/U_4CTˣVMr@18q-WudA˜ ȆL+K$YNg9tVVOʚnu&mGYVOʞn |4n2غ48+>^I,عFاF̝>b{|>B ߂1_5B9U?U"x1c |q X3M?/fl)Y@awn}Eg wO".IX"&`TmMUrEv J&"aUcRjScΎYIcA9%˓Wf*yE\Qerز"^̦,NLVǠS4S5S5e6ݬugZ_V&"9h9'ۜv\kOVvP9'O;nKtM6>~NыS# *)ʃk1;IY$v+,mHC^GUM |՗v ga~;͏]r ̏F8#σ/$+]e 4u~-ӝ)nltgVOMyltfDCI̖"Αn]N<8_n .b}|X ˈr?q<@,ŲHU%qb'52@Ljk NqH eI0^'n2J* 6#n;d q%#y Q 6Qb'-r U^Fl)nyqW;D,%&b\G+!vD#ze=q_n!!y4aH7XEZjY-=Y-jUj+ܠSj92rRKRKRKRKT%Vj92Ԣ+*VjiL›0vs`s`)Kv Wv ,QQ9T`-*f؀ MfJƝJ'J'R2Zr%%~%〒%k*S2o&k&Ήĝx''H%TJ<?1fkctɍCj(Knet/htmlparser/jericho/HTMLElementTerminatingTagNameSets.classQMO@} Q&^j7cP}MYnͲ<qƃ̾y3o("Z;K~"6l2F7\!<0 ?soad葡Pe@ -6 \o4(*p&'XH(CRIch7F M<zAGl p& ̈́i ?NMh=9Cu]q&4l5BfQKB("v60[.+d9%m`b{PKj/MPK!M:3net/htmlparser/jericho/IntStringHashMap$Entry.classJ@46Fm7`Bp?֡I2> |(&.\8 zp&lUҮdî=A0YI2ƙX 57Fjz`+lhf VJa.B |yBRLtx̏ť(+1?=OUj.F%8a%@~U{0 2d29[?pB!ADEJnHI7[A bX#:A7PK٣PK!M:-net/htmlparser/jericho/IntStringHashMap.classVSemM†!,iE) K@ҦXun f<8NEA%pȌN9Œ'?Ѓxf|&H 9<}'?!Qw53j7ԲckZ_-2#rK5VgՍ+qQD1W˚Z*.sKrZ͛29P0U!DA/TT8O2 ӜaRb^0w,}{E33]GI_8HYSXQWb xsq9oQ-nݸCR΍M y9o3h/ZJPVf!_TꎯȚ+ ^àW!Kns$! f^pI %0iY s# b 򾄫qWBT G5d?`MSÜYzgu׵mB=鳆qN/l_vB>TK#E#U?,xݠKIA{!7uXI~n}hs-lh<0}c_A+um Re¹>m j {\Px㳑#ȑ}|71ioR{hmmmoȞzi[+3k=:U=#xƕxr? >aPV~ƥ#UHAa_j Gh) ԶE*`o!ۛ]mN C3f{8" X_5dcsL)nFB< 0t\D*at1À;^HRЕ WUjuXKX7Sf\̢nLј+ctX'VGrHQKbYX{ԳݵC1f+ZU.WT䨑[x9=ueR=l]Q } F/[ wUoN= =: +G/FzPK8PK M:#net/htmlparser/jericho/Logger.classEο 0/Z.:gW1TҪPUBr<_| Z2Lfa*g(+6j93xa )DF70źʭ204g#C]7kȒKv0WJUM(qf`hzȖt}PKe PK!M:+net/htmlparser/jericho/LoggerDisabled.classMO@-ԖOAEďTH0X˒5۪ʋ@?eZO%n2;yffߝϯ'CGV9 0l1 $Ypʚs%&3xUWJ[H2d َ7 :WS `n?ږkKJH=J֣)?8 e¾G3oJ¿$Kqں$a'n LLʟO8%U5!p6e U3a(~Q9J:b-FXyRHPxv@QPKGPK!M:*net/htmlparser/jericho/LoggerFactory.classT[S`=&K 7ViDKTQQT644?? gkR.̴wvn~ 2t!@'C80o<.".#~B혈k^1@< Lᦈ[<)bH -Nf5mfO,9ycY 4[O/YNoT7ug6cKXռ áTN]UC5ʬcfv$~;=*a7n'n[3?"]nj42A*<86ʪII  +A$ǬGB! ܋ khXz~و:]2IT$`MiDzICn ab*#ΘHUsXjzI|2 Qp=M3hG x*3'-y]/xu(KҍrvB ir4C {d[oٷyl,䴴<14V ;@xGSV]s \Iu\>53KMMr$EWQD47sB|Ezkё;sRM͊jVJ4բeZV]|GHONL| ~zzGPS>P71=LBl C(M?W"|w#_ 2|13evelu. (7EY.?[Jh@G6FMcqGLǣl )TZu!6u劍PKP9K#PK!M:+net/htmlparser/jericho/LoggerProvider.classKO@UVENcFҀ}I-A0\Qƙt6ߙɹ=.60-P0ja?T 0* OH^)ڳ5wwPӇŝ'gfwsMr ĪV0Y-)pU4{Ifq7,%\fj}s`i:?'q۽-rWvc$U#Dł䩊䙊EIMEZ\L#${N#f4RPT?U1 !+9d7[4sq*͉ecM!Ԛ #`j1c."T╰,5A1aGÔG1aƇXPKPDxPK!M:3net/htmlparser/jericho/LoggerProviderDisabled.classJ0ƿӮϪT@ La"xٵKdaϥx1P|(S|_zpZ.,|^:.]b4"F4bjXϤ*V"L[]U*u1P2ۻ'4U* ~&(<E\fbl*~>sAR>Z]Y<)d…Gh=|3ebkqpq㹿qlvs`9!6-Vբ:W'@++PK3SPPK!M::net/htmlparser/jericho/LoggerProviderJava$JavaLogger.class]SP!-UZhQb-ZSRzhO'&I/(]b/xswU \uY`r%C^yr厁e ]g׽v[jLaC5K{AGebݕAw>ڗԕZwV{=ՒzⶲrU&&}x-)0.S>؄>.xDd-P~tdK ˿+*5;ɩo0(XB <i Y2,O6*as^`YGK3[9X5וؾ/}_/],?]0iIMl)!WKSug[ݮ ̞6Lfa*`Ks$O[nZccFA!SI@`  H&yMŨ>%戈R* ),!|k#=SF1Ɔl(2>D2 E!22C8;F#Е0hrZ I1YE~PKSM!})PK!M:/net/htmlparser/jericho/LoggerProviderJava.class[KAmu--KPE)AH!ʶH[K39s\||8Ǝ9 + VWAABQA$k%l0(WhZw.jycyvp{ woDr,!Z I('ƀzcVĕ];$X"\ NK"͠P"͐&~}7]ao.C!t}-[鉊' 9!#ȼ2qD v1'JB-&MnYd<{F)~9#)":B)4KɇF@jxg +EB2BМ~PK{CfPK!M:8net/htmlparser/jericho/LoggerProviderJCL$JCLLogger.class]o0_wm+i-J ! ;75iԞlSL~Դs b}]_^n .6(c' hqg;3}D*HzD}^!Q !D PK_AqPK!M:<net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger.classn@MӚ$[h m90%ZMQPTAP;'YGƮ&wE3S[fW7x4S8y\p%K)\]QCC5׷W6|VZ`k۴NO7KkS拱T@r*IO[f5LlYbo-QrW>]~c wqPj cBnj%*OPK =PK!M:0net/htmlparser/jericho/LoggerProviderLog4J.class]KAku+-K-(R#(Q. aeەUta$Qq"Á993gޙWQ #+aUr "K((#$.aCB!+2& ju;sAnڷ%~.|.4 { UDr!Z*wbu&>5GwM5ĕkU,blMlM( )*T,0z0ayn֦)ɌrX,wpD/,DYtM_"J_tHV|2{d0 .a9<#:B! Ʌ5@jxw)*EV!N(PKȾbPK!M:<net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger.class]O@߳|V\eTTݮXI7oH0h&u١tSfZOqC"1xG)e55vg:ϴӷ6Y*lܷkGǂuS3 m -l$:v P0ӍMe%0:ah}0aBLg:z upFs꣘0ٓy{Gj@k?GGUј:^8] Y9hM, +X𒰭U'g$xCe8&> [o3fe6rVf'D%zP(ngW58.>g$"U^VRܯ s% -wc_P\c[Q*|4QHRiMd,]"\R3U8eNpVʜZtʽ\~PK8PK!M:0net/htmlparser/jericho/LoggerProviderSLF4J.classRJ@=ӋiV^K EA)*T>vMSb"P*?JVEE3ٳ3gV5V0ficzͼDrT8x%$A?v]!ˎ'ǧՋ!ah7έ)}!vWϥ׶B #%۵}B4$^CF- ^VZf4ӵj m*g^TwBf UPڷ&,HaDzBϢC!UUKP$'-w ~Ȭ#l)O8.3U2ɟ Cg8S=;qp Ehb$bDZ#PT WA{PKA@G9ePK!M:1net/htmlparser/jericho/LoggerProviderSTDERR.classR]KA=GfڇI[Q=EJúпAH~$3{{cPQUG *܈p!`CM[ |WRx~p˅ªmMMayն);c57AN2zZhZI mPmX2C ’\Lj%,[~GIոA$÷,)\*K XdM\?4xU2buԕEqA>iwM3,=y47җU0h3S: n"=샽W$*+< @`҇ #PK 1xPK!M:*net/htmlparser/jericho/MasonTagTypes.classT]O`~^֭ta7LA@` U l dzTVa8:5&'%QnKԘx/P;&`Л9o?1 A/qGH`'J=~ J?$$?MIxi~z(⑈Za=D|%Z2c1e-{'kf!m;il&vM FKEjI1 PV(]W2g]}'|Z8N$XBYfh;at2 }/.?f5< (P_07E,0U24p5 M f"{qlv#Wn"~]q ~2"}"(2nED;Qцv*w,D;O0pIAyJټm19 &^n6^`h5uxBcUwviي [dXSTZ.E+zRMBWU'TWJ9Bn;ts 53mB5x)}<R:hʖRwqu$|d>}$.Jz~uˁY\O|6:vɹ M`9F!)[8w} %0%@xo3o! *?p<u)AtP_[BFJhˁr;hSԛ"T^B" PKʹ'PK!M:.net/htmlparser/jericho/MicrosoftTagTypes.classR]OP~nVpL!YR>ekb)k0ngKגx/BnLLWoaD8xsϱLG1"x,$<(aaP~k궪նhij/녢Q,9楒 ߱L˽l{V}V| e|k #fV+ 5~dr,AHļ SV7: ˷\Ǵn$\jZCl6.d=û T/zݾȫ` *H!MUIr*lèQg e(Yu~)X7鴲m^SuC&TZ&W|rZtU>h0mԮ^1rz^exss5zcB KTGh  *|>bǣ3&O#q`P8ye&H5q9x&a Ď~yG=Zha DӖ7Dˉbf@=O~9N B!#@"|i?PK]e;%PK!M:)net/htmlparser/jericho/NodeIterator.classUSU,YVj(hBcVbJ(l%$ n o}Q}2TfQ9 ̈=s{9g\Ŷp#^|$>YA1ų)6nz;&xIZ 3 I^sYs g2Ha>Sh>oܖBe,) 6]2 D [B!X.䋚UVl]Xz:gR.bLBdum,% RN0Ė=e%tyY.\I4KKº+ք%efYm4$ y9&=y Zlӊfkz^g ]EZ{nOH ޓ;rN+%nt yzd~$[4 y}D҆^78H ,Q07Cbmjfdc"ewܕcpc31"0,cECb+SUhXUFDXSxs*;DfM6ENulȫ(lelJb+ X0JbѴl-錓TkN pIhgT% Wm_+)L7ҵ!̿*:vV=s.yR #Ca{ WJ;;5E'PFFlͮQDTo [nzQ35=۷`N#2g٦mFʱ@5aŠRILSs+ny6B=m%*s~܎N|}uY9ͫZ\jA륏,5^}mzH6pq$uGRrI.hJ D!E {hGy( Y>+P@4Sq>^"??]:V"fs⺐}OkjDݨj m$5y;h懸f]89%ѺCdIލ1оxw6bnUn{Y[ ɑcQG'9ucxq~+>\w2u/Bc\h(ڇG?Dop"9>8y7ŽEõ'[{QU:tn h'"ZI2OECgERxPKtAD PK M:net/htmlparser/jericho/nodoc/PK M:Bnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList$1.classU[S@ mV(rjZ/) b*Zo/ݡaBR/3}G9FP Bf{/_ i f4}я܌%p <125j ؏{&4E&܈# n)yciS} JmuW n+1vѰҦ]K()-Ϙ#m[yO׫U3\Otrm|^oKrnF iFmcv3"NT oZ.Qa-vc2[=Lw$|`{e|#[ 11Ll3lV a^ C|0Q^w9QO J;۲kЫC*NYqO QnInk {I+A`aryjGQLTѫU\TzAԒeqg] 7Žwd6uN;FZvͮf+Pkn~g$6]ѫ^_sWqMŢEom/.õ>(v[n}nY5w^7=öVce5NBǾ*j"@]BC]&.j(inn8(;OQST@@SVFFE gwq"QC!3 WhArJt77NlrVV9>OM<<ͤwwIѳ|H]gqiqHW?Rt'2hiбE^95;Du~l*q?gژH+D:ڐc'P':sKq "}P"yLgX,ƞ}qfc1X&#'ħFUP[Ʃ]/f"x19Q8ЕÇQWo>\%K9zIwas$t0'¾Ʈv oyLP$q Mq9 ~~t_19LX8zzEpvׯiZ;1bQ_#${T|sum`=/./PKslPK M:8net/htmlparser/jericho/nodoc/SequentialListSegment.classV[WU $C/@Z=$ZM Rh mx4`0̤3 /׶rP_e[O>Ľ'09oog~[wA3A³!v Nl bj+% /36>?,hd@jLg9۲djJ,YLXKҏ<Lj1/%Ƕ7{s~odldN@&ʴ>Cd4E\3:mb[99Zb/Z]Pni(؛dd$jiӅj\F%-C&rj&$嚝=;<+=$CiU%%|>'">ϟK_ Y{Nif. KIg98dqt_IxG$|f|ë+*.K{qF@gkzZO/ma;atbiV;ijʬ޽UfBH \VUn*K/lP ETkKiR6{m+Mݸj~ʦF*+Pƹ}ĝ!)iZp{"k4xQu^H@MPfT=g5Dp)D:RҲ#? d}4LMc~h,u_Rgj<@uCC=hK݄OET."0*j-@MPc+PG{1ihQ3ϖп#nr5, ~_F Xu>}=4q*Z,|Bh'/Qv0F4'v+=I[P#櫨__?txJ<ޚ ~B>.$s(󿑏q$\H_6u m/ZvZzx 9}QBvBUYaEAc c땥aE1GK'(g~[HVVouѷOr* ԇ'$!u6rƁ:*ܸhg\#&+-bh%nVO#Bs.PK2c PK!M:6net/htmlparser/jericho/NumericCharacterReference.classV[WWN2$ A[U 6hTXAKEh/ᐌ N&{ eeZcWS*gf (t-_>sgG WЉ0/ha@oqrBFZƐ'Xb +8u8aby7LgbDE.YY1*=7&PpU"e| g\2Tfسlyj[֔nI&%"s[_)fT,0:gT|+xm|ΰ{żU(ܠ m:zcJ!8~uYFlkX&狆I`TĐ-9v8TzQ3P]J3)kVtսϳu3z%z-6KwhfG4=ܫGg2)'#fgK!df4>U_nxiЎ:Wm횴 xő,I2݅,U!̺kfY@x= H[xX$@~ a6G @2YN VI5Su$Xu"(AFy-cۂׁWKyEfFX? zJKMkxU㐱dg|;ζMz>D*s^RYg$TCB&є R]DJ֤OhR55?&MqMҔE Ua=($̣S@&iGh$1x-$NshRu3 "HnmZ7')O⇅ʝ" _Jm0H@DYF Gw@ԨZW/NcZKxieuDG,Hџ#鿥U1K3t޺jH]zhbE;HWFF3R%xр~ xqa)אB?PK]m M| PK M:+net/htmlparser/jericho/OutputDocument.classW XT/#q05@ڒX1RLt'Ζ7jtOӅM%]ҊUFm&Mtߒ6%% w=9,w.? ME(x)B7ިV'6>ɉ7A[VoSi}Jw0"vTQw1x?S~KϝLuT܍{T= >?'|D*|cT| 'IψO8UuWqa1g r'F,τ}Z9S*@1exЉ .J~~^C_dȗxGْGqى/+xя()$bn!s)7c =<,$hՇCz؈ TX #ljt~#F֍#cq=ִ_G"M=Z6,Gk 06 b5o#`k ҕih&^j'9&]ɰnh>w1=9H6TyR#KG\4=9দS@4F Mnj`[l83p[m(w(7D0z%%_ t ZFyeZ#pʄR?ǻW La&'ŭDN;]+kruޕw4|ޗBGڸc@c'Ps2E飍u_0SqDʂr0)PԸ%ysQZR`'S"t1ӵmWiCfDEd~M{掹8UXuד\ҷdmhV ZݞA, qt\xb .,-:1*@וɎ;o?B}?ov߃KO&X靄GA$ {%aw9&\9c潒ܔ}gvyHF9Qj |D3(=%. Жb9|^.hڽg<%7θQ)$&i)T) t5LB`HM?m1[ c@ꪖ!t-鯮nmH.&PE&P=K i+p_"Dm$t8| \9ɆdܤW3f\S)or\`l3YA!<YU$VxCO]IW)Um5}Iʲ9WѿgR׆ &}5+0L9 )g#.j($zlREl7J5]=K^_!$s< aGp䝴Ǽ5ShofQxk-f{]feޙQz_pM?1s<7̠ړtmqI :%Y6 KX帄UX-%lR*l |)<}3c(rP9n,@66V{BN[{F.sdJ f&Gt7جk f dd26X]wA G팆tO6$ѢXy\C e )MdYhvPM2 \[x]B("(1K=Wek4]@;>_i x}*cXsm'IZ'vEEW zGlUe*hWXZ>z1}i"I_W20#IE5aiYU^PK PK M:*net/htmlparser/jericho/OutputSegment.classR[kA=lnդ&/.>'bC eKZ8ٌ 3tV[⃠GI^"b!Y9{wx %q;=O3o'&V@G17ja(MeM`w;VºKD\ 㮄R^bNE e;=2+¾T [@P-FZ1 _v/RtvZ]9xANūݭl/ͪ(mK8r(3TPoqib&Oy?L{l#Er|Cz*?.# [_3PKKhPK!M:4net/htmlparser/jericho/OutputSegmentComparator.classKK@ѱmGK0n)(Ҕ6)qݸQt!QP̹/G! q3Haccc!|OJ8  ԭt|!I=v7H;l&sU*scwHMI l%i7-PB6 I-X1'.?3,B +H88qF"|ҩ@;ݬ==@}Lqί/ ggh f߅{X/!v3?y~U~VѠs6X! PK`ݑNPK M:&net/htmlparser/jericho/ParseText.classmMO0eSe vZ``!C+dTw<-E<*zC#ʤ,g]r_r9*Otz v"bPwУQ/ǐ4S^pJjYx)ñK!*qNP~9 3'QXЭۿaLDˇX_ѢEff5ԉC6Z5Zۆ;mMxxx]ivEPK:U-PK!M:(net/htmlparser/jericho/PHPTagTypes.classS]OQ=vˢ-"Zhh-,F6݆ЬZ`!ۍ _I^&jLW|s׊/33w~ <#AG LF(M~0-ns/-#{Y 9 3fz|uazc:ڎr횶tǰѮK G[/Wk /T7*ȵF^) .'QhX*l3ͶuLAj/eH.aVi!T=Qp cD`QqJW; .#*.)>(Ba .>: m贵]Ps>ӋbQ(omK >XTN˰[t%>BaϨ"Ӱ>;64SOxӶ(~ @Ӕɋ{vȳ|9k2W3&]gJb15Py(N~|MU4ɦ=[Vcd E5.EP<=U8D撆UDԏ$@H+\9$dB^]z=b@!K0~PK5QjPK!M:0net/htmlparser/jericho/RemoveOutputSegment.class}RO@~e6;_ IcC"F;K.~(M4hC[oj@hr{>\X/bðe(șrP5p<spe^a9{2ߗa <7ݷ^M? "蕽FhRP'x1;NG;bGD+ F?zi`3}lbMbUx"+&p g6;A+G|unXEM5[>5[  4.?I'f.)l}ޚnivrxbKB-re򚞵%c$SrNh $^SIiN42iSVat8>)a`Vm!7:.: iHW RYV ]u+E+S+](Z\yPTPKB?-PK M:@net/htmlparser/jericho/Renderer$Processor$A_ElementHandler.classU[OA.,lG,7TRʶ "j-(Z&l_' 4j2)"adϙ9s͜3~s mЉR Kp4F@i J1*EH1) )*"Z1BbB b:W1вl3\. 1шdʻ+J;.Sob6Go=hWYQO2ҍ#`{u~Erp`PH+hGS: }֔B]O=h}x׃ zP!S? ZR7q ĮtEvju7W~PK4PK M:Anet/htmlparser/jericho/Renderer$Processor$BR_ElementHandler.classSkKA=׍dZchZ F E'na K*GD сwvsw{ ` E F$R(f9LL L2z*0CF8vMkU7ULBdՍ@Vs\-7U4U{stCE**uJeBz~MNJPѺcݫ_W- Su,0/F sr=:{Jٜ, M|o*/!l@yRlB]AB uu--vЧ_cQc"j =x&_yĹwQ`Ylq+WY`a;bEF%z isc)ixQ_<&q[PKhcrPK M:>net/htmlparser/jericho/Renderer$Processor$ElementHandler.classNNBA''DΆ !Ā?x/݅m~e\Pd|}|ÕB&:cTϿB{kbH'L+[9CY,~a7'[^K+!\;ɾ$m<7,U;neF[zVq CdIphl2)sAܦp- 8í0ZPKFTPK M:Gnet/htmlparser/jericho/Renderer$Processor$FontStyleElementHandler.classTmoP~.0 ^ؘ :T^ ̗9V10Nl_Znҵm&?5M(C4Ysι<=?~~2&"k2.L 7E֒U@Q@IBY2\o?߬kՃ- Ok; JͲrajCnyuir_}bqOmyfWw\>V; Y[& /xX%&7lGڶteCxm PV&gnj UNK]•p!f} lnG<!.V5+@a {d[޶w=qʌpQA f$b!_목[7<: rgf۶Zk :H ) KUnwuYT"e)-7B0kAhXtQLiJ'pcthW鳟D8vdOE3 /`|.#E3"N6HvC+dBGbv »'p8qj0x4=!bBbd7Db$'﫾|_w?~"x0Iq^6IMc*,~nq,BPKsXOPPK M:Anet/htmlparser/jericho/Renderer$Processor$HR_ElementHandler.classSn@=S'qbJ uiҨ8Uy@HRTO묨#w^xF<4"B$39svz.}`UE,0̊27u2Rmk: vǝFS]Bi"I`q#D$f(΅L[솂㏞HgyDnOāGBvE,bgT['\yA5 +SzoCֺb0o|OH&:6~ 6-Ö2*wk.{ \'=熞|: фݵ8+M߷qWq49nYmbWMi[MX['#i 32ZHri0vLJo"xoo:'a!R3ILP l(XBR(Ş(+x$ VTPD8硈Rp c!骥oQ>C#ƐښaatT}V2*nNȠҜQcȾ0}Vd=s]"sQvp 9 hmJthB|>D$.n z9w [ŰU B>8D0?Dh?W: w[CHE:ѡY7Dn"uou+Sl`*RH.jE\#nđPK@}z&PK M:Anet/htmlparser/jericho/Renderer$Processor$LI_ElementHandler.classTjPn5mstnjfK͵.CR00:Q$%% + >%ueS?~P4paa6 e{*kаT 0T WO;5(tenZ2 gN%wɑ?pXFf_F{/dГ`\K ^D@' CS<g]9H0U@ yFۮm;0h|#~ZTt?N9}/g &rZY]DkGOXAA̓6KRY= LnTH6Cd2't!ݪ2 9Z*'ie) qsyBW1_濜zET"_\_@PKowPK M:Bnet/htmlparser/jericho/Renderer$Processor$PRE_ElementHandler.classSmOA~"/"E@pAmCb&61s^7rq]?0S c&;33<7?M y-(,/⁈%-dQ̳X1j!j{h({q$bj$:!L5}vWoC:QsDNWǾw9:XǥaMl2ш:`.vy쵿^քjxQx{*<32* k1O~4Bӄ{:~uvt^o3Ml*# sr?rZÞp\OJ*-io R+7Ǥ?uUa_+<y0wQ^' dfXɵ37"<{z6e|.wy`9r,pY~:y5E.I1n%{;+ë PKYoPK M:Dnet/htmlparser/jericho/Renderer$Processor$RemoveElementHandler.classR[KQۮj-˂zA{Yz!D( MTֳrvEBA?Z Et̙ong^ߞ^bBy 1B6  l̳I2P"/ 5>SusBUxꪞb?>W:p]vݾtXX./bkT}aToc1@ڕ~sZ27Ki#|~' 6auN\sսsq7nH8 K {b_? Q#q\D"5$#0p/$ a&AYvcxaᑆ UY[_Ujjx,{5jy>[]W%ndK}/lH/te`N[7ߨz֥/t'Yl҅jᴜag L_$ qOLc` &tYI뙃PK S PK M:Anet/htmlparser/jericho/Renderer$Processor$TD_ElementHandler.classTaO@~^6֭됉Nĩ+E퓱DcAH60~`27Pq{={^}@M:*:J`YmwhXաR`XpOCtw[m (DH)VƱ s@ te/d[>IN""~%dOD"NĪsBn˗~򔐩 Vmp tkӫ;32$~(c [^0zW`:,xd`: Oʗ (;\yl /!<_?54l(ڝ- f^'l/?KE߾pc/|y\5FzTqILb8$i2)'+ c>/8.g0ʒgS#q]?}jXMj1qb cf8{`m ;g-q2j,|Պ_! q+/>xT|NEJ PKzPK M:Anet/htmlparser/jericho/Renderer$Processor$TR_ElementHandler.classS]kA=UG7Dc[kbZCPDP4$SٮC\Ygew [J Q%w m {Μsaϯ+E&P4D1aK`[`i/ڍ@= WFQR2lNɈPzJv?#'dhd>'CVb߹uBS^`T/ FГ11ul޺r{O] ֑{*({OWYb87;|H7&/ K is/[j@V#xkj^(`y%|G $C [ ιCc i5C>dcnd!Yd+ߐ#_M42\1 PKH'FPK M:/net/htmlparser/jericho/Renderer$Processor.class9 XTYޛK4cEb8j$Iuy0C .I,jmH$P6Kۘ4iӴYMӦKҴ=Λ13 -|sz9wy\~{`4ףI\]M W|n0,bfs%,fss n]FW9 'xYͥܬe /iqRFn:l㦝nb͸xn-N+dAnֱqq4}NB15GƠ1ĝˁz/ZX4,c }\Ţsb?f{ڧL_r!'^V#Q#~'~ ?.'Љ(I܉vS ~3 ~V9 W L_rP:WUR_WPno*x7(x#soR𐂇<2RVfE[(xw8fT Sq{BAp T𔂧KpS8{x{OeckhDGPᰦ7|Ѩ;ڶyBZ!MGL^E~dc'~[YI%?Dk1#ʢ>P#̧B?A&̟"T&P0%(BY=P4Z8ܝb=POjzNM{"mr=klaZ> CH$ZBѴhXz䰶i<;}}!_=ݤR Cm>ML%Nhi<c?ޭHI|J{Ϭq]<^ăWӈE!LBC!-F 4JҚP]|1^N__92^sF<^}Omؑ⏄hdբֈ4.A)B)Zӻa-}#b1-@Ҷ5-=&¦UGtƂD{]q~@)6*LM(v$٘RΗ`ՙؔk~-je;QMXH (¼1:L |޴kWka:t,;sgGxycԮc$c15ƜGC@N/b?Ǥn/kƼ 22Vcf/#gK2ZHAzdbbAr7Bj躱 Mq,uxyRbL^=Xbgai&4'ubNVmi* n@u7 *<ͽ_}*<Lw-ecܜT!k7.UI=͋7cwd|L=l?CV,l|P% XqfU*tÊ8|1}cRb8gHLSLtq">q[#TRV?TThT=K H.Y4&*;Z( .|Veg*MVzY"n.榞${4_ɑ-. R0+a? q+uu~=GB4@=ݞdڣZHNLi\u.+,x8.{ѯJ~bzQ=^Nvq~f=M{B赳 4xKŘ~YczAX6z푯(+ayAX3v_r= 4PR3ZTLl](7,D(JlDWR=gdG %KQ9Ï6/FJECy/PIO총_ÄFMSn643נӨ%T mËNǰeՙCr_bReܱNʌQQJݚ.Rx[;V6{h//uL~ҁ?2C)ϬtP9SEPSLbLϥ E5'U*U?:,JhvP);?)ͼ%nggo'bʾnJQ#/e7j0nnx:n r^*0? s=tD4KHxEIrU{s6!2o hѠRNN(_)KrQi5Eh mS8,_ojOkGQiO =Jd0Tnjśi%7$7|nJ(z%\HuTH%TAC!Ry##BO"GG_F 7ܓ|ʀO!7~IWkc\;?[<'sjHeS OD!h&/!Bwu*.]!p]T]T]!.wTwyqOdG+f(xn L `ay.L(d/FJAX ,7y5{E܌FvKCP=m)cԞ0+qjYٷ0g5P@~̪< YU(rGu+Xk#֚PK"[4О8Gk>[fwY탰0>@@I~ ӰP GgpY++ItKt-% @%ðqXv,ˉpVH[c3 {V3FUfFpqE#4j.Yf- ʭ *&͍gtWM{9ez2BqEQ(<[Oš{+8G@aP!&+bb2A{`Zxzr1GN;$H[)*nJ٩z=zZ3 @G: -6y$8` a͘j^owYtzN*,no|I([j2Ԉf'uq$O UB)'e 9 `ڒB.ƞR,tXWqhn@l Fi (rPI  5%~%!Nl]րu; ÅZڴ7nIP@J3ymK8'-.u|$Lar*nɄUDJM [jXoіXl ҇2f 8߲{2L6~ENSkspٴ|-u KcmDN]--'oj"ߟmSvkᝤF4 2cz*nO+v:+<;p% gk)T( SSӷH$`%[:Zʑ椤"F 4 >}9Mŝn|fg]8kd~eWƴFqV-VaXʅ#5`+y^ChK3 UX5[tYO X{^ +(0^bz{I-iEA6$=',ɖQNcY8i*5&+hLTI˅Xddl!1l|޸R '${1]#]..L]#V#sMgAv'a!>&'td V BIQU95%0%>ksq|I&K Jƾf¾oq8ƾ/YDEp=7]Sv%m7+vace=X2GGFzFUUWW3?w\ (vX,J1<|Eʼn(N>F! X< Ox,a,4y/j4DW (^:YC?5l7XaqVśQoxGŻ*S;y@:2W鹮̉BQ8RArr`Fq a)JVꈓ< 1-ϯ'L^_ax2Cz<5Jj]XlOmr P\y&LKq nqv؅*ܬc/1Hɘ!;t*>Ww lip#ÈqL踏,&yaE3:f$!:cS#TOL8 |IOLIl1Uø'X^lD5shc'Q6\:~[:·籡$h9{Ò4[+$H=CmoI'6V`OlbYUl;rZǛk'C)VZ?x)?",Σt#"ƆYwgqKS2$%%9ښa8'Q$iܣnc< F#Lҏ"@Vg\=q5Mt^WMy2鸾G!mȏg~vrz A@"-?|܁?`ZPNwZwcSē3&O"}c56oxLv4D,=p3e?} 1b=8!קv_AbQ*dd7I9 zSD0DdҩMr0"r_ Y(2/ϳ>F#GO>.jw܉I ^MW{/\&tԯrF8C *OG,[-G>pɧp(~,Q'8)'yPK=열6PK M:$net/htmlparser/jericho/Segment.classY xTo2Y C $Dv45 H1$y&c'3qfP.Zk[b7k6*N(`m}{k}Oy7|_ws{N,*'Bny)j"zJ0 gFbS,K\Q2Xȿ.,E=7 fylk6$`PABw!Hh<< \T#/Q7aHGlp]ޱggk[@m$G7 h#`i *KV\ϭ^s i U |J]z/w9 7AnlK#ȈnJI@ GGO BĖf:<usEC=Ѱ?8,0{RkZ a&:5D2.٭} gLBtIw:jا WGfp/.ڔ94Hck 7L+OG(3+1h!vS7fгQO' G-|2ƴ>{dIXi=?̪O 6\V`?1,̬QXV4Eig39h"_)`[mj-kMQ*4uq˺'2AF"uuMPssU'w5KN S[ _tyM+v85ܟx2PB2?ڌU w~s[Sjg1"WZ`Iv;˔GqEq?$;(m q Z|ϒdj`eu6yGܒqIΡj*VFNiec04J2PA%U?7⏪=c>6~(4mpGz",QK#=T:a8d՜m@7Quv/q:]:Ɨ^j+,cD)ϡROR p*CuڝA+M9/NސΪ M#@`9Y.TMyHG$|kE@S0)": Y: ǰ6t]Oܣ㑨;Twp< ZB7}aE41q+zP8Y`BLv4M9I͔43ܴTߨ:d܇8Ux5]\.yQ_UUXRm&V*bXsx&(b-nZ /EAWgYEl dl30v"6M6Ѣb S\JҢAN3 §n&kS6nqSMtK ދϰ]F\r';w)<`WNnM >ʊv(bR`qG$3*v)8*z(N(8 ^UYR-G"v^E}6q"X᩿&} E\^ 61ASr1ug ˴xϼsS8# :$Idaଧq*'9"|ghSPGǢZ!m.DNU691*)|B>a7{rHPҽcWWk'Pnɿ\h!]:3џbGvY8`aĝ1YJi԰+);)Ppѡ/ȋu\iA L&}eIenh-J,jEK˵I^-W^_C7U/9.LCH/FnMqȈeQb[?U&6=]"o~x/ON#-A?lbaE \\OT鹐+z@6Do(&DM1{M(=)< QXg0{K`;> CQ=!))Eϔb&YI7JdS\<g7sX͌5(L9 *$](1bYS}&8n"s"V7gpPwJ.xr{N(#S(j$ZS<ebYeJbfNN`o`nɈ2,|9^!1rIjxKQpi/I;KRJ-R,U}e,st ziaΤ vGy?\:ܚǐ z KT2dy#M4tnraA"k;bǘwUSXxiTOG y1\)-D|i@+a${ ;ɼ]LH.чg~טHVmvw;=1,L5rپ̄*kzzTJ~S_W0LLnJֶRei1(,3WYUCSNTг+6ZLZeڪ,$WYսKSwTw%͓gڞ.ocg~Vsq̗3ϋ86$U2}J_J"ڞU^Β&f&h'i}Ud51LOH%;eږ!WFsYLONB-[RW4XvMwS| tFY'f.s{/jC0mG{?-)\fţi%@J3 !´DvڧE銏/pgQƈ%1<&iWJeLISZZKqV3*Od!Zşx<>N͵5[feab~,7˳2m;~ת)twz]1瘝lWy]M xww]k ɾP6Ȫӌ +NY[d-o⸭!Kg]')'c G3=EМFǜCOY+4x:X*,t<bTX<14?OKy-boM|O }}WQC.pѯGa>[K<jk׵Mǰ^q VMM *+3& >k&l=RrMڣG6~OgO_H;(ޟM|)j}Ѭu_b#g͸ZK-rWbr[]zK\~Kq>NcPK>}*PK M:#net/htmlparser/jericho/Source.class tXJ:e;c;QnG>rH#;`; %(V%#r-@P M!;$%#Io'wgWxe+)͋ϿϬ%\gvlu'v p Dfٻ٭7x6꾝z>@7԰>$a&z8=Ը~vfes!A=J<{XbxAj~a/{>JSNx +٧V~Ɇ)z=dy0nuH08dO83Z I'{9gsy;EoK,a _%5RDu'F.&|f/%} ].">D~~~·~B 沟W~QIz.57$&!dI?#?3 _7z;'7dgPəs}A rM"$rr7u.K?uRbjbj4Z@B3X|4" G{{H5M, '#[.5X,ӑ&B>]D`F:1"'J]8ܾo@uɠ5G MIú)UUT r9{B#- tB7{B 54W[7T1] Ҧc~Y$42"X)A SLrl8cޖ@s=PhkǶ?5N8"hr[!ooаI v J,H@[ C꾦h7rp+$J7Vk֯l C;J]$UAX.EC[Ե J+E­$6_UEZWTGݰK"П`7\@Dץ$UcJ_NXΙeig1i@s.6T H-Z_> 50/HrZ֫Y&URZ/lRtޜ.+gۛdU,ČFT32 NJ8IJ޽tP4Wơ6 :K=NBhFڔn%N:zdhWLgQ)-u6э̀UszVĔ^e/r䚑Ld+u[u%4|qdz"@zX[k9)Bx*& t_l֘qY.!ҭ-ex džFqץgEAg8y۹,3e[F%Q mѪ(1*|Gc$dLX͡Lz:3*hq "јOP)4c%gĊ"l)jj+'܅^8:N[YFЛ"~6Oa[:ꕝ Qi%P'F[?GM?$O4eIi\ؔPRTaӖLNJWbٞ`|؊Ώ<:*e!ChFUبU@6߶dEd\s#;P8`%R9Vq/SdB2C72 /">eXW2^~ + o2 ޔx@fel ߆Ȭulێ‚T2(6Y>a%x˟.%#+d y#oBV2of%ނfXgrXxKXv|e2oEE6xU2)7؜t$K6f ]$My5reVɼ_.q\XN| ˨j nj\kVq/U<&dv5 cNiz 8o82zCq'IޝJ-eKd]r/ $.-swKG潼Of= e3Y繱'b2 jsJ%~̦#֨ot+{[jKϺh"7od2^0Ҁ7n%iQ*&9oƿWDҵLpO!]?I >o4T|DoG$Hx\*9h^csdVfޛJ5: Z~_/" e6S7f~o2;QGf pz^RL}2?q)2[H`E&F)C>AoKсbKobubeVM~[!ߪeHKv/p Q/7y?U(Uw28>ʸe7qnzf'7U;o$C4$2 Kamb(>&1 ?)ͬU⟖.12bE1E oĐ1ʠ6(qT"%փ~A{μiO|K)Rո1/qD<0 Nu1 | %?*$m $>,>"ci#(7Xrq2K%fQZC_:jF^gyN\xFsǍY5YL!gd ?)gej1ұVwx]WYk< Fb+Rf=GK4mTHWʩ"ӻaQ;]Likomkjֲ߰uSCk(KPANhnkk7h4 }2E17bcӑ8ΑeǻKҎ|fp M;uu큖fAF !:0w-;uurhH){ΜKa``Ҭ#=FsQRmb.rVR螷vB0rB㷅\Ek9-MuJ:$W6F(1>];ؒrdw>bN9DDm6s0;0ogMx#jB9pZ|?^-ڝAcHp'czw:^gz'M E3\!M"[yi9+2v)Bj_Ev5 c=/ .hہ/xZ e;uN;9K""iɖpп/]W{ZuIN c:a <`:x8]Bk%x~_<?Ԟ?{~_3kKx]{ ~=߀7QhE;KKQm7mt >U,ln?,fF?[?Xg slܪi-$f$}VDRuEiV}G l̇gJC4_TgV56 g+h]D8hr b; (:A 44 Dwod{ e%2rH(`,oCFDwxCː/+ +OA 8s} p!Y NJ( ê!g 4y)$7% a,4DF- D. g6ÅH _ ! 50TB/PO,Y<뫎x h+[lN{r%qV2T- eՖ҇-*=8)w?i8 M CeyBf:!;=̓1 a8fj@/\fZcv!h:B|T6M#=]$&XbIZve:+(JG!8l$6ݧ1^1slpfl7(3E/#a 70 ܎Ni$cˏη#w%]d;2#TrZ%ȿ@&Id^)K]@m?em6#D0bG lɧ5g޹ 3@jGjq@A4UAXԛ< jFz[PXy1LU"겊9ŜfV:E^c|rQ㻕k,4IO&$5jĝ!N;XW <|Y\ܯM_I_:g4gv"Zi g׈\K%T"% Cģl2u*Vm!s]# \ IPb#'LJ ,A` ,gpUbR + 3-G(ӡLP.7)JIx=G7Rle/M+TÒps 0GDT䐄[hEXQmdXv gAI!_({zE{>& ʦ#vu1U%:${*A3SښF4Ix*GàpX(V齶2ղ5v_j|",¹q8I-s)!V;9D u"ݑ"R]z>™H;SLvٚܺFC):PӄPshO[GRYF@TwNjGQ~⏥'L0'Υf+N8&3(G)[<>kMkZ֣ ^U9%9= 9%9uqtǍi7ֱYV>Om]`\VdO çrƤyzvę 'WiYzE5(/8wCݱ7c:2xuV"mlL(3߇T99xN; lӦDǴNiR/gW \fTw6GEcٖHu4K¶ E!֨c&͵TO0oT} db 8iXF>hzO±ax8Vn$3 i&,tʎ,бW&ccM{¬3bl=WLMyAB&#]d"Puy*8yz.9Ь%IoO!cպ/3]6s$W&5t[CP0!&FNs=E$5<´ah`Zc S0aO4,`QS Y }_:Q)%vjWɅFj)ҴYeOiLZfgHoNl# Uq9 .k3CRQ'1|i( a&LƏ٘Q?Hvd;@/g/+'^A=JPH6, Sh?Ds fŸ! 5v@HUT\!d t : 6»P"U42TE*Q ."`4Ǥ{ڇAL R4_pSp! 1>yxHiyK<|F4q#7b́ܟqL(Dza  =iq;㫼5zځl3<x~ëy$?x{ Oyxs~,ɫ7s+WaF{%-o;;OT'F3og&TӁ*bR@]=z(*01G=>8v@GwoUZw/i C]UnE_uB B])أνh'`ۺQߡ zc vA.,:hr[ 0*CA(J.ZDLS2=$w@b PdC ¤:lk@VtVjvMݮ\@v vC^W0~O'ESIZhPTzY` G@$[]!?LΛ&FWךT` d|QF3đ*n2Z ɘBpgTCC{GW-[jk!wA0')¦NwhJEhnH!e+zNNGwr@nHJ@.@K%])$PODٰ2,gqt.Tt'|d+-t;{=h!De,=N-äZ n{A)EfP0qBPpl[dtp5>x*&Ej5CC:5<䡋n| WQeJxY;5a&DphGC?sxD=ثdיp7Y-T1G@)PvA\CAp&3%R@DdVhaE&\b*J5M *ib> +X$b*j Q pp3i JL][5qb߫b;tWY8s1 -nAGq&eOӪy| fD&VkXʵ%ʍXNPBM\ z '5#׊ ^j"Q .6CT']A+fxSCDzÔ5)k!þ _5yɠhޱK$)[ުD -;f\dהuo>/,F/ ņp"Nsл$dXϣ?5rlAH's~\&0+}GټEi >!Oݥ6iW](T4H7n*X)"'~ HgT/DسSpZ| w i}[7_XI` yؚ7ok֍Iϰ:ۄkoDs72~l߹R礹Egm{I3;M)1l:q}e4n?Mawtrd=0QCK#xnT4,!-XzE:7nP&}cND,<%K3:T_ƍMoC]kݕ->#2@TP᷂a(QvB$H;*!dRx >(e%V()1;E.z$;<%Y?􆻂׹瞄V܉ș9S?"gݜws9ws@gq"g:LW}#jitM% oCջl 6g ש @UbP,c{1d{18Nu 9^} 'E?fPh41\(¬q6/1cͼ1BL೛ q8PGTi~ {JNc+Pc(Գ[8<5Vue6tn&lISy]VO.7]q,Q]jpдAI$EHޒĞAhA8eʓ ,&xKx:lkOjL?їBsg8GeGP aZxê ¦l4TsˏaYug|Lmo%I>,6Ԍk>eV8,ŊJ1|cJ@Y1R76 sff, DbD-bTʁ$MPrMYces%T$)3i9c6S zri%S3(Q{y mNee{^"^)'&|TaĄ ,dAejkȥ:KE!Tv˳;x] A`fJN1?fh/A?ݙP 4wO5LE wKٴNhi;!oqA%)<Պ/:]zQl6F.B)Q6X+epWe|^dSZ }tj@H."@PaL@r@@iu9? ճld$q٨c:]Ul,ʚUgdLAߊ)h93{pU4s8.2^enʲb3lQ&ߚJf7B{+215*`Ĝ}:NRMFzƵj8-4-7s's9/aѻ&pqQ|,4nRDsZ&Eu=(zw4:hކ/PK6 0PK M:,net/htmlparser/jericho/SourceFormatter.classUkSF=By`phHRG:#<@ i Ч7,δ3<:L13Qk 3{=w޼W%UgQd5CC9oX?ZgMG\iڊ^'}oEtұ eYyT|IqIG1ʐe1qt|TxΚ+GKͥN;鸁17qK) Å~Qpa熋#,әGթG#[-SLȌ̊ȸ #ȋRECHz( w[Yez >"Z ]asܦ"9ğ9_ :Í{"{"mG}?^&{0Ç"8w(GS"Taӧ,}y(;P*QBsvhΣ \8M^L1=}A]4m(Fk1#n8Cj(HM$| ĩЄf䷠mq&Uԋ!:&+K8N4‘F?osr YShe[[Hǫx c!cfR pg3xI1/%T8VRTe\/wxY ֶVl r+xl [\_Up-1LZäZ~ԒX$΁ؤ I p 7f^[(U\FN z?n.܆JUvU906,emj{hѲrTinUXD+Ę*E&i.aA*$2b:_ѴD±*%&A܊`_u5%)eEfyEM̈́vKe!)JQ%ܺbnBUFRf pjuHn*BqIi* 8EUR#d&\,TbªԬ*RhdR8 Eʹ1s#8*¾ 5ӰOI$pahKGd Kj&7G2VŢQn5b1FX]J$:# )J0(rͱ -q%ʙ;Im GV%ڀMU~1V9,G%Uax#qU IV-٨%5ܘVT#S"*yG&! ]pPѲyhkJ((GH s!o=9tk(ѰD;ZH\mņ6pC؛Q+$FLR(T&Q[UrHiWXl[w+8%i װ"Ty_):Q/[֎koFLEUyozjJyʍۺŰ$mLϥȱ95s:q߄o8 'ElƠ2=,0{*b 6w E cD]xQ5#EYanJ:9QVq͏=T=p4Uj,}9iDe&=M{n"ކp.{2'WD*<'~c">'|Pćh |$5ģ"<O"t 1$f8:S>. z$D| :EtmzΣ7;5ÛBT}P l Cb—28> nͥL'g;Pż-Gx]w9cC3xz| `[¦tx8(-6zs\2!ke7ٺO em;F?f$>X 93@haQ!33yA:_:QUPd?B{/xM/B%\/Y3\ EL,0#e\Ae%@1N3- 5D}D̿BCEK쁓eAi`˺11erïXޡAwy7^Ο2WnR~_ɴ]p9Ljgz\K"3\mEC0oˡ  XI2ra@/\u1OG21?o [@ߴ9bp7 +'B!E>e΀*`rMp؆ Ub*̤R*N_LcˌUXj՛}cfUu,ܧ "j-(W'D}ULCV\2Kgj9ciYlh1prKXv-&fxoԩY^WкyZ ׀+tQ( @^Ϣ{- ߤGmnyQo@F4p*o-MFG٘QaV48lfp}7P#@ߪv2/}6]s)vzN*Ex/ػ9lQp$j)KcnI:l _,ӶD+T`(QTH鱀J?[Zz$T-HxӚT; /,g{~`m Z-ZOR[ Pyql E;aM<,R),vwVp ver!_8wy"usqfmPcN_M' JҪXpm]ܙ-&; |v{Y.pP1P ҶiSovr`1QP@Be8Eʾƺ nmҭce)ef&N/Zs Qk&!pdilۇ~h9Tv=*˯l^,9~uh==2 s,pϕA6u p;htnޤpn=}J݄٭}9`<K|\D +^ziyAM˥fAd\Xo& ,x?,MC$`=߮#r;mcD:"4a"bgD"v AG-٪!L{󰀋0e< &kػcV 6\i8 }Ƀg"Z6x@/`j]^v  ~P}+eh9uv晁-6cZ+Un ߖŭ8܂ So#N6lD$:Cf4d>gk{YNvm8'PKLOi-PK M:)net/htmlparser/jericho/StartTagType.classWWW]y"AEF RT6MVW>5,a5ݍbk}o~~Tl9Q=JRV̽3sܻǟFAEHy7gZL甅|h,ÆDOb(&x:O2BsHN*dG䯐jA71-J0ǂ 2 0$X =}!?$Cp|q_20l#x/&ȱt0HƧ?9¾NiڙӔ(r29-&O}R)(ʥ4zR4C_ܜ ZݒAѹsiyԒ'iX+eUcizK1M4M&4%UOĪѬ )R]4 Lά: >p/Fm  s{)> W\ _ W\Z8·p;.|/ ~aZpW[ []C e/CgA AN5Ntj&Y-~I?a m 0ƧsM}K^ϻJN .Pj 5CR t= O撅`VA}U5ǭpqqgZk1#O49T(~EEj'VB&! &UK95e챠W%2 [qmjQ @șIf r#BfԌS4RĔ愾߂y@:nw!!`321++,n-!kjiכXDI9"hYF7!4Tg킙`7}Ԭ? swAW&=% ~ >~W^3~tܤLzФԎVzHLad':/ms"Z]kv7>]/O_58<ڮnnhW8<`9=p6ÃP@w;ֶCL~JoڮcUSSQSrvogoiq jD2\/26TM-eW*Ve T-PK?*2PK!M:5net/htmlparser/jericho/StartTagTypeCDATASection.classKk@>;nAA$⠑88JB Q7tWw`sߜ.'82 q^K>{3v ݑZud+k%0Xn,ODb?ҽs|Ǔ7PC>Cݙ0\ҥvfCžsŔ٬_C )V^TH"K3kVIXP OBa9Pa3A?04ӨhQrRtB:~c23gY~ 8kVŇJ(gCPK6#PK!M:0net/htmlparser/jericho/StartTagTypeComment.classPMK@}ۏdۦ~ď֣7! HRPKlB\x((QoBga}޼a>ܠqqqm⤉:NL3t6#]4E, w-8\eLBE4eTc0XzP [J7-zqZ0 * {*q' g7Y&BE/by{!Xrg=r:9C\ =ڜ6.`P6+vU7 ?ZDe>Qx%PKNɅPK!M:;net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.classRkPn&i]fsӮ·U!P!QhH.mF{׿a>P??J=+:v˹s׷Slypy\‚ؕ4,r>W4tmu w7#0 ԶyxIu_TnA j# [G J;9y<݈|ek*sxv)ixfޏ!ҲUc6# i,4T ,cEUp9X k8c{'`#u Ϭm[ ݨOIyFN ڽ^Oty=e0E_ +Vuq7pm˩CwJ/*O/3N$ZI((T,ˇP>A,V$kݛ_uEgnBPWM- Z 1Տc y a$"net/htmlparser/jericho/StartTagTypeGenericImplementation.classVSg}d.qEW J.bzTHmZ %H&qQ 7^`glvЗN u/|s~_"~`y +DU|% g5뼼8)A,vao2yoE` m0Lif 2}%2{qQ%z,3E_7%{yTrȼXiݶR%)|v Q駊~TR7gnj[^_P2n^ \a@9ש^-ø/0k%<ּLgHCbT> l)c W}઎k :☊<%ta븉UTu8KTx:Y0NWQq`Yӱ:[Ky!w:|P:>ǔM DMMO܂cWe޴O4t|CY4m,ҳ㚹[%(3/Kv ,_{Sb;/f˕,KǷ|u(;M:(o}42[N/ b,]s)T2J,<r䨀fW'8*LB̪~ˤ}0ֲqQ^=ѹVu^7m&Ԁ&?ns;p-mW/_ & ݩ(}ҾXeU`OCIʥmg-cX1rx[&β[(pR9`Hd\:Kc-8YN-E]bJZOy*ybBnNc$lgHTMς$ojγ+n"`le=h*7{WUEY*Z M+!v9èyl'/y6CS0}b?tR7Qt녆0흉bBk? N4|ħ~g u mD:Dܔ|UYjSep'Ц# JHN+L:"#vq?i(Z.bVG«MC jGgK$Z!ܘ7sJ0%a:^a܁*!&aF¬aq 9|$M0z.ϐ,86OJfسݒ[MlpY4j龬Vll&/U/WWfl:ErKN;sUg4u³Q6(k<ϫUjd[zky\I%o]:<ҾMT%nVh߆k쟽c `;2TiX\DU14?֮z(Z,W zw0U!c 7HA${-=t5|CIyF)QS oZ `~f6r;"$  >dF36N9['A+I!هd` 3ye@x Q8@gG#or^vwDx {3=/PKFUPK!M:;net/htmlparser/jericho/StartTagTypeMasonComponentCall.classQJ@=#Vࢫv]BKABBbH4%t,qQP(;x.wq|}pc]=8l %P+R;pz.GJjg0JR [GIҮ. T V{Ɛw$nZޏᑐ55QgW~&TPDj 2t%C V R< ''~(G@7 g✾4[&mNHn'0iنc5ȊcV'QV70_`}PK'PK!M:Hnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.class]KAQswղJ!f.P$F1 tЕe?U]~Tt>f`>99g vt,Xё3 kH!A˦-&iK e<=ZO] QMԡ_m`US%kKd`nSdJO<7ɳAFz2701̏+PbVV;sO5ӵ&IM8+WdތPK BiPK!M:/net/htmlparser/jericho/StartTagTypeNormal.classRMo@}㘏6@K `!E Y ) .+ǎ-\ Q dx<{۸cYNJ*`.XCKc]0Yǖ {z7Q< E0uD~0^oFI$Z g(b'}" >aزY4I3gdڅ5nif:lghㆆ&neȹ-,*83 @=zqG<҄ac4i.<VA2p Y  eN&}_ZU 怋GS>gqͽ~R )Mj{NSR2J&2='c 0J(RQƢ*tRbVR>AUTK)ʳһ]V}}F\w(Wߢ\Q-RoÔCv+\^0F PK#PK!M:2net/htmlparser/jericho/StartTagTypePHPScript.classSmOP~.++B6slY,IB⾘Zn_l&o&()N1JO<{?(c[U\OcK*fq#ޖe(XU!㦂5U2r*2ȧi+`PNn0l\.J}1p|#yP:mRGVuEaj1r}psCDfL Z.Q؋WR j:c'Wꯈdlj(bKE4\P~pe2q ׊ ˸x!1l\.iΰ}.wc9>0 `L>{Ro1%$":4^Q*!]r.l^߀E]e$xȐ%IZ%?7^?AUd8ar7OѭS=-:MRy[B0WЙ>C/)2Q%f #L}.~$;ҕi]bzm  vZ xMB3p~PK  PK!M:1net/htmlparser/jericho/StartTagTypePHPShort.classP]KQ=WW}l&좄H ""p˺]Kϊ~@?*%-fN8v98Mp8%(L娄c7N᢯vz%"]]4cwE"?'rx7T{ E?Tn1 щghD2vM!緩[*24aI, e [n2R*-t+z! ? UmaUИ g٩(rBlNQ<30^/J,*(ܯQ|E釄oPK;8PK!M:4net/htmlparser/jericho/StartTagTypePHPStandard.classQ]KQ=^ʶ zQ{h-P)ADP.e]ٽ.%gEBA?͒\f|~}©@E;28s: fC0axbgbX9#c39R1ڕ"d۾M7frlσ-^a~mԌv7ZDTSBU|U< J' *mWX.t=ǾZ;!X5l=4fh l{C~GPKAn,PK!M:5net/htmlparser/jericho/StartTagTypeUnregistered.classTkS@=K %TyI-+PV+ ]Kl:V͟Sr) Ivs}=w0^i8ᜆ N|p  d62b8f\pGF[;x#}Teғda8#-ɕbt\Ėcج45 ُ%>'^\34Y’ @82Ϡ$),b'3EPC8SY<4[nZS$Y]Ho%$"~#h'D{5i+bMa4Bә73:_<^$J~;Bfca4io^11qP J7i}h@d飿h4ZC,T @  G׀рz_G+oM*o6@3`OyRY% 2|B̛G *змІ8jP:D{j` n4mcas;;PK5, ~PK!M:7net/htmlparser/jericho/StartTagTypeXMLDeclaration.classQ]KA=]uCMz %LB0_)_bAWvGن=~Ttz ǹ9||,!fG98$n0;Vƛ0X^EBEdFFf(gç]I #i2 _I*n/mLwXr<ҍcRf;b9 n TIUz @#_!Xv41҂P.Ń3ŧ#ΉKnʵȲ5g%C,,ҷkd^& PKc?$PK!M:Anet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.classQ]KA=;~}()!IH::NA?+z .=Bw̹s~}@ıDZe sL3n0; W=%= ?Q,#{.#ϝ@Ht]2=5XGO0dtG v0$p5~BjaW_:CѢjQ@dD [̹*AЗ X9zYؾPSH C&kAg<G8-pZC%1KNN.YJY3W$i,-2Kd`PKtL. PK M:.net/htmlparser/jericho/StreamedParseText.classN@H, z0&Lg⁄ (VE}(hM<>q94F#ogz),lͦ<x)rp!AדZ wz1Ő:q=W2RKdH*Ju  aȄ+ %өwV8 u ʷ8,ls1Qac y5p@#Lz}ëvl83WtOA Yw;EH)ڿIP)2(%UF.m4_$ORoh33A[jkiZ8PK1R)FHPK!M:Bnet/htmlparser/jericho/StreamedSource$StreamedSourceIterator.classW%,Bqib[CIeAe شM.J:#߉SH(Zސ@i0`ӖBl <;tbavgn;܌уd0̳{01> &L?=PA 7͏8@ ifL[,ɱ S dI>@i&M&&aA1#x88 09p؏_L~-xŋ<пV{~RoN)SK;Y1IfrL+ yii8 t|AliuMIKO.WW`m!Xժ_fuTNȌ@1nuKj2=j$|_n^pG6 xcfZ (9<TQbtt.0%#dJkfa6H_Jڌn\I|LiGfddQzМE.W(KJ8[.Խ*~;TDPы>c?xϨ؍[U<˳xv7kRW^T#Q𒊗i竁 T.-~4% n*^k*O  ƛEvP0-G^ ܃To8A믾Xo^*f1b{L)83 >R1*bILj|3|w%Xz4'or-g'9\~הgy^("\p]qj9u|I[BWY%8P-!tuuAw6 s./LsO Bq,TWKpg*\(GB3->b|+Q,kmg˷-bMm'͍q[=;.vKq7_{jЧ7Nb,a[Ym"]ǎ=!<:kAO "R#fQd~SgҺc-%$y~Rmu`K,˺#+3q3)DĿKZYG`q-U4r4S5,WR*mMQFZPKt/]PK!M:+net/htmlparser/jericho/StreamedSource.classWxS$#Y~6،`0ASdČ(vH@vRc(r 6IӳdRFnҽ#M$[Mwҽ;9oIXI}:{>sC>`V ap/ >h> 18lj f~?Nь ^Qbf0Gh}<( A91?>~|74>ӇXg ADE^R_nv<̒O3dׂXqQO&fbo3|<>%?Q=q=~L~Γ_0xį~N`^4TyLM&29CQjf⺮ͪYfu)OWQs,=.Yܫ2Lu 4dͦLG[%3^RYZ3YUN쾽mr錹$̘lj* JFNYES4dZSds뚩i-&\ 55F@N6;@';,E:-{>mnLY\f e)N9St@M;$z?@^H7Pjt iYOCɪZV:v b$9%ػ_QMXfcdDڣB)512c!9}bVQ鴚 S6b!XLщbiXj-|e=V_ [ FleM2qGyi B.WY$K/?jLjIjf\GTQ9Kjzф^KB/}K `1wAZ*:! H^ϸh~!IQ4ћ&' ̺:N>Ʀ캢c;Ԫ+|چyz03RS]7Gz@"b HN16$fF 5}7FNW@ft5ֺNsx&}PR (?1e?^hvDx?9Dx+k>PK ,PK!M:)net/htmlparser/jericho/StreamedText.classV wFGllp$ LB6&1zJa,m<8BB閦mB7 4 0IӤm/;Fzst{w}nIx|1xΝ; s ǯ(*e>~7M}e"; q&Wy.1UxMW2~ s-]Flk47#g<_͞JWd\Uo3F|,u nȸ)cV- 5iL[Š)+t1ۈ6VgZp/19JcGC<s"m&!S?4͓mzКؗNt{v]=}G{Fv<:8G.lO[ig8 !ԛMX~7pǽFY;i؞K.oT25҆mJ.6o:~2Ig*3iw7tr25PSfj؜v( FoQނ$Yv(D%lKh; )dLk™ qsIN  I v1|q6CN9'XޱUTlSu'OBlۼJXFnV*AVIB ڗ4]Xa"aSL;Q"m䤙<6ks6pʴ3ӄNs$M= T:IINڻ8ɘLuN\ql۴7k9t 'Qw۶mD'3 Q*FB0gЃ^pYTBx2~6N8*>1zH^,P{|HtazU@FG +_岶crm6 - Cq$5J^i:juZAߩ-=vjNǦ1W)ͻJ哠USj1-*I?U[I iʬRVWUTP1InmjC8L?Fj 7jZ&ΟW4c ml_<1qiRGe9NCޛ>Py..vj`xΞWu"?ӱW5u Nu_}Q!Vְ.dY [D^h%3h @j/)r#u:Uh-~ukE4qo 9=J 4.YȷpLKiiS;Qo l 7CnIΆ;bknV^YsQsES>>RO <^|܇O^|J>3Yn+ E/ ,AzHkV$6uEDVMiZ*ijg :隩]T%Hg$4₪;cF>dHrjr\G]7y*Y$p<쥹z{:A>ⱁĀĔCJeс)K]ۮ:y#~ !;jM<]MjOTFPRjy/^P*X= wBww*]J%F>CaC9YUWi5G1#rx~,x~$yN1#>lٻŊI^#!bZB_?<99[rfʣj~o+~쬚SӮ0~,^][3[3&z$/ZMlș_G*%~wx4 Ybb%z?e{G\(V.2ELPB!%SbCa{Gą2EU=%vCqeh݈M=P2if톔x\io/Y83Ĉۜ9r_B)a PVΘ9tk#Z6BVAK60QaA8*|.bTz3H$)c!KGRglK!wy%csaƊHUG+wY_K̼fSZΒ. EFE](aQ!m)AGwvm0HRdFe\#Y}RIKx=H zEwn=DO~']t)!n!qJiiM}>{h^<*: )"-gn*_CU_E5x+>Fy#R繅%AS$ _d Uӫ˻VC FnoF+ oP3]M}!,ljWF`Fm͎{: Zv\CD}ݰ%.!.+Ւxs%Ğx(pYy .0s5fr``-<jx3-QÐvј5ҌT:*i8Pe\m0{4i(+*"Uvu3Q{#L5V4P,_>fYW;?;wZJ r/`)ĿPKm/0PK!M:0net/htmlparser/jericho/StringOutputSegment.class}TmSQ~.oje,j%fVQ%n,K-}Qje3`{}s='9#.  2n } ²?mq7%X="ݘ@U!dM!VC $<`fx01x,~`1{;=V|lWmYg-a0D;XQs!&Mn VXѫU#[(<_D"繵f'VÔ "S}հN_rlWت OAW,lKxBLzBWS!Wf=SN}(mFVJլJJr3oy:gPçl+J!  +7XLੂa(a\BRL*R+xi7c:dfs敢KjExn fpE+$“b3hѠt_Tfg4g%I#whP 2̪mgug[;ikT=ujNjtv:a^h6FP}bB&iFHN. <Ǵc0m.mn- r_!~6LNn=Z>zcML;>4jp}r-RGͮ!TrrJwa<~Q'E 0|섧 8TA̞ M_'x^q:A{~{M,u\l͵PKwPK M:0net/htmlparser/jericho/SubCache$CacheEntry.classS]SP=&M R|( iDVfđ@/mvҴ ^|:c[Gg|u_/(/ݛ={ǧ/Ʊ! ] 0\pI@BeI * cWqEU4\DŽ8Ljšִ[ 0y'wX!P,=+[tyvrKMnq7+$Zi+B6rKd `P`lZh 3Vd&jͰTC 5(?y 5Gq+D])P*ffK_P y&}Ph CR;jUr(Qǩ ~cDlBv)CwBROF?cI59%(ٙ̄ߠ#"iW_{սV#ү 5.`?PK͖?PK M:Dnet/htmlparser/jericho/SubCache$CacheEntryMissingInternalError.classS]OA=cK.["!ˆ l|\!in6S~/K?eS&瞝s=wf;:Xt𴈲Kls+Xlͅu #c*iBRN? Z׺A9jӧbzAH_aҕ +O(J~֪5NNy8o|uITF*y#E]>+o^$tD$7a֌cέ%B#Z0 {j3~|DN&lh_画񘴋3T,[\sd&t\/{aLN3GIU/%YJ5GSL$)9WZ +pS#~8Y1VRͭ)L2#,iZmu??gx؏_⠆5' Zh{lU=<=;W4^Qw+ PKC!|PK M:1net/htmlparser/jericho/SubCache$TagIterator.classS]OA=mtR(n-- M&C;i)0)b47M|wʒFMv9s?B&HaFyykʻc@3 (J pS-s:CAwŞ| vCo7TMl8A0/Fg{WBבK @tCt٫ a -Zv{Bq6_xFlɖkk'RvrݺUI9 r)iJG:-"ϥ/O҂dMbyML)o>s7۞/Emx]٫rt6qwMM ÃNfHh#0%[ I3JHځD- 32g!K[ER[U̸DM*KllU{]?@<f(?g?s+~G)vq'-O8ф'xC˨*_w)|>XJa^.GQ&1|ItSxi?ͷ|1_y9x!b0/nюt&Ҩ+@k{o[ֶ@g%-n$S!#u(K6 Tֺf-!)lujL%ĉ0ӱcP7@b .`-  'DRF"qV!8pƵ9 x4&ؔ )"h5ǔC]AH4N vI~"%+L3bU]}H1-8F(ND/;&8B`  eC?EĮ֍9P̀ 2'NdLJCa Z+\Z(FD?q``BZcTII8'6mDe ] f3\͒*ެaA Хxb Mô/j؋%j5|/k^v~Sv$4-PÏ?TC/ӐFJC~ư>C$_qT}TUbXӟ+#Q 8BEB6j~˫Jï 49 dc{Y?5 f¶UmM]Lg3E p'4aLxƅ\[Z; 'ybO/O&`iӇ1HڣvTQ@|punĦ_TBPR v$._ ԚTɠB'9ȎeQ#KGA$|dqŅKh69fYrЏG j\i@ Usg.#,cۛ%&m)422(L'Xy%eV͌C3R#ri%ט3Ķ q>p7 򩿈|&DCu1r.Wq>q_}5ނv(/`8HC<Sy+wu7aq(6uݶ"fg%A&QqaR*$"8Ծě8'4%Mxs)p|C8,o"wz>iP{ԴD52Vz$ߠRn|+%ˍ$AvN: r}n 8{ [T:a&b+8|LgWެPFyʹ5E6j{]vTwe'Ej-4P#֙\4wX!$E(ylƹh*01OʹiqnڥZgIJwh"4ѕ3x,Ejf #B)ϊT(5ݔ`kM]IJ얨vOIz@K""g_ՆI/F:5{HL\)LE<.Jd 24eÜ,zNX#o= /AKLgq%ح>Ӄ3^_wy=3ԣ~GCa=Ԅ G/rnxv^pоx%DKK zUdvxJ^^,^ nt(CBu5l^| Owe(ΠVQ;XJv`1_?y|L7NWgj.$oO˯cs7+@ %1Ԟd2|IfIx \zsfuxS,*J)3ףZnFqӵ,מ|k9.Z+=f9FehCM`_TwM;K f_XҏH2@'n (6La{h'+}lOk~ bK`%ۧDI'lͬkKzjVs *&^vY2Hr h/Z5Z2S3HN<'D`EX#Yj%\Kd׿,1跣xHFlofĽ!\9'F ՘Sl6-*/hd}dHAKvdXaSّ-^h]1;KpO`QYBr ;]»]/oG PiEڛidJ{ ;&w PKCM fPK M: net/htmlparser/jericho/Tag.classX \K> QC+46㰢m|B\Hhnzް]ʺuX{ok޺=wTB^pem.'2iV)~&}Ѿħe*/_c |SƷ\Xvߋoie1.?ӏx1 S^~m7NMMǝRB#̤bᴖjQ3 e- j@u$e2!5EDTo Y60h(]K~.2然7#ڻzC͡EmHwK#|ykgW#nmCD:&2[f)e 4rILbrT(%G2a͗MwuSYk(B~7Iap;3F(=8BEs 7 <3JbDJ돥3ZJ꾥IXt@a,uSpo5թ]7%(ZLP̨ ÉK İ,̸.5U1u-jpDϛ4d;OJcpTkMtjjo`.V~a9L7w'Ծut^+Q+.3my-f.ry߶r{ Xf>>5\ElH=X]?5ҪZ`3^2~ xU W_AkoTU;^b2O3 E_ )Cw?\= b)7|f_K2^}ioa-ב8lWѵ܈O7I؅C"Pު͠bT3]ZQmop}ץD4ۧ ebIri4)ɷT*I OQ9֢ձUф,Rv"\$ToBz~Ŭ߭_1w"Jr\J XQFOW`87RlEy ށw*Bx*s"X(TOQ%)b1/.Q%xIKeY/<Ֆّ{-uL2ܣBjKM6EL.iZykY}':hͭuW9[xl0Fq+UtWPʜ4Qӗ FgԸ^Bj<|TORueJ,JIꌥ#ZF?ZTK?Qf|5msY+?,뚜[ ^v+N!3ExD N kQ wuVȉ?]i,χCo!3FE YQf1ʦ%(d|'Uʋxݽ <# y%;x> vI`;)aSC6vhRrT6:L,zr\;g{ ̎\`Wczy&YriVQ`,f3jY>X 5=ճ9E;ZHt[{Ǧ戴~鷙Lhh#Mo h:wݕ|LWdϸ)%na16KL&GLxda᫉I&RJI;&Q vy8ђ3SG`5 ,|*YFITdB#_ 45< o$h;h;Ă|}&hbl~]E/&fXGi +ݍ!Kcl,Ʌ&r"_{x*ݫ$YB7N} v1^S/>I$UI$I!BN$xQ,: m|:nBA TK=2|%o(\t!O>Kb<+fpB;Qж7c( &b$I#yEHh7IRHmi#O"(AHٚGMF>Q8i#5JNauOy \=ovX*\ X,)/:D>2IA#pQ|oᤑw;S, j69߻vQei`fuY A8LPKJ1> PK M:Cnet/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkup.classQMK@gZzB=l{HY%MID?z(qSxi݅7zp<4,hXQD) K:XPְFPlHؽ yb`D̅w{0![C? d u9!Wu1o yl 6P1t /z wL?%0 XP'f,] ao섉d"OЬvYo9iv-˚Shp"dczuS&CJIz zF:r(@ses/P 2%f *PK? PK M:$net/htmlparser/jericho/TagType.classW_;,+$dYlPk%WMe8,Vڇ-5ؖZaIF[~Dsfw>ù3{y|9_p;> m8!x$o,4~`!bE6DsS'\b&z1^`pq\vbΆ8;;!D=gzZcOxJyG xg8x.|/ ^ e~)W\/$aӺ+l MMS 3*Yay ҸM[LsZ48-a ̌oj;-A2BBYy>SY*L6 VКsucp~TϚI[01N3㜰Q4d|fZ7z7\)("zaCnԝӚgFhyСx%F])ִ4J񶳵%MNmM5mO[ڹ4+q%t׌^rJ8PMuDMе Dؕ{kꌊѣ8+, ~WxUů]<}̫XoUǫTuox$'Ot2MT U?ƈ㴊o`0DV3zlnZn\Zxb SG =ՏTXGl˙iWE(EӚkQ 21*Ń*.=bC*F1Fk3#sI+gfMn籪ಊ+}\; l ܘqMG*>?$DblN)ӜKSMsDLwcSVLXT*TP{N '7`Bk>fj0BRݜ܄SZ㩲m;pRe2jsql1mR wR¶~3NR`h`Yddc5jҕHGs.75jzbG;-*ǗzJ`MgrЯٶvH{⩚'Xz&zDQvN cT$+KY/\hWO䡬 WP.FF2B%dYGЋ>ypeS?>H%P\* JWr k _ ,,¯ '6 Iq4q=ps >̣>Kt[Ҟ7ȋ}K)[ڛ8[wt [GXDc$ͣ KkKEä:c~ ^Ŀp;ϬbG;-Y/)nIR2 l=*g(MR 5o-t汧.|g;{t.\ELb*v9tb`{'M~y D"#. 3*iƿWA^N+ڬLv/;BKMB3.vPKQn)PK!M:Gnet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIterator.classT[OA²\ ʥB-`&6"%& kK 6[&rQ|້?B<.B$КI;w̜!{+˖A5i8X,Ơ}P^wg0DѬvR8I +)QK 'KD^cA=_9u@ϩ=QCV .SfyH5=0ݿ_.>QIy=('|؇2&ʇ2 G? dG!zi.ѯHSTqU$'ƭ^2(¨<,ԀiOeieY~b$_bxqn!T}A'k RwQ["z0YBPKe\4PK!M:,net/htmlparser/jericho/TagTypeRegister.classVS[Jĵ-0-+$(8*BvT At vlk+KTNM~iS'OiN;/}L%@W@uI4bʁO(,i&<91.Wy|Fgch>_/T_d ̰P`J/Wd|_י|7|ɷ|w|?ď zYOe"P7TT)sQK$'HI+$KB(,}*Yr%_4&K)jZ!̷DHQ,IHMfih>} r#p4:z2犥dt"֬ Gc౱PX iCFB2h=094>3]_VR5dt)mcf&rtLeT}Лr17r}0zmg?jNODrednM-2 }ƷԔKo)4:((Bq+kub3}wfris嶶?"X:F5ّLb]{ʞSvYӷrNq|-~CǷ!6'/N|DAz'X _G1]VpO*2~UBR)x Ns3LNb.  K|0(c3^ػiW4u&o(ct¯eFoqM)<#heNy& L\gL,37eN w( 8ס=O/@p{jTM\-% 4d oJU*(MÛ=5, 4slpm9hL'ֻR';1YL2SL"~3ݢ]PrL{6є׌m]ڀQc𱈚@(=6F Dhp` K`X8 ƃ`x``"8C\@tl(> j$vl,Sasq/nvWfٵI ӰE-mܪun_ Â,<tNA4?סu 9Na(\^71@?iXuO)׻xي=<oeȌeuniwq@JSrIYZ\R(6#Q-lH97B~8QCnak]ƾ% gi![鿗GiG ~홥-~Do2u<DF#,ZW:%kE[=.[Gm8Ж(?%wtn9JFf߁r9hi61+󇵃* ykax.wJ!s~U.It)7*n~nϢ ^jA"7?Ts8e$'s]ϒtbWQ54)k5פzBL '׀!6vTb,@K!PN:qSgr3=YY{=o j\PXN+짋hrCF0F-F-zc,W򎱰_Vݭ}{yͫtSc:)obJcf7-f4:P5 wa}(܄cثBx Ǒ]"ؼX)<59d@fC^axuiT)݃z~0B7-O屐uPK8$oPK M:Bnet/htmlparser/jericho/TextExtractor$AttributeIncludeChecker.classOP=c~B6"Yٽ<_d~pu9 &E#t ̻繪dE5Z{V8[XLΒ4jE0mLǻ1a UW봔˶?f;>{EaXSX0$g;!@( <:uIh# PK7PK M:4net/htmlparser/jericho/TextExtractor$Processor.classVWW=dxj6@n-VZiCJ03vv푸c??o{޹s6 qvIhw{%!O{O*}ۯ"nxPIy(HညxHBT+^pѧ Q**x F)Ո C ) 5'M*1$cR`#/5p\Ƞ";,LaMN" Ͳ G'`X2kc&oI=Ўf6]2f` u3cnDZ#ĨK#=ɽfv&^f{ŨwԱ;uSwaHFoMd[3~ɧo3˰4ڒFDq'&lV4±8c1,- )ag)FKg8sCWZ%%+xUkKoं79Tc8帊'2^P@cp|8> J3ѯEQ@լ=QsJTy:CRiaS&J@ai3)-~JBo`O8Գ;ɰ35;e.a dosVԔUeh[t#)r(Stt#mԅ3)4h:n@C49o]vwn2y:hwrB-ai77~PVDn Y)a(WezfrNj¤}7ܶk"4tS$ň0fy񌼥LÂ/Ԟ:$ KzD xPdhEt C+آ/G%r70Cghf6soZhzw($5`:IVtGc` 'PouTb ucT4$j{Σ W]XXbZjcmWE|e,.#\BTK:_De,q+c.X~g"h쨋#næ t6Y+ci4(aEt_ªvzRq)%5en\:Jh=U4w( %,o F^FEƯ-}l ư"Z]ҨZA$ƯJ*YNvh#PK * PK M:*net/htmlparser/jericho/TextExtractor.classVSW.!lXVwjľT(mXQV,dq77}}OؙV+2o&Œӝs{^sOIE1F+*k\QLx\4 9i)HbJ2BUIf" r 5my=,p37ޒV}=34yebaKya[Dל) 0ˇ3ܸIQtۂ'nsΈMDG m!}{~ؙz(7\S,t))sjjt9a:v^ =4 V/R3/Ns 3 >;/Bv rvZd貂ɝpJegh>z y.kHopwCŕҏDPSأ-מ*pw2]|"n/2.$Zѻҵ$ H; ш#rZT1`òe8]% Rp[Ç8!,I$%9"8# #! >)>S/% eb8!.ɸ$𕆧𴂯5|##|;|0$0QjQL~a kx'4EnI-d{VZ@Iϸ|NC7\?6{m#44163 P Rp=p AV 01es&AKgo^yT9'|jm%c8V~D ȰLέUp1kNZh[$&;Ni8SBAPy9'"uzU7{$H rNx{xrHY(Of;utw>|!}KGï"Ui3RC}P t^4_"!0x)u6MCaM TTcdHM6ӻ,%ڼDP?/PKKV PK!M:!net/htmlparser/jericho/Util.classuTmSF~ٖ,cҼَWphcZ%!V5W#$|nt23Nͤڙ~(ݓPl3ݻ}vo//\C4dUt(tqIj)ޓ$`Ru|H >ND(JqUÌOk*fzgW_ f8U p|xu8a`~)x]XoO3$ŜXc-:yٌs2wx8= H/WUR*JTOPպ/ì NYp30FVq|^c8 UOݨ8*80tL3 &]@+V7FavuDue_qMdT Qh6X6p )+` CK5^uBG~&3\vĞG;5vyVvªp=qu_`8_yCRTsdDSr\cMWGZk"xŔA+3/2S=JW g$H$TL6bi%ݾI-v";W$MD3B_\D<ʹmC;~M=0PD.z%zo`u&ΒM =A1v"V"!3'$6b?9"~W([r 9o{!)# $7NŬéx#Pޣ"1;Q ]k%~@k%hc\ώ^Y+Ӣ^`J7`X8TaI+#):-.VVҋ2i}1m =ce#Z_P.luqz 'YrJYE:s&+wAix=h,483RlPK?>PK!M:)net/htmlparser/jericho/WriterLogger.classTRP]iӖRDRR/@An- J8i/uw?qcvk}%=܏8сLކF c{8 (NGcb&G(19 ((ю )W\Bd1]ÑpxjMM;];dK_7$$z*UNϺiɧc;YK_y c4֊R2rǴLaPa{s ce-zi$}QلE6ڒdN Dl&B(GuhnSjDMD xm՘PaVG ɱwBHPj ^x*zdXPqgUcbf8*N1 RB;r1Sȩ%sy9z<`Q+ TpQ2~ٷ%kT厽{6,\7v|CbeUwZw^37|.D_Z3J}AY{)Q`u&3<.2Yojkѐ^5Kt뮘Ȋx0_ X6<]|dW*U jͭ:& g_ ]QMOnet/htmlparser/jericho/HTMLElementTerminatingTagNameSets.classPK!M:٣3net/htmlparser/jericho/IntStringHashMap$Entry.classPK!M:8-net/htmlparser/jericho/IntStringHashMap.classPK M:e # net/htmlparser/jericho/Logger.classPK!M:G+ net/htmlparser/jericho/LoggerDisabled.classPK!M:P9K#*net/htmlparser/jericho/LoggerFactory.classPK!M:PDx+=net/htmlparser/jericho/LoggerProvider.classPK!M:3SP3net/htmlparser/jericho/LoggerProviderDisabled.classPK!M:SM!}):unet/htmlparser/jericho/LoggerProviderJava$JavaLogger.classPK!M:{Cf/net/htmlparser/jericho/LoggerProviderJava.classPK!M:h <8net/htmlparser/jericho/LoggerProviderJCL$JCLLogger.classPK!M:_Aq.net/htmlparser/jericho/LoggerProviderJCL.classPK!M: =<net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger.classPK!M:Ⱦb02!net/htmlparser/jericho/LoggerProviderLog4J.classPK!M:8<"net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger.classPK!M:A@G9e0 %net/htmlparser/jericho/LoggerProviderSLF4J.classPK!M: 1x1&net/htmlparser/jericho/LoggerProviderSTDERR.classPK!M:ʹ'*(net/htmlparser/jericho/MasonTagTypes.classPK!M:]e;%.+net/htmlparser/jericho/MicrosoftTagTypes.classPK!M:tAD )q.net/htmlparser/jericho/NodeIterator.classPK M:p3net/htmlparser/jericho/nodoc/PK M:0xB3net/htmlparser/jericho/nodoc/SequentialListSegment$SubList$1.classPK M:sl@K7net/htmlparser/jericho/nodoc/SequentialListSegment$SubList.classPK M:2c 8;net/htmlparser/jericho/nodoc/SequentialListSegment.classPK!M:]m M| 6Anet/htmlparser/jericho/NumericCharacterReference.classPK M: +Gnet/htmlparser/jericho/OutputDocument.classPK M:Kh*{Qnet/htmlparser/jericho/OutputSegment.classPK!M:`ݑN4aSnet/htmlparser/jericho/OutputSegmentComparator.classPK M::U-&Unet/htmlparser/jericho/ParseText.classPK!M:5Qj(Vnet/htmlparser/jericho/PHPTagTypes.classPK!M:B?-09Ynet/htmlparser/jericho/RemoveOutputSegment.classPK M:4@[net/htmlparser/jericho/Renderer$Processor$A_ElementHandler.classPK M:hcrA^net/htmlparser/jericho/Renderer$Processor$BR_ElementHandler.classPK M:FT>`net/htmlparser/jericho/Renderer$Processor$ElementHandler.classPK M:sXOPG?bnet/htmlparser/jericho/Renderer$Processor$FontStyleElementHandler.classPK M:ڰAenet/htmlparser/jericho/Renderer$Processor$HR_ElementHandler.classPK M:@}z&BAgnet/htmlparser/jericho/Renderer$Processor$ListElementHandler.classPK M:owAinet/htmlparser/jericho/Renderer$Processor$LI_ElementHandler.classPK M:YoBElnet/htmlparser/jericho/Renderer$Processor$PRE_ElementHandler.classPK M:8FFD~nnet/htmlparser/jericho/Renderer$Processor$RemoveElementHandler.classPK M:nfp#K6pnet/htmlparser/jericho/Renderer$Processor$StandardBlockElementHandler.classPK M: S Lsnet/htmlparser/jericho/Renderer$Processor$StandardInlineElementHandler.classPK M:zAunet/htmlparser/jericho/Renderer$Processor$TD_ElementHandler.classPK M:H'FAmwnet/htmlparser/jericho/Renderer$Processor$TR_ElementHandler.classPK M:S8#,/wynet/htmlparser/jericho/Renderer$Processor.classPK M:}*$net/htmlparser/jericho/Segment.classPK M::iNx@#net/htmlparser/jericho/Source.classPK!M: ,net/htmlparser/jericho/SourceCompactor.classPK M:6 06net/htmlparser/jericho/SourceFormatter$Processor.classPK M:g7  ,net/htmlparser/jericho/SourceFormatter.classPK M:LOi-%tnet/htmlparser/jericho/StartTag.classPK M:?*2)Gnet/htmlparser/jericho/StartTagType.classPK!M:6#5net/htmlparser/jericho/StartTagTypeCDATASection.classPK!M:NɅ0net/htmlparser/jericho/StartTagTypeComment.classPK!M:w;net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.classPK!M:0 >znet/htmlparser/jericho/StartTagTypeGenericImplementation.classPK!M:FU:net/htmlparser/jericho/StartTagTypeMarkupDeclaration.classPK!M:';net/htmlparser/jericho/StartTagTypeMasonComponentCall.classPK!M:K̢UH$net/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.classPK!M:0s8net/htmlparser/jericho/StartTagTypeMasonNamedBlock.classPK!M: BiUmnet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment.classPK!M:#/net/htmlparser/jericho/StartTagTypeNormal.classPK!M:  2!net/htmlparser/jericho/StartTagTypePHPScript.classPK!M:;81net/htmlparser/jericho/StartTagTypePHPShort.classPK!M:6"4net/htmlparser/jericho/StartTagTypePHPStandard.classPK!M:g!5 net/htmlparser/jericho/StartTagTypeServerCommon.classPK!M:An,< net/htmlparser/jericho/StartTagTypeServerCommonEscaped.classPK!M:5, ~5 net/htmlparser/jericho/StartTagTypeUnregistered.classPK!M:c?$7net/htmlparser/jericho/StartTagTypeXMLDeclaration.classPK!M:tL. Anet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.classPK M:1R)FH.7net/htmlparser/jericho/StreamedParseText.classPK!M:t/]Bnet/htmlparser/jericho/StreamedSource$StreamedSourceIterator.classPK!M: ,+net/htmlparser/jericho/StreamedSource.classPK!M:٘)&net/htmlparser/jericho/StreamedText.classPK!M:m/03/net/htmlparser/jericho/StreamEncodingDetector.classPK!M:w07net/htmlparser/jericho/StringOutputSegment.classPK M:͖?0:net/htmlparser/jericho/SubCache$CacheEntry.classPK M:jUD>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalError.classPK M:Cu uI.@net/htmlparser/jericho/SubCache$FoundCacheEntryMissingInternalError.classPK M:C!|JAnet/htmlparser/jericho/SubCache$SourceCacheEntryMissingInternalError.classPK M:PY15Cnet/htmlparser/jericho/SubCache$TagIterator.classPK M:CM f%Enet/htmlparser/jericho/SubCache.classPK M:J1>  Pnet/htmlparser/jericho/Tag.classPK M:? C\net/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkup.classPK M:Qn)$]net/htmlparser/jericho/TagType.classPK!M:e\4Gdnet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIterator.classPK!M:6EXQ,Rhnet/htmlparser/jericho/TagTypeRegister.classPK M:Nf,qnet/htmlparser/jericho/TextExtractor$1.classPK M:8$o,srnet/htmlparser/jericho/TextExtractor$2.classPK M:7B!net/htmlparser/jericho/Util.classPK!M:L/:D)onet/htmlparser/jericho/WriterLogger.classPK7jericho-html-3.1/project-description.txt0000644000175000017500000000040211031444346020424 0ustar twernertwernerJericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions. jericho-html-3.1/release.txt0000644000175000017500000005513711214016506016070 0ustar twernertwernerJericho HTML Parser Release Notes 3.1 (2009-06-11) - Bug Fixes: - [2793556] Infinite loop on Segment.getAllStartTags() - Infinite loop on Segment.getAllElements() - Segment.getFirst* methods returned segments outside the bounding segment. - Segment.getAllElements methods did not return all enclosed elements in some circumstances. - Fixed documentation errors in Segment.getAllElements methods. - Added StreamedSource class. - CHANGES THAT COULD AFFECT THE BEHAVIOUR OF EXISTING PROGRAMS: - Changed ParseText from class to interface. - Segment.getNodeIterator() now returns character references as separate nodes. - Added tag search methods based on attribute value regular expressions. - Added tag search methods based on HTML class attribute. - Added static Source.LegacyNodeIteratorCompatabilityMode property temporarily to restore Segment.getNodeIterator() functionality to that of previous versions. - Removed char[] based search methods in ParseText. - Added CharacterReference.appendCharTo(Appendable) method. - Added OutputDocument(Segment) constructor. - Added StreamedSourceCopy sample program. 3.0 (2009-04-09) - Requires runtime Java 5 or later - Bug Fixes: - Character references representing unicode supplementary characters were not decoded correctly to UTF-16 code unit pairs. - [2188446] Element.getDepth() and Element.getParentElement() returned incorrect results if called in parse on demand mode. - Comments are now recognised inside tags - [1576991] Bug in ConvertStyleSheets sample program - [1597587] various NPEs in findFormFields() - [1599700] Segment.findAllStartTags(attributeName...) infinite loop - Overlapping elements resulted in some elements being listed as a child of more than one parent element. - OutputDocument.writeTo(Writer) closed the writer. - Server tags no longer interfere with parsing of start tag attributes. - Added Renderer class and Segment.getRenderer() method. - Added TextExtractor class and Segment.getTextExtractor() method. - Deprecated segment.extractText methods. - Added SourceFormatter class and Source.getSourceFormatter() method. - Deprecated Source.indent method. - Added Logger interface along with the related LoggerProvider interface and BasicLoggerProvider and WriterLogger classes. - Added Source.setLogger(Logger) and Source.getLogger() methods. - Deprecated Source.setLogWriter(Writer) and Source.getLogWriter() methods. - Added Source.findNextElement(int pos, String attributeName, String value, boolean valueCaseSensitive) method. - Added Segment.findAllElements(String attributeName, String value, boolean valueCaseSensitive) method. - Calling the ignoreWhenParsing methods on overlapping segments no longer results in an OverlappingOutputSegmentsException. - Added CharacterReference.getEncodingFilterWriter(Writer) method. - Added CharacterReference.encode(char) method. - Added Source.getNewLine() method. - Added static Config.NewLine parameter. - All text output now uses Config.NewLine instead of hard-coded '\n'. - Source.fullSequentialParse() method no longer parses the source again if it has already been called. - Some methods that require the parsing of the entire source now call Source.fullSequentialParse() automatically. - Some changes to the output of various getDebugInfo() methods. - Added categorised class list in javadoc. - Removed all methods/constants deprecated in 2.0. 2.3 (2006-09-11) - Bug Fixes: - [1510438] NullPointerException in Source.indent. - [1511480] Incorrect detection of non-html element with nested empty-element tag of same name. - [1547562] Fault in caching mechanism. - Source.fullSequentialParse() sometimes resulted in unregistered tags being returned in tag searches. - Invalid Empty-element tags whose name is in either of the sets HTMLElements.getEndTagOptionalElementNames() or HTMLElements.getEndTagRequiredElementNames() were rejected by the parser if the slash immediately follows the tag name. - StartTag.tidy() only included a slash before the closing delimiter of the tag if the tag name was in the set of HTMLElements.getEndTagForbiddenElementNames(). It now includes the slash for all tag names not in getEndTagOptionalElementNames(). - Source.fullSequentialParse() now clears the cache automatically instead of throwing an IllegalStateException if the cache is not empty. - Changes to behaviour of Source.indent: - preserves indenting in SCRIPT elements, server elements, HTML comments and CDATA sections. - keeps SCRIPT elements, HTML comments, XML declarations, XML processing instructions and markup declarations inline. - Minor documentation improvements. 2.2 (2006-06-20) - Bug Fixes: - Fault in caching mechanism resulted in missed tags in rare circumstances. (SubCache.findNextTag method) - [1407179] Segment.extractText() threw NullPointerException if the last character position was part of a tag. - Segment.extractText() now converts some tags to whitespace and ignores text inside SCRIPT and STYLE elements. - Added Segment.extractText(boolean includeAttributes) option. - Added Source.fullSequentialParse() method. - Added CharStreamSource interface for dealing with char output. - Added Source.indent(String indentText, boolean tidyTags, boolean collapseWhiteSpace, boolean indentAllElements) method. - Added Segment.getChildElements() method. - Added Element.getParentElement() method. - Added Element.getDepth() method. - Named tag search methods now only return unregistered tags if the specified name is not a valid XML tag name. - Changed Attributes.DefaultMaxErrorCount system default from 1 to 2. - Added EndTag.getElement() method. - Added Tag.getElement() abstract method. - Added Tag.getNameSegment() method. - Added Tag.getUserData() and Tag.setUserData(Object) methods. - Added Tag.findNextTag() method. - Added Tag.findPreviousTag() method. - Added Tag.tidy() and Tag.tidy(boolean toXHTML) methods. - Added and renamed many methods in OutputDocument class to make the interface more intuitive. - Added HTMLElements.getNestingForbiddenElementNames() method. - Illegally nested elements with required end tags now terminate at start of illegally nested start tag, avoiding possible stack overflow in the common case of multiple unterminated elements. - Tag search methods called with a pos argument that is out of range now return null or empty results rather than throwing an exception. - Renamed output(Writer) method in OutputSegment to writeTo(Writer). - Deprecated Tag.regenerateHTML() method. - Deprecated Source.getNextTagIterator() method. - Deprecated AttributesOutputSegment class. - Deprecated StringOutputSegment class. - Removed BlankOutputSegment class from public API. - Removed CharOutputSegment class from public API. - Removed IOutputSegment which was deprecated in 2.0. 2.1 (2005-12-24) - Added Source(InputStream) constructor. - Added Source(Reader) constructor. - Added Source(URL) constructor. - Added Source.getEncoding() method. - Added Source.getEncodingSpecificationInfo() method. - Added Source.isXML() method. - Added Source.findNextElement(pos) method. - Added Source.findNextElement(pos,name) method. - Added Segment.extractText() method. - Added StartTag.getAttributeValue(attributeName) method. - Added Element.getAttributeValue(attributeName) method. - Added ExtractText and SourceEncoding sample programs. 2.0 (2005-11-10) - Complete rewrite of the parsing engine to allow the encapsulation of different tag types into the new TagType class. - Requires Java 1.4 or later. - All programs written for previous versions of the library will have to be recompiled with the new version, regardless of whether any changes are required. This is because several methods, including the Source constructor, now expect a CharSequence as an argument instead of a String. - Changes that could require modifications to existing programs: - The toString() method of Segment and all subclasses now returns the source text of the segment instead of a string useful for debugging purposes. This change was necessary because Segment now implements CharSequence. - For consistency, the toString() methods of all IOutputSegment implementations now return the output string instead of a string useful for debugging purposes. - The return type of the OutputDocument.getSourceText() method is now CharSequence instead of String. - Character references in Attribute.getValue() are now decoded - StartTag.isEmptyElementTag() no longer checks whether the end tag is required. - Element.getContent() now returns zero-length segment instead of null in case of an empty element. - FormField.getPredefinedValues() now returns an empty collection instead of null if the form field has no predefined values. - Segment.findAllStartTags() now returns server tags that are found inside other tags. - Attributes segment now ends immediately after the last attribute instead of immediatley before the end-of-tag delimiter. - Modified Segment.isWhiteSpace(char) to match HTML specification - CharacterReference.encode(CharSequence) no longer encodes apostrophes by default - Tags of type SERVER_COMMON now always have the name "%" regardless of whether an identifier immediately follows it. - Modified and enhanced aspects of StartTag searches relating to special tags - P elements are now terminated by TABLE elements. See the HTMLElementName.P documentation for more information. - removed public fields in Attribute class that were deprecated in 1.2 - removed Source.getSourceTextLowerCase() method deprecated in 1.3 - removed Source.findEnd(int pos, SpecialTag) method which was accidentally added as a public method in 1.4 - Deprecated numerous methods (details in javadoc) - Deprecated IOutputSegment interface and replaced with OutputSegment - Improved caching system - Added recognition of markup declarations - Added recognition of CDATA sections - Added recognition of SGML marked sections - Doctype declarations containing markup declarations now supported - Segment class now implements CharSequence and Comparable - Added getDebugInfo() to Segment and all subclasses to replace the previous functionality of the toString() method - OutputSegment interface now implements CharSequence - Added getDebugInfo() to the OutputSegment interface to replace the previous functionality of the toString() method - Attributes class now implements List - FormFields class now implements Collection - Added HTMLElementName interface and HTMLElements class - Added RowColumnVector class and associated methods in Source class - Added FormControl class - Added various methods to the FormField, FormFields and OutputDocument classes related to FormControl objects and the manipulation and output of form submission values. - Added Config and related classes - Added TagType class and subclasses - Added various tag search methods to the Source and Segment classes including searches by TagType, attribute values, and other criteria. - Added AttributesOutputSegment class - Added Util class - Added OverlappingOutputSegmentsException class - Added many other methods to existing classes - Documentation improvements 1.4.1 (2005-11-10) - Bug Fixes: - [1065861] Named StartTag search did not find a tag immediately following a comment - Unnamed StartTag search did not find a comment if the search starts at the first character of the comment - Character references in FormField.getPredefinedValues() items were not decoded - FormControlType.SELECT_SINGLE.allowsMultipleValues() returned false instead of the correct value of true, resulting in the same incorrect value from FormField.allowMultipleValues() when multiple SELECT_SINGLE controls with the same name were present in the form 1.4 (2004-09-02) - Added CharacterEntityReference and NumbericCharacterReference classes - Added CharOutputSegment class - Attributes allow whitespace around '=' sign - Added convenience method Element.getAttributes() - Some documentation improvements 1.3 (2004-07-25) - Deprecated Source.getSourceTextLowerCase() - Added ignoreWhenParsing methods to Source and Segment classes (See sample called JSPTest) - Added parseAttributes methods to Source, Segment and StartTag classes - Added ability to search for tags in a specified namespace - Added BlankOutputSegment class - Fixed bug relating to HTML comments with alphabetic characters immediately following the opening Deprecated List (Jericho HTML Parser 3.1)

                    Deprecated API


                    Contents
                    Deprecated Fields
                    net.htmlparser.jericho.Source.LegacyIteratorCompatabilityMode
                              Modify existing code to explicitly handle CharacterReference segments. 
                     



                    jericho-html-3.1/docs/javadoc/allclasses-noframe.html0000644000175000017500000001361211214132424022675 0ustar twernertwerner All Classes (Jericho HTML Parser 3.1) All Classes
                    Attribute
                    Attributes
                    BasicLogFormatter
                    CharacterEntityReference
                    CharacterReference
                    CharStreamSource
                    CharStreamSourceUtil
                    Config
                    Config.CompatibilityMode
                    Element
                    EndTag
                    EndTagType
                    EndTagTypeGenericImplementation
                    FormControl
                    FormControlOutputStyle
                    FormControlOutputStyle.ConfigDisplayValue
                    FormControlType
                    FormField
                    FormFields
                    HTMLElementName
                    HTMLElements
                    Logger
                    LoggerProvider
                    MasonTagTypes
                    MicrosoftTagTypes
                    NumericCharacterReference
                    OutputDocument
                    OutputSegment
                    ParseText
                    PHPTagTypes
                    Renderer
                    RowColumnVector
                    Segment
                    Source
                    SourceCompactor
                    SourceFormatter
                    StartTag
                    StartTagType
                    StartTagTypeGenericImplementation
                    StreamedSource
                    Tag
                    TagType
                    TextExtractor
                    Util
                    WriterLogger
                    jericho-html-3.1/docs/javadoc/allclasses-frame-alphabetical.html0000644000175000017500000001247011172063262024756 0ustar twernertwerner All Classes (Jericho HTML Parser)
                    All Classes (alphabetical) » categorised

                    jericho-html-3.1/docs/javadoc/index.html0000644000175000017500000000246411214132424020234 0ustar twernertwerner Jericho HTML Parser 3.1 <H2> Frame Alert</H2> <P> This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. <BR> Link to<A HREF="net/htmlparser/jericho/package-summary.html">Non-frame version.</A> jericho-html-3.1/docs/javadoc/overview-tree.html0000644000175000017500000003124411214132424021726 0ustar twernertwerner Class Hierarchy (Jericho HTML Parser 3.1)

                    Hierarchy For All Packages

                    Package Hierarchies:
                    net.htmlparser.jericho

                    Class Hierarchy

                    Interface Hierarchy

                    Enum Hierarchy



                    jericho-html-3.1/docs/javadoc/constant-values.html0000644000175000017500000042367711214132424022270 0ustar twernertwerner Constant Field Values (Jericho HTML Parser 3.1)

                    Constant Field Values


                    Contents
                    net.htmlparser.*

                    net.htmlparser.jericho.CharacterEntityReference
                    public static final char _aacute 225
                    public static final char _Aacute 193
                    public static final char _acirc 226
                    public static final char _Acirc 194
                    public static final char _acute 180
                    public static final char _aelig 230
                    public static final char _AElig 198
                    public static final char _agrave 224
                    public static final char _Agrave 192
                    public static final char _alefsym 8501
                    public static final char _alpha 945
                    public static final char _Alpha 913
                    public static final char _amp 38
                    public static final char _and 8743
                    public static final char _ang 8736
                    public static final char _apos 39
                    public static final char _aring 229
                    public static final char _Aring 197
                    public static final char _asymp 8776
                    public static final char _atilde 227
                    public static final char _Atilde 195
                    public static final char _auml 228
                    public static final char _Auml 196
                    public static final char _bdquo 8222
                    public static final char _beta 946
                    public static final char _Beta 914
                    public static final char _brvbar 166
                    public static final char _bull 8226
                    public static final char _cap 8745
                    public static final char _ccedil 231
                    public static final char _Ccedil 199
                    public static final char _cedil 184
                    public static final char _cent 162
                    public static final char _chi 967
                    public static final char _Chi 935
                    public static final char _circ 710
                    public static final char _clubs 9827
                    public static final char _cong 8773
                    public static final char _copy 169
                    public static final char _crarr 8629
                    public static final char _cup 8746
                    public static final char _curren 164
                    public static final char _dagger 8224
                    public static final char _Dagger 8225
                    public static final char _darr 8595
                    public static final char _dArr 8659
                    public static final char _deg 176
                    public static final char _delta 948
                    public static final char _Delta 916
                    public static final char _diams 9830
                    public static final char _divide 247
                    public static final char _eacute 233
                    public static final char _Eacute 201
                    public static final char _ecirc 234
                    public static final char _Ecirc 202
                    public static final char _egrave 232
                    public static final char _Egrave 200
                    public static final char _empty 8709
                    public static final char _emsp 8195
                    public static final char _ensp 8194
                    public static final char _epsilon 949
                    public static final char _Epsilon 917
                    public static final char _equiv 8801
                    public static final char _eta 951
                    public static final char _Eta 919
                    public static final char _eth 240
                    public static final char _ETH 208
                    public static final char _euml 235
                    public static final char _Euml 203
                    public static final char _euro 8364
                    public static final char _exist 8707
                    public static final char _fnof 402
                    public static final char _forall 8704
                    public static final char _frac12 189
                    public static final char _frac14 188
                    public static final char _frac34 190
                    public static final char _frasl 8260
                    public static final char _gamma 947
                    public static final char _Gamma 915
                    public static final char _ge 8805
                    public static final char _gt 62
                    public static final char _harr 8596
                    public static final char _hArr 8660
                    public static final char _hearts 9829
                    public static final char _hellip 8230
                    public static final char _iacute 237
                    public static final char _Iacute 205
                    public static final char _icirc 238
                    public static final char _Icirc 206
                    public static final char _iexcl 161
                    public static final char _igrave 236
                    public static final char _Igrave 204
                    public static final char _image 8465
                    public static final char _infin 8734
                    public static final char _int 8747
                    public static final char _iota 953
                    public static final char _Iota 921
                    public static final char _iquest 191
                    public static final char _isin 8712
                    public static final char _iuml 239
                    public static final char _Iuml 207
                    public static final char _kappa 954
                    public static final char _Kappa 922
                    public static final char _lambda 955
                    public static final char _Lambda 923
                    public static final char _lang 9001
                    public static final char _laquo 171
                    public static final char _larr 8592
                    public static final char _lArr 8656
                    public static final char _lceil 8968
                    public static final char _ldquo 8220
                    public static final char _le 8804
                    public static final char _lfloor 8970
                    public static final char _lowast 8727
                    public static final char _loz 9674
                    public static final char _lrm 8206
                    public static final char _lsaquo 8249
                    public static final char _lsquo 8216
                    public static final char _lt 60
                    public static final char _macr 175
                    public static final char _mdash 8212
                    public static final char _micro 181
                    public static final char _middot 183
                    public static final char _minus 8722
                    public static final char _mu 956
                    public static final char _Mu 924
                    public static final char _nabla 8711
                    public static final char _nbsp 160
                    public static final char _ndash 8211
                    public static final char _ne 8800
                    public static final char _ni 8715
                    public static final char _not 172
                    public static final char _notin 8713
                    public static final char _nsub 8836
                    public static final char _ntilde 241
                    public static final char _Ntilde 209
                    public static final char _nu 957
                    public static final char _Nu 925
                    public static final char _oacute 243
                    public static final char _Oacute 211
                    public static final char _ocirc 244
                    public static final char _Ocirc 212
                    public static final char _oelig 339
                    public static final char _OElig 338
                    public static final char _ograve 242
                    public static final char _Ograve 210
                    public static final char _oline 8254
                    public static final char _omega 969
                    public static final char _Omega 937
                    public static final char _omicron 959
                    public static final char _Omicron 927
                    public static final char _oplus 8853
                    public static final char _or 8744
                    public static final char _ordf 170
                    public static final char _ordm 186
                    public static final char _oslash 248
                    public static final char _Oslash 216
                    public static final char _otilde 245
                    public static final char _Otilde 213
                    public static final char _otimes 8855
                    public static final char _ouml 246
                    public static final char _Ouml 214
                    public static final char _para 182
                    public static final char _part 8706
                    public static final char _permil 8240
                    public static final char _perp 8869
                    public static final char _phi 966
                    public static final char _Phi 934
                    public static final char _pi 960
                    public static final char _Pi 928
                    public static final char _piv 982
                    public static final char _plusmn 177
                    public static final char _pound 163
                    public static final char _prime 8242
                    public static final char _Prime 8243
                    public static final char _prod 8719
                    public static final char _prop 8733
                    public static final char _psi 968
                    public static final char _Psi 936
                    public static final char _quot 34
                    public static final char _radic 8730
                    public static final char _rang 9002
                    public static final char _raquo 187
                    public static final char _rarr 8594
                    public static final char _rArr 8658
                    public static final char _rceil 8969
                    public static final char _rdquo 8221
                    public static final char _real 8476
                    public static final char _reg 174
                    public static final char _rfloor 8971
                    public static final char _rho 961
                    public static final char _Rho 929
                    public static final char _rlm 8207
                    public static final char _rsaquo 8250
                    public static final char _rsquo 8217
                    public static final char _sbquo 8218
                    public static final char _scaron 353
                    public static final char _Scaron 352
                    public static final char _sdot 8901
                    public static final char _sect 167
                    public static final char _shy 173
                    public static final char _sigma 963
                    public static final char _Sigma 931
                    public static final char _sigmaf 962
                    public static final char _sim 8764
                    public static final char _spades 9824
                    public static final char _sub 8834
                    public static final char _sube 8838
                    public static final char _sum 8721
                    public static final char _sup 8835
                    public static final char _sup1 185
                    public static final char _sup2 178
                    public static final char _sup3 179
                    public static final char _supe 8839
                    public static final char _szlig 223
                    public static final char _tau 964
                    public static final char _Tau 932
                    public static final char _there4 8756
                    public static final char _theta 952
                    public static final char _Theta 920
                    public static final char _thetasym 977
                    public static final char _thinsp 8201
                    public static final char _thorn 254
                    public static final char _THORN 222
                    public static final char _tilde 732
                    public static final char _times 215
                    public static final char _trade 8482
                    public static final char _uacute 250
                    public static final char _Uacute 218
                    public static final char _uarr 8593
                    public static final char _uArr 8657
                    public static final char _ucirc 251
                    public static final char _Ucirc 219
                    public static final char _ugrave 249
                    public static final char _Ugrave 217
                    public static final char _uml 168
                    public static final char _upsih 978
                    public static final char _upsilon 965
                    public static final char _Upsilon 933
                    public static final char _uuml 252
                    public static final char _Uuml 220
                    public static final char _weierp 8472
                    public static final char _xi 958
                    public static final char _Xi 926
                    public static final char _yacute 253
                    public static final char _Yacute 221
                    public static final char _yen 165
                    public static final char _yuml 255
                    public static final char _Yuml 376
                    public static final char _zeta 950
                    public static final char _Zeta 918
                    public static final char _zwj 8205
                    public static final char _zwnj 8204

                    net.htmlparser.jericho.CharacterReference
                    public static final int INVALID_CODE_POINT -1

                    net.htmlparser.jericho.Config.CompatibilityMode
                    public static final int CODE_POINTS_ALL 1114111
                    public static final int CODE_POINTS_NONE -1

                    net.htmlparser.jericho.HTMLElementName
                    public static final java.lang.String A "a"
                    public static final java.lang.String ABBR "abbr"
                    public static final java.lang.String ACRONYM "acronym"
                    public static final java.lang.String ADDRESS "address"
                    public static final java.lang.String APPLET "applet"
                    public static final java.lang.String AREA "area"
                    public static final java.lang.String B "b"
                    public static final java.lang.String BASE "base"
                    public static final java.lang.String BASEFONT "basefont"
                    public static final java.lang.String BDO "bdo"
                    public static final java.lang.String BIG "big"
                    public static final java.lang.String BLOCKQUOTE "blockquote"
                    public static final java.lang.String BODY "body"
                    public static final java.lang.String BR "br"
                    public static final java.lang.String BUTTON "button"
                    public static final java.lang.String CAPTION "caption"
                    public static final java.lang.String CENTER "center"
                    public static final java.lang.String CITE "cite"
                    public static final java.lang.String CODE "code"
                    public static final java.lang.String COL "col"
                    public static final java.lang.String COLGROUP "colgroup"
                    public static final java.lang.String DD "dd"
                    public static final java.lang.String DEL "del"
                    public static final java.lang.String DFN "dfn"
                    public static final java.lang.String DIR "dir"
                    public static final java.lang.String DIV "div"
                    public static final java.lang.String DL "dl"
                    public static final java.lang.String DT "dt"
                    public static final java.lang.String EM "em"
                    public static final java.lang.String FIELDSET "fieldset"
                    public static final java.lang.String FONT "font"
                    public static final java.lang.String FORM "form"
                    public static final java.lang.String FRAME "frame"
                    public static final java.lang.String FRAMESET "frameset"
                    public static final java.lang.String H1 "h1"
                    public static final java.lang.String H2 "h2"
                    public static final java.lang.String H3 "h3"
                    public static final java.lang.String H4 "h4"
                    public static final java.lang.String H5 "h5"
                    public static final java.lang.String H6 "h6"
                    public static final java.lang.String HEAD "head"
                    public static final java.lang.String HR "hr"
                    public static final java.lang.String HTML "html"
                    public static final java.lang.String I "i"
                    public static final java.lang.String IFRAME "iframe"
                    public static final java.lang.String IMG "img"
                    public static final java.lang.String INPUT "input"
                    public static final java.lang.String INS "ins"
                    public static final java.lang.String ISINDEX "isindex"
                    public static final java.lang.String KBD "kbd"
                    public static final java.lang.String LABEL "label"
                    public static final java.lang.String LEGEND "legend"
                    public static final java.lang.String LI "li"
                    public static final java.lang.String LINK "link"
                    public static final java.lang.String MAP "map"
                    public static final java.lang.String MENU "menu"
                    public static final java.lang.String META "meta"
                    public static final java.lang.String NOFRAMES "noframes"
                    public static final java.lang.String NOSCRIPT "noscript"
                    public static final java.lang.String OBJECT "object"
                    public static final java.lang.String OL "ol"
                    public static final java.lang.String OPTGROUP "optgroup"
                    public static final java.lang.String OPTION "option"
                    public static final java.lang.String P "p"
                    public static final java.lang.String PARAM "param"
                    public static final java.lang.String PRE "pre"
                    public static final java.lang.String Q "q"
                    public static final java.lang.String S "s"
                    public static final java.lang.String SAMP "samp"
                    public static final java.lang.String SCRIPT "script"
                    public static final java.lang.String SELECT "select"
                    public static final java.lang.String SMALL "small"
                    public static final java.lang.String SPAN "span"
                    public static final java.lang.String STRIKE "strike"
                    public static final java.lang.String STRONG "strong"
                    public static final java.lang.String STYLE "style"
                    public static final java.lang.String SUB "sub"
                    public static final java.lang.String SUP "sup"
                    public static final java.lang.String TABLE "table"
                    public static final java.lang.String TBODY "tbody"
                    public static final java.lang.String TD "td"
                    public static final java.lang.String TEXTAREA "textarea"
                    public static final java.lang.String TFOOT "tfoot"
                    public static final java.lang.String TH "th"
                    public static final java.lang.String THEAD "thead"
                    public static final java.lang.String TITLE "title"
                    public static final java.lang.String TR "tr"
                    public static final java.lang.String TT "tt"
                    public static final java.lang.String U "u"
                    public static final java.lang.String UL "ul"
                    public static final java.lang.String VAR "var"

                    net.htmlparser.jericho.ParseText
                    public static final int NO_BREAK -1



                    jericho-html-3.1/docs/javadoc/package-list0000644000175000017500000000003011214132422020507 0ustar twernertwernernet.htmlparser.jericho jericho-html-3.1/docs/javadoc/allclasses-frame.html0000644000175000017500000001315011172063330022337 0ustar twernertwerner All Classes (Jericho HTML Parser)
                    All Classes (categorised) » alphabetical

                    Document Segments:

                    Tag Types:

                    HTML:

                    Output:

                    Document Manipulation/Output:

                    Form Data:

                    Extended Tag Types:

                    Configuration:

                    Logging:

                    Utilities:

                    Custom Tag Type Creation:

                    jericho-html-3.1/docs/javadoc/stylesheet.css0000644000175000017500000000502111214132424021132 0ustar twernertwerner/* Javadoc style sheet */ /* Define colors, fonts and other style attributes here to override the defaults */ /* Page background color */ body { background-color: #FFFFFF } /* Headings */ h1 { font-size: 145% } /* Table colors */ .TableHeadingColor { background: #CCCCFF } /* Dark mauve */ .TableSubHeadingColor { background: #EEEEFF } /* Light mauve */ .TableRowColor { background: #FFFFFF } /* White */ /* Font used in left-hand frame lists */ .FrameTitleFont { font-size: 100%; font-family: Helvetica, Arial, sans-serif } .FrameHeadingFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif } .FrameItemFont { font-size: 90%; font-family: Helvetica, Arial, sans-serif } /* Navigation bar fonts and colors */ .NavBarCell1 { background-color:#EEEEFF;} /* Light mauve */ .NavBarCell1Rev { background-color:#00008B;} /* Dark Blue */ .NavBarFont1 { font-family: Arial, Helvetica, sans-serif; color:#000000;} .NavBarFont1Rev { font-family: Arial, Helvetica, sans-serif; color:#FFFFFF;} .NavBarCell2 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;} .NavBarCell3 { font-family: Arial, Helvetica, sans-serif; background-color:#FFFFFF;} body, code var {font-family: Arial,sans-serif; font-size: 10pt} var {font-style: italic} samp {font-family: sans-serif; font-size: 20px} code {white-space: nowrap} th {text-align: left} th,td {vertical-align: top; border-width: 1px} table {border-width: 1px; border-style: none} table.bordered {border-collapse: collapse} table.bordered td, table.bordered th {border-style: solid; border-color: black; padding: 0px 15px 0px 15px; vertical-align: middle} table.CompactDL {border-collapse: collapse; border-style: solid; margin-top: 0.5em; margin-bottom: 0.5em} table.CompactDL td {border-bottom-style: solid; padding-left: 0.5em; padding-right: 0.5em} dt {font-weight: bold} .Separated dd, .Separated li {margin-bottom: 1.6em; margin-top: 1.6em} .HalfSeparated li {margin-bottom: 0.5em; margin-top: 0.5em} .Unseparated li {margin-bottom: 0; margin-top: 0} blockquote {margin-bottom: 0; margin-top: 0} blockquote.code {margin-bottom: 10px; margin-top: 10px} .SmallVerticalMargin {margin-bottom: 0.5em; margin-top: 0.5em} dl.compact dt {display: compact} dl.compact dd {margin-left: 4em; margin-bottom: 0; margin-top: 0} ol ol {list-style-type: lower-roman} pre {margin: 0} .AllClassesBody h4 {font-size: 9pt; font-weight: bold; margin-top: 7px; padding-top: 2px; margin-bottom: 0; border-top-style: inset; border-width: 1px} jericho-html-3.1/docs/javadoc/net/0000755000175000017500000000000011214132420017013 5ustar twernertwernerjericho-html-3.1/docs/javadoc/net/htmlparser/0000755000175000017500000000000011214132420021174 5ustar twernertwernerjericho-html-3.1/docs/javadoc/net/htmlparser/jericho/0000755000175000017500000000000011214132424022623 5ustar twernertwernerjericho-html-3.1/docs/javadoc/net/htmlparser/jericho/SourceCompactor.html0000644000175000017500000005104311214132422026622 0ustar twernertwerner SourceCompactor (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class SourceCompactor

                    java.lang.Object
                      extended by SourceCompactor
                    
                    All Implemented Interfaces:
                    CharStreamSource

                    public final class SourceCompactor
                    extends java.lang.Object
                    implements CharStreamSource

                    Compacts HTML source by removing all unnecessary white space.

                    Use one of the following methods to obtain the output:

                    The output text is functionally equivalent to the original source and should be rendered identically.

                    Compacting an entire Source object performs a full sequential parse automatically.


                    Constructor Summary
                    SourceCompactor(Segment segment)
                              Constructs a new SourceCompactor based on the specified Segment.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the output to the specified Appendable object.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     java.lang.String getNewLine()
                              Returns the string to be used to represent a newline in the output.
                     SourceCompactor setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     java.lang.String toString()
                              Returns the output as a string.
                     void writeTo(java.io.Writer writer)
                              Writes the output to the specified Writer.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    SourceCompactor

                    public SourceCompactor(Segment segment)
                    Constructs a new SourceCompactor based on the specified Segment.

                    Parameters:
                    segment - the segment containing the HTML to be compacted.
                    Method Detail

                    writeTo

                    public void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Writes the output to the specified Writer.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    public void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Appends the output to the specified Appendable object.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    getEstimatedMaximumOutputLength

                    public long getEstimatedMaximumOutputLength()
                    Description copied from interface: CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Specified by:
                    getEstimatedMaximumOutputLength in interface CharStreamSource
                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    public java.lang.String toString()
                    Description copied from interface: CharStreamSource
                    Returns the output as a string.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the output as a string.

                    setNewLine

                    public SourceCompactor setNewLine(java.lang.String newLine)
                    Sets the string to be used to represent a newline in the output.

                    The default is to use the same new line string as is used in the source document, which is determined via the Source.getNewLine() method. If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, or using the value from the static Config.NewLine property.

                    Specifying a null argument resets the property to its default value, which is to use the same new line string as is used in the source document.

                    Parameters:
                    newLine - the string to be used to represent a newline in the output, may be null.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getNewLine()

                    getNewLine

                    public java.lang.String getNewLine()
                    Returns the string to be used to represent a newline in the output.

                    See the setNewLine(String) method for a full description of this property.

                    Returns:
                    the string to be used to represent a newline in the output.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/MicrosoftTagTypes.html0000644000175000017500000004530011214132422027137 0ustar twernertwerner MicrosoftTagTypes (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class MicrosoftTagTypes

                    java.lang.Object
                      extended by MicrosoftTagTypes
                    

                    public final class MicrosoftTagTypes
                    extends java.lang.Object

                    Contains tag types recognised exclusively by Microsoft® Internet Explorer.

                    The tag type defined in this class is not registered by default.


                    Field Summary
                    static StartTagType DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT
                              The tag type given to a Microsoft® downlevel-revealed conditional comment (<![if ... ]> | <![endif]>).
                     
                    Method Summary
                    static boolean defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                    static boolean isConditionalCommentEndifTag(Tag tag)
                              Indicates whether the specified tag is a downlevel-revealed conditional comment "endif" tag (<![endif]>).
                    static boolean isConditionalCommentIfTag(Tag tag)
                              Indicates whether the specified tag is a downlevel-revealed conditional comment "if" tag (<![if ... ]>).
                    static void register()
                              Registers all of the tag types defined in this class at once.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT

                    public static final StartTagType DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT
                    The tag type given to a Microsoft® downlevel-revealed conditional comment (<![if ... ]> | <![endif]>).

                    The only valid names for tags of this type are "![if" and "![endif".

                    This start tag type is used to represent both the "if" and "endif" tags. Because the "endif" tag can not be represented by an end tag type (it doesn't start with "</"), the parser makes no attempt to match if-endif tag pairs to form elements.

                    The isConditionalCommentIfTag(Tag) and isConditionalCommentEndifTag(Tag) methods provide an efficient means of determining whether a given tag is of the "if" or "endif" variety.

                    The expression consituting the condition of an "if" tag can be extracted using the StartTag.getTagContent() method. For example, if the variable conditionalCommentIfTag represents the tag <![if !IE]>, then the expression conditionalCommentIfTag.getTagContent().toString().trim() yields the string "!IE".

                    Properties:
                    PropertyValue
                    DescriptionMicrosoft downlevel-revealed conditional comment
                    StartDelimiter<![
                    ClosingDelimiter]>
                    IsServerTagfalse
                    NamePrefix![
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredtrue
                    Example:
                    <![if !IE]>

                    Method Detail

                    isConditionalCommentIfTag

                    public static boolean isConditionalCommentIfTag(Tag tag)
                    Indicates whether the specified tag is a downlevel-revealed conditional comment "if" tag (<![if ... ]>).

                    Parameters:
                    tag - the Tag to test.
                    Returns:
                    true if the specified tag is a conditional comment "if" tag, otherwise false.

                    isConditionalCommentEndifTag

                    public static boolean isConditionalCommentEndifTag(Tag tag)
                    Indicates whether the specified tag is a downlevel-revealed conditional comment "endif" tag (<![endif]>).

                    Parameters:
                    tag - the Tag to test.
                    Returns:
                    true if the specified tag is a conditional comment "endif" tag, otherwise false.

                    register

                    public static void register()
                    Registers all of the tag types defined in this class at once.

                    The tag types must be registered before the parser will recognise them.


                    defines

                    public static boolean defines(TagType tagType)
                    Indicates whether the specified tag type is defined in this class.

                    Parameters:
                    tagType - the TagType to test.
                    Returns:
                    true if the specified tag type is defined in this class, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/TextExtractor.html0000644000175000017500000011520011214132422026326 0ustar twernertwerner TextExtractor (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class TextExtractor

                    java.lang.Object
                      extended by TextExtractor
                    
                    All Implemented Interfaces:
                    CharStreamSource

                    public class TextExtractor
                    extends java.lang.Object
                    implements CharStreamSource

                    Extracts the textual content from HTML markup.

                    The output is ideal for feeding into a text search engine such as Apache Lucene, especially when the IncludeAttributes property has been set to true.

                    Use one of the following methods to obtain the output:

                    The process removes all of the tags and decodes the result, collapsing all white space. A space character is included in the output where a normal tag is present in the source, unless the tag belongs to an inline-level element. An exception to this is the BR element, which is also converted to a space despite being an inline-level element.

                    Text inside SCRIPT and STYLE elements contained within this segment is ignored.

                    Setting the ExcludeNonHTMLElements property results in the exclusion of any content within a non-HTML element.

                    See the excludeElement(StartTag) method for details on how to implement a more complex mechanism to determine whether the content of each Element is to be excluded from the output.

                    All tags that are not normal tags, such as server tags, comments etc., are removed from the output without adding white space to the output.

                    Note that segments on which the Segment.ignoreWhenParsing() method has been called are treated as text rather than markup, resulting in their inclusion in the output. To remove specific segments before extracting the text, create an OutputDocument and call its remove(Segment) or replaceWithSpaces(int begin, int end) method for each segment to be removed. Then create a new source document using new Source(outputDocument.toString()) and perform the text extraction on this new source object.

                    Extracting the text from an entire Source object performs a full sequential parse automatically.

                    To perform a simple rendering of HTML markup into text, which is more readable than the output of this class, use the Renderer class instead.

                    Example:
                    Using the default settings, the source segment:
                    "<div><b>O</b>ne</div><div title="Two"><b>Th</b><script>//a script </script>ree</div>"
                    produces the text "One Two Three".


                    Constructor Summary
                    TextExtractor(Segment segment)
                              Constructs a new TextExtractor based on the specified Segment.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the output to the specified Appendable object.
                     boolean excludeElement(StartTag startTag)
                              Indicates whether the text inside the Element of the specified start tag should be excluded from the output.
                     boolean getConvertNonBreakingSpaces()
                              Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     boolean getExcludeNonHTMLElements()
                              Indicates whether the content of non-HTML elements is excluded from the output.
                     boolean getIncludeAttributes()
                              Indicates whether any attribute values are included in the output.
                     boolean includeAttribute(StartTag startTag, Attribute attribute)
                              Indicates whether the value of the specified attribute in the specified start tag is included in the output.
                     TextExtractor setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                              Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     TextExtractor setExcludeNonHTMLElements(boolean excludeNonHTMLElements)
                              Sets whether the content of non-HTML elements is excluded from the output.
                     TextExtractor setIncludeAttributes(boolean includeAttributes)
                              Sets whether any attribute values are included in the output.
                     java.lang.String toString()
                              Returns the output as a string.
                     void writeTo(java.io.Writer writer)
                              Writes the output to the specified Writer.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    TextExtractor

                    public TextExtractor(Segment segment)
                    Constructs a new TextExtractor based on the specified Segment.

                    Parameters:
                    segment - the segment from which the text will be extracted.
                    See Also:
                    Segment.getTextExtractor()
                    Method Detail

                    writeTo

                    public void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Writes the output to the specified Writer.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    public void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Appends the output to the specified Appendable object.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    getEstimatedMaximumOutputLength

                    public long getEstimatedMaximumOutputLength()
                    Description copied from interface: CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Specified by:
                    getEstimatedMaximumOutputLength in interface CharStreamSource
                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    public java.lang.String toString()
                    Description copied from interface: CharStreamSource
                    Returns the output as a string.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the output as a string.

                    setConvertNonBreakingSpaces

                    public TextExtractor setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                    Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.

                    The default value is that of the static Config.ConvertNonBreakingSpaces property at the time the TextExtractor is instantiated.

                    Parameters:
                    convertNonBreakingSpaces - specifies whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    Returns:
                    this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getConvertNonBreakingSpaces()

                    getConvertNonBreakingSpaces

                    public boolean getConvertNonBreakingSpaces()
                    Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.

                    See the setConvertNonBreakingSpaces(boolean) method for a full description of this property.

                    Returns:
                    true if non-breaking space (&nbsp;) character entity references are converted to spaces, otherwise false.

                    setIncludeAttributes

                    public TextExtractor setIncludeAttributes(boolean includeAttributes)
                    Sets whether any attribute values are included in the output.

                    If the value of this property is true, then each attribute still has to match the conditions implemented in the includeAttribute(StartTag,Attribute) method in order for its value to be included in the output.

                    The default value is false.

                    Parameters:
                    includeAttributes - specifies whether any attribute values are included in the output.
                    Returns:
                    this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getIncludeAttributes()

                    getIncludeAttributes

                    public boolean getIncludeAttributes()
                    Indicates whether any attribute values are included in the output.

                    See the setIncludeAttributes(boolean) method for a full description of this property.

                    Returns:
                    true if any attribute values are included in the output, otherwise false.

                    includeAttribute

                    public boolean includeAttribute(StartTag startTag,
                                                    Attribute attribute)
                    Indicates whether the value of the specified attribute in the specified start tag is included in the output.

                    This method is ignored if the IncludeAttributes property is set to false, in which case no attribute values are included in the output.

                    If the IncludeAttributes property is set to true, every attribute of every start tag encountered in the segment is checked using this method to determine whether the value of the attribute should be included in the output.

                    The default implementation of this method returns true if the name of the specified attribute is one of title, alt, label, summary, content*, or href, but the method can be overridden in a subclass to perform a check of arbitrary complexity on each attribute.

                    * The value of a content attribute is only included if a name attribute is also present in the specified start tag, as the content attribute of a META tag only contains human readable text if the name attribute is used as opposed to an http-equiv attribute.

                    Example:
                    To include only the value of title and alt attributes:

                    final Set includeAttributeNames=new HashSet(Arrays.asList(new String[] {"title","alt"}));
                    TextExtractor textExtractor=new TextExtractor(segment) {
                        public boolean includeAttribute(StartTag startTag, Attribute attribute) {
                            return includeAttributeNames.contains(attribute.getKey());
                        }
                    };
                    textExtractor.setIncludeAttributes(true);
                    String extractedText=textExtractor.toString();

                    Parameters:
                    startTag - the start tag of the element to check for inclusion.
                    Returns:
                    if the text inside the Element of the specified start tag should be excluded from the output, otherwise false.

                    setExcludeNonHTMLElements

                    public TextExtractor setExcludeNonHTMLElements(boolean excludeNonHTMLElements)
                    Sets whether the content of non-HTML elements is excluded from the output.

                    The default value is false, meaning that content from all elements meeting the other criteria is included.

                    Parameters:
                    excludeNonHTMLElements - specifies whether content non-HTML elements is excluded from the output.
                    Returns:
                    this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getExcludeNonHTMLElements()

                    getExcludeNonHTMLElements

                    public boolean getExcludeNonHTMLElements()
                    Indicates whether the content of non-HTML elements is excluded from the output.

                    See the setExcludeNonHTMLElements(boolean) method for a full description of this property.

                    Returns:
                    true if the content of non-HTML elements is excluded from the output, otherwise false.

                    excludeElement

                    public boolean excludeElement(StartTag startTag)
                    Indicates whether the text inside the Element of the specified start tag should be excluded from the output.

                    During the text extraction process, every start tag encountered in the segment is checked using this method to determine whether the text inside its associated element should be excluded from the output.

                    The default implementation of this method is to always return false, so that every element is included, but the method can be overridden in a subclass to perform a check of arbitrary complexity on each start tag.

                    All elements nested inside an excluded element are also implicitly excluded, as are all SCRIPT and STYLE elements. Such elements are skipped over without calling this method, so there is no way to include them by overriding the method.

                    Example:
                    To extract the text from a segment, excluding any text inside elements with the attribute class="NotIndexed":

                    TextExtractor textExtractor=new TextExtractor(segment) {
                        public boolean excludeElement(StartTag startTag) {
                            return "NotIndexed".equalsIgnoreCase(startTag.getAttributeValue("class"));
                        }
                    };
                    String extractedText=textExtractor.toString();

                    Parameters:
                    startTag - the start tag of the element to check for inclusion.
                    Returns:
                    if the text inside the Element of the specified start tag should be excluded from the output, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Segment.html0000644000175000017500000036406411214132422025126 0ustar twernertwerner Segment (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Segment

                    java.lang.Object
                      extended by Segment
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>
                    Direct Known Subclasses:
                    Attribute, CharacterReference, Element, FormControl, net.htmlparser.jericho.nodoc.SequentialListSegment, Source, Tag

                    public class Segment
                    extends java.lang.Object
                    implements java.lang.Comparable<Segment>, java.lang.CharSequence

                    Represents a segment of a Source document.

                    Many of the tag search methods are defined in this class.

                    The span of a segment is defined by the combination of its begin and end character positions.


                    Constructor Summary
                    Segment(Source source, int begin, int end)
                              Constructs a new Segment within the specified source document with the specified begin and end character positions.
                     
                    Method Summary
                     char charAt(int index)
                              Returns the character at the specified index.
                     int compareTo(Segment segment)
                              Compares this Segment object to another object.
                     boolean encloses(int pos)
                              Indicates whether this segment encloses the specified character position in the source document.
                     boolean encloses(Segment segment)
                              Indicates whether this Segment encloses the specified Segment.
                     boolean equals(java.lang.Object object)
                              Compares the specified object with this Segment for equality.
                     java.util.List<CharacterReference> getAllCharacterReferences()
                              Returns a list of all CharacterReference objects that are enclosed by this segment.
                     java.util.List<Element> getAllElements()
                              Returns a list of all Element objects that are enclosed by this segment.
                     java.util.List<Element> getAllElements(StartTagType startTagType)
                              Returns a list of all Element objects with start tags of the specified type that are enclosed by this segment.
                     java.util.List<Element> getAllElements(java.lang.String name)
                              Returns a list of all Element objects with the specified name that are enclosed by this segment.
                     java.util.List<Element> getAllElements(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns a list of all Element objects with the specified attribute name and value pattern that are enclosed by this segment.
                     java.util.List<Element> getAllElements(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns a list of all Element objects with the specified attribute name/value pair that are enclosed by this segment.
                     java.util.List<Element> getAllElementsByClass(java.lang.String className)
                              Returns a list of all Element objects with the specified class that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTags()
                              Returns a list of all StartTag objects that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTags(StartTagType startTagType)
                              Returns a list of all StartTag objects of the specified type that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTags(java.lang.String name)
                              Returns a list of all normal StartTag objects with the specified name that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTags(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns a list of all StartTag objects with the specified attribute name and value pattern that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTags(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns a list of all StartTag objects with the specified attribute name/value pair that are enclosed by this segment.
                     java.util.List<StartTag> getAllStartTagsByClass(java.lang.String className)
                              Returns a list of all StartTag objects with the specified class that are enclosed by this segment.
                     java.util.List<Tag> getAllTags()
                              Returns a list of all Tag objects that are enclosed by this segment.
                     java.util.List<Tag> getAllTags(TagType tagType)
                              Returns a list of all Tag objects of the specified type that are enclosed by this segment.
                     int getBegin()
                              Returns the character position in the Source document at which this segment begins, inclusive.
                     java.util.List<Element> getChildElements()
                              Returns a list of the immediate children of this segment in the document element hierarchy.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     int getEnd()
                              Returns the character position in the Source document immediately after the end of this segment.
                     Element getFirstElement()
                              Returns the first Element enclosed by this segment.
                     Element getFirstElement(java.lang.String name)
                              Returns the first normal Element with the specified name enclosed by this segment.
                     Element getFirstElement(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the first Element with the specified attribute name and value pattern that is enclosed by this segment.
                     Element getFirstElement(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the first Element with the specified attribute name/value pair enclosed by this segment.
                     Element getFirstElementByClass(java.lang.String className)
                              Returns the first Element with the specified class that is enclosed by this segment.
                     StartTag getFirstStartTag()
                              Returns the first StartTag enclosed by this segment.
                     StartTag getFirstStartTag(StartTagType startTagType)
                              Returns the first StartTag of the specified type enclosed by this segment.
                     StartTag getFirstStartTag(java.lang.String name)
                              Returns the first normal StartTag enclosed by this segment.
                     StartTag getFirstStartTag(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the first StartTag with the specified attribute name and value pattern that is enclosed by this segment.
                     StartTag getFirstStartTag(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the first StartTag with the specified attribute name/value pair enclosed by this segment.
                     StartTag getFirstStartTagByClass(java.lang.String className)
                              Returns the first StartTag with the specified class that is enclosed by this segment.
                     java.util.List<FormControl> getFormControls()
                              Returns a list of the FormControl objects that are enclosed by this segment.
                     FormFields getFormFields()
                              Returns the FormFields object representing all form fields that are enclosed by this segment.
                     java.util.Iterator<Segment> getNodeIterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within this segment.
                     Renderer getRenderer()
                              Performs a simple rendering of the HTML markup in this segment into text.
                     Source getSource()
                              Returns the Source document containing this segment.
                     TextExtractor getTextExtractor()
                              Extracts the textual content from the HTML markup of this segment.
                     int hashCode()
                              Returns a hash code value for the segment.
                     void ignoreWhenParsing()
                              Causes the this segment to be ignored when parsing.
                     boolean isWhiteSpace()
                              Indicates whether this segment consists entirely of white space.
                    static boolean isWhiteSpace(char ch)
                              Indicates whether the specified character is white space.
                     int length()
                              Returns the length of the segment.
                     Attributes parseAttributes()
                              Parses any Attributes within this segment.
                     java.lang.CharSequence subSequence(int beginIndex, int endIndex)
                              Returns a new character sequence that is a subsequence of this sequence.
                     java.lang.String toString()
                              Returns the source text of this segment as a String.
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    Segment

                    public Segment(Source source,
                                   int begin,
                                   int end)
                    Constructs a new Segment within the specified source document with the specified begin and end character positions.

                    Parameters:
                    source - the Source document, must not be null.
                    begin - the character position in the source where this segment begins, inclusive.
                    end - the character position in the source where this segment ends, exclusive.
                    Method Detail

                    getSource

                    public final Source getSource()
                    Returns the Source document containing this segment.

                    If a StreamedSource is in use, this method throws an UnsupportedOperationException.

                    Returns:
                    the Source document containing this segment.

                    getBegin

                    public final int getBegin()
                    Returns the character position in the Source document at which this segment begins, inclusive.

                    Returns:
                    the character position in the Source document at which this segment begins, inclusive.

                    getEnd

                    public final int getEnd()
                    Returns the character position in the Source document immediately after the end of this segment.

                    The character at the position specified by this property is not included in the segment.

                    Returns:
                    the character position in the Source document immediately after the end of this segment.

                    equals

                    public final boolean equals(java.lang.Object object)
                    Compares the specified object with this Segment for equality.

                    Returns true if and only if the specified object is also a Segment, and both segments have the same Source, and the same begin and end positions.

                    Overrides:
                    equals in class java.lang.Object
                    Parameters:
                    object - the object to be compared for equality with this Segment.
                    Returns:
                    true if the specified object is equal to this Segment, otherwise false.

                    hashCode

                    public int hashCode()
                    Returns a hash code value for the segment.

                    The current implementation returns the sum of the begin and end positions, although this is not guaranteed in future versions.

                    Overrides:
                    hashCode in class java.lang.Object
                    Returns:
                    a hash code value for the segment.

                    length

                    public int length()
                    Returns the length of the segment. This is defined as the number of characters between the begin and end positions.

                    Specified by:
                    length in interface java.lang.CharSequence
                    Returns:
                    the length of the segment.

                    encloses

                    public final boolean encloses(Segment segment)
                    Indicates whether this Segment encloses the specified Segment.

                    This is the case if getBegin()<=segment.getBegin() && getEnd()>=segment.getEnd().

                    Note that a segment encloses itself.

                    Parameters:
                    segment - the segment to be tested for being enclosed by this segment.
                    Returns:
                    true if this Segment encloses the specified Segment, otherwise false.

                    encloses

                    public final boolean encloses(int pos)
                    Indicates whether this segment encloses the specified character position in the source document.

                    This is the case if getBegin() <= pos < getEnd().

                    Parameters:
                    pos - the position in the Source document.
                    Returns:
                    true if this segment encloses the specified character position in the source document, otherwise false.

                    toString

                    public java.lang.String toString()
                    Returns the source text of this segment as a String.

                    The returned String is newly created with every call to this method, unless this segment is itself an instance of Source.

                    Specified by:
                    toString in interface java.lang.CharSequence
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the source text of this segment as a String.

                    getRenderer

                    public Renderer getRenderer()
                    Performs a simple rendering of the HTML markup in this segment into text.

                    The output can be configured by setting any number of properties on the returned Renderer instance before obtaining its output.

                    Returns:
                    an instance of Renderer based on this segment.
                    See Also:
                    getTextExtractor()

                    getTextExtractor

                    public TextExtractor getTextExtractor()
                    Extracts the textual content from the HTML markup of this segment.

                    The output can be configured by setting properties on the returned TextExtractor instance before obtaining its output.

                    Returns:
                    an instance of TextExtractor based on this segment.
                    See Also:
                    getRenderer()

                    getNodeIterator

                    public java.util.Iterator<Segment> getNodeIterator()
                    Returns an iterator over every tag, character reference and plain text segment contained within this segment.

                    See the Source.iterator() method for a detailed description.

                    Example:

                    The following code demonstrates the typical usage of this method to make an exact copy of this segment to writer (assuming no server tags are present):

                     for (Iterator<Segment> nodeIterator=segment.getNoteIterator(); nodeIterator.hasNext();) {
                       Segment nodeSegment=nodeIterator.next();
                       if (nodeSegment instanceof Tag) {
                         Tag tag=(Tag)nodeSegment;
                         // HANDLE TAG
                         // Uncomment the following line to ensure each tag is valid XML:
                         // writer.write(tag.tidy()); continue;
                       } else if (nodeSegment instanceof CharacterReference) {
                         CharacterReference characterReference=(CharacterReference)nodeSegment;
                         // HANDLE CHARACTER REFERENCE
                         // Uncomment the following line to decode all character references instead of copying them verbatim:
                         // characterReference.appendCharTo(writer); continue;
                       } else {
                         // HANDLE PLAIN TEXT
                       }
                       // unless specific handling has prevented getting to here, simply output the segment as is:
                       writer.write(nodeSegment.toString());
                     }

                    Returns:
                    an iterator over every tag, character reference and plain text segment contained within this segment.

                    getAllTags

                    public java.util.List<Tag> getAllTags()
                    Returns a list of all Tag objects that are enclosed by this segment.

                    The Source.fullSequentialParse() method should be called after construction of the Source object if this method is to be used on a large proportion of the source. It is called automatically if this method is called on the Source object itself.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Returns:
                    a list of all Tag objects that are enclosed by this segment.

                    getAllTags

                    public java.util.List<Tag> getAllTags(TagType tagType)
                    Returns a list of all Tag objects of the specified type that are enclosed by this segment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying a null argument to the tagType parameter is equivalent to getAllTags().

                    Parameters:
                    tagType - the type of tags to get.
                    Returns:
                    a list of all Tag objects of the specified type that are enclosed by this segment.
                    See Also:
                    getAllStartTags(StartTagType)

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags()
                    Returns a list of all StartTag objects that are enclosed by this segment.

                    The Source.fullSequentialParse() method should be called after construction of the Source object if this method is to be used on a large proportion of the source. It is called automatically if this method is called on the Source object itself.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Returns:
                    a list of all StartTag objects that are enclosed by this segment.

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags(StartTagType startTagType)
                    Returns a list of all StartTag objects of the specified type that are enclosed by this segment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying a null argument to the startTagType parameter is equivalent to getAllStartTags().

                    Parameters:
                    startTagType - the type of tags to get.
                    Returns:
                    a list of all StartTag objects of the specified type that are enclosed by this segment.

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags(java.lang.String name)
                    Returns a list of all normal StartTag objects with the specified name that are enclosed by this segment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying a null argument to the name parameter is equivalent to getAllStartTags(), which may include non-normal start tags.

                    This method also returns unregistered tags if the specified name is not a valid XML tag name.

                    Parameters:
                    name - the name of the start tags to get.
                    Returns:
                    a list of all normal StartTag objects with the specified name that are enclosed by this segment.

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags(java.lang.String attributeName,
                                                                    java.lang.String value,
                                                                    boolean valueCaseSensitive)
                    Returns a list of all StartTag objects with the specified attribute name/value pair that are enclosed by this segment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    a list of all StartTag objects with the specified attribute name/value pair that are enclosed by this segment.
                    See Also:
                    getAllStartTags(String attributeName, Pattern valueRegexPattern)

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags(java.lang.String attributeName,
                                                                    java.util.regex.Pattern valueRegexPattern)
                    Returns a list of all StartTag objects with the specified attribute name and value pattern that are enclosed by this segment.

                    Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, without regard to the attribute value. This will also match an attribute that has no value at all.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    a list of all StartTag objects with the specified attribute name and value pattern that are enclosed by this segment.
                    See Also:
                    getAllStartTags(String attributeName, String value, boolean valueCaseSensitive)

                    getAllStartTagsByClass

                    public java.util.List<StartTag> getAllStartTagsByClass(java.lang.String className)
                    Returns a list of all StartTag objects with the specified class that are enclosed by this segment.

                    This matches start tags with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple class names separated by white space in the attribute value.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    a list of all StartTag objects with the specified class that are enclosed by this segment.

                    getChildElements

                    public java.util.List<Element> getChildElements()
                    Returns a list of the immediate children of this segment in the document element hierarchy.

                    The returned list may include an element that extends beyond the end of this segment, as long as it begins within this segment.

                    An element found at the start of this segment is included in the list. Note however that if this segment is an Element, the overriding Element.getChildElements() method is called instead, which only returns the children of the element.

                    Calling getChildElements() on an Element is much more efficient than calling it on a Segment.

                    The objects in the list are all of type Element.

                    The Source.fullSequentialParse() method should be called after construction of the Source object if this method is to be used on a large proportion of the source. It is called automatically if this method is called on the Source object itself.

                    See the Source.getChildElements() method for more details.

                    Returns:
                    the a list of the immediate children of this segment in the document element hierarchy, guaranteed not null.
                    See Also:
                    Element.getParentElement()

                    getAllElements

                    public java.util.List<Element> getAllElements()
                    Returns a list of all Element objects that are enclosed by this segment.

                    The Source.fullSequentialParse() method should be called after construction of the Source object if this method is to be used on a large proportion of the source. It is called automatically if this method is called on the Source object itself.

                    The elements returned correspond exactly with the start tags returned in the getAllStartTags() method.

                    If this segment is itself an Element, the result includes this element in the list.

                    Returns:
                    a list of all Element objects that are enclosed by this segment.

                    getAllElements

                    public java.util.List<Element> getAllElements(java.lang.String name)
                    Returns a list of all Element objects with the specified name that are enclosed by this segment.

                    The elements returned correspond with the start tags returned in the getAllStartTags(String name) method, except that elements which are not entirely enclosed by this segment are excluded.

                    Specifying a null argument to the name parameter is equivalent to getAllElements(), which may include elements of non-normal tags.

                    This method also returns elements consisting of unregistered tags if the specified name is not a valid XML tag name.

                    If this segment is itself an Element with the specified name, the result includes this element in the list.

                    Parameters:
                    name - the name of the elements to get.
                    Returns:
                    a list of all Element objects with the specified name that are enclosed by this segment.

                    getAllElements

                    public java.util.List<Element> getAllElements(StartTagType startTagType)
                    Returns a list of all Element objects with start tags of the specified type that are enclosed by this segment.

                    The elements returned correspond with the start tags returned in the getAllTags(TagType) method, except that elements which are not entirely enclosed by this segment are excluded.

                    If this segment is itself an Element with the specified type, the result includes this element in the list.

                    Parameters:
                    startTagType - the type of start tags to get, must not be null.
                    Returns:
                    a list of all Element objects with start tags of the specified type that are enclosed by this segment.

                    getAllElements

                    public java.util.List<Element> getAllElements(java.lang.String attributeName,
                                                                  java.lang.String value,
                                                                  boolean valueCaseSensitive)
                    Returns a list of all Element objects with the specified attribute name/value pair that are enclosed by this segment.

                    The elements returned correspond with the start tags returned in the getAllStartTags(String attributeName, String value, boolean valueCaseSensitive) method, except that elements which are not entirely enclosed by this segment are excluded.

                    If this segment is itself an Element with the specified name/value pair, the result includes this element in the list.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    a list of all Element objects with the specified attribute name/value pair that are enclosed by this segment.
                    See Also:
                    getAllElements(String attributeName, Pattern valueRegexPattern)

                    getAllElements

                    public java.util.List<Element> getAllElements(java.lang.String attributeName,
                                                                  java.util.regex.Pattern valueRegexPattern)
                    Returns a list of all Element objects with the specified attribute name and value pattern that are enclosed by this segment.

                    The elements returned correspond with the start tags returned in the getAllStartTags(String attributeName, Pattern valueRegexPattern) method, except that elements which are not entirely enclosed by this segment are excluded.

                    Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, without regard to the attribute value. This will also match an attribute that has no value at all.

                    If this segment is itself an Element with the specified attribute name and value pattern, the result includes this element in the list.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    a list of all Element objects with the specified attribute name and value pattern that are enclosed by this segment.
                    See Also:
                    getAllElements(String attributeName, String value, boolean valueCaseSensitive)

                    getAllElementsByClass

                    public java.util.List<Element> getAllElementsByClass(java.lang.String className)
                    Returns a list of all Element objects with the specified class that are enclosed by this segment.

                    This matches elements with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple class names separated by white space in the attribute value.

                    The elements returned correspond with the start tags returned in the getAllStartTagsByClass(String className) method, except that elements which are not entirely enclosed by this segment are excluded.

                    If this segment is itself an Element with the specified class, the result includes this element in the list.

                    Parameters:
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    a list of all Element objects with the specified class that are enclosed by this segment.

                    getAllCharacterReferences

                    public java.util.List<CharacterReference> getAllCharacterReferences()
                    Returns a list of all CharacterReference objects that are enclosed by this segment.

                    Returns:
                    a list of all CharacterReference objects that are enclosed by this segment.

                    getFirstStartTag

                    public final StartTag getFirstStartTag()
                    Returns the first StartTag enclosed by this segment.

                    This is functionally equivalent to getAllStartTags().iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Returns:
                    the first StartTag enclosed by this segment, or null if none exists.

                    getFirstStartTag

                    public final StartTag getFirstStartTag(StartTagType startTagType)
                    Returns the first StartTag of the specified type enclosed by this segment.

                    This is functionally equivalent to getAllStartTags(startTagType).iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Parameters:
                    startTagType - the StartTagType to search for.
                    Returns:
                    the first StartTag of the specified type enclosed by this segment, or null if none exists.

                    getFirstStartTag

                    public final StartTag getFirstStartTag(java.lang.String name)
                    Returns the first normal StartTag enclosed by this segment.

                    This is functionally equivalent to getAllStartTags(name).iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Specifying a null argument to the name parameter is equivalent to getFirstStartTag().

                    Parameters:
                    name - the name of the start tag to search for, may be null.
                    Returns:
                    the first normal StartTag enclosed by this segment, or null if none exists.

                    getFirstStartTag

                    public final StartTag getFirstStartTag(java.lang.String attributeName,
                                                           java.lang.String value,
                                                           boolean valueCaseSensitive)
                    Returns the first StartTag with the specified attribute name/value pair enclosed by this segment.

                    This is functionally equivalent to getAllStartTags(attributeName,value,valueCaseSensitive).iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    the first StartTag with the specified attribute name/value pair enclosed by this segment, or null if none exists.
                    See Also:
                    getFirstStartTag(String attributeName, Pattern valueRegexPattern)

                    getFirstStartTag

                    public final StartTag getFirstStartTag(java.lang.String attributeName,
                                                           java.util.regex.Pattern valueRegexPattern)
                    Returns the first StartTag with the specified attribute name and value pattern that is enclosed by this segment.

                    This is functionally equivalent to getAllStartTags(attributeName,valueRegexPattern).iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    the first StartTag with the specified attribute name and value pattern that is enclosed by this segment, or null if none exists.
                    See Also:
                    getFirstStartTag(String attributeName, String value, boolean valueCaseSensitive)

                    getFirstStartTagByClass

                    public final StartTag getFirstStartTagByClass(java.lang.String className)
                    Returns the first StartTag with the specified class that is enclosed by this segment.

                    This is functionally equivalent to getAllStartTagsByClass(className).iterator().next(), but does not search beyond the first start tag and returns null if no such start tag exists.

                    Parameters:
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    the first StartTag with the specified class that is enclosed by this segment, or null if none exists.

                    getFirstElement

                    public final Element getFirstElement()
                    Returns the first Element enclosed by this segment.

                    This is functionally equivalent to getAllElements().iterator().next(), but does not search beyond the first enclosed element and returns null if no such element exists.

                    If this segment is itself an Element, this element is returned, not the first child element.

                    Returns:
                    the first Element enclosed by this segment, or null if none exists.

                    getFirstElement

                    public final Element getFirstElement(java.lang.String name)
                    Returns the first normal Element with the specified name enclosed by this segment.

                    This is functionally equivalent to getAllElements(name).iterator().next(), but does not search beyond the first enclosed element and returns null if no such element exists.

                    Specifying a null argument to the name parameter is equivalent to getFirstElement().

                    If this segment is itself an Element with the specified name, this element is returned.

                    Parameters:
                    name - the name of the element to search for.
                    Returns:
                    the first normal Element with the specified name enclosed by this segment, or null if none exists.

                    getFirstElement

                    public final Element getFirstElement(java.lang.String attributeName,
                                                         java.lang.String value,
                                                         boolean valueCaseSensitive)
                    Returns the first Element with the specified attribute name/value pair enclosed by this segment.

                    This is functionally equivalent to getAllElements(attributeName,value,valueCaseSensitive).iterator().next(), but does not search beyond the first enclosed element and returns null if no such element exists.

                    If this segment is itself an Element with the specified attribute name/value pair, this element is returned.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    the first Element with the specified attribute name/value pair enclosed by this segment, or null if none exists.
                    See Also:
                    getFirstElement(String attributeName, Pattern valueRegexPattern)

                    getFirstElement

                    public final Element getFirstElement(java.lang.String attributeName,
                                                         java.util.regex.Pattern valueRegexPattern)
                    Returns the first Element with the specified attribute name and value pattern that is enclosed by this segment.

                    This is functionally equivalent to getAllElements(attributeName,valueRegexPattern).iterator().next(), but does not search beyond the first enclosed element and returns null if no such element exists.

                    If this segment is itself an Element with the specified attribute name and value pattern, this element is returned.

                    Parameters:
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    the first Element with the specified attribute name and value pattern that is enclosed by this segment, or null if none exists.
                    See Also:
                    getFirstElement(String attributeName, String value, boolean valueCaseSensitive)

                    getFirstElementByClass

                    public final Element getFirstElementByClass(java.lang.String className)
                    Returns the first Element with the specified class that is enclosed by this segment.

                    This is functionally equivalent to getAllElementsByClass(className).iterator().next(), but does not search beyond the first enclosed element and returns null if no such element exists.

                    If this segment is itself an Element with the specified class, this element is returned.

                    Parameters:
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    the first Element with the specified class that is enclosed by this segment, or null if none exists.

                    getFormControls

                    public java.util.List<FormControl> getFormControls()
                    Returns a list of the FormControl objects that are enclosed by this segment.

                    Returns:
                    a list of the FormControl objects that are enclosed by this segment.

                    getFormFields

                    public FormFields getFormFields()
                    Returns the FormFields object representing all form fields that are enclosed by this segment.

                    This is equivalent to new FormFields(getFormControls()).

                    Returns:
                    the FormFields object representing all form fields that are enclosed by this segment.
                    See Also:
                    getFormControls()

                    parseAttributes

                    public Attributes parseAttributes()
                    Parses any Attributes within this segment. This method is only used in the unusual situation where attributes exist outside of a start tag. The StartTag.getAttributes() method should be used in normal situations.

                    This is equivalent to source.parseAttributes(getBegin(),getEnd()).

                    Returns:
                    the Attributes within this segment, or null if too many errors occur while parsing.

                    ignoreWhenParsing

                    public void ignoreWhenParsing()
                    Causes the this segment to be ignored when parsing.

                    Ignored segments are treated as blank spaces by the parsing mechanism, but are included as normal text in all other functions.

                    This method was originally the only means of preventing server tags located inside normal tags from interfering with the parsing of the tags (such as where an attribute of a normal tag uses a server tag to dynamically set its value), as well as preventing non-server tags from being recognised inside server tags.

                    It is not necessary to use this method to ignore server tags located inside normal tags, as the attributes parser automatically ignores any server tags.

                    It is not necessary to use this method to ignore non-server tags inside server tags, or the contents of SCRIPT elements, as the parser does this automatically when performing a full sequential parse.

                    This leaves only very few scenarios where calling this method still provides a significant benefit.

                    One such case is where XML-style server tags are used inside normal tags. Here is an example using an XML-style JSP tag:

                    <a href="<i18n:resource path="/Portal"/>?BACK=TRUE">back</a>
                    The first double-quote of "/Portal" will be interpreted as the end quote for the href attribute, as there is no way for the parser to recognise the il8n:resource element as a server tag. Such use of XML-style server tags inside normal tags is generally seen as bad practice, but it is nevertheless valid JSP. The only way to ensure that this library is able to parse the normal tag surrounding it is to find these server tags first and call the ignoreWhenParsing method to ignore them before parsing the rest of the document.

                    It is important to understand the difference between ignoring the segment when parsing and removing the segment completely. Any text inside a segment that is ignored when parsing is treated by most functions as content, and as such is included in the output of tools such as TextExtractor and Renderer.

                    To remove segments completely, create an OutputDocument and call its remove(Segment) or replaceWithSpaces(int begin, int end) method for each segment. Then create a new source document using new Source(outputDocument.toString()) and perform the desired operations on this new source object.

                    Calling this method after the Source.fullSequentialParse() method has been called is not permitted and throws an IllegalStateException.

                    Any tags appearing in this segment that are found before this method is called will remain in the tag cache, and so will continue to be found by the tag search methods. If this is undesirable, the Source.clearCache() method can be called to remove them from the cache. Calling the Source.fullSequentialParse() method after this method clears the cache automatically.

                    For best performance, this method should be called on all segments that need to be ignored without calling any of the tag search methods in between.

                    See Also:
                    Source.ignoreWhenParsing(Collection segments)

                    compareTo

                    public int compareTo(Segment segment)
                    Compares this Segment object to another object.

                    If the argument is not a Segment, a ClassCastException is thrown.

                    A segment is considered to be before another segment if its begin position is earlier, or in the case that both segments begin at the same position, its end position is earlier.

                    Segments that begin and end at the same position are considered equal for the purposes of this comparison, even if they relate to different source documents.

                    Note: this class has a natural ordering that is inconsistent with equals. This means that this method may return zero in some cases where calling the equals(Object) method with the same argument returns false.

                    Specified by:
                    compareTo in interface java.lang.Comparable<Segment>
                    Parameters:
                    segment - the segment to be compared
                    Returns:
                    a negative integer, zero, or a positive integer as this segment is before, equal to, or after the specified segment.
                    Throws:
                    java.lang.ClassCastException - if the argument is not a Segment

                    isWhiteSpace

                    public final boolean isWhiteSpace()
                    Indicates whether this segment consists entirely of white space.

                    Returns:
                    true if this segment consists entirely of white space, otherwise false.

                    isWhiteSpace

                    public static final boolean isWhiteSpace(char ch)
                    Indicates whether the specified character is white space.

                    The HTML 4.01 specification section 9.1 specifies the following white space characters:

                    • space (U+0020)
                    • tab (U+0009)
                    • form feed (U+000C)
                    • line feed (U+000A)
                    • carriage return (U+000D)
                    • zero-width space (U+200B)

                    Despite the explicit inclusion of the zero-width space in the HTML specification, Microsoft IE6 does not recognise them as white space and renders them as an unprintable character (empty square). Even zero-width spaces included using the numeric character reference &#x200B; are rendered this way.

                    Parameters:
                    ch - the character to test.
                    Returns:
                    true if the specified character is white space, otherwise false.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Returns:
                    a string representation of this object useful for debugging purposes.

                    charAt

                    public char charAt(int index)
                    Returns the character at the specified index.

                    This is logically equivalent to toString().charAt(index) for valid argument values 0 <= index < length().

                    However because this implementation works directly on the underlying document source string, it should not be assumed that an IndexOutOfBoundsException is thrown for an invalid argument value.

                    Specified by:
                    charAt in interface java.lang.CharSequence
                    Parameters:
                    index - the index of the character.
                    Returns:
                    the character at the specified index.

                    subSequence

                    public java.lang.CharSequence subSequence(int beginIndex,
                                                              int endIndex)
                    Returns a new character sequence that is a subsequence of this sequence.

                    This is logically equivalent to toString().subSequence(beginIndex,endIndex) for valid values of beginIndex and endIndex.

                    However because this implementation works directly on the underlying document source text, it should not be assumed that an IndexOutOfBoundsException is thrown for invalid argument values as described in the String.subSequence(int,int) method.

                    Specified by:
                    subSequence in interface java.lang.CharSequence
                    Parameters:
                    beginIndex - the begin index, inclusive.
                    endIndex - the end index, exclusive.
                    Returns:
                    a new character sequence that is a subsequence of this sequence.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Source.html0000644000175000017500000060562411214132422024764 0ustar twernertwerner Source (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Source

                    java.lang.Object
                      extended by Segment
                          extended by Source
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>, java.lang.Iterable<Segment>

                    public final class Source
                    extends Segment
                    implements java.lang.Iterable<Segment>

                    Represents a source HTML document.

                    The first step in parsing an HTML document is always to construct a Source object from the source data, which can be a String, Reader, InputStream, URLConnection or URL. Each constructor uses all the evidence available to determine the original character encoding of the data.

                    Once the Source object has been created, you can immediately start searching for tags or elements within the document using the tag search methods.

                    In certain circumstances you may be able to improve performance by calling the fullSequentialParse() method before calling any tag search methods. See the documentation of the fullSequentialParse() method for details.

                    Any issues encountered while parsing are logged to a Logger object. The setLogger(Logger) method can be used to explicitly set a Logger implementation for a particular Source instance, otherwise the static Config.LoggerProvider property determines how the logger is set by default for all Source instances. See the documentation of the Config.LoggerProvider property for information about how the default logging provider is determined.

                    Note that many of the useful functions which can be performed on the source document are defined in its superclass, Segment. The source object is itself a segment which spans the entire document.

                    Most of the methods defined in this class are useful for determining the elements and tags surrounding or neighbouring a particular character position in the document.

                    For information on how to create a modified version of this source document, see the OutputDocument class.

                    Source objects are not thread safe, and should therefore not be shared between multiple threads unless all access is synchronized using some mechanism external to the library.

                    If memory usage is a major concern, consider using the StreamedSource class instead of the Source class.

                    See Also:
                    Segment, StreamedSource

                    Field Summary
                    static boolean LegacyIteratorCompatabilityMode
                              Deprecated. Modify existing code to explicitly handle CharacterReference segments.
                     
                    Constructor Summary
                    Source(java.lang.CharSequence text)
                              Constructs a new Source object from the specified text.
                    Source(java.io.InputStream inputStream)
                              Constructs a new Source object by loading the content from the specified InputStream.
                    Source(java.io.Reader reader)
                              Constructs a new Source object by loading the content from the specified Reader.
                    Source(java.net.URL url)
                              Constructs a new Source object by loading the content from the specified URL.
                    Source(java.net.URLConnection urlConnection)
                              Constructs a new Source object by loading the content from the specified URLConnection.
                     
                    Method Summary
                     char charAt(int index)
                              Returns the character at the specified index.
                     void clearCache()
                              Clears the tag cache of all tags.
                     Tag[] fullSequentialParse()
                              Parses all of the tags in this source document sequentially from beginning to end.
                     java.util.List<Element> getAllElements()
                              Returns a list of all elements in this source document.
                     java.util.List<StartTag> getAllStartTags()
                              Returns a list of all start tags in this source document.
                     java.util.List<Tag> getAllTags()
                              Returns a list of all tags in this source document.
                     java.lang.String getCacheDebugInfo()
                              Returns a string representation of the tag cache, useful for debugging purposes.
                     java.util.List<Element> getChildElements()
                              Returns a list of the top-level elements in the document element hierarchy.
                     int getColumn(int pos)
                              Returns the column number of the specified character position in the source document.
                     java.lang.String getDocumentSpecifiedEncoding()
                              Returns the document encoding specified within the text of the document.
                     Element getElementById(java.lang.String id)
                              Returns the Element with the specified id attribute value.
                     Element getEnclosingElement(int pos)
                              Returns the most nested normal Element that encloses the specified position in the source document.
                     Element getEnclosingElement(int pos, java.lang.String name)
                              Returns the most nested normal Element with the specified name that encloses the specified position in the source document.
                     Tag getEnclosingTag(int pos)
                              Returns the Tag that encloses the specified position in the source document.
                     Tag getEnclosingTag(int pos, TagType tagType)
                              Returns the Tag of the specified type that encloses the specified position in the source document.
                     java.lang.String getEncoding()
                              Returns the character encoding scheme of the source byte stream used to create this object.
                     java.lang.String getEncodingSpecificationInfo()
                              Returns a concise description of how the encoding of the source document was determined.
                     Logger getLogger()
                              Returns the Logger that handles log messages.
                     int getNameEnd(int pos)
                              Returns the end position of the XML Name that starts at the specified position.
                     java.lang.String getNewLine()
                              Returns the newline character sequence used in the source document.
                     CharacterReference getNextCharacterReference(int pos)
                              Returns the CharacterReference beginning at or immediately following the specified position in the source document.
                     Element getNextElement(int pos)
                              Returns the Element beginning at or immediately following the specified position in the source document.
                     Element getNextElement(int pos, java.lang.String name)
                              Returns the normal Element with the specified name beginning at or immediately following the specified position in the source document.
                     Element getNextElement(int pos, java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the Element with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                     Element getNextElement(int pos, java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the Element with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                     Element getNextElementByClass(int pos, java.lang.String className)
                              Returns the Element with the specified class beginning at or immediately following the specified position in the source document.
                     EndTag getNextEndTag(int pos)
                              Returns the EndTag beginning at or immediately following the specified position in the source document.
                     EndTag getNextEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type beginning at or immediately following the specified position in the source document.
                     EndTag getNextEndTag(int pos, java.lang.String name)
                              Returns the normal EndTag with the specified name beginning at or immediately following the specified position in the source document.
                     EndTag getNextEndTag(int pos, java.lang.String name, EndTagType endTagType)
                              Returns the EndTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos)
                              Returns the StartTag beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos, java.lang.String name)
                              Returns the normal StartTag with the specified name beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos, java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the StartTag with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTag(int pos, java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the StartTag with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                     StartTag getNextStartTagByClass(int pos, java.lang.String className)
                              Returns the StartTag with the specified class beginning at or immediately following the specified position in the source document.
                     Tag getNextTag(int pos)
                              Returns the Tag beginning at or immediately following the specified position in the source document.
                     Tag getNextTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately following the specified position in the source document.
                     ParseText getParseText()
                              Returns the parse text of this source document.
                     java.lang.String getPreliminaryEncodingInfo()
                              Returns the preliminary encoding of the source document together with a concise description of how it was determined.
                     CharacterReference getPreviousCharacterReference(int pos)
                              Returns the CharacterReference at or immediately preceding (or enclosing) the specified position in the source document.
                     EndTag getPreviousEndTag(int pos)
                              Returns the EndTag at or immediately preceding (or enclosing) the specified position in the source document.
                     EndTag getPreviousEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     EndTag getPreviousEndTag(int pos, java.lang.String name)
                              Returns the normal EndTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag getPreviousStartTag(int pos)
                              Returns the StartTag at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag getPreviousStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag getPreviousStartTag(int pos, java.lang.String name)
                              Returns the normal StartTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag getPreviousStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document.
                     Tag getPreviousTag(int pos)
                              Returns the Tag beginning at or immediately preceding (or enclosing) the specified position in the source document.
                     Tag getPreviousTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately preceding (or enclosing) the specified position in the source document.
                     int getRow(int pos)
                              Returns the row number of the specified character position in the source document.
                     RowColumnVector getRowColumnVector(int pos)
                              Returns a RowColumnVector object representing the row and column number of the specified character position in the source document.
                     SourceFormatter getSourceFormatter()
                              Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                     Tag getTagAt(int pos)
                              Returns the Tag at the specified position in the source document.
                     void ignoreWhenParsing(java.util.Collection<? extends Segment> segments)
                              Causes all of the segments in the specified collection to be ignored when parsing.
                     void ignoreWhenParsing(int begin, int end)
                              Causes the specified range of the source text to be ignored when parsing.
                     boolean isXML()
                              Indicates whether the source document is likely to be XML.
                     java.util.Iterator<Segment> iterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within the source document.
                     int length()
                              Returns the length of the source document.
                     Attributes parseAttributes(int pos, int maxEnd)
                              Parses any Attributes starting at the specified position.
                     Attributes parseAttributes(int pos, int maxEnd, int maxErrorCount)
                              Parses any Attributes starting at the specified position.
                     void setLogger(Logger logger)
                              Sets the Logger that handles log messages.
                     java.lang.CharSequence subSequence(int begin, int end)
                              Returns a new character sequence that is a subsequence of this source document.
                     java.lang.String toString()
                              Returns the source text as a String.
                     
                    Methods inherited from class Segment
                    compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getBegin, getDebugInfo, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, parseAttributes
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    LegacyIteratorCompatabilityMode

                    @Deprecated
                    public static boolean LegacyIteratorCompatabilityMode
                    Deprecated. Modify existing code to explicitly handle CharacterReference segments.
                    Specifies whether to enable the legacy Segment.getNodeIterator() compatability mode.

                    Prior to version 3.1, Segment.getNodeIterator() and Source.iterator() did not handle character references as separate segments, and they were instead included unparsed in the plain text segments. This required the use of the CharacterReference.decode(CharSequence) method to retrieve the actual text from each plain text segment.

                    Although it is likely that existing programs based on the previous functionality should still work without modification, this static configuration property is provided on a temporary basis to revert back to the behaviour of previous versions, ensuring that existing programs function as intended without major modification.

                    Setting this configuration property to true restores compatability with previous versions.

                    This property and compatability mode will be removed in a future release.

                    Constructor Detail

                    Source

                    public Source(java.lang.CharSequence text)
                    Constructs a new Source object from the specified text.

                    Parameters:
                    text - the source text.

                    Source

                    public Source(java.io.Reader reader)
                           throws java.io.IOException
                    Constructs a new Source object by loading the content from the specified Reader.

                    If the specified reader is an instance of InputStreamReader, the getEncoding() method of the created Source object returns the encoding from InputStreamReader.getEncoding().

                    Parameters:
                    reader - the java.io.Reader from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.

                    Source

                    public Source(java.io.InputStream inputStream)
                           throws java.io.IOException
                    Constructs a new Source object by loading the content from the specified InputStream.

                    The algorithm for detecting the character encoding of the source document from the raw bytes of the specified input stream is the same as that for the Source(URLConnection) constructor, except that the first step is not possible as there is no Content-Type header to check.

                    Parameters:
                    inputStream - the java.io.InputStream from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()

                    Source

                    public Source(java.net.URL url)
                           throws java.io.IOException
                    Constructs a new Source object by loading the content from the specified URL.

                    This is equivalent to Source(url.openConnection()).

                    Parameters:
                    url - the URL from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()

                    Source

                    public Source(java.net.URLConnection urlConnection)
                           throws java.io.IOException
                    Constructs a new Source object by loading the content from the specified URLConnection.

                    The algorithm for detecting the character encoding of the source document is as follows:
                    (process termination is marked by ♦)

                    1. If the HTTP headers received from the URL connection include a Content-Type header specifying a charset parameter, then use the encoding specified in the value of the charset parameter. ♦
                    2. Read the first four bytes of the input stream.
                    3. If the input stream is empty, the created source document has zero length and its getEncoding() method returns null. ♦
                    4. If the input stream starts with a unicode Byte Order Mark (BOM), then use the encoding signified by the BOM. ♦
                      BOM BytesEncoding
                      EF BB FFUTF-8
                      FF FE 00 00UTF-32 (little-endian)
                      00 00 FE FFUTF-32 (big-endian)
                      FF FEUTF-16 (little-endian)
                      FE FFUTF-16 (big-endian)
                      0E FE FFSCSU
                      2B 2F 76UTF-7
                      DD 73 66 73UTF-EBCDIC
                      FB EE 28BOCU-1
                    5. If the stream contains less than four bytes, then:
                      1. If the stream contains either one or three bytes, then use the encoding ISO-8859-1. ♦
                      2. If the stream starts with a zero byte, then use the encoding UTF-16BE. ♦
                      3. If the second byte of the stream is zero, then use the encoding UTF-16LE. ♦
                      4. Otherwise use the encoding ISO-8859-1. ♦
                    6. Determine a preliminary encoding by examining the first four bytes of the input stream. See the getPreliminaryEncodingInfo() method for details.
                    7. Read the first 2048 bytes of the input stream and decode it using the preliminary encoding to create a "preview segment". If the detected preliminary encoding is not supported on this platform, create the preview segment using ISO-8859-1 instead (this incident is logged at warn level).
                    8. Search the preview segment for an encoding specification, which should always appear at or near the top of the document.
                    9. If an encoding specification is found:
                      1. If the specified encoding is supported on this platform, use it. ♦
                      2. If the specified encoding is not supported on this platform, use the encoding that was used to create the preview segment, which is normally the detected preliminary encoding. ♦
                    10. If the document looks like XML, then use UTF-8. ♦
                      Section 4.3.3 of the XML 1.0 specification states that an XML file that is not encoded in UTF-8 must contain either a UTF-16 BOM or an encoding declaration in its XML declaration. Since neither of these was detected, we can assume the encoding is UTF-8.
                    11. Use the encoding that was used to create the preview segment, which is normally the detected preliminary encoding. ♦
                      This is the best guess, in the absence of any explicit information about the encoding, based on the first four bytes of the stream. The HTTP protocol section 3.7.1 states that an encoding of ISO-8859-1 can be assumed if no charset parameter was included in the HTTP Content-Type header. This is consistent with the preliminary encoding detected in this scenario.

                    Parameters:
                    urlConnection - the URL connection from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()
                    Method Detail

                    getDocumentSpecifiedEncoding

                    public java.lang.String getDocumentSpecifiedEncoding()
                    Returns the document encoding specified within the text of the document.

                    The document encoding can be specified within the document text in two ways. They are referred to generically in this library as an encoding specification, and are listed below in order of precedence:

                    1. An encoding declaration within the XML declaration of an XML document, which must be present if it has an encoding other than UTF-8 or UTF-16.
                      <?xml version="1.0" encoding="ISO-8859-1" ?>
                    2. A META declaration, which is in the form of a META tag with attribute http-equiv="Content-Type". The encoding is specified in the charset parameter of a Content-Type HTTP header value, which is placed in the value of the meta tag's content attribute. This META declaration should appear as early as possible in the HEAD element.
                      <META http-equiv=Content-Type content="text/html; charset=iso-8859-1">

                    Both of these tags must only use characters in the range U+0000 to U+007F, and in the case of the META declaration must use ASCII encoding. This, along with the fact that they must occur at or near the beginning of the document, assists in their detection and decoding without the need to know the exact encoding of the full text.

                    Returns:
                    the document encoding specified within the text of the document, or null if no encoding is specified.
                    See Also:
                    getEncoding()

                    getEncoding

                    public java.lang.String getEncoding()
                    Returns the character encoding scheme of the source byte stream used to create this object.

                    The encoding of a document defines how the original byte stream was encoded into characters. The HTTP specification section 3.4 uses the term "character set" to refer to the encoding, and the term "charset" is similarly used in Java (see the class java.nio.charset.Charset). This often causes confusion, as a modern "coded character set" such as Unicode can have several encodings, such as UTF-8, UTF-16, and UTF-32. See the Wikipedia character encoding article for an explanation of the terminology.

                    This method makes the best possible effort to return the name of the encoding used to decode the original source byte stream into character data. This decoding takes place in the constructor when a parameter based on a byte stream such as an InputStream or URLConnection is used to specify the source text. The documentation of the Source(InputStream) and Source(URLConnection) constructors describe how the return value of this method is determined in these cases. It is also possible in some circumstances for the encoding to be determined in the Source(Reader) constructor.

                    If a constructor was used that specifies the source text directly in character form (not requiring the decoding of a byte sequence) then the document itself is searched for an encoding specification. In this case, this method returns the same value as the getDocumentSpecifiedEncoding() method.

                    The getEncodingSpecificationInfo() method returns a simple description of how the value of this method was determined.

                    Returns:
                    the character encoding scheme of the source byte stream used to create this object, or null if the encoding is not known.
                    See Also:
                    getEncodingSpecificationInfo()

                    getEncodingSpecificationInfo

                    public java.lang.String getEncodingSpecificationInfo()
                    Returns a concise description of how the encoding of the source document was determined.

                    The description is intended for informational purposes only. It is not guaranteed to have any particular format and can not be reliably parsed.

                    Returns:
                    a concise description of how the encoding of the source document was determined.
                    See Also:
                    getEncoding()

                    getPreliminaryEncodingInfo

                    public java.lang.String getPreliminaryEncodingInfo()
                    Returns the preliminary encoding of the source document together with a concise description of how it was determined.

                    It is sometimes necessary for the Source(InputStream) and Source(URLConnection) constructors to search the document for an encoding specification in order to determine the exact encoding of the source byte stream.

                    In order to search for the document specified encoding before the exact encoding is known, a preliminary encoding is determined using the first four bytes of the input stream.

                    Because the encoding specification must only use characters in the range U+0000 to U+007F, the preliminary encoding need only have the following basic properties determined:

                    • Code unit size (8-bit, 16-bit or 32-bit)
                    • Byte order (big-endian or little-endian) if the code unit size is 16-bit or 32-bit
                    • Basic encoding of characters in the range U+0000 to U+007F (the current implementation only distinguishes between ASCII and EBCDIC)

                    The encodings used to represent the most commonly encountered combinations of these basic properties are:

                    • ISO-8859-1: 8-bit ASCII-compatible encoding
                    • Cp037: 8-bit EBCDIC-compatible encoding
                    • UTF-16BE: 16-bit big-endian encoding
                    • UTF-16LE: 16-bit little-endian encoding
                    • UTF-32BE: 32-bit big-endian encoding (not supported on most java platforms)
                    • UTF-32LE: 32-bit little-endian encoding (not supported on most java platforms)
                    Note: all encodings with a code unit size greater than 8 bits are assumed to use an ASCII-compatible low-order byte.

                    In some descriptions returned by this method, and the documentation below, a pattern is used to help demonstrate the contents of the first four bytes of the stream. The patterns use the characters "00" to signify a zero byte, "XX" to signify a non-zero byte, and "??" to signify a byte than can be either zero or non-zero.

                    The algorithm for determining the preliminary encoding is as follows:

                    1. Byte pattern "00 00..." : If the stream starts with two zero bytes, the default 32-bit big-endian encoding UTF-32BE is used.
                    2. Byte pattern "00 XX..." : If the stream starts with a single zero byte, the default 16-bit big-endian encoding UTF-16BE is used.
                    3. Byte pattern "XX ?? 00 00..." : If the third and fourth bytes of the stream are zero, the default 32-bit little-endian encoding UTF-32LE is used.
                    4. Byte pattern "XX 00..." or "XX ?? XX 00..." : If the second or fourth byte of the stream is zero, the default 16-bit little-endian encoding UTF-16LE is used.
                    5. Byte pattern "XX XX 00 XX..." : If the third byte of the stream is zero, the default 16-bit big-endian encoding UTF-16BE is used (assumes the first character is > U+00FF).
                    6. Byte pattern "4C XX XX XX..." : If the first four bytes are consistent with the EBCDIC encoding of an XML declaration ("<?xm") or a document type declaration ("<!DO"), or any other string starting with the EBCDIC character '<' followed by three non-ASCII characters (8th bit set), which is consistent with EBCDIC alphanumeric characters, the default EBCDIC-compatible encoding Cp037 is used.
                    7. Byte pattern "XX XX XX XX..." : Otherwise, if all of the first four bytes of the stream are non-zero, the default 8-bit ASCII-compatible encoding ISO-8859-1 is used.

                    If it was not necessary to search for a document specified encoding when determining the encoding of this source document from a byte stream, this method returns null.

                    See the documentation of the Source(InputStream) and Source(URLConnection) constructors for more detailed information about when the detection of a preliminary encoding is required.

                    The description returned by this method is intended for informational purposes only. It is not guaranteed to have any particular format and can not be reliably parsed.

                    Returns:
                    the preliminary encoding of the source document together with a concise description of how it was determined, or null if no preliminary encoding was required.
                    See Also:
                    getEncoding()

                    isXML

                    public boolean isXML()
                    Indicates whether the source document is likely to be XML.

                    The algorithm used to determine this is designed to be relatively inexpensive and to provide an accurate result in most normal situations. An exact determination of whether the source document is XML would require a much more complex analysis of the text.

                    The algorithm is as follows:

                    1. If the document begins with an XML declaration, it is an XML document.
                    2. If the document contains a document type declaration that contains the text "xhtml", it is an XHTML document, and hence also an XML document.
                    3. If none of the above conditions are met, assume the document is normal HTML, and therefore not an XML document.

                    Returns:
                    true if the source document is likely to be XML, otherwise false.

                    getNewLine

                    public java.lang.String getNewLine()
                    Returns the newline character sequence used in the source document.

                    If the document does not contain any newline characters, this method returns null.

                    The three possible return values (aside from null) are "\n", "\r\n" and "\r".

                    Returns:
                    the newline character sequence used in the source document, or null if none is present.

                    getRow

                    public int getRow(int pos)
                    Returns the row number of the specified character position in the source document.

                    Parameters:
                    pos - the position in the source document.
                    Returns:
                    the row number of the specified character position in the source document.
                    Throws:
                    java.lang.IndexOutOfBoundsException - if the specified position is not within the bounds of the document.
                    See Also:
                    getColumn(int pos), getRowColumnVector(int pos)

                    getColumn

                    public int getColumn(int pos)
                    Returns the column number of the specified character position in the source document.

                    Parameters:
                    pos - the position in the source document.
                    Returns:
                    the column number of the specified character position in the source document.
                    Throws:
                    java.lang.IndexOutOfBoundsException - if the specified position is not within the bounds of the document.
                    See Also:
                    getRow(int pos), getRowColumnVector(int pos)

                    getRowColumnVector

                    public RowColumnVector getRowColumnVector(int pos)
                    Returns a RowColumnVector object representing the row and column number of the specified character position in the source document.

                    Parameters:
                    pos - the position in the source document.
                    Returns:
                    a RowColumnVector object representing the row and column number of the specified character position in the source document.
                    Throws:
                    java.lang.IndexOutOfBoundsException - if the specified position is not within the bounds of the document.
                    See Also:
                    getRow(int pos), getColumn(int pos)

                    toString

                    public java.lang.String toString()
                    Returns the source text as a String.

                    Specified by:
                    toString in interface java.lang.CharSequence
                    Overrides:
                    toString in class Segment
                    Returns:
                    the source text as a String.

                    fullSequentialParse

                    public Tag[] fullSequentialParse()
                    Parses all of the tags in this source document sequentially from beginning to end.

                    Calling this method can greatly improve performance if most or all of the tags in the document need to be parsed.

                    Calling the getAllTags(), getAllStartTags(), getAllElements(), getChildElements(), iterator() or Segment.getNodeIterator() method on the Source object performs a full sequential parse automatically. There are however still circumstances where it should be called manually, such as when it is known that most or all of the tags in the document will need to be parsed, but none of the abovementioned methods are used, or are called only after calling one or more other tag search methods.

                    If this method is called manually, is should be called soon after the Source object is created, before any tag search methods are called.

                    By default, tags are parsed only as needed, which is referred to as parse on demand mode. In this mode, every call to a tag search method that is not returning previously cached tags must perform a relatively complex check to determine whether a potential tag is in a valid position.

                    Generally speaking, a tag is in a valid position if it does not appear inside any another tag. Server tags can appear anywhere in a document, including inside other tags, so this relates only to non-server tags. Theoretically, checking whether a specified position in the document is enclosed in another tag is only possible if every preceding tag has been parsed, otherwise it is impossible to tell whether one of the delimiters of the enclosing tag was in fact enclosed by some other tag before it, thereby invalidating it.

                    When this method is called, each tag is parsed in sequence starting from the beginning of the document, making it easy to check whether each potential tag is in a valid position. In parse on demand mode a compromise technique must be used for this check, since the theoretical requirement of having parsed all preceding tags is no longer practical. This compromise involves only checking whether the position is enclosed by other tags with certain tag types. The added complexity of this technique makes parsing each tag slower compared to when a full sequential parse is performed, but when only a few tags need parsing this is an extremely beneficial trade-off.

                    The documentation of the TagType.isValidPosition(Source, int pos, int[] fullSequentialParseData) method, which is called internally by the parser to perform the valid position check, includes a more detailed explanation of the differences between the two modes of operation.

                    Calling this method a second or subsequent time has no effect.

                    This method returns the same list of tags as the Source.getAllTags() method, but as an array instead of a list.

                    If this method is called after any of the tag search methods are called, the cache is cleared of any previously found tags before being restocked via the full sequential parse. This means that if you still have references to tags or elements from before the full sequential parse, they will not be the same objects as those that are returned by tag search methods after the full sequential parse, which can cause confusion if you are allocating user data to tags. It is also significant if the Segment.ignoreWhenParsing() method has been called since the tags were first found, as any tags inside the ignored segments will no longer be returned by any of the tag search methods.

                    See also the Tag class documentation for more general details about how tags are parsed.

                    Returns:
                    an array of all tags in this source document.

                    iterator

                    public java.util.Iterator<Segment> iterator()
                    Returns an iterator over every tag, character reference and plain text segment contained within the source document.

                    Plain text is defined as all text that is not part of a Tag or CharacterReference.

                    This results in a sequential walk-through of the entire source document. The end position of each segment should correspond with the begin position of the subsequent segment, unless any of the tags are enclosed by other tags. This could happen if there are server tags present in the document, or in rare circumstances where the document type declaration contains markup declarations.

                    Character references that are found inside tags, such as those present inside attribute values, are not included as separate iterated segments.

                    This method is implemented by simply calling the Segment.getNodeIterator() method of the Segment superclass.

                    Prior to version 3.1, character references were not handled as separate segments, and were instead included unparsed in the plain text segments. This required the use of the CharacterReference.decode(CharSequence) method to retrieve the actual text from each plain text segment. Although it is likely that existing programs based on the previous functionality should still work without modification, the static configuration property LegacyIteratorCompatabilityMode is provided on a temporary basis to revert back to the behaviour of previous versions, ensuring that existing programs function as intended without major modification.

                    Example:

                    The following code demonstrates the typical (implied) usage of this method through the Iterable interface to make an exact copy of the document from reader to writer (assuming no server tags are present):

                     Source source=new Source(reader);
                     for (Segment segment : source) {
                       if (segment instanceof Tag) {
                         Tag tag=(Tag)segment;
                         // HANDLE TAG
                         // Uncomment the following line to ensure each tag is valid XML:
                         // writer.write(tag.tidy()); continue;
                       } else if (segment instanceof CharacterReference) {
                         CharacterReference characterReference=(CharacterReference)segment;
                         // HANDLE CHARACTER REFERENCE
                         // Uncomment the following line to decode all character references instead of copying them verbatim:
                         // characterReference.appendCharTo(writer); continue;
                       } else {
                         // HANDLE PLAIN TEXT
                       }
                       // unless specific handling has prevented getting to here, simply output the segment as is:
                       writer.write(segment.toString());
                     }

                    Specified by:
                    iterator in interface java.lang.Iterable<Segment>
                    Returns:
                    an iterator over every tag, character reference and plain text segment contained within the source document.

                    getChildElements

                    public java.util.List<Element> getChildElements()
                    Returns a list of the top-level elements in the document element hierarchy.

                    The objects in the list are all of type Element.

                    The term top-level element refers to an element that is not nested within any other element in the document.

                    The term document element hierarchy refers to the hierarchy of elements that make up this source document. The source document itself is not considered to be part of the hierarchy, meaning there is typically more than one top-level element. Even when the source represents an entire HTML document, the document type declaration and/or an XML declaration often exist as top-level elements along with the HTML element itself.

                    The Element.getChildElements() method can be used to get the children of the top-level elements, with recursive use providing a means to visit every element in the document hierarchy.

                    The document element hierarchy differs from that of the Document Object Model in that it is only a representation of the elements that are physically present in the source text. Unlike the DOM, it does not include any "implied" HTML elements such as TBODY if they are not present in the source text.

                    Elements formed from server tags are not included in the hierarchy at all.

                    Structural errors in this source document such as overlapping elements are reported in the log. When elements are found to overlap, the position of the start tag determines the location of the element in the hierarchy.

                    Calling this method on the Source object performs a full sequential parse automatically.

                    A visual representation of the document element hierarchy can be obtained by calling:
                    getSourceFormatter().setIndentAllElements(true).setCollapseWhiteSpace(true).setTidyTags(true).toString()

                    Overrides:
                    getChildElements in class Segment
                    Returns:
                    a list of the top-level elements in the document element hierarchy, guaranteed not null.
                    See Also:
                    Element.getParentElement(), Element.getChildElements(), Element.getDepth()

                    getSourceFormatter

                    public SourceFormatter getSourceFormatter()
                    Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent.

                    The output format can be configured by setting any number of properties on the returned SourceFormatter instance before obtaining its output.

                    To create a SourceFormatter instance based on a Segment rather than an entire Source document, use new SourceFormatter(segment) instead.

                    Returns:
                    an instance of SourceFormatter based on this source document.

                    getAllTags

                    public java.util.List<Tag> getAllTags()
                    Returns a list of all tags in this source document.

                    Calling this method on the Source object performs a full sequential parse automatically.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Overrides:
                    getAllTags in class Segment
                    Returns:
                    a list of all tags in this source document.

                    getAllStartTags

                    public java.util.List<StartTag> getAllStartTags()
                    Returns a list of all start tags in this source document.

                    Calling this method on the Source object performs a full sequential parse automatically.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Overrides:
                    getAllStartTags in class Segment
                    Returns:
                    a list of all start tags in this source document.

                    getAllElements

                    public java.util.List<Element> getAllElements()
                    Returns a list of all elements in this source document.

                    Calling this method on the Source object performs a full sequential parse automatically.

                    The elements returned correspond exactly with the start tags returned in the getAllStartTags() method.

                    Overrides:
                    getAllElements in class Segment
                    Returns:
                    a list of all elements in this source document.

                    getElementById

                    public Element getElementById(java.lang.String id)
                    Returns the Element with the specified id attribute value.

                    This simulates the script method getElementById defined in DOM HTML level 1.

                    This is equivalent to getFirstElement("id",id,true).

                    A well formed HTML document should have no more than one element with any given id attribute value.

                    Parameters:
                    id - the id attribute value (case sensitive) to search for, must not be null.
                    Returns:
                    the Element with the specified id attribute value, or null if no such element exists.

                    getTagAt

                    public final Tag getTagAt(int pos)
                    Returns the Tag at the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This method also returns unregistered tags.

                    Parameters:
                    pos - the position in the source document, may be out of bounds.
                    Returns:
                    the Tag at the specified position in the source document, or null if no tag exists at the specified position or it is out of bounds.

                    getPreviousTag

                    public Tag getPreviousTag(int pos)
                    Returns the Tag beginning at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the Tag beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousTag

                    public Tag getPreviousTag(int pos,
                                              TagType tagType)
                    Returns the Tag of the specified type beginning at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    tagType - the TagType to search for.
                    Returns:
                    the Tag of the specified type beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextTag

                    public Tag getNextTag(int pos)
                    Returns the Tag beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Use Tag.getNextTag() to get the tag immediately following another tag.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the Tag beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextTag

                    public Tag getNextTag(int pos,
                                          TagType tagType)
                    Returns the Tag of the specified type beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    tagType - the TagType to search for.
                    Returns:
                    the Tag of the specified type beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getEnclosingTag

                    public Tag getEnclosingTag(int pos)
                    Returns the Tag that encloses the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document, may be out of bounds.
                    Returns:
                    the Tag that encloses the specified position in the source document, or null if the position is not within a tag or is out of bounds.

                    getEnclosingTag

                    public Tag getEnclosingTag(int pos,
                                               TagType tagType)
                    Returns the Tag of the specified type that encloses the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document, may be out of bounds.
                    tagType - the TagType to search for.
                    Returns:
                    the Tag of the specified type that encloses the specified position in the source document, or null if the position is not within a tag of the specified type or is out of bounds.

                    getNextElement

                    public Element getNextElement(int pos)
                    Returns the Element beginning at or immediately following the specified position in the source document.

                    This is equivalent to getNextStartTag(pos).getElement(), assuming the result is not null.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the Element beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextElement

                    public Element getNextElement(int pos,
                                                  java.lang.String name)
                    Returns the normal Element with the specified name beginning at or immediately following the specified position in the source document.

                    This is equivalent to getNextStartTag(pos,name).getElement(), assuming the result is not null.

                    Specifying a null argument to the name parameter is equivalent to getNextElement(pos).

                    Specifying an argument to the name parameter that ends in a colon (:) searches for all elements in the specified XML namespace.

                    This method also returns elements consisting of unregistered tags if the specified name is not a valid XML tag name.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the element to search for.
                    Returns:
                    the normal Element with the specified name beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextElement

                    public Element getNextElement(int pos,
                                                  java.lang.String attributeName,
                                                  java.lang.String value,
                                                  boolean valueCaseSensitive)
                    Returns the Element with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.

                    This is equivalent to getNextStartTag(pos,attributeName,value,valueCaseSensitive).getElement(), assuming the result is not null.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    the Element with the specified attribute name/value pair beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.
                    See Also:
                    getNextElement(int pos, String attributeName, Pattern valueRegexPattern)

                    getNextElement

                    public Element getNextElement(int pos,
                                                  java.lang.String attributeName,
                                                  java.util.regex.Pattern valueRegexPattern)
                    Returns the Element with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.

                    Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, without regard to the attribute value. This will also match an attribute that has no value at all.

                    This is equivalent to getNextStartTag(pos,attributeName,valueRegexPattern).getElement(), assuming the result is not null.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    the Element with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.
                    See Also:
                    getNextElement(int pos, String attributeName, String value, boolean valueCaseSensitive)

                    getNextElementByClass

                    public Element getNextElementByClass(int pos,
                                                         java.lang.String className)
                    Returns the Element with the specified class beginning at or immediately following the specified position in the source document.

                    This matches an element with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple class names separated by white space in the attribute value.

                    This is equivalent to getNextStartTagByClass(pos,className).getElement(), assuming the result is not null.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    the Element with the specified class beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousStartTag

                    public StartTag getPreviousStartTag(int pos)
                    Returns the StartTag at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the StartTag at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousStartTag

                    public StartTag getPreviousStartTag(int pos,
                                                        StartTagType startTagType)
                    Returns the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This is exactly equivalent to (StartTag)getPreviousTag(pos,startTagType), but can be used to avoid the explicit cast to a StartTag object.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    startTagType - the StartTagType to search for.
                    Returns:
                    the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousStartTag

                    public StartTag getPreviousStartTag(int pos,
                                                        java.lang.String name)
                    Returns the normal StartTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying a null argument to the name parameter is equivalent to getPreviousStartTag(pos).

                    This method also returns unregistered tags if the specified name is not a valid XML tag name.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the start tag to search for.
                    Returns:
                    the normal StartTag with the specified name at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousStartTag

                    public StartTag getPreviousStartTag(int pos,
                                                        java.lang.String name,
                                                        StartTagType startTagType)
                    Returns the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying StartTagType.NORMAL as the argument to the startTagType parameter is equivalent to getPreviousStartTag(pos,name).

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the start tag to search for, may be null.
                    startTagType - the type of the start tag to search for, must not be null.
                    Returns:
                    the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextStartTag

                    public StartTag getNextStartTag(int pos)
                    Returns the StartTag beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the StartTag beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextStartTag

                    public StartTag getNextStartTag(int pos,
                                                    StartTagType startTagType)
                    Returns the StartTag of the specified type beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This is exactly equivalent to (StartTag)getNextTag(pos,startTagType), but can be used to avoid the explicit cast to a StartTag object.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    startTagType - the StartTagType to search for.
                    Returns:
                    the StartTag of the specified type beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextStartTag

                    public StartTag getNextStartTag(int pos,
                                                    java.lang.String name)
                    Returns the normal StartTag with the specified name beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying a null argument to the name parameter is equivalent to getNextStartTag(pos).

                    Specifying an argument to the name parameter that ends in a colon (:) searches for all start tags in the specified XML namespace.

                    This method also returns unregistered tags if the specified name is not a valid XML tag name.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the start tag to search for, may be null.
                    Returns:
                    the normal StartTag with the specified name beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextStartTag

                    public StartTag getNextStartTag(int pos,
                                                    java.lang.String name,
                                                    StartTagType startTagType)
                    Returns the StartTag with the specified name and type beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Specifying StartTagType.NORMAL as the argument to the startTagType parameter is equivalent to getNextStartTag(pos,name).

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the start tag to search for, may be null.
                    startTagType - the type of the start tag to search for, must not be null.
                    Returns:
                    the StartTag with the specified name and type beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextStartTag

                    public StartTag getNextStartTag(int pos,
                                                    java.lang.String attributeName,
                                                    java.lang.String value,
                                                    boolean valueCaseSensitive)
                    Returns the StartTag with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    value - the value of the specified attribute to search for, must not be null.
                    valueCaseSensitive - specifies whether the attribute value matching is case sensitive.
                    Returns:
                    the StartTag with the specified attribute name/value pair beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.
                    See Also:
                    getNextStartTag(int pos, String attributeName, Pattern valueRegexPattern)

                    getNextStartTag

                    public StartTag getNextStartTag(int pos,
                                                    java.lang.String attributeName,
                                                    java.util.regex.Pattern valueRegexPattern)
                    Returns the StartTag with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.

                    Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, without regard to the attribute value. This will also match an attribute that has no value at all.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    attributeName - the attribute name (case insensitive) to search for, must not be null.
                    valueRegexPattern - the regular expression pattern that must match the attribute value, may be null.
                    Returns:
                    the StartTag with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.
                    See Also:
                    getNextStartTag(int pos, String attributeName, String value, boolean valueCaseSensitive)

                    getNextStartTagByClass

                    public StartTag getNextStartTagByClass(int pos,
                                                           java.lang.String className)
                    Returns the StartTag with the specified class beginning at or immediately following the specified position in the source document.

                    This matches a start tag with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple class names separated by white space in the attribute value.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    className - the class name (case sensitive) to search for, must not be null.
                    Returns:
                    the StartTag with the specified class beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousEndTag

                    public EndTag getPreviousEndTag(int pos)
                    Returns the EndTag at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the EndTag at or immediately preceding (or enclosing) the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousEndTag

                    public EndTag getPreviousEndTag(int pos,
                                                    EndTagType endTagType)
                    Returns the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This is exactly equivalent to (EndTag)getPreviousTag(pos,endTagType), but can be used to avoid the explicit cast to an EndTag object.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    endTagType - the EndTagType to search for.
                    Returns:
                    the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousEndTag

                    public EndTag getPreviousEndTag(int pos,
                                                    java.lang.String name)
                    Returns the normal EndTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the end tag to search for, must not be null.
                    Returns:
                    the normal EndTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextEndTag

                    public EndTag getNextEndTag(int pos)
                    Returns the EndTag beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the EndTag beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextEndTag

                    public EndTag getNextEndTag(int pos,
                                                EndTagType endTagType)
                    Returns the EndTag of the specified type beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This is exactly equivalent to (EndTag)getNextTag(pos,endTagType), but can be used to avoid the explicit cast to an EndTag object.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    endTagType - the EndTagType to search for.
                    Returns:
                    the EndTag of the specified type beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextEndTag

                    public EndTag getNextEndTag(int pos,
                                                java.lang.String name)
                    Returns the normal EndTag with the specified name beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the end tag to search for, must not be null.
                    Returns:
                    the normal EndTag with the specified name beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextEndTag

                    public EndTag getNextEndTag(int pos,
                                                java.lang.String name,
                                                EndTagType endTagType)
                    Returns the EndTag with the specified name and type beginning at or immediately following the specified position in the source document.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    name - the name of the end tag to search for, must not be null.
                    endTagType - the type of the end tag to search for, must not be null.
                    Returns:
                    the EndTag with the specified name and type beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getEnclosingElement

                    public Element getEnclosingElement(int pos)
                    Returns the most nested normal Element that encloses the specified position in the source document.

                    The specified position can be anywhere inside the start tag, end tag, or content of the element. There is no requirement that the returned element has an end tag, and it may be a server tag or HTML comment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    Parameters:
                    pos - the position in the source document, may be out of bounds.
                    Returns:
                    the most nested normal Element that encloses the specified position in the source document, or null if the position is not within an element or is out of bounds.

                    getEnclosingElement

                    public Element getEnclosingElement(int pos,
                                                       java.lang.String name)
                    Returns the most nested normal Element with the specified name that encloses the specified position in the source document.

                    The specified position can be anywhere inside the start tag, end tag, or content of the element. There is no requirement that the returned element has an end tag, and it may be a server tag or HTML comment.

                    See the Tag class documentation for more details about the behaviour of this method.

                    This method also returns elements consisting of unregistered tags if the specified name is not a valid XML tag name.

                    Parameters:
                    pos - the position in the source document, may be out of bounds.
                    name - the name of the element to search for.
                    Returns:
                    the most nested normal Element with the specified name that encloses the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getPreviousCharacterReference

                    public CharacterReference getPreviousCharacterReference(int pos)
                    Returns the CharacterReference at or immediately preceding (or enclosing) the specified position in the source document.

                    Character references positioned within an HTML comment are NOT ignored.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the CharacterReference beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNextCharacterReference

                    public CharacterReference getNextCharacterReference(int pos)
                    Returns the CharacterReference beginning at or immediately following the specified position in the source document.

                    Character references positioned within an HTML comment are NOT ignored.

                    Parameters:
                    pos - the position in the source document from which to start the search, may be out of bounds.
                    Returns:
                    the CharacterReference beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds.

                    getNameEnd

                    public int getNameEnd(int pos)
                    Returns the end position of the XML Name that starts at the specified position.

                    This implementation first checks that the character at the specified position is a valid XML Name start character as defined by the Tag.isXMLNameStartChar(char) method. If this is not the case, the value -1 is returned.

                    Once the first character has been checked, subsequent characters are checked using the Tag.isXMLNameChar(char) method until one is found that is not a valid XML Name character or the end of the document is reached. This position is then returned.

                    Parameters:
                    pos - the position in the source document of the first character of the XML Name.
                    Returns:
                    the end position of the XML Name that starts at the specified position.
                    Throws:
                    java.lang.IndexOutOfBoundsException - if the specified position is not within the bounds of the document.

                    parseAttributes

                    public Attributes parseAttributes(int pos,
                                                      int maxEnd)
                    Parses any Attributes starting at the specified position. This method is only used in the unusual situation where attributes exist outside of a start tag. The StartTag.getAttributes() method should be used in normal situations.

                    The returned Attributes segment always begins at pos, and ends at the end of the last attribute before either maxEnd or the first occurrence of "/>" or ">" outside of a quoted attribute value, whichever comes first.

                    Only returns null if the segment contains a major syntactical error or more than the default maximum number of minor syntactical errors.

                    This is equivalent to parseAttributes(pos,maxEnd,Attributes.getDefaultMaxErrorCount())}.

                    Parameters:
                    pos - the position in the source document at the beginning of the attribute list, may be out of bounds.
                    maxEnd - the maximum end position of the attribute list, or -1 if no maximum.
                    Returns:
                    the Attributes starting at the specified position, or null if too many errors occur while parsing or the specified position is out of bounds.
                    See Also:
                    StartTag.getAttributes(), Segment.parseAttributes()

                    parseAttributes

                    public Attributes parseAttributes(int pos,
                                                      int maxEnd,
                                                      int maxErrorCount)
                    Parses any Attributes starting at the specified position. This method is only used in the unusual situation where attributes exist outside of a start tag. The StartTag.getAttributes() method should be used in normal situations.

                    Only returns null if the segment contains a major syntactical error or more than the specified number of minor syntactical errors.

                    The maxErrorCount argument overrides the default maximum error count.

                    See parseAttributes(int pos, int maxEnd) for more information.

                    Parameters:
                    pos - the position in the source document at the beginning of the attribute list, may be out of bounds.
                    maxEnd - the maximum end position of the attribute list, or -1 if no maximum.
                    maxErrorCount - the maximum number of minor errors allowed while parsing.
                    Returns:
                    the Attributes starting at the specified position, or null if too many errors occur while parsing or the specified position is out of bounds.
                    See Also:
                    StartTag.getAttributes(), parseAttributes(int pos, int MaxEnd)

                    ignoreWhenParsing

                    public void ignoreWhenParsing(int begin,
                                                  int end)
                    Causes the specified range of the source text to be ignored when parsing.

                    See the documentation of the Segment.ignoreWhenParsing() method for more information.

                    Parameters:
                    begin - the beginning character position in the source text.
                    end - the end character position in the source text.

                    ignoreWhenParsing

                    public void ignoreWhenParsing(java.util.Collection<? extends Segment> segments)
                    Causes all of the segments in the specified collection to be ignored when parsing.

                    This is equivalent to calling Segment.ignoreWhenParsing() on each segment in the collection.


                    setLogger

                    public void setLogger(Logger logger)
                    Sets the Logger that handles log messages.

                    Specifying a null argument disables logging completely for operations performed on this Source object.

                    A logger instance is created automatically for each Source object using the LoggerProvider specified by the static Config.LoggerProvider property. The name used for all automatically created logger instances is "net.htmlparser.jericho".

                    Use of this method with a non-null argument is therefore not usually necessary, unless specifying an instance of WriterLogger or a user-defined Logger implementation.

                    Parameters:
                    logger - the logger that will handle log messages, or null to disable logging.
                    See Also:
                    Config.LoggerProvider

                    getLogger

                    public Logger getLogger()
                    Returns the Logger that handles log messages.

                    A logger instance is created automatically for each Source object using the LoggerProvider specified by the static Config.LoggerProvider property. This can be overridden by calling the setLogger(Logger) method. The name used for all automatically created logger instances is "net.htmlparser.jericho".

                    Returns:
                    the Logger that handles log messages, or null if logging is disabled.

                    clearCache

                    public void clearCache()
                    Clears the tag cache of all tags.

                    This method may be useful after calling the Segment.ignoreWhenParsing() method so that any tags previously found within the ignored segments will no longer be returned by the tag search methods.


                    getCacheDebugInfo

                    public java.lang.String getCacheDebugInfo()
                    Returns a string representation of the tag cache, useful for debugging purposes.

                    Returns:
                    a string representation of the tag cache, useful for debugging purposes.

                    getParseText

                    public final ParseText getParseText()
                    Returns the parse text of this source document.

                    This method is normally only of interest to users who wish to create custom tag types.

                    The parse text is defined as the entire text of the source document in lower case, with all ignored segments replaced by space characters.

                    Returns:
                    the parse text of this source document.

                    subSequence

                    public final java.lang.CharSequence subSequence(int begin,
                                                                    int end)
                    Returns a new character sequence that is a subsequence of this source document.

                    Specified by:
                    subSequence in interface java.lang.CharSequence
                    Overrides:
                    subSequence in class Segment
                    Parameters:
                    begin - the begin position, inclusive.
                    end - the end position, exclusive.
                    Returns:
                    a new character sequence that is a subsequence of this source document.

                    charAt

                    public final char charAt(int index)
                    Description copied from class: Segment
                    Returns the character at the specified index.

                    This is logically equivalent to toString().charAt(index) for valid argument values 0 <= index < length().

                    However because this implementation works directly on the underlying document source string, it should not be assumed that an IndexOutOfBoundsException is thrown for an invalid argument value.

                    Specified by:
                    charAt in interface java.lang.CharSequence
                    Overrides:
                    charAt in class Segment
                    Parameters:
                    index - the index of the character.
                    Returns:
                    the character at the specified index.

                    length

                    public final int length()
                    Returns the length of the source document.

                    Specified by:
                    length in interface java.lang.CharSequence
                    Overrides:
                    length in class Segment
                    Returns:
                    the length of the source document.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/EndTagTypeGenericImplementation.html0000644000175000017500000007432711214132420031731 0ustar twernertwerner EndTagTypeGenericImplementation (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class EndTagTypeGenericImplementation

                    java.lang.Object
                      extended by TagType
                          extended by EndTagType
                              extended by EndTagTypeGenericImplementation
                    

                    public class EndTagTypeGenericImplementation
                    extends EndTagType

                    Provides a generic implementation of the abstract EndTagType class based on the most common end tag behaviour.

                    This class is only of interest to users who wish to create custom tag types.

                    The differences between this class and its abstract superclass EndTagType are:

                    Most of the predefined end tag types are implemented using this class or a subclass of it.

                    See Also:
                    StartTagTypeGenericImplementation

                    Field Summary
                     
                    Fields inherited from class EndTagType
                    NORMAL, UNREGISTERED
                     
                    Constructor Summary
                    protected EndTagTypeGenericImplementation(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, boolean isServerTag, boolean isStatic)
                              Constructs a new EndTagTypeGenericImplementation object based on the specified properties.
                     
                    Method Summary
                    protected  Tag constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                     java.lang.String generateHTML(java.lang.String startTagName)
                              Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                     java.lang.String getEndTagName(java.lang.String startTagName)
                              Returns the end tag name that is required to match a corresponding start tag with the specified name.
                    protected  boolean isStatic()
                              Indicates whether the end tag text is static.
                     
                    Methods inherited from class EndTagType
                    constructEndTag, getCorrespondingStartTagType
                     
                    Methods inherited from class TagType
                    deregister, getClosingDelimiter, getDescription, getNamePrefix, getRegisteredTagTypes, getStartDelimiter, getTagTypesIgnoringEnclosedMarkup, isServerTag, isValidPosition, register, setTagTypesIgnoringEnclosedMarkup, tagEncloses, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    EndTagTypeGenericImplementation

                    protected EndTagTypeGenericImplementation(java.lang.String description,
                                                              java.lang.String startDelimiter,
                                                              java.lang.String closingDelimiter,
                                                              boolean isServerTag,
                                                              boolean isStatic)
                    Constructs a new EndTagTypeGenericImplementation object based on the specified properties.
                    (implementation assistance method)

                    The purpose of the isStatic parameter is explained in the IsStatic property description.

                    Parameters:
                    description - a description of the new end tag type useful for debugging purposes.
                    startDelimiter - the start delimiter of the new end tag type.
                    closingDelimiter - the closing delimiter of the new end tag type.
                    isServerTag - indicates whether the new end tag type is a server tag.
                    isStatic - determines whether the end tag text is static.
                    Method Detail

                    isStatic

                    protected final boolean isStatic()
                    Indicates whether the end tag text is static.
                    (property and implementation assistance method)

                    The purpose of this property is to determine the behaviour of the generateHTML(String startTagName) method.

                    If this property is true, the end tag text is constant for all tags of this type.

                    If this property is false, the end tag text includes the name of the corresponding start tag.

                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END is the only predefined end tag for which this property is true. All tags of this type have the constant tag text "</&>".

                    Returns:
                    true if the end tag text is static, otherwise false.

                    getEndTagName

                    public java.lang.String getEndTagName(java.lang.String startTagName)
                    Returns the end tag name that is required to match a corresponding start tag with the specified name.
                    (property method)

                    This implementation overrides the default implementation in EndTagType.getEndTagName(String startTagName).

                    If the value of the IsStatic property is false, it returns simply returns startTagName, as in the default implementation.

                    If the value of the IsStatic property is true, it returns this end tag type's name prefix.

                    Note that the startTagName parameter should include the start tag's name prefix if it has one.

                    Overrides:
                    getEndTagName in class EndTagType
                    Parameters:
                    startTagName - the name of a corresponding start tag, including its name prefix.
                    Returns:
                    the end tag name that is required to match a corresponding start tag with the specified name.

                    generateHTML

                    public java.lang.String generateHTML(java.lang.String startTagName)
                    Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                    (property method)

                    This implementation overrides the default implementation in EndTagType.generateHTML(String startTagName) to improve efficiency in the case of a static end tag type, although the functionality is the same.

                    Overrides:
                    generateHTML in class EndTagType
                    Parameters:
                    startTagName - the name of a corresponding start tag, including its name prefix.
                    Returns:
                    the HTML text of an end tag of this type given the name of a corresponding start tag.

                    constructTagAt

                    protected Tag constructTagAt(Source source,
                                                 int pos)
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    (default implementation method)

                    This default implementation checks the source text for a match according to the following criteria:

                    If the value of the IsStatic property is false, this implementation ensures that the source text matches the expression:
                    getStartDelimiter()+"name"+optionalWhiteSpace+getClosingDelimiter()
                    where name is a valid XML tag name, and optionalWhiteSpace is a string of zero or more white space characters. The name of the constructed end tag becomes getNamePrefix()+"name".

                    If the value of the IsStatic property is true, this implementation ensures that the source text matches the static expression:
                    getStartDelimiter()+getClosingDelimiter()
                    The name of the constructed end tag is the value of the getNamePrefix() method.

                    See TagType.constructTagAt(Source, int pos) for more important information about this method.

                    Specified by:
                    constructTagAt in class TagType
                    Parameters:
                    source - the Source document.
                    pos - the position in the source document.
                    Returns:
                    a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/TagType.html0000644000175000017500000020563711214132422025101 0ustar twernertwerner TagType (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class TagType

                    java.lang.Object
                      extended by TagType
                    
                    Direct Known Subclasses:
                    EndTagType, StartTagType

                    public abstract class TagType
                    extends java.lang.Object

                    Defines the syntax for a tag type that can be recognised by the parser.

                    This class is the root abstract class common to all tag types, and contains methods to register and deregister tag types as well as various methods to aid in their implementation.

                    Every tag type is represented by a singleton instance of a class that must be a subclass of either StartTagType or EndTagType. These two abstract classes, the only direct descendants of this class, represent the two major classifications under which every tag type exists.

                    Because all TagType instaces must be singletons, the '==' operator can be used to test for a particular tag type instead of the equals(Object) method.

                    The term predefined tag type refers to any of the tag types defined in this library, including both standard and extended tag types.

                    The term standard tag type refers to any of the tag types represented by instances in static fields of the StartTagType and EndTagType subclasses. Standard tag types are registered by default, and define the tags most commonly found in HTML documents.

                    The term extended tag type refers to any predefined tag type that is not a standard tag type. The PHPTagTypes and MasonTagTypes classes contain extended tag types related to their respective server platforms. The tag types defined within them must be registered by the user before they are recognised by the parser.

                    The term custom tag type refers to any user-defined tag type, or any tag type that is not a predefined tag type.

                    The tag recognition process of the parser gives each tag type a precedence level, which is primarily determined by the length of its start delimiter. A tag type with a more specific start delimiter is chosen in preference to one with a less specific start delimiter, assuming they both share the same prefix. If two tag types have exactly the same start delimiter, the one which was registered later has the higher precedence.

                    The two special tag types StartTagType.UNREGISTERED and EndTagType.UNREGISTERED represent tags that do not match the syntax of any other tag type. They have the lowest precedence of all the tag types. The Tag.isUnregistered() method provides a detailed explanation of unregistered tags.

                    See the documentation of the tag parsing process for more information on how each tag is identified by the parser.

                    Note that the standard HTML element names do not represent different tag types. All standard HTML tags have a tag type of StartTagType.NORMAL or EndTagType.NORMAL, and are also referred to as normal tags.

                    Apart from the registration related methods, all of the methods in this class and its subclasses relate to the implementation of custom tag types and are not relevant to the majority of users who just use the predefined tag types.

                    For perfomance reasons, this library only allows tag types that start with a '<' character. The character following this defines the immediate subclass of the tag type. An EndTagType always has a slash ('/') as the second character, while a StartTagType has any character other than a slash as the second character. This definition means that tag types which are not intuitively classified as either start tag types or end tag types (such as an HTML comment) are mostly classified as start tag types.

                    Every method in this and the StartTagType and EndTagType abstract classes can be categorised as one of the following:

                    Properties:
                    Simple properties (marked final) that were either specified as parameters during construction or are derived from those parameters.
                    Abstract implementation methods:
                    Methods that must be implemented in a subclass.
                    Default implementation methods:
                    Methods (not marked final) that implement common behaviour, but may be overridden in a subclass.
                    Implementation assistance methods:
                    Protected methods that provide low-level functionality and are only of use within other implementation methods.
                    Registration related methods:
                    Utility methods (marked final) relating to the registration of tag type instances.


                    Method Summary
                    protected abstract  Tag constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                     void deregister()
                              Deregisters this tag type.
                     java.lang.String getClosingDelimiter()
                              Returns the character sequence that marks the end of the tag.
                     java.lang.String getDescription()
                              Returns a description of this tag type useful for debugging purposes.
                    protected  java.lang.String getNamePrefix()
                              Returns the name prefix required by this tag type.
                    static java.util.List<TagType> getRegisteredTagTypes()
                              Returns a list of all the currently registered tag types in order of lowest to highest precedence.
                     java.lang.String getStartDelimiter()
                              Returns the character sequence that marks the start of the tag.
                    static TagType[] getTagTypesIgnoringEnclosedMarkup()
                              Returns an array of all the tag types inside which the parser ignores all non-server tags in parse on demand mode.
                     boolean isServerTag()
                              Indicates whether this tag type represents a server tag.
                    protected  boolean isValidPosition(Source source, int pos, int[] fullSequentialParseData)
                              Indicates whether a tag of this type is valid in the specified position of the specified source document.
                     void register()
                              Registers this tag type for recognition by the parser.
                    static void setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes)
                              Sets the tag types inside which the parser ignores all non-server tags.
                    protected  boolean tagEncloses(Source source, int pos)
                              Indicates whether a tag of this type encloses the specified position of the specified source document.
                     java.lang.String toString()
                              Returns a string representation of this object useful for debugging purposes.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    register

                    public final void register()
                    Registers this tag type for recognition by the parser.
                    (registration related method)

                    The order of registration affects the precedence of the tag type when a potential tag is being parsed.

                    See Also:
                    deregister()

                    deregister

                    public final void deregister()
                    Deregisters this tag type.
                    (registration related method)

                    See Also:
                    register()

                    getRegisteredTagTypes

                    public static final java.util.List<TagType> getRegisteredTagTypes()
                    Returns a list of all the currently registered tag types in order of lowest to highest precedence.
                    (registration related method)

                    Returns:
                    a list of all the currently registered tag types in order of lowest to highest precedence.

                    getDescription

                    public final java.lang.String getDescription()
                    Returns a description of this tag type useful for debugging purposes.
                    (property method)

                    Returns:
                    a description of this tag type useful for debugging purposes.

                    getStartDelimiter

                    public final java.lang.String getStartDelimiter()
                    Returns the character sequence that marks the start of the tag.
                    (property method)

                    The character sequence must be all in lower case.

                    The first character in this property must be '<'. This is a deliberate limitation of the system which is necessary to retain reasonable performance.

                    The second character in this property must be '/' if the implementing class is an EndTagType. It must not be '/' if the implementing class is a StartTagType.

                    Standard Tag Type Values:
                    Tag TypeStart Delimiter
                    StartTagType.UNREGISTERED<
                    StartTagType.NORMAL<
                    StartTagType.COMMENT<!--
                    StartTagType.XML_DECLARATION<?xml
                    StartTagType.XML_PROCESSING_INSTRUCTION<?
                    StartTagType.DOCTYPE_DECLARATION<!doctype
                    StartTagType.MARKUP_DECLARATION<!
                    StartTagType.CDATA_SECTION<![cdata[
                    StartTagType.SERVER_COMMON<%
                    EndTagType.UNREGISTERED</
                    EndTagType.NORMAL</
                    Extended Tag Type Values:
                    Tag TypeStart Delimiter
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT<![
                    PHPTagTypes.PHP_SCRIPT<script
                    PHPTagTypes.PHP_SHORT<?
                    PHPTagTypes.PHP_STANDARD<?php
                    MasonTagTypes.MASON_COMPONENT_CALL<&
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT<&|
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END</&
                    MasonTagTypes.MASON_NAMED_BLOCK<%
                    MasonTagTypes.MASON_NAMED_BLOCK_END</%

                    Returns:
                    the character sequence that marks the start of the tag.

                    getClosingDelimiter

                    public final java.lang.String getClosingDelimiter()
                    Returns the character sequence that marks the end of the tag.
                    (property method)

                    The character sequence must be all in lower case.

                    In a StartTag of a type that has attributes, characters appearing inside a quoted attribute value are ignored when determining the location of the closing delimiter.

                    Note that the optional '/' character preceding the closing '>' in an empty-element tag is not considered part of the end delimiter. This property must define the closing delimiter common to all instances of the tag type.

                    Standard Tag Type Values:
                    Tag TypeClosing Delimiter
                    StartTagType.UNREGISTERED>
                    StartTagType.NORMAL>
                    StartTagType.COMMENT-->
                    StartTagType.XML_DECLARATION?>
                    StartTagType.XML_PROCESSING_INSTRUCTION?>
                    StartTagType.DOCTYPE_DECLARATION>
                    StartTagType.MARKUP_DECLARATION>
                    StartTagType.CDATA_SECTION]]>
                    StartTagType.SERVER_COMMON%>
                    EndTagType.UNREGISTERED>
                    EndTagType.NORMAL>
                    Extended Tag Type Values:
                    Tag TypeClosing Delimiter
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT]>
                    PHPTagTypes.PHP_SCRIPT>
                    PHPTagTypes.PHP_SHORT?>
                    PHPTagTypes.PHP_STANDARD?>
                    MasonTagTypes.MASON_COMPONENT_CALL&>
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT&>
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END>
                    MasonTagTypes.MASON_NAMED_BLOCK>
                    MasonTagTypes.MASON_NAMED_BLOCK_END>

                    Returns:
                    the character sequence that marks the end of the tag.

                    isServerTag

                    public final boolean isServerTag()
                    Indicates whether this tag type represents a server tag.
                    (property method)

                    Server tags are typically parsed by some process on the web server and substituted with other text or markup before delivery to the user agent. This parser therefore handles them differently to non-server tags in that they can occur at any position in the document without regard for the HTML document structure. As a result they can occur anywhere inside any other tag, although a non-server tag cannot theoretically occur inside a server tag.

                    The documentation of the tag parsing process explains in detail how the value of this property affects the recognition of server tags, as well as how the presence of server tags affects the recognition of non-server tags in and around them.

                    Most XML-style server tags can not be represented as a distinct tag type because they are generally indistinguishable from non-server XML tags. See the Segment.ignoreWhenParsing() method for information about how to prevent such server tags from interfering with the proper parsing of the rest of the document.

                    Standard Tag Type Values:
                    Tag TypeIs Server Tag
                    StartTagType.UNREGISTEREDfalse
                    StartTagType.NORMALfalse
                    StartTagType.COMMENTfalse
                    StartTagType.XML_DECLARATIONfalse
                    StartTagType.XML_PROCESSING_INSTRUCTIONfalse
                    StartTagType.DOCTYPE_DECLARATIONfalse
                    StartTagType.MARKUP_DECLARATIONfalse
                    StartTagType.CDATA_SECTIONfalse
                    StartTagType.SERVER_COMMONtrue
                    EndTagType.UNREGISTEREDfalse
                    EndTagType.NORMALfalse
                    Extended Tag Type Values:
                    Tag TypeIs Server Tag
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENTfalse
                    PHPTagTypes.PHP_SCRIPTtrue
                    PHPTagTypes.PHP_SHORTtrue
                    PHPTagTypes.PHP_STANDARDtrue
                    MasonTagTypes.MASON_COMPONENT_CALLtrue
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENTtrue
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_ENDtrue
                    MasonTagTypes.MASON_NAMED_BLOCKtrue
                    MasonTagTypes.MASON_NAMED_BLOCK_ENDtrue

                    Returns:
                    true if this tag type represents a server tag, otherwise false.

                    getNamePrefix

                    protected final java.lang.String getNamePrefix()
                    Returns the name prefix required by this tag type.
                    (property method)

                    This string is identical to the start delimiter, except that it does not include the initial "<" or "</" characters that always prefix the start delimiter of a StartTagType or EndTagType respectively.

                    The name of a tag of this type may or may not include extra characters after the prefix. This is determined by properties such as StartTagType.isNameAfterPrefixRequired() or EndTagTypeGenericImplementation.isStatic().

                    Standard Tag Type Values:
                    Tag TypeName Prefix
                    StartTagType.UNREGISTERED(empty string)
                    StartTagType.NORMAL(empty string)
                    StartTagType.COMMENT!--
                    StartTagType.XML_DECLARATION?xml
                    StartTagType.XML_PROCESSING_INSTRUCTION?
                    StartTagType.DOCTYPE_DECLARATION!doctype
                    StartTagType.MARKUP_DECLARATION!
                    StartTagType.CDATA_SECTION![cdata[
                    StartTagType.SERVER_COMMON%
                    EndTagType.UNREGISTERED(empty string)
                    EndTagType.NORMAL(empty string)
                    Extended Tag Type Values:
                    Tag TypeName Prefix
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT![
                    PHPTagTypes.PHP_SCRIPTscript
                    PHPTagTypes.PHP_SHORT?
                    PHPTagTypes.PHP_STANDARD?php
                    MasonTagTypes.MASON_COMPONENT_CALL&
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT&|
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END&
                    MasonTagTypes.MASON_NAMED_BLOCK%
                    MasonTagTypes.MASON_NAMED_BLOCK_END%

                    Returns:
                    the name prefix required by this tag type.
                    See Also:
                    getStartDelimiter()

                    isValidPosition

                    protected boolean isValidPosition(Source source,
                                                      int pos,
                                                      int[] fullSequentialParseData)
                    Indicates whether a tag of this type is valid in the specified position of the specified source document.
                    (implementation assistance method)

                    This method is called immediately before constructTagAt(Source, int pos) to do a preliminary check on the validity of a tag of this type in the specified position.

                    This check is not performed as part of the constructTagAt(Source, int pos) call because the same validation is used for all the standard tag types, and is likely to be sufficient for all custom tag types. Having this check separated into a different method helps to isolate common code from the code that is unique to each tag type.

                    In theory, a server tag is valid in any position, but a non-server tag is not valid inside any other tag, nor inside elements with CDATA content such as SCRIPT and STYLE elements.

                    The common implementation of this method always returns true for server tags, but for non-server tags it behaves slightly differently depending upon whether or not a full sequential parse is being peformed.

                    When this method is called during a full sequential parse, the fullSequentialParseData argument contains information allowing the exact theoretical check to be performed, rejecting a non-server tag if it is inside any other tag. See below for further information about the fullSequentialParseData parameter.

                    When this method is called in parse on demand mode (not during a full sequential parse, fullSequentialParseData==null), practical constraints prevent the exact theoretical check from being carried out, and non-server tags are only rejected if they are found inside HTML comments or CDATA sections.

                    This behaviour is configurable by manipulating the static TagTypesIgnoringEnclosedMarkup array to determine which tag types can not contain non-server tags in parse on demand mode. The documentation of this property contains a more detailed analysis of the subject and explains why only the comment and CDATA section tag types are included by default.

                    See the documentation of the tag parsing process for more information about how this method fits into the whole tag parsing process.

                    This method can be overridden in custom tag types if the default implementation is unsuitable.

                    The fullSequentialParseData parameter:

                    This parameter is used to discard non-server tags that are found inside other tags or inside SCRIPT elements.

                    In the current version of this library, the fullSequentialParseData argument is either null (in parse on demand mode) or an integer array containing only a single entry (if a full sequential parse is being peformed).

                    The integer contained in the array is the maximum position in the document at which the end of a tag has been found, indicating that no non-server tags should be recognised before that position. If no tags have yet been encountered, the value of this integer is zero.

                    If the last tag encountered was the start tag of a SCRIPT element, the value of this integer is Integer.MAX_VALUE, indicating that no other non-server elements should be recognised until the end tag of the SCRIPT element is found. According to the HTML 4.01 specification section 6.2, the first occurrence of the character sequence "</" terminates the special handling of CDATA within SCRIPT and STYLE elements. This library however only terminates the CDATA handling of SCRIPT element content when the character sequence "</script" is detected, in line with the behaviour of the major browsers.

                    Note that the implicit treatment of SCRIPT element content as CDATA should theoretically also prevent the recognition of comments and explicit CDATA sections inside script elements. While this is true for explicit CDATA sections, the parser does still recognise comments inside SCRIPT elements in order to maintain compatability with the major browsers. This prevents the character sequence "</script" from terminating the SCRIPT element if it occurs inside a comment. The end of the comment however also ends the implicit treatment of the SCRIPT element content as CDATA.

                    Although STYLE elements should theoretically be treated in the same way as SCRIPT elements, the syntax of Cascading Style Sheets (CSS) does not contain any constructs that could be misinterpreted as HTML tags, so there is virtually no need to perform any special checks in this case.

                    IMPLEMENTATION NOTE: The rationale behind using an integer array to hold this value, rather than a scalar int value, is to emulate passing the parameter by reference. This value needs to be shared amongst several internal methods during the full sequential parse process, and any one of those methods needs to be able to modify the value and pass it back to the calling method. This would normally be implemented by passing the parameter by reference, but because Java does not support this language construct, a container for a mutable integer must be passed instead. Because the standard Java library does not provide a class for holding a single mutable integer (the java.lang.Integer class is immutable), the easiest container to use, without creating a class especially for this purpose, is an integer array. The use of an array does not imply any intention to use more than a single array entry in subsequent versions.

                    Parameters:
                    source - the Source document.
                    pos - the character position in the source document to check.
                    fullSequentialParseData - an integer array containing data allowing this method to implement a better algorithm when a full sequential parse is being performed, or null in parse on demand mode.
                    Returns:
                    true if a tag of this type is valid in the specified position of the specified source document, otherwise false.

                    getTagTypesIgnoringEnclosedMarkup

                    public static final TagType[] getTagTypesIgnoringEnclosedMarkup()
                    Returns an array of all the tag types inside which the parser ignores all non-server tags in parse on demand mode.
                    (implementation assistance method)

                    The tag types returned by this property (referred to in the following paragraphs as the "listed types") default to StartTagType.COMMENT and StartTagType.CDATA_SECTION.

                    This property is used by the default implementation of the isValidPosition method in parse on demand mode. It is not used at all during a full sequential parse.

                    In the default implementation of the isValidPosition method, in parse on demand mode, every new non-server tag found by the parser (referred to as a "new tag") undergoes a check to see whether it is enclosed by a tag of one of the listed types. This includes new tags of the listed types themselves if they are non-server tags. The recursive nature of this check means that all tags of the listed types occurring before the new tag must be found by the parser before it can determine whether the new tag should be ignored. To mitigate any performance issues arising from this process, the listed types are given special treatment in the tag cache. This dramatically decreases the time taken to search on these tag types, so adding a tag type to this array that is easily recognised and occurs infrequently only results in a small degradation in overall performance.

                    A special exception to the algorithm described above applies to COMMENT tags. The default implementation of the isValidPosition method does not check whether a COMMENT tag is inside another COMMENT tag, as this should never happen in a syntactically correct document (the characters '--' should not occur inside a comment). Skipping this check also avoids the need to recursively check every COMMENT tag back to the start of the document, which has the potential to cause a stack overflow in a large document containing lots of comments.

                    Theoretically, non-server tags appearing inside any other tag should be ignored, which is how the parser behaves during a full sequential parse.

                    Server tags in particular very often contain other "tags" that should not be recognised as tags by the parser. If this behaviour is required in parse on demand, the tag type of each server tag that might be found in the source documents can be added to this property using the static setTagTypesIgnoringEnclosedMarkup(TagType[]) method. For example, the following command would prevent non-server tags from being recognised inside standard PHP tags, as well as the default comment and CDATA section tags:

                    TagType.setTagTypesIgnoringEnclosedMarkup(new TagType[] {PHPTagTypes.PHP_STANDARD, StartTagType.COMMENT, StartTagType.CDATA_SECTION});

                    The only situation where a non-server tag can legitimately contain a sequence of characters that resembles a tag is within an attribute value. The HTML 4.01 specification section 5.3.2 specifically allows the presence of '<' and '>' characters within attribute values. A common occurrence of this is in event attributes containing scripts, such as the onclick attribute. There is no way of preventing such "tags" from being recognised in parse on demand mode, as adding StartTagType.NORMAL to this property as a listed type would be far too inefficient. Performing a full sequential parse of the source document prevents these attribute values from being recognised as tags, but can be very expensive if only a few tags in the document need to be parsed. The penalty of not parsing every tag in the document is that the exactness of this check is compromised, but in practical terms the difference is inconsequential. The default listed types of comments and CDATA sections yields sensible results in the vast majority of practical applications with only a minor impact on performance.

                    In XHTML, '<' and '>' characters must be represented in attribute values as character references (see the XML 1.0 specification section 3.1), so the situation should never arise that a tag is found inside another tag unless one of them is a server tag.

                    Returns:
                    an array of all the tag types inside which the parser ignores all non-server tags.

                    setTagTypesIgnoringEnclosedMarkup

                    public static final void setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes)
                    Sets the tag types inside which the parser ignores all non-server tags.
                    (implementation assistance method)

                    See getTagTypesIgnoringEnclosedMarkup() for the documentation of this property.

                    Parameters:
                    tagTypes - an array of tag types.

                    constructTagAt

                    protected abstract Tag constructTagAt(Source source,
                                                          int pos)
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    (abstract implementation method)

                    The implementation of this method must check that the text at the specified position meets all of the criteria of this tag type, including such checks as the presence of the correct or well formed closing delimiter, name, attributes, end tag, or any other distinguishing features.

                    It can be assumed that the specified position starts with the start delimiter of this tag type, and that all other tag types with higher precedence (if any) have already been rejected as candidates. Tag types with lower precedence will be considered if this method returns null.

                    This method is only called after a successful check of the tag's position, i.e. isValidPosition(source,pos,fullSequentialParseData)==true.

                    The StartTagTypeGenericImplementation and EndTagTypeGenericImplementation subclasses provide default implementations of this method that allow the use of much simpler properties and implementation assistance methods and to carry out the required functions.

                    Parameters:
                    source - the Source document.
                    pos - the position in the source document.
                    Returns:
                    a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria.

                    tagEncloses

                    protected final boolean tagEncloses(Source source,
                                                        int pos)
                    Indicates whether a tag of this type encloses the specified position of the specified source document.
                    (implementation assistance method)

                    This is logically equivalent to source.getEnclosingTag(pos,this)!=null, but is safe to use within other implementation methods without the risk of causing an infinite recursion.

                    This method is called from the default implementation of the isValidPosition(Source, int pos, int[] fullSequentialParseData) method.

                    Parameters:
                    source - the Source document.
                    pos - the character position in the source document to check.
                    Returns:
                    true if a tag of this type encloses the specified position of the specified source document, otherwise false.

                    toString

                    public java.lang.String toString()
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/NumericCharacterReference.html0000644000175000017500000010527311214132422030555 0ustar twernertwerner NumericCharacterReference (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class NumericCharacterReference

                    java.lang.Object
                      extended by Segment
                          extended by CharacterReference
                              extended by NumericCharacterReference
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public class NumericCharacterReference
                    extends CharacterReference

                    Represents an HTML Numeric Character Reference.

                    A numeric character reference can be one of two types:

                    Decimal Character Reference
                    A numeric character reference specifying the unicode code point in decimal notation.
                    This is signified by the absence of an 'x' character after the '#', (eg "&#62;").
                    Hexadecimal Character Reference
                    A numeric character reference specifying the unicode code point in hexadecimal notation.
                    This is signified by the presence of an 'x' character after the '#', (eg "&#x3e;").

                    Static methods to encode and decode strings and single characters can be found in the CharacterReference superclass.

                    NumericCharacterReference instances are obtained using one of the following methods:

                    See Also:
                    CharacterReference, CharacterEntityReference

                    Field Summary
                     
                    Fields inherited from class CharacterReference
                    INVALID_CODE_POINT
                     
                    Method Summary
                    static java.lang.String encode(java.lang.CharSequence unencodedText)
                              Encodes the specified text, escaping special characters into numeric character references.
                    static java.lang.String encodeDecimal(java.lang.CharSequence unencodedText)
                              Encodes the specified text, escaping special characters into decimal character references.
                    static java.lang.String encodeHexadecimal(java.lang.CharSequence unencodedText)
                              Encodes the specified text, escaping special characters into hexadecimal character references.
                     java.lang.String getCharacterReferenceString()
                              Returns the correct encoded form of this numeric character reference.
                    static java.lang.String getCharacterReferenceString(int codePoint)
                              Returns the numeric character reference encoded form of the specified unicode code point.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     boolean isDecimal()
                              Indicates whether this numeric character reference specifies the unicode code point in decimal format.
                     boolean isHexadecimal()
                              Indicates whether this numeric character reference specifies the unicode code point in hexadecimal format.
                     
                    Methods inherited from class CharacterReference
                    appendCharTo, decode, decode, decodeCollapseWhiteSpace, encode, encodeWithWhiteSpaceFormatting, getChar, getCodePoint, getCodePointFromCharacterReferenceString, getDecimalCharacterReferenceString, getDecimalCharacterReferenceString, getEncodingFilterWriter, getHexadecimalCharacterReferenceString, getHexadecimalCharacterReferenceString, getUnicodeText, getUnicodeText, isTerminated, parse, reencode, requiresEncoding
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    isDecimal

                    public boolean isDecimal()
                    Indicates whether this numeric character reference specifies the unicode code point in decimal format.

                    A numeric character reference in decimal format is referred to in this library as a decimal character reference.

                    Returns:
                    true if this numeric character reference specifies the unicode code point in decimal format, otherwise false.
                    See Also:
                    isHexadecimal()

                    isHexadecimal

                    public boolean isHexadecimal()
                    Indicates whether this numeric character reference specifies the unicode code point in hexadecimal format.

                    A numeric character reference in hexadecimal format is referred to in this library as a hexadecimal character reference.

                    Returns:
                    true if this numeric character reference specifies the unicode code point in hexadecimal format, otherwise false.
                    See Also:
                    isDecimal()

                    encode

                    public static java.lang.String encode(java.lang.CharSequence unencodedText)
                    Encodes the specified text, escaping special characters into numeric character references.

                    Each character is encoded only if the requiresEncoding(char) method would return true for that character.

                    This method encodes all character references in decimal format, and is exactly the same as calling encodeDecimal(CharSequence).

                    To encode text using both character entity references and numeric character references, use the
                    CharacterReference.encode(CharSequence) method instead.

                    To encode text using hexadecimal character references only, use the encodeHexadecimal(CharSequence) method instead.

                    Parameters:
                    unencodedText - the text to encode.
                    Returns:
                    the encoded string.
                    See Also:
                    CharacterReference.decode(CharSequence)

                    encodeDecimal

                    public static java.lang.String encodeDecimal(java.lang.CharSequence unencodedText)
                    Encodes the specified text, escaping special characters into decimal character references.

                    Each character is encoded only if the requiresEncoding(char) method would return true for that character.

                    To encode text using both character entity references and numeric character references, use the
                    CharacterReference.encode(CharSequence) method instead.

                    To encode text using hexadecimal character references only, use the encodeHexadecimal(CharSequence) method instead.

                    Parameters:
                    unencodedText - the text to encode.
                    Returns:
                    the encoded string.
                    See Also:
                    CharacterReference.decode(CharSequence)

                    encodeHexadecimal

                    public static java.lang.String encodeHexadecimal(java.lang.CharSequence unencodedText)
                    Encodes the specified text, escaping special characters into hexadecimal character references.

                    Each character is encoded only if the requiresEncoding(char) method would return true for that character.

                    To encode text using both character entity references and numeric character references, use the
                    CharacterReference.encode(CharSequence) method instead.

                    To encode text using decimal character references only, use the encodeDecimal(CharSequence) method instead.

                    Parameters:
                    unencodedText - the text to encode.
                    Returns:
                    the encoded string.
                    See Also:
                    CharacterReference.decode(CharSequence)

                    getCharacterReferenceString

                    public java.lang.String getCharacterReferenceString()
                    Returns the correct encoded form of this numeric character reference.

                    The returned string uses the same radix as the original character reference in the source document, i.e. decimal format if isDecimal() is true, and hexadecimal format if isHexadecimal() is true.

                    Note that the returned string is not necessarily the same as the original source text used to create this object. This library recognises certain invalid forms of character references, as detailed in the decode(CharSequence) method.

                    To retrieve the original source text, use the toString() method instead.

                    Example:
                    CharacterReference.parse("&#62").getCharacterReferenceString() returns "&#62;"

                    Specified by:
                    getCharacterReferenceString in class CharacterReference
                    Returns:
                    the correct encoded form of this numeric character reference.
                    See Also:
                    CharacterReference.getCharacterReferenceString(int codePoint)

                    getCharacterReferenceString

                    public static java.lang.String getCharacterReferenceString(int codePoint)
                    Returns the numeric character reference encoded form of the specified unicode code point.

                    This method returns the character reference in decimal format, and is exactly the same as calling CharacterReference.getDecimalCharacterReferenceString(int codePoint).

                    To get either the character entity reference or numeric character reference, use the
                    CharacterReference.getCharacterReferenceString(int codePoint) method instead.

                    To get the character reference in hexadecimal format, use the CharacterReference.getHexadecimalCharacterReferenceString(int codePoint) method instead.

                    Examples:
                    NumericCharacterReference.getCharacterReferenceString(62) returns "&#62;"
                    NumericCharacterReference.getCharacterReferenceString('>') returns "&#62;"

                    Returns:
                    the numeric character reference encoded form of the specified unicode code point.
                    See Also:
                    CharacterReference.getCharacterReferenceString(int codePoint)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Description copied from class: Segment
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Attributes.html0000644000175000017500000011104611214132420025636 0ustar twernertwerner Attributes (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Attributes

                    java.lang.Object
                      extended by Segment
                          extended by net.htmlparser.jericho.nodoc.SequentialListSegment<Attribute>
                              extended by Attributes
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>, java.lang.Iterable<Attribute>, java.util.Collection<Attribute>, java.util.List<Attribute>

                    public final class Attributes
                    extends net.htmlparser.jericho.nodoc.SequentialListSegment<Attribute>

                    Represents the list of Attribute objects present within a particular StartTag.

                    This segment starts at the end of the start tag's name and ends at the end of the last attribute.

                    The attributes in this list are a representation of those found in the source document and are not modifiable. The OutputDocument.replace(Attributes, Map) and OutputDocument.replace(Attributes, boolean convertNamesToLowerCase) methods provide the means to add, delete or modify attributes and their values in an OutputDocument.

                    Any server tags encountered inside the attributes area of a non-server tag do not interfere with the parsing of the attributes.

                    If too many syntax errors are encountered while parsing a start tag's attributes, the parser rejects the entire start tag and generates a log entry. The threshold for the number of errors allowed can be set using the setDefaultMaxErrorCount(int) static method.

                    Obtained using the StartTag.getAttributes() method, or explicitly using the Source.parseAttributes(int pos, int maxEnd) method.

                    It is common for instances of this class to contain no attributes.

                    See also the XML 1.0 specification for attributes.

                    See Also:
                    StartTag, Attribute

                    Method Summary
                    static java.lang.String generateHTML(java.util.Map<java.lang.String,java.lang.String> attributesMap)
                              Returns the contents of the specified attributes map as HTML attribute name/value pairs.
                     Attribute get(java.lang.String name)
                              Returns the Attribute with the specified name (case insensitive).
                     int getCount()
                              Returns the number of attributes.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                    static int getDefaultMaxErrorCount()
                              Returns the default maximum error count allowed when parsing attributes.
                     java.lang.String getValue(java.lang.String name)
                              Returns the decoded value of the attribute with the specified name (case insensitive).
                     java.util.Iterator<Attribute> iterator()
                              Returns an iterator over the Attribute objects in this list in order of appearance.
                     java.util.ListIterator<Attribute> listIterator(int index)
                              Returns a list iterator of the Attribute objects in this list in order of appearance, starting at the specified position in the list.
                     java.util.Map<java.lang.String,java.lang.String> populateMap(java.util.Map<java.lang.String,java.lang.String> attributesMap, boolean convertNamesToLowerCase)
                              Populates the specified Map with the name/value pairs from these attributes.
                    static void setDefaultMaxErrorCount(int value)
                              Sets the default maximum error count allowed when parsing attributes.
                     
                    Methods inherited from class net.htmlparser.jericho.nodoc.SequentialListSegment
                    add, add, addAll, addAll, clear, contains, containsAll, get, indexOf, isEmpty, lastIndexOf, listIterator, remove, remove, removeAll, retainAll, set, size, subList, toArray, toArray
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     
                    Methods inherited from interface java.util.List
                    equals, hashCode
                     

                    Method Detail

                    get

                    public Attribute get(java.lang.String name)
                    Returns the Attribute with the specified name (case insensitive).

                    If more than one attribute exists with the specified name (which is illegal HTML), the first is returned.

                    Parameters:
                    name - the name of the attribute to get.
                    Returns:
                    the attribute with the specified name, or null if no attribute with the specified name exists.
                    See Also:
                    getValue(String name)

                    getValue

                    public java.lang.String getValue(java.lang.String name)
                    Returns the decoded value of the attribute with the specified name (case insensitive).

                    Returns null if no attribute with the specified name exists or the attribute has no value.

                    This is equivalent to get(name).getValue(), except that it returns null if no attribute with the specified name exists instead of throwing a NullPointerException.

                    Parameters:
                    name - the name of the attribute to get.
                    Returns:
                    the decoded value of the attribute with the specified name, or null if the attribute does not exist or has no value.
                    See Also:
                    Attribute.getValue()

                    getCount

                    public int getCount()
                    Returns the number of attributes.

                    This is equivalent to calling the size() method specified in the List interface.

                    Specified by:
                    getCount in class net.htmlparser.jericho.nodoc.SequentialListSegment<Attribute>
                    Returns:
                    the number of attributes.

                    iterator

                    public java.util.Iterator<Attribute> iterator()
                    Returns an iterator over the Attribute objects in this list in order of appearance.

                    Specified by:
                    iterator in interface java.lang.Iterable<Attribute>
                    Specified by:
                    iterator in interface java.util.Collection<Attribute>
                    Specified by:
                    iterator in interface java.util.List<Attribute>
                    Overrides:
                    iterator in class net.htmlparser.jericho.nodoc.SequentialListSegment<Attribute>
                    Returns:
                    an iterator over the Attribute objects in this list in order of appearance.

                    listIterator

                    public java.util.ListIterator<Attribute> listIterator(int index)
                    Returns a list iterator of the Attribute objects in this list in order of appearance, starting at the specified position in the list.

                    The specified index indicates the first item that would be returned by an initial call to the next() method. An initial call to the previous() method would return the item with the specified index minus one.

                    IMPLEMENTATION NOTE: For efficiency reasons this method does not return an immutable list iterator. Calling any of the add(Object), remove() or set(Object) methods on the returned ListIterator does not throw an exception but could result in unexpected behaviour.

                    Specified by:
                    listIterator in interface java.util.List<Attribute>
                    Specified by:
                    listIterator in class net.htmlparser.jericho.nodoc.SequentialListSegment<Attribute>
                    Parameters:
                    index - the index of the first item to be returned from the list iterator (by a call to the next() method).
                    Returns:
                    a list iterator of the items in this list (in proper sequence), starting at the specified position in the list.
                    Throws:
                    java.lang.IndexOutOfBoundsException - if the specified index is out of range (index < 0 || index > size()).

                    populateMap

                    public java.util.Map<java.lang.String,java.lang.String> populateMap(java.util.Map<java.lang.String,java.lang.String> attributesMap,
                                                                                        boolean convertNamesToLowerCase)
                    Populates the specified Map with the name/value pairs from these attributes.

                    Both names and values are stored as String objects.

                    The entries are added in order of apprearance in the source document.

                    An attribute with no value is represented by a map entry with a null value.

                    Attribute values are automatically decoded before storage in the map.

                    Parameters:
                    attributesMap - the map to populate, must not be null.
                    convertNamesToLowerCase - specifies whether all attribute names are converted to lower case in the map.
                    Returns:
                    the same map specified as the argument to the attributesMap parameter, populated with the name/value pairs from these attributes.
                    See Also:
                    generateHTML(Map attributesMap)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.

                    getDefaultMaxErrorCount

                    public static int getDefaultMaxErrorCount()
                    Returns the default maximum error count allowed when parsing attributes.

                    The system default value is 2.

                    When searching for start tags, the parser can find the end of the start tag only by parsing the attributes, as it is valid HTML for attribute values to contain '>' characters (see the HTML 4.01 specification section 5.3.2).

                    If the source text being parsed does not follow the syntax of an attribute list at all, the parser assumes that the text which was originally identified as the beginning of of a start tag is in fact some other text, such as an invalid '<' character in the middle of some text, or part of a script element. In this case the entire start tag is rejected.

                    On the other hand, it is quite common for attributes to contain minor syntactical errors, such as an invalid character in an attribute name. For this reason the parser allows a certain number of minor errors to occur while parsing an attribute list before the entire start tag or attribute list is rejected. This property indicates the number of minor errors allowed.

                    Major syntactical errors cause the start tag or attribute list to be rejected immediately, regardless of the maximum error count setting.

                    Some errors are considered too minor to count at all (ignorable), such as missing white space between the end of a quoted attribute value and the start of the next attribute name.

                    The classification of particular syntax errors in attribute lists into major, minor, and ignorable is not part of the specification and may change in future versions.

                    Errors are logged as they occur.

                    The value of this property is set using the setDefaultMaxErrorCount(int) method.

                    Returns:
                    the default maximum error count allowed when parsing attributes.
                    See Also:
                    Source.parseAttributes(int pos, int maxEnd, int maxErrorCount)

                    setDefaultMaxErrorCount

                    public static void setDefaultMaxErrorCount(int value)
                    Sets the default maximum error count allowed when parsing attributes.

                    See the getDefaultMaxErrorCount() method for a full description of this property.

                    Parameters:
                    value - the default maximum error count allowed when parsing attributes.

                    generateHTML

                    public static java.lang.String generateHTML(java.util.Map<java.lang.String,java.lang.String> attributesMap)
                    Returns the contents of the specified attributes map as HTML attribute name/value pairs.

                    Each attribute (including the first) is preceded by a single space, and all values are encoded and enclosed in double quotes.

                    The map keys must be of type String and values must be objects that implement the CharSequence interface.

                    A null value represents an attribute with no value.

                    Parameters:
                    attributesMap - a map containing attribute name/value pairs.
                    Returns:
                    the contents of the specified attributes map as HTML attribute name/value pairs.
                    See Also:
                    StartTag.generateHTML(String tagName, Map attributesMap, boolean emptyElementTag)


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/OutputSegment.html0000644000175000017500000004460111214132422026337 0ustar twernertwerner OutputSegment (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface OutputSegment

                    All Superinterfaces:
                    CharStreamSource

                    public interface OutputSegment
                    extends CharStreamSource

                    Defines the interface for an output segment, which is used in an OutputDocument to replace segments of the source document with other text.

                    All text in the OutputDocument between the character positions defined by getBegin() and getEnd() is replaced by the content of this output segment. If the begin and end character positions are the same, the content is simply inserted at this position without replacing any text.

                    See Also:
                    OutputDocument.register(OutputSegment)

                    Field Summary
                    static java.util.Comparator<OutputSegment> COMPARATOR
                              The comparator used to sort output segments in the OutputDocument before output.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the content of this output segment to the specified Appendable object.
                     int getBegin()
                              Returns the character position in the source text of the output document where this segment begins.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     int getEnd()
                              Returns the character position in the source text of the output document where this segment ends.
                     java.lang.String toString()
                              Returns the content of this output segment as a String.
                     void writeTo(java.io.Writer writer)
                              Writes the content of this output segment to the specified Writer.
                     
                    Methods inherited from interface CharStreamSource
                    getEstimatedMaximumOutputLength
                     

                    Field Detail

                    COMPARATOR

                    static final java.util.Comparator<OutputSegment> COMPARATOR
                    The comparator used to sort output segments in the OutputDocument before output.

                    The following rules are applied in order compare two output segments:

                    1. The output segment that begins earlier in the document comes first.
                    2. If both output segments begin at the same position, the one that has zero length comes first. If neither or both have zero length then neither is guaranteed to come before the other.

                    Note: this comparator has a natural ordering that may be inconsistent with the equals method of classes implementing this interface. This means that the comparator may treat two output segments as equal where calling the equals(Object) method with the same two output segments returns false.

                    Method Detail

                    getBegin

                    int getBegin()
                    Returns the character position in the source text of the output document where this segment begins.

                    Returns:
                    the character position in the source text of the output document where this segment begins.

                    getEnd

                    int getEnd()
                    Returns the character position in the source text of the output document where this segment ends.

                    Returns:
                    the character position in the source text of the output document where this segment ends.

                    writeTo

                    void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Writes the content of this output segment to the specified Writer.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Appends the content of this output segment to the specified Appendable object.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    toString

                    java.lang.String toString()
                    Returns the content of this output segment as a String.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the content of this output segment as a String, guaranteed not null.
                    See Also:
                    writeTo(Writer)

                    getDebugInfo

                    java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/StartTag.html0000644000175000017500000017341311214132422025251 0ustar twernertwerner StartTag (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class StartTag

                    java.lang.Object
                      extended by Segment
                          extended by Tag
                              extended by StartTag
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public final class StartTag
                    extends Tag

                    Represents the start tag of an element in a specific source document.

                    A start tag always has a type that is a subclass of StartTagType, meaning that any tag that does not start with the characters '</' is categorised as a start tag.

                    This includes many tags which stand alone, without a corresponding end tag, and would not intuitively be categorised as a "start tag". For example, an HTML comment is represented as a single start tag that spans the whole comment, and does not have an end tag at all.

                    See the static fields defined in the StartTagType class for a list of the standard start tag types.

                    StartTag instances are obtained using one of the following methods:

                    The methods above which accept a name parameter are categorised as named search methods.

                    In such methods dealing with start tags, specifying an argument to the name parameter that ends in a colon (:) searches for all start tags in the specified XML namespace.

                    The constants defined in the HTMLElementName interface can be used directly as arguments to these name parameters. For example, source.getAllStartTags(HTMLElementName.A) is equivalent to source.getAllStartTags("a"), and gets all hyperlink start tags.

                    The Tag superclass defines a method called getName() to get the name of this start tag.

                    See also the XML 1.0 specification for start tags.

                    See Also:
                    Tag, Element, EndTag

                    Method Summary
                    static java.lang.String generateHTML(java.lang.String tagName, java.util.Map<java.lang.String,java.lang.String> attributesMap, boolean emptyElementTag)
                              Generates the HTML text of a normal start tag with the specified tag name and attributes map.
                     Attributes getAttributes()
                              Returns the attributes specified in this start tag.
                     java.lang.String getAttributeValue(java.lang.String attributeName)
                              Returns the decoded value of the attribute with the specified name (case insensitive).
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     Element getElement()
                              Returns the element that is started by this start tag.
                     FormControl getFormControl()
                              Returns the FormControl defined by this start tag.
                     StartTagType getStartTagType()
                              Returns the type of this start tag.
                     Segment getTagContent()
                              Returns the segment between the end of the tag's name and the start of its end delimiter.
                     TagType getTagType()
                              Returns the type of this tag.
                     boolean isEmptyElementTag()
                              Indicates whether this start tag is an empty-element tag.
                     boolean isEndTagForbidden()
                              Indicates whether a matching end tag is forbidden.
                     boolean isEndTagRequired()
                              Indicates whether a matching end tag is required.
                     boolean isSyntacticalEmptyElementTag()
                              Indicates whether this start tag is syntactically an empty-element tag.
                     boolean isUnregistered()
                              Indicates whether this tag has a syntax that does not match any of the registered tag types.
                     Attributes parseAttributes()
                              Parses the attributes specified in this start tag, regardless of the type of start tag.
                     Attributes parseAttributes(int maxErrorCount)
                              Parses the attributes specified in this start tag, regardless of the type of start tag.
                     java.lang.String tidy()
                              Returns an XML representation of this start tag.
                     java.lang.String tidy(boolean toXHTML)
                              Returns an XML or XHTML representation of this start tag.
                     
                    Methods inherited from class Tag
                    getName, getNameSegment, getNextTag, getPreviousTag, getUserData, isXMLName, isXMLNameChar, isXMLNameStartChar, setUserData
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getElement

                    public Element getElement()
                    Returns the element that is started by this start tag. Guaranteed not null.

                    Example 1: Elements for which the end tag is required
                        1. <div>
                        2.   <div>
                        3.     <div>
                        4.       <div>This is line 4</div>
                        5.     </div>
                        6.     <div>This is line 6</div>
                        7.   </div>
                    • The start tag on line 1 returns an empty element spanning only the start tag. This is because the end tag of a <div> element is required, making the sample code invalid as all the end tags are matched with other start tags.
                    • The start tag on line 2 returns an element spanning to the end of line 7.
                    • The start tag on line 3 returns an element spanning to the end of line 5.
                    • The start tag on line 4 returns an element spanning to the end of line 4.
                    • The start tag on line 6 returns an element spanning to the end of line 6.

                    Example 2: Elements for which the end tag is optional
                        1. <ul>
                        2.   <li>item 1
                        3.   <li>item 2
                        4.     <ul>
                        5.       <li>subitem 1</li>
                        6.       <li>subitem 2
                        7.     </ul>
                        8.   <li>item 3</li>
                        9. </ul>
                    • The start tag on line 1 returns an element spanning to the end of line 9.
                    • The start tag on line 2 returns an element spanning to the start of the <li> start tag on line 3.
                    • The start tag on line 3 returns an element spanning to the start of the <li> start tag on line 8.
                    • The start tag on line 4 returns an element spanning to the end of line 7.
                    • The start tag on line 5 returns an element spanning to the end of line 5.
                    • The start tag on line 6 returns an element spanning to the start of the </ul> end tag on line 7.
                    • The start tag on line 8 returns an element spanning to the end of line 8.

                    Specified by:
                    getElement in class Tag
                    Returns:
                    the element that is started by this start tag.

                    isEmptyElementTag

                    public boolean isEmptyElementTag()
                    Indicates whether this start tag is an empty-element tag.

                    This property checks that the the tag is syntactically an empty-element tag, but in addition checks that the name of the tag is not one that is defined in the HTML specification to have a required or optional end tag, which the major browsers do not recognise as empty-element tags, even in an XHTML document.

                    This is equivalent to:
                    isSyntacticalEmptyElementTag() && !(HTMLElements.getEndTagOptionalElementNames().contains(getName()) || HTMLElements.getEndTagRequiredElementNames().contains(getName())).

                    Returns:
                    true if this start tag is an empty-element tag, otherwise false.

                    isSyntacticalEmptyElementTag

                    public boolean isSyntacticalEmptyElementTag()
                    Indicates whether this start tag is syntactically an empty-element tag.

                    This is signified by the characters "/>" at the end of the start tag.

                    Only a normal start tag can be syntactically an empty-element tag.

                    This property simply reports whether the syntax of the start tag is consistent with that of an empty-element tag, it does not guarantee that this start tag's element is actually empty.

                    This possible discrepancy reflects the way major browsers interpret illegal empty element tags used in HTML elements, and is explained further in the documentation of the isEmptyElementTag() property.

                    Returns:
                    true if this start tag is syntactically an empty-element tag, otherwise false.
                    See Also:
                    isEmptyElementTag()

                    getStartTagType

                    public StartTagType getStartTagType()
                    Returns the type of this start tag.

                    This is equivalent to (StartTagType)getTagType().

                    Returns:
                    the type of this start tag.

                    getTagType

                    public TagType getTagType()
                    Description copied from class: Tag
                    Returns the type of this tag.

                    Specified by:
                    getTagType in class Tag
                    Returns:
                    the type of this tag.

                    getAttributes

                    public Attributes getAttributes()
                    Returns the attributes specified in this start tag.

                    Return value is not null if and only if getStartTagType().hasAttributes()==true.

                    To force the parsing of attributes in other start tag types, use the parseAttributes() method instead.

                    Returns:
                    the attributes specified in this start tag, or null if the type of this start tag does not have attributes.
                    See Also:
                    parseAttributes(), Source.parseAttributes(int pos, int maxEnd)

                    getAttributeValue

                    public java.lang.String getAttributeValue(java.lang.String attributeName)
                    Returns the decoded value of the attribute with the specified name (case insensitive).

                    Returns null if this start tag does not have attributes, no attribute with the specified name exists or the attribute has no value.

                    This is equivalent to getAttributes().getValue(attributeName), except that it returns null if this start tag does not have attributes instead of throwing a NullPointerException.

                    Parameters:
                    attributeName - the name of the attribute to get.
                    Returns:
                    the decoded value of the attribute with the specified name, or null if the attribute does not exist or has no value.

                    parseAttributes

                    public Attributes parseAttributes()
                    Parses the attributes specified in this start tag, regardless of the type of start tag. This method is only required in the unusual situation where attributes exist in a start tag whose type doesn't have attributes.

                    This method returns the cached attributes from the getAttributes() method if its value is not null, otherwise the source is physically parsed with each call to this method.

                    This is equivalent to parseAttributes(Attributes.getDefaultMaxErrorCount())}.

                    Overrides:
                    parseAttributes in class Segment
                    Returns:
                    the attributes specified in this start tag, or null if too many errors occur while parsing.
                    See Also:
                    getAttributes(), Source.parseAttributes(int pos, int maxEnd)

                    parseAttributes

                    public Attributes parseAttributes(int maxErrorCount)
                    Parses the attributes specified in this start tag, regardless of the type of start tag. This method is only required in the unusual situation where attributes exist in a start tag whose type doesn't have attributes.

                    See the documentation of the parseAttributes() method for more information.

                    Parameters:
                    maxErrorCount - the maximum number of minor errors allowed while parsing
                    Returns:
                    the attributes specified in this start tag, or null if too many errors occur while parsing.
                    See Also:
                    getAttributes()

                    getTagContent

                    public Segment getTagContent()
                    Returns the segment between the end of the tag's name and the start of its end delimiter.

                    This method is normally only of use for start tags whose content is something other than attributes.

                    A new Segment object is created with each call to this method.

                    Returns:
                    the segment between the end of the tag's name and the start of the end delimiter.

                    getFormControl

                    public FormControl getFormControl()
                    Returns the FormControl defined by this start tag.

                    This is equivalent to getElement().getFormControl().

                    Returns:
                    the FormControl defined by this start tag, or null if it is not a control.

                    isEndTagForbidden

                    public boolean isEndTagForbidden()
                    Indicates whether a matching end tag is forbidden.

                    This property returns true if one of the following conditions is met:

                    If this property returns true then this start tag's element will always be a single tag element.

                    Returns:
                    true if a matching end tag is forbidden, otherwise false.

                    isEndTagRequired

                    public boolean isEndTagRequired()
                    Indicates whether a matching end tag is required.

                    This property returns true if one of the following conditions is met:

                    Returns:
                    true if a matching end tag is required, otherwise false.

                    isUnregistered

                    public boolean isUnregistered()
                    Description copied from class: Tag
                    Indicates whether this tag has a syntax that does not match any of the registered tag types.

                    The only requirement of an unregistered tag type is that it starts with '<' and there is a closing '>' character at some position after it in the source document.

                    The absence or presence of a '/' character after the initial '<' determines whether an unregistered tag is respectively a StartTag with a type of StartTagType.UNREGISTERED or an EndTag with a type of EndTagType.UNREGISTERED.

                    There are no restrictions on the characters that might appear between these delimiters, including other '<' characters. This may result in a '>' character that is identified as the closing delimiter of two separate tags, one an unregistered tag, and the other a tag of any type that begins in the middle of the unregistered tag. As explained below, unregistered tags are usually only found when specifically looking for them, so it is up to the user to detect and deal with any such nonsensical results.

                    Unregistered tags are only returned by the Source.getTagAt(int pos) method, named search methods, where the specified name matches the first characters inside the tag, and by tag type search methods, where the specified tagType is either StartTagType.UNREGISTERED or EndTagType.UNREGISTERED.

                    Open tag searches and other searches always ignore unregistered tags, although every discovery of an unregistered tag is logged by the parser.

                    The logic behind this design is that unregistered tag types are usually the result of a '<' character in the text that was mistakenly left unencoded, or a less-than operator inside a script, or some other occurrence which is of no interest to the user. By returning unregistered tags in named and tag type search methods, the library allows the user to specifically search for tags with a certain syntax that does not match any existing TagType. This expediency feature avoids the need for the user to create a custom tag type to define the syntax before searching for these tags. By not returning unregistered tags in the less specific search methods, it is providing only the information that most users are interested in.

                    Specified by:
                    isUnregistered in class Tag
                    Returns:
                    true if this tag has a syntax that does not match any of the registered tag types, otherwise false.

                    tidy

                    public java.lang.String tidy()
                    Returns an XML representation of this start tag.

                    This is equivalent to tidy(false), thereby keeping the name of the tag in its original case.

                    See the documentation of the tidy(boolean toXHTML) method for more details.

                    Specified by:
                    tidy in class Tag
                    Returns:
                    an XML representation of this start tag, or the source text if it is of a type that does not have attributes.

                    tidy

                    public java.lang.String tidy(boolean toXHTML)
                    Returns an XML or XHTML representation of this start tag.

                    The tidying of the tag is carried out as follows:

                    • if this start tag is of a type that does not have attributes, then the original source text of the enture tag is returned.
                    • if this start tag contain any server tags outside of an attribute value, then the original source text of the entire tag is returned.
                    • name converted to lower case if the toXHTML argument is true and this is a normal start tag
                    • attributes separated by a single space
                    • attribute names in original case
                    • attribute values are enclosed in double quotes and re-encoded
                    • if this start tag forms an HTML element that has no end tag, a slash is inserted before the closing angle bracket, separated from the name or last attribute by a single space.
                    • if an attribute value contains a server tag it is inserted verbatim instead of being encoded.

                    The toXHTML parameter determines only whether the name is converted to lower case for normal tags. In all other respects the generated tag is already valid XHTML.

                    Example:

                    The following source text:

                    <INPUT name=Company value='G&uuml;nter O&#39;Reilly &amp Associés'>
                    produces the following regenerated HTML:
                    <input name="Company" value="G&uuml;nter O'Reilly &amp; Associ&eacute;s" />

                    Parameters:
                    toXHTML - specifies whether the output is XHTML.
                    Returns:
                    an XML or XHTML representation of this start tag, or the source text if it is of a type that does not have attributes.

                    generateHTML

                    public static java.lang.String generateHTML(java.lang.String tagName,
                                                                java.util.Map<java.lang.String,java.lang.String> attributesMap,
                                                                boolean emptyElementTag)
                    Generates the HTML text of a normal start tag with the specified tag name and attributes map.

                    The output of the attributes is as described in the Attributes.generateHTML(Map attributesMap) method.

                    The emptyElementTag parameter specifies whether the start tag should be an empty-element tag, in which case a slash is inserted before the closing angle bracket, separated from the name or last attribute by a single space.

                    Example:

                    The following code:

                     LinkedHashMap attributesMap=new LinkedHashMap();
                     attributesMap.put("name","Company");
                     attributesMap.put("value","G\n00fcnter O'Reilly & Associés");
                     System.out.println(StartTag.generateHTML("INPUT",attributesMap,true));
                    generates the following output:
                    <INPUT name="Company" value="G&uuml;nter O'Reilly &amp; Associ&eacute;s" />

                    Parameters:
                    tagName - the name of the start tag.
                    attributesMap - a map containing attribute name/value pairs.
                    emptyElementTag - specifies whether the start tag should be an empty-element tag.
                    Returns:
                    the HTML text of a normal start tag with the specified tag name and attributes map.
                    See Also:
                    EndTag.generateHTML(String tagName)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Description copied from class: Segment
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/StreamedSource.html0000644000175000017500000016705511214132422026452 0ustar twernertwerner StreamedSource (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class StreamedSource

                    java.lang.Object
                      extended by StreamedSource
                    
                    All Implemented Interfaces:
                    java.io.Closeable, java.lang.Iterable<Segment>

                    public final class StreamedSource
                    extends java.lang.Object
                    implements java.lang.Iterable<Segment>, java.io.Closeable

                    Represents a streamed source HTML document.

                    This class provides a means, via the iterator() method, of sequentially parsing every tag, character reference and plain text segment contained within the source document using a minimum amount of memory.

                    In contrast, the standard Source class stores the entire source text in memory and caches every tag parsed, resulting in memory problems when attempting to parse very large files.

                    The iterator parses and returns each segment as the source text is streamed in. Previous segments are discarded for garbage collection. Source documents up to 2GB in size can be processed, a limit which is imposed by the java language because of its use of the int data type to index string operations.

                    There is however a significant trade-off in functionality when using the StreamedSource class as opposed to the Source class. The Tag.getElement() method is not supported on tags that are returned by the iterator, nor are any methods that use the Element class in any way. The Segment.getSource() method is also not supported.

                    Most of the methods and constructors in this class mirror similarly named methods in the Source class where the same functionality is available.

                    See the description of the iterator() method for a typical usage example of this class.

                    In contrast to a Source object, the Reader or InputStream specified in the constructor or created implicitly by the constructor remains open for the life of the StreamedSource object. If the stream is created internally, it is automatically closed when the end of the stream is reached or the StreamedSource object is finalized. However a Reader or InputStream that is specified directly in a constructor is never closed automatically, as it can not be assumed that the application has no further use for it. It is the user's responsibility to ensure it is closed in this case. Explicitly calling the close() method on the StreamedSource object ensures that all resources used by it are closed, regardless of whether they were created internally or supplied externally.

                    The functionality provided by StreamedSource is similar to a StAX parser, but with some important benefits:

                    • The source document does not have to be valid XML. It can be plain HTML, can contain invalid syntax, undefined entities, incorrectly nested elements, server tags, or anything else that is commonly found in "tag soup".
                    • Every single syntactical construct in the source document's original text is included in the iterator, including the XML declaration, character references, comments, CDATA sections and server tags, each providing the segment's begin and end position in the source document. This allows an exact copy of the original document to be generated, allowing modifications to be made only where they are explicitly required. This is not possible with either SAX or StAX, which to some extent provide interpretations of the content of the XML instead of the syntactial structures used in the original source document.

                    The following table summarises the differences between the StreamedSource, StAX and SAX interfaces. Note that some of the available features are documented as optional and may not be supported by all implementations of StAX and SAX.

                    FeatureStreamedSourceStAXSAX
                    Parse XML
                    Parse entities without DTD
                    Automatically validate XML
                    Parse HTML
                    Tolerant of syntax or nesting errors
                    Provide begin and end character positions of each event1
                    Provide source text of each event
                    Handle server tag events
                    Handle XML declaration event
                    Handle comment events
                    Handle CDATA section events
                    Handle document type declaration event
                    Handle character reference events
                    Allow chunking of plain text
                    Allow chunking of comment text
                    Allow chunking of CDATA section text
                    Allow specification of maximum buffer size
                    1 StAX optionally reports the "offset" of each event but this could be either byte or character position depending on the source.

                    Note that the OutputDocument class can not be used to create a modified version of a streamed source document. Instead, the output document must be constructed manually from the segments provided by the iterator.

                    StreamedSource objects are not thread safe.


                    Constructor Summary
                    StreamedSource(java.lang.CharSequence text)
                              Constructs a new StreamedSource object from the specified text.
                    StreamedSource(java.io.InputStream inputStream)
                              Constructs a new StreamedSource object by loading the content from the specified InputStream.
                    StreamedSource(java.io.Reader reader)
                              Constructs a new StreamedSource object by loading the content from the specified Reader.
                    StreamedSource(java.net.URL url)
                              Constructs a new StreamedSource object by loading the content from the specified URL.
                    StreamedSource(java.net.URLConnection urlConnection)
                              Constructs a new StreamedSource object by loading the content from the specified URLConnection.
                     
                    Method Summary
                     void close()
                              Closes the underlying Reader or InputStream and releases any system resources associated with it.
                    protected  void finalize()
                              Called by the garbage collector on an object when garbage collection determines that there are no more references to the object.
                     int getBufferSize()
                              Returns the current size of the internal character buffer.
                     Segment getCurrentSegment()
                              Returns the current Segment from the iterator().
                     java.nio.CharBuffer getCurrentSegmentCharBuffer()
                              Returns a CharBuffer containing the source text of the current segment.
                     java.lang.String getEncoding()
                              Returns the character encoding scheme of the source byte stream used to create this object.
                     java.lang.String getEncodingSpecificationInfo()
                              Returns a concise description of how the encoding of the source document was determined.
                     Logger getLogger()
                              Returns the Logger that handles log messages.
                     java.lang.String getPreliminaryEncodingInfo()
                              Returns the preliminary encoding of the source document together with a concise description of how it was determined.
                     boolean isXML()
                              Indicates whether the source document is likely to be XML.
                     java.util.Iterator<Segment> iterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within the source document.
                     StreamedSource setBuffer(char[] buffer)
                              Specifies an existing character array to use for buffering the incoming character stream.
                     StreamedSource setCoalescing(boolean coalescing)
                              Specifies whether an unbroken section of plain text in the source document should always be coalesced into a single Segment by the iterator.
                     void setLogger(Logger logger)
                              Sets the Logger that handles log messages.
                     java.lang.String toString()
                              Returns a string representation of the object as generated by the default Object.toString() implementation.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    StreamedSource

                    public StreamedSource(java.io.Reader reader)
                                   throws java.io.IOException
                    Constructs a new StreamedSource object by loading the content from the specified Reader.

                    If the specified reader is an instance of InputStreamReader, the getEncoding() method of the created StreamedSource object returns the encoding from InputStreamReader.getEncoding().

                    Parameters:
                    reader - the java.io.Reader from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.

                    StreamedSource

                    public StreamedSource(java.io.InputStream inputStream)
                                   throws java.io.IOException
                    Constructs a new StreamedSource object by loading the content from the specified InputStream.

                    The algorithm for detecting the character encoding of the source document from the raw bytes of the specified input stream is the same as that for the Source(URLConnection) constructor of the Source class, except that the first step is not possible as there is no Content-Type header to check.

                    If the specified InputStream does not support the mark method, the algorithm that determines the encoding may have to wrap it in a BufferedInputStream in order to look ahead at the encoding meta data. This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. It is always preferable to use the StreamedSource(Reader) constructor if the encoding is known in advance.

                    Parameters:
                    inputStream - the java.io.InputStream from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()

                    StreamedSource

                    public StreamedSource(java.net.URL url)
                                   throws java.io.IOException
                    Constructs a new StreamedSource object by loading the content from the specified URL.

                    This is equivalent to StreamedSource(url.openConnection()).

                    Parameters:
                    url - the URL from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()

                    StreamedSource

                    public StreamedSource(java.net.URLConnection urlConnection)
                                   throws java.io.IOException
                    Constructs a new StreamedSource object by loading the content from the specified URLConnection.

                    The algorithm for detecting the character encoding of the source document is identical to that described in the Source(URLConnection) constructor of the Source class.

                    The algorithm that determines the encoding may have to wrap the input stream in a BufferedInputStream in order to look ahead at the encoding meta data if the encoding is not specified in the HTTP headers. This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. It is always preferable to use the StreamedSource(Reader) constructor if the encoding is known in advance.

                    Parameters:
                    urlConnection - the URL connection from which to load the source text.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    getEncoding()

                    StreamedSource

                    public StreamedSource(java.lang.CharSequence text)
                    Constructs a new StreamedSource object from the specified text.

                    Although the CharSequence argument of this constructor apparently contradicts the notion of streaming in the source text, it can still benefits over the equivalent use of the standard Source class.

                    Firstly, using the StreamedSource class to iterate the nodes of an in-memory CharSequence source document still requires much less memory than the equivalent operation using the standard Source class.

                    Secondly, the specified CharSequence object could possibly implement its own paging mechanism to minimise memory usage.

                    If the specified CharSequence is mutable, its state must not be modified while the StreamedSource is in use.

                    Parameters:
                    text - the source text.
                    Method Detail

                    setBuffer

                    public StreamedSource setBuffer(char[] buffer)
                    Specifies an existing character array to use for buffering the incoming character stream.

                    The specified buffer is fixed for the life of the StreamedSource object, in contrast to the default buffer which can be automatically replaced by a larger buffer as needed. This means that if a tag (including a comment or CDATA section) is encountered that is larger than the specified buffer, an unrecoverable BufferOverflowException is thrown. This exception is also thrown if coalescing has been enabled and a plain text segment is encountered that is larger than the specified buffer.

                    In general this method should only be used if there needs to be an absolute maximum memory limit imposed on the parser, where that requirement is more important than the ability to parse any source document successfully.

                    This method can only be called before the iterator() method has been called.

                    Parameters:
                    buffer - an existing character array to use for buffering the incoming character stream, must not be null.
                    Returns:
                    this StreamedSource instance, allowing multiple property setting methods to be chained in a single statement.
                    Throws:
                    java.lang.IllegalStateException - if the iterator() method has already been called.

                    setCoalescing

                    public StreamedSource setCoalescing(boolean coalescing)
                    Specifies whether an unbroken section of plain text in the source document should always be coalesced into a single Segment by the iterator.

                    If this property is set to the default value of false, and a section of plain text is encountered in the document that is larger than the current buffer size, the text is chunked into multiple consecutive plain text segments in order to minimise memory usage.

                    If this property is set to true then chunking is disabled, ensuring that consecutive plain text segments are never generated, but instead forcing the internal buffer to expand to fit the largest section of plain text.

                    Note that CharacterReference segments are always handled separately from plain text, regardless of the value of this property. For this reason, algorithms that process element content almost always have to be designed to expect the text in multiple segments in order to handle character references, so there is usually no advantage in coalescing plain text segments.

                    Parameters:
                    coalescing - the new value of the coalescing property.
                    Returns:
                    this StreamedSource instance, allowing multiple property setting methods to be chained in a single statement.
                    Throws:
                    java.lang.IllegalStateException - if the iterator() method has already been called.

                    close

                    public void close()
                               throws java.io.IOException
                    Closes the underlying Reader or InputStream and releases any system resources associated with it.

                    If the stream is already closed then invoking this method has no effect.

                    Specified by:
                    close in interface java.io.Closeable
                    Throws:
                    java.io.IOException - if an I/O error occurs.

                    getEncoding

                    public java.lang.String getEncoding()
                    Returns the character encoding scheme of the source byte stream used to create this object.

                    This method works in essentially the same way as the Source.getEncoding() method.

                    If the byte stream used to create this object does not support the mark method, the algorithm that determines the encoding may have to wrap it in a BufferedInputStream in order to look ahead at the encoding meta data. This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. It is always preferable to use the StreamedSource(Reader) constructor if the encoding is known in advance.

                    The getEncodingSpecificationInfo() method returns a simple description of how the value of this method was determined.

                    Returns:
                    the character encoding scheme of the source byte stream used to create this object, or null if the encoding is not known.
                    See Also:
                    getEncodingSpecificationInfo()

                    getEncodingSpecificationInfo

                    public java.lang.String getEncodingSpecificationInfo()
                    Returns a concise description of how the encoding of the source document was determined.

                    The description is intended for informational purposes only. It is not guaranteed to have any particular format and can not be reliably parsed.

                    Returns:
                    a concise description of how the encoding of the source document was determined.
                    See Also:
                    getEncoding()

                    getPreliminaryEncodingInfo

                    public java.lang.String getPreliminaryEncodingInfo()
                    Returns the preliminary encoding of the source document together with a concise description of how it was determined.

                    This method works in essentially the same way as the Source.getPreliminaryEncodingInfo() method.

                    The description returned by this method is intended for informational purposes only. It is not guaranteed to have any particular format and can not be reliably parsed.

                    Returns:
                    the preliminary encoding of the source document together with a concise description of how it was determined, or null if no preliminary encoding was required.
                    See Also:
                    getEncoding()

                    iterator

                    public java.util.Iterator<Segment> iterator()
                    Returns an iterator over every tag, character reference and plain text segment contained within the source document.

                    Plain text is defined as all text that is not part of a Tag or CharacterReference.

                    This results in a sequential walk-through of the entire source document. The end position of each segment should correspond with the begin position of the subsequent segment, unless any of the tags are enclosed by other tags. This could happen if there are server tags present in the document, or in rare circumstances where the document type declaration contains markup declarations.

                    Each segment generated by the iterator is parsed as the source text is streamed in. Previous segments are discarded for garbage collection.

                    If a section of plain text is encountered in the document that is larger than the current buffer size, the text is chunked into multiple consecutive plain text segments in order to minimise memory usage. Setting the Coalescing property to true disables chunking, ensuring that consecutive plain text segments are never generated, but instead forcing the internal buffer to expand to fit the largest section of plain text. Note that CharacterReference segments are always handled separately from plain text, regardless of whether coalescing is enabled. For this reason, algorithms that process element content almost always have to be designed to expect the text in multiple segments in order to handle character references, so there is usually no advantage in coalescing plain text segments.

                    Character references that are found inside tags, such as those present inside attribute values, do not generate separate segments from the iterator.

                    This method may only be called once on any particular StreamedSource instance.

                    Example:

                    The following code demonstrates the typical (implied) usage of this method through the Iterable interface to make an exact copy of the document from reader to writer (assuming no server tags are present):

                     StreamedSource streamedSource=new StreamedSource(reader);
                     for (Segment segment : streamedSource) {
                       if (segment instanceof Tag) {
                         Tag tag=(Tag)segment;
                         // HANDLE TAG
                         // Uncomment the following line to ensure each tag is valid XML:
                         // writer.write(tag.tidy()); continue;
                       } else if (segment instanceof CharacterReference) {
                         CharacterReference characterReference=(CharacterReference)segment;
                         // HANDLE CHARACTER REFERENCE
                         // Uncomment the following line to decode all character references instead of copying them verbatim:
                         // characterReference.appendCharTo(writer); continue;
                       } else {
                         // HANDLE PLAIN TEXT
                       }
                       // unless specific handling has prevented getting to here, simply output the segment as is:
                       writer.write(segment.toString());
                     }

                    Note that the last line writer.write(segment.toString()) in the above code can be replaced with the following for improved performance:

                     CharBuffer charBuffer=streamedSource.getCurrentSegmentCharBuffer();
                     writer.write(charBuffer.array(),charBuffer.position(),charBuffer.length());

                    The following code demonstrates how to process the plain text content of a specific element, in this case to print the content of every paragraph element:

                     StreamedSource streamedSource=new StreamedSource(reader);
                     StringBuilder sb=new StringBuilder();
                     boolean insideParagraphElement=false;
                     for (Segment segment : streamedSource) {
                       if (segment instanceof Tag) {
                         Tag tag=(Tag)segment;
                         if (tag.getName().equals("p")) {
                           if (tag instanceof StartTag) {
                             insideParagraphElement=true;
                             sb.setLength(0);
                           } else { // tag instanceof EndTag
                             insideParagraphElement=false;
                             System.out.println(sb.toString());
                           }
                         }
                       } else if (insideParagraphElement) {
                         if (segment instanceof CharacterReference) {
                           ((CharacterReference)segment).appendCharTo(sb);
                         } else {
                           sb.append(segment);
                         }
                       }
                     }

                    Specified by:
                    iterator in interface java.lang.Iterable<Segment>
                    Returns:
                    an iterator over every tag, character reference and plain text segment contained within the source document.

                    getCurrentSegment

                    public Segment getCurrentSegment()
                    Returns the current Segment from the iterator().

                    This is defined as the last Segment returned from the iterator's next() method.

                    This method returns null if the iterator's next() method has never been called, or its hasNext() method has returned the value false.

                    Returns:
                    the current Segment from the iterator().

                    getCurrentSegmentCharBuffer

                    public java.nio.CharBuffer getCurrentSegmentCharBuffer()
                    Returns a CharBuffer containing the source text of the current segment.

                    The returned CharBuffer provides a window into the internal char[] buffer including the position and length that spans the current segment.

                    For example, the following code writes the source text of the current segment to writer:

                    CharBuffer charBuffer=streamedSource.getCurrentSegmentCharBuffer();
                    writer.write(charBuffer.array(),charBuffer.position(),charBuffer.length());

                    This may provide a performance benefit over the standard way of accessing the source text of the current segment, which is to use the CharSequence interface of the segment directly, or to call Segment.toString().

                    Because this CharBuffer is a direct window into the internal buffer of the StreamedSource, the contents of the CharBuffer.array() must not be modified, and the array is only guaranteed to hold the segment source text until the iterator's hasNext() or next() method is next called.

                    Returns:
                    a CharBuffer containing the source text of the current segment.

                    isXML

                    public boolean isXML()
                    Indicates whether the source document is likely to be XML.

                    The algorithm used to determine this is designed to be relatively inexpensive and to provide an accurate result in most normal situations. An exact determination of whether the source document is XML would require a much more complex analysis of the text.

                    The algorithm is as follows:

                    1. If the document begins with an XML declaration, it is an XML document.
                    2. If the document begins with a document type declaration that contains the text "xhtml", it is an XHTML document, and hence also an XML document.
                    3. If none of the above conditions are met, assume the document is normal HTML, and therefore not an XML document.

                    This method can only be called after the iterator() method has been called.

                    Returns:
                    true if the source document is likely to be XML, otherwise false.
                    Throws:
                    java.lang.IllegalStateException - if the iterator() method has not yet been called.

                    setLogger

                    public void setLogger(Logger logger)
                    Sets the Logger that handles log messages.

                    Specifying a null argument disables logging completely for operations performed on this StreamedSource object.

                    A logger instance is created automatically for each StreamedSource object in the same way as is described in the Source.setLogger(Logger) method.

                    Parameters:
                    logger - the logger that will handle log messages, or null to disable logging.
                    See Also:
                    Config.LoggerProvider

                    getLogger

                    public Logger getLogger()
                    Returns the Logger that handles log messages.

                    A logger instance is created automatically for each StreamedSource object using the LoggerProvider specified by the static Config.LoggerProvider property. This can be overridden by calling the setLogger(Logger) method. The name used for all automatically created logger instances is "net.htmlparser.jericho".

                    Returns:
                    the Logger that handles log messages, or null if logging is disabled.

                    getBufferSize

                    public int getBufferSize()
                    Returns the current size of the internal character buffer.

                    This information is generally useful only for investigating memory and performance issues.

                    Returns:
                    the current size of the internal character buffer.

                    toString

                    public java.lang.String toString()
                    Returns a string representation of the object as generated by the default Object.toString() implementation.

                    In contrast to the Source.toString() implementation, it is generally not possible for this method to return the entire source text.

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    a string representation of the object as generated by the default Object.toString() implementation.

                    finalize

                    protected void finalize()
                    Called by the garbage collector on an object when garbage collection determines that there are no more references to the object.

                    This implementation calls the close() method if the underlying Reader or InputStream stream was created internally.

                    Overrides:
                    finalize in class java.lang.Object


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/OutputDocument.html0000644000175000017500000015132311214132422026513 0ustar twernertwerner OutputDocument (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class OutputDocument

                    java.lang.Object
                      extended by OutputDocument
                    
                    All Implemented Interfaces:
                    CharStreamSource

                    public final class OutputDocument
                    extends java.lang.Object
                    implements CharStreamSource

                    Represents a modified version of an original Source document or Segment.

                    An OutputDocument represents an original Source document or Segment that has been modified by substituting segments of it with other text. Each of these substitutions must be registered in the output document, which is most commonly done using the various replace, remove or insert methods in this class. These methods internally register one or more OutputSegment objects to define each substitution.

                    If a Segment is used to construct the output document, all character positions are relative to the source document of the specified segment.

                    After all of the substitutions have been registered, the modified text can be retrieved using the writeTo(Writer) or toString() methods.

                    The registered output segments may be adjacent and may also overlap. An output segment that is completely enclosed by another output segment is not included in the output.

                    If unexpected results are being generated from an OutputDocument, the getDebugInfo() method provides information on each registered output segment, which should provide enough information to determine the cause of the problem. In most cases the problem will be caused by overlapping output segments.

                    The following example converts all externally referenced style sheets to internal style sheets:

                      URL sourceUrl=new URL(sourceUrlString);
                      String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
                      Source source=new Source(htmlText);
                      OutputDocument outputDocument=new OutputDocument(source);
                      StringBuilder sb=new StringBuilder();
                      List linkStartTags=source.getAllStartTags(HTMLElementName.LINK);
                      for (Iterator i=linkStartTags.iterator(); i.hasNext();) {
                        StartTag startTag=(StartTag)i.next();
                        Attributes attributes=startTag.getAttributes();
                        String rel=attributes.getValue("rel");
                        if (!"stylesheet".equalsIgnoreCase(rel)) continue;
                        String href=attributes.getValue("href");
                        if (href==null) continue;
                        String styleSheetContent;
                        try {
                          styleSheetContent=Util.getString(new InputStreamReader(new URL(sourceUrl,href).openStream()));
                        } catch (Exception ex) {
                          continue; // don't convert if URL is invalid
                        }
                        sb.setLength(0);
                        sb.append("<style");
                        Attribute typeAttribute=attributes.get("type");
                        if (typeAttribute!=null) sb.append(' ').append(typeAttribute);
                        sb.append(">\n").append(styleSheetContent).append("\n</style>");
                        outputDocument.replace(startTag,sb);
                      }
                      String convertedHtmlText=outputDocument.toString();
                     

                    See Also:
                    OutputSegment

                    Constructor Summary
                    OutputDocument(Segment segment)
                              Constructs a new output document based on the specified Segment.
                    OutputDocument(Source source)
                              Constructs a new output document based on the specified source document.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the final content of this output document to the specified Appendable object.
                     void appendTo(java.lang.Appendable appendable, int begin, int end)
                              Appends the specified portion of the final content of this output document to the specified Appendable object.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     java.util.List<OutputSegment> getRegisteredOutputSegments()
                              Returns a list all of the registered OutputSegment objects in this output document.
                     java.lang.CharSequence getSourceText()
                              Returns the original source text upon which this output document is based.
                     void insert(int pos, java.lang.CharSequence text)
                              Inserts the specified text at the specified character position in this output document.
                     void register(OutputSegment outputSegment)
                              Registers the specified output segment in this output document.
                     void remove(java.util.Collection<? extends Segment> segments)
                              Removes all the segments from this output document represented by the specified source Segment objects.
                     void remove(Segment segment)
                              Removes the specified segment from this output document.
                     java.util.Map<java.lang.String,java.lang.String> replace(Attributes attributes, boolean convertNamesToLowerCase)
                              Replaces the specified Attributes segment in this output document with the name/value entries in the returned Map.
                     void replace(Attributes attributes, java.util.Map<java.lang.String,java.lang.String> map)
                              Replaces the specified attributes segment in this source document with the name/value entries in the specified Map.
                     void replace(FormControl formControl)
                              Replaces the specified FormControl in this output document.
                     void replace(FormFields formFields)
                              Replaces all the constituent form controls from the specified FormFields in this output document.
                     void replace(int begin, int end, char ch)
                              Replaces the specified segment of this output document with the specified character.
                     void replace(int begin, int end, java.lang.CharSequence text)
                              Replaces the specified segment of this output document with the specified text.
                     void replace(Segment segment, java.lang.CharSequence text)
                              Replaces the specified segment in this output document with the specified text.
                     void replaceWithSpaces(int begin, int end)
                              Replaces the specified segment of this output document with a string of spaces of the same length.
                     java.lang.String toString()
                              Returns the final content of this output document as a String.
                     void writeTo(java.io.Writer writer)
                              Writes the final content of this output document to the specified Writer.
                     void writeTo(java.io.Writer writer, int begin, int end)
                              Writes the specified portion of the final content of this output document to the specified Writer.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    OutputDocument

                    public OutputDocument(Source source)
                    Constructs a new output document based on the specified source document.

                    Parameters:
                    source - the source document.

                    OutputDocument

                    public OutputDocument(Segment segment)
                    Constructs a new output document based on the specified Segment.

                    Parameters:
                    segment - the original Segment.
                    Method Detail

                    getSourceText

                    public java.lang.CharSequence getSourceText()
                    Returns the original source text upon which this output document is based.

                    If a Segment was used to construct the output document, this returns the text of the entire source document rather than just the segment.

                    Returns:
                    the original source text upon which this output document is based.

                    remove

                    public void remove(Segment segment)
                    Removes the specified segment from this output document.

                    This is equivalent to replace(segment,null).

                    Parameters:
                    segment - the segment to remove.

                    remove

                    public void remove(java.util.Collection<? extends Segment> segments)
                    Removes all the segments from this output document represented by the specified source Segment objects.

                    This is equivalent to the following code:

                      for (Iterator i=segments.iterator(); i.hasNext();)
                        remove((Segment)i.next());

                    Parameters:
                    segments - a collection of segments to remove, represented by source Segment objects.

                    insert

                    public void insert(int pos,
                                       java.lang.CharSequence text)
                    Inserts the specified text at the specified character position in this output document.

                    Parameters:
                    pos - the character position at which to insert the text.
                    text - the replacement text.

                    replace

                    public void replace(Segment segment,
                                        java.lang.CharSequence text)
                    Replaces the specified segment in this output document with the specified text.

                    Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, and results in the segment being completely removed from the output document.

                    Parameters:
                    segment - the segment to replace.
                    text - the replacement text, or null to remove the segment.

                    replace

                    public void replace(int begin,
                                        int end,
                                        java.lang.CharSequence text)
                    Replaces the specified segment of this output document with the specified text.

                    Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, and results in the segment being completely removed from the output document.

                    Parameters:
                    begin - the character position at which to begin the replacement.
                    end - the character position at which to end the replacement.
                    text - the replacement text, or null to remove the segment.

                    replace

                    public void replace(int begin,
                                        int end,
                                        char ch)
                    Replaces the specified segment of this output document with the specified character.

                    Parameters:
                    begin - the character position at which to begin the replacement.
                    end - the character position at which to end the replacement.
                    ch - the replacement character.

                    replace

                    public void replace(FormControl formControl)
                    Replaces the specified FormControl in this output document.

                    The effect of this method is to register zero or more output segments in the output document as required to reflect previous modifications to the control's state. The state of a control includes its submission value, output style, and whether it has been disabled.

                    The state of the form control should not be modified after this method is called, as there is no guarantee that subsequent changes either will or will not be reflected in the final output. A second call to this method with the same parameter is not allowed. It is therefore recommended to call this method as the last action before the output is generated.

                    Although the specifics of the number and nature of the output segments added in any particular circumstance is not defined in the specification, it can generally be assumed that only the minimum changes necessary are made to the original document. If the state of the control has not been modified, calling this method has no effect at all.

                    Parameters:
                    formControl - the form control to replace.
                    See Also:
                    replace(FormFields)

                    replace

                    public void replace(FormFields formFields)
                    Replaces all the constituent form controls from the specified FormFields in this output document.

                    This is equivalent to the following code:

                    for (Iterator i=formFields.getFormControls().iterator(); i.hasNext();)
                       replace((FormControl)i.next());

                    The state of any of the form controls in the specified form fields should not be modified after this method is called, as there is no guarantee that subsequent changes either will or will not be reflected in the final output. A second call to this method with the same parameter is not allowed. It is therefore recommended to call this method as the last action before the output is generated.

                    Parameters:
                    formFields - the form fields to replace.
                    See Also:
                    replace(FormControl)

                    replace

                    public java.util.Map<java.lang.String,java.lang.String> replace(Attributes attributes,
                                                                                    boolean convertNamesToLowerCase)
                    Replaces the specified Attributes segment in this output document with the name/value entries in the returned Map. The returned map initially contains entries representing the attributes from the source document, which can be modified before output.

                    The documentation of the replace(Attributes,Map) method contains more information about the requirements of the map entries.

                    Specifying a value of true as an argument to the convertNamesToLowerCase parameter causes all original attribute names to be converted to lower case in the map. This simplifies the process of finding/updating specific attributes since map keys are case sensitive.

                    Attribute values are automatically decoded before being loaded into the map.

                    This method is logically equivalent to:
                    replace(attributes, attributes.populateMap(new LinkedHashMap<String,String>(),convertNamesToLowerCase))

                    The use of LinkedHashMap to implement the map ensures (probably unnecessarily) that existing attributes are output in the same order as they appear in the source document, and new attributes are output in the same order as they are added.

                    Example:
                      Source source=new Source(htmlDocument);
                      Attributes bodyAttributes
                        =source.getNextStartTag(0,HTMLElementName.BODY).getAttributes();
                      OutputDocument outputDocument=new OutputDocument(source);
                      Map<String,String> attributesMap=outputDocument.replace(bodyAttributes,true);
                      attributesMap.put("bgcolor","green");
                      String htmlDocumentWithGreenBackground=outputDocument.toString();

                    Parameters:
                    attributes - the Attributes segment defining the span of the segment and initial name/value entries of the returned map.
                    convertNamesToLowerCase - specifies whether all attribute names are converted to lower case in the map.
                    Returns:
                    a Map containing the name/value entries to be output.
                    See Also:
                    replace(Attributes,Map)

                    replace

                    public void replace(Attributes attributes,
                                        java.util.Map<java.lang.String,java.lang.String> map)
                    Replaces the specified attributes segment in this source document with the name/value entries in the specified Map.

                    This method might be used if the Map containing the new attribute values should not be preloaded with the same entries as the source attributes, or a map implementation other than LinkedHashMap is required. Otherwise, the replace(Attributes, boolean convertNamesToLowerCase) method is generally more useful.

                    An attribute with no value is represented by a map entry with a null value.

                    Attribute values are stored unencoded in the map, and are automatically encoded if necessary during output.

                    The use of invalid characters in attribute names results in unspecified behaviour.

                    Note that methods in the Attributes class treat attribute names as case insensitive, whereas the Map treats them as case sensitive.

                    Parameters:
                    attributes - the Attributes object defining the span of the segment to replace.
                    map - the Map containing the name/value entries.
                    See Also:
                    replace(Attributes, boolean convertNamesToLowerCase)

                    replaceWithSpaces

                    public void replaceWithSpaces(int begin,
                                                  int end)
                    Replaces the specified segment of this output document with a string of spaces of the same length.

                    This method is most commonly used to remove segments of the document without affecting the character positions of the remaining elements.

                    It is used internally to implement the functionality available through the Segment.ignoreWhenParsing() method.

                    To remove a segment from the output document completely, use the remove(Segment) method instead.

                    Parameters:
                    begin - the character position at which to begin the replacement.
                    end - the character position at which to end the replacement.

                    register

                    public void register(OutputSegment outputSegment)
                    Registers the specified output segment in this output document.

                    Use this method if you want to use a customised OutputSegment class.

                    Parameters:
                    outputSegment - the output segment to register.

                    writeTo

                    public void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Writes the final content of this output document to the specified Writer.

                    The writeTo(Writer, int begin, int end) method allows the output of a portion of the output document.

                    If the output is required in the form of a Reader, use CharStreamSourceUtil.getReader(this) instead.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.
                    See Also:
                    toString()

                    writeTo

                    public void writeTo(java.io.Writer writer,
                                        int begin,
                                        int end)
                                 throws java.io.IOException
                    Writes the specified portion of the final content of this output document to the specified Writer.

                    Any zero-length output segments located at begin or end are included in the output.

                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    begin - the character position at which to start the output, inclusive.
                    end - the character position at which to end the output, exclusive.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.
                    See Also:
                    writeTo(Writer)

                    appendTo

                    public void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Appends the final content of this output document to the specified Appendable object.

                    The appendTo(Appendable, int begin, int end) method allows the output of a portion of the output document.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.
                    See Also:
                    toString()

                    appendTo

                    public void appendTo(java.lang.Appendable appendable,
                                         int begin,
                                         int end)
                                  throws java.io.IOException
                    Appends the specified portion of the final content of this output document to the specified Appendable object.

                    Any zero-length output segments located at begin or end are included in the output.

                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    begin - the character position at which to start the output, inclusive.
                    end - the character position at which to end the output, exclusive.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.
                    See Also:
                    appendTo(Appendable)

                    getEstimatedMaximumOutputLength

                    public long getEstimatedMaximumOutputLength()
                    Description copied from interface: CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Specified by:
                    getEstimatedMaximumOutputLength in interface CharStreamSource
                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    public java.lang.String toString()
                    Returns the final content of this output document as a String.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the final content of this output document as a String.
                    See Also:
                    writeTo(Writer)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    The output includes details of all the registered output segments.

                    Returns:
                    a string representation of this object useful for debugging purposes.

                    getRegisteredOutputSegments

                    public java.util.List<OutputSegment> getRegisteredOutputSegments()
                    Returns a list all of the registered OutputSegment objects in this output document.

                    The output segments are sorted in order of their starting position in the document.

                    The returned list is modifiable and any changes will affect the output generated by this OutputDocument.

                    Returns:
                    a list all of the registered OutputSegment objects in this output document.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/package-frame.html0000644000175000017500000001454711214132422026205 0ustar twernertwerner net.htmlparser.jericho (Jericho HTML Parser 3.1) net.htmlparser.jericho
                    Interfaces 
                    CharStreamSource
                    HTMLElementName
                    Logger
                    LoggerProvider
                    OutputSegment
                    ParseText
                    Classes 
                    Attribute
                    Attributes
                    BasicLogFormatter
                    CharacterEntityReference
                    CharacterReference
                    CharStreamSourceUtil
                    Config
                    Config.CompatibilityMode
                    Element
                    EndTag
                    EndTagType
                    EndTagTypeGenericImplementation
                    FormControl
                    FormControlOutputStyle.ConfigDisplayValue
                    FormField
                    FormFields
                    HTMLElements
                    MasonTagTypes
                    MicrosoftTagTypes
                    NumericCharacterReference
                    OutputDocument
                    PHPTagTypes
                    Renderer
                    RowColumnVector
                    Segment
                    Source
                    SourceCompactor
                    SourceFormatter
                    StartTag
                    StartTagType
                    StartTagTypeGenericImplementation
                    StreamedSource
                    Tag
                    TagType
                    TextExtractor
                    Util
                    WriterLogger
                    Enums 
                    FormControlOutputStyle
                    FormControlType
                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/ParseText.html0000644000175000017500000007074711214132422025445 0ustar twernertwerner ParseText (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface ParseText

                    All Superinterfaces:
                    java.lang.CharSequence

                    public interface ParseText
                    extends java.lang.CharSequence

                    Represents the text from the source document that is to be parsed.

                    This interface is normally only of interest to users who wish to create custom tag types.

                    The parse text is defined as the entire text of the source document in lower case, with all ignored segments replaced by space characters.

                    The text is stored in lower case to make case insensitive parsing as efficient as possible.

                    This interface provides many methods which are also provided by the java.lang.String class, but adds an extra parameter called breakAtIndex to the various indexOf methods. This parameter allows a search on only a specified segment of the text, which is not possible using the normal String class.

                    ParseText instances are obtained using the Source.getParseText() method.


                    Field Summary
                    static int NO_BREAK
                              A value to use as the breakAtIndex argument in certain methods to indicate that the search should continue to the start or end of the parse text.
                     
                    Method Summary
                     char charAt(int index)
                              Returns the character at the specified index.
                     boolean containsAt(java.lang.String str, int pos)
                              Indicates whether this parse text contains the specified string at the specified position.
                     int indexOf(char searchChar, int fromIndex)
                              Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex.
                     int indexOf(char searchChar, int fromIndex, int breakAtIndex)
                              Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                     int indexOf(java.lang.String searchString, int fromIndex)
                              Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex.
                     int indexOf(java.lang.String searchString, int fromIndex, int breakAtIndex)
                              Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                     int lastIndexOf(char searchChar, int fromIndex)
                              Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex.
                     int lastIndexOf(char searchChar, int fromIndex, int breakAtIndex)
                              Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                     int lastIndexOf(java.lang.String searchString, int fromIndex)
                              Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex.
                     int lastIndexOf(java.lang.String searchString, int fromIndex, int breakAtIndex)
                              Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                     int length()
                              Returns the length of the parse text.
                     java.lang.CharSequence subSequence(int begin, int end)
                              Returns a new character sequence that is a subsequence of this sequence.
                     java.lang.String toString()
                              Returns the content of the parse text as a String.
                     

                    Field Detail

                    NO_BREAK

                    static final int NO_BREAK
                    A value to use as the breakAtIndex argument in certain methods to indicate that the search should continue to the start or end of the parse text.

                    See Also:
                    Constant Field Values
                    Method Detail

                    charAt

                    char charAt(int index)
                    Returns the character at the specified index.

                    Specified by:
                    charAt in interface java.lang.CharSequence
                    Parameters:
                    index - the index of the character.
                    Returns:
                    the character at the specified index, which is always in lower case.

                    containsAt

                    boolean containsAt(java.lang.String str,
                                       int pos)
                    Indicates whether this parse text contains the specified string at the specified position.

                    This method is analogous to the java.lang.String.startsWith(String prefix, int toffset) method.

                    Parameters:
                    str - a string.
                    pos - the position (index) in this parse text at which to check for the specified string.
                    Returns:
                    true if this parse text contains the specified string at the specified position, otherwise false.

                    indexOf

                    int indexOf(char searchChar,
                                int fromIndex)
                    Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex.

                    If the specified character is not found then -1 is returned.

                    Parameters:
                    searchChar - a character.
                    fromIndex - the index to start the search from.
                    Returns:
                    the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.

                    indexOf

                    int indexOf(char searchChar,
                                int fromIndex,
                                int breakAtIndex)
                    Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.

                    The position specified by breakAtIndex is not included in the search.

                    If the search is to continue to the end of the text, the value ParseText.NO_BREAK should be specified as the breakAtIndex.

                    If the specified character is not found then -1 is returned.

                    Parameters:
                    searchChar - a character.
                    fromIndex - the index to start the search from.
                    breakAtIndex - the index at which to break off the search, or NO_BREAK if the search is to continue to the end of the text.
                    Returns:
                    the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found.

                    indexOf

                    int indexOf(java.lang.String searchString,
                                int fromIndex)
                    Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex.

                    If the specified string is not found then -1 is returned.

                    Parameters:
                    searchString - a string.
                    fromIndex - the index to start the search from.
                    Returns:
                    the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.

                    indexOf

                    int indexOf(java.lang.String searchString,
                                int fromIndex,
                                int breakAtIndex)
                    Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.

                    The position specified by breakAtIndex is not included in the search.

                    If the search is to continue to the end of the text, the value ParseText.NO_BREAK should be specified as the breakAtIndex.

                    If the specified string is not found then -1 is returned.

                    Parameters:
                    searchString - a string.
                    fromIndex - the index to start the search from.
                    breakAtIndex - the index at which to break off the search, or NO_BREAK if the search is to continue to the end of the text.
                    Returns:
                    the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found.

                    lastIndexOf

                    int lastIndexOf(char searchChar,
                                    int fromIndex)
                    Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex.

                    If the specified character is not found then -1 is returned.

                    Parameters:
                    searchChar - a character.
                    fromIndex - the index to start the search from.
                    Returns:
                    the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.

                    lastIndexOf

                    int lastIndexOf(char searchChar,
                                    int fromIndex,
                                    int breakAtIndex)
                    Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.

                    The position specified by breakAtIndex is not included in the search.

                    If the search is to continue to the start of the text, the value ParseText.NO_BREAK should be specified as the breakAtIndex.

                    If the specified character is not found then -1 is returned.

                    Parameters:
                    searchChar - a character.
                    fromIndex - the index to start the search from.
                    breakAtIndex - the index at which to break off the search, or NO_BREAK if the search is to continue to the start of the text.
                    Returns:
                    the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found.

                    lastIndexOf

                    int lastIndexOf(java.lang.String searchString,
                                    int fromIndex)
                    Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex.

                    If the specified string is not found then -1 is returned.

                    Parameters:
                    searchString - a string.
                    fromIndex - the index to start the search from.
                    Returns:
                    the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.

                    lastIndexOf

                    int lastIndexOf(java.lang.String searchString,
                                    int fromIndex,
                                    int breakAtIndex)
                    Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.

                    The position specified by breakAtIndex is not included in the search.

                    If the search is to continue to the start of the text, the value ParseText.NO_BREAK should be specified as the breakAtIndex.

                    If the specified string is not found then -1 is returned.

                    Parameters:
                    searchString - a string.
                    fromIndex - the index to start the search from.
                    breakAtIndex - the index at which to break off the search, or NO_BREAK if the search is to continue to the start of the text.
                    Returns:
                    the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found.

                    length

                    int length()
                    Returns the length of the parse text.

                    Specified by:
                    length in interface java.lang.CharSequence
                    Returns:
                    the length of the parse text.

                    subSequence

                    java.lang.CharSequence subSequence(int begin,
                                                       int end)
                    Returns a new character sequence that is a subsequence of this sequence.

                    Specified by:
                    subSequence in interface java.lang.CharSequence
                    Parameters:
                    begin - the begin position, inclusive.
                    end - the end position, exclusive.
                    Returns:
                    a new character sequence that is a subsequence of this sequence.

                    toString

                    java.lang.String toString()
                    Returns the content of the parse text as a String.

                    Specified by:
                    toString in interface java.lang.CharSequence
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the content of the parse text as a String.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/CharStreamSourceUtil.html0000644000175000017500000003040211214132420027554 0ustar twernertwerner CharStreamSourceUtil (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class CharStreamSourceUtil

                    java.lang.Object
                      extended by CharStreamSourceUtil
                    

                    public final class CharStreamSourceUtil
                    extends java.lang.Object

                    Contains static utility methods for manipulating the way data is retrieved from a CharStreamSource object.

                    See the documentation of the CharStreamSource class for details.


                    Method Summary
                    static java.io.Reader getReader(CharStreamSource charStreamSource)
                              Returns a Reader that reads the output of the specified CharStreamSource.
                    static java.lang.String toString(CharStreamSource charStreamSource)
                              Returns the output of the specified CharStreamSource as a string.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Method Detail

                    getReader

                    public static java.io.Reader getReader(CharStreamSource charStreamSource)
                    Returns a Reader that reads the output of the specified CharStreamSource.

                    The current implementation of this method simply returns new StringReader(toString(charStreamSource)), but a future version may implement this method in a more memory efficient manner.

                    Parameters:
                    charStreamSource - the character stream source producing the output.
                    Returns:
                    a Reader that reads the output of the specified CharStreamSource.

                    toString

                    public static java.lang.String toString(CharStreamSource charStreamSource)
                    Returns the output of the specified CharStreamSource as a string.

                    The current implementation of this method simply returns new StringReader(toString(charStreamSource)), but a future version may implement this method in a more memory efficient manner, for example by utilising a temporary file.

                    Parameters:
                    charStreamSource - the character stream source producing the output.
                    Returns:
                    the output of the specified CharStreamSource as a string.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/StartTagType.html0000644000175000017500000023165111214132422026112 0ustar twernertwerner StartTagType (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class StartTagType

                    java.lang.Object
                      extended by TagType
                          extended by StartTagType
                    
                    Direct Known Subclasses:
                    StartTagTypeGenericImplementation

                    public abstract class StartTagType
                    extends TagType

                    Defines the syntax for a start tag type.

                    A start tag type is any TagType that starts with the character '<' (as with all tag types), but whose second character is not '/'.

                    This includes types for many tags which stand alone, without a corresponding end tag, and would not intuitively be categorised as a "start tag". For example, an HTML comment in a document is represented as a single start tag that spans the whole comment, and does not have an end tag at all.

                    The singleton instances of all the standard start tag types are available in this class as static fields.

                    Because all StartTagType instaces must be singletons, the '==' operator can be used to test for a particular tag type instead of the equals(Object) method.

                    See Also:
                    EndTagType

                    Field Summary
                    static StartTagType CDATA_SECTION
                              The tag type given to a CDATA section (<![CDATA[ ... ]]>).
                    static StartTagType COMMENT
                              The tag type given to an HTML comment (<!-- ... -->).
                    static StartTagType DOCTYPE_DECLARATION
                              The tag type given to a document type declaration (<!DOCTYPE ... >).
                    static StartTagType MARKUP_DECLARATION
                              The tag type given to a markup declaration (<!ELEMENT ... > | <!ATTLIST ... > | <!ENTITY ... > | <!NOTATION ... >).
                    static StartTagType NORMAL
                              The tag type given to a normal HTML or XML start tag (<name ... >).
                    static StartTagType SERVER_COMMON
                              The tag type given to a common server tag (<% ... %>).
                    static StartTagType SERVER_COMMON_ESCAPED
                              The tag type given to an escaped common server tag (<\% ... %>).
                    static StartTagType UNREGISTERED
                              The tag type given to an unregistered start tag (< ... >).
                    static StartTagType XML_DECLARATION
                              The tag type given to an XML declaration (<?xml ... ?>).
                    static StartTagType XML_PROCESSING_INSTRUCTION
                              The tag type given to an XML processing instruction (<?PITarget ... ?>).
                     
                    Constructor Summary
                    protected StartTagType(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag, boolean hasAttributes, boolean isNameAfterPrefixRequired)
                              Constructs a new StartTagType object with the specified properties.
                     
                    Method Summary
                     boolean atEndOfAttributes(Source source, int pos, boolean isClosingSlashIgnored)
                              Indicates whether the specified source document position is at the end of a tag's attributes.
                    protected  StartTag constructStartTag(Source source, int begin, int end, java.lang.String name, Attributes attributes)
                              Internal method for the construction of a StartTag object if this type.
                     EndTagType getCorrespondingEndTagType()
                              Returns the type of end tag required to pair with a start tag of this type to form an element.
                     boolean hasAttributes()
                              Indicates whether a start tag of this type contains attributes.
                     boolean isNameAfterPrefixRequired()
                              Indicates whether a valid XML tag name is required directly after the prefix.
                    protected  Attributes parseAttributes(Source source, int startTagBegin, java.lang.String tagName)
                              Internal method for the parsing of Attributes.
                     
                    Methods inherited from class TagType
                    constructTagAt, deregister, getClosingDelimiter, getDescription, getNamePrefix, getRegisteredTagTypes, getStartDelimiter, getTagTypesIgnoringEnclosedMarkup, isServerTag, isValidPosition, register, setTagTypesIgnoringEnclosedMarkup, tagEncloses, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    UNREGISTERED

                    public static final StartTagType UNREGISTERED
                    The tag type given to an unregistered start tag (< ... >).

                    See the documentation of the Tag.isUnregistered() method for details.

                    Properties:
                    PropertyValue
                    Descriptionunregistered
                    StartDelimiter<
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix(empty string)
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <"This is not recognised as any of the predefined tag types in this library">

                    See Also:
                    EndTagType.UNREGISTERED

                    NORMAL

                    public static final StartTagType NORMAL
                    The tag type given to a normal HTML or XML start tag (<name ... >).

                    Properties:
                    PropertyValue
                    Descriptionnormal
                    StartDelimiter<
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix(empty string)
                    CorrespondingEndTagTypeEndTagType.NORMAL
                    HasAttributestrue
                    IsNameAfterPrefixRequiredtrue
                    Example:
                    <div class="NormalDivTag">


                    COMMENT

                    public static final StartTagType COMMENT
                    The tag type given to an HTML comment (<!-- ... -->).

                    An HTML comment is an area of the source document enclosed by the delimiters <!-- on the left and --> on the right.

                    The HTML 4.01 specification section 3.2.4 states that the end of comment delimiter may contain white space between the "--" and ">" characters, but this library does not recognise end of comment delimiters containing white space.

                    In the default configuration, any non-server tag appearing within an HTML comment is ignored by the parser. See the documentation of the tag parsing process for more information.

                    Properties:
                    PropertyValue
                    Descriptioncomment
                    StartDelimiter<!--
                    ClosingDelimiter-->
                    IsServerTagfalse
                    NamePrefix!--
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <!-- This is a comment -->


                    XML_DECLARATION

                    public static final StartTagType XML_DECLARATION
                    The tag type given to an XML declaration (<?xml ... ?>).

                    An XML declaration is often referred to in texts as a special type of processing instruction with the reserved PITarget name of "xml". Technically it is not an XML processing instruction at all, but is still a type of SGML processing instruction.

                    According to section 2.8 of the XML 1.0 specification, a valid XML declaration can specify only "version", "encoding" and "standalone" attributes in that order. This library parses the attributes of an XML declaration in the same way as those of a normal tag, without checking that they conform to the specification.

                    Properties:
                    PropertyValue
                    DescriptionXML declaration
                    StartDelimiter<?xml
                    ClosingDelimiter?>
                    IsServerTagfalse
                    NamePrefix?xml
                    CorrespondingEndTagTypenull
                    HasAttributestrue
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <?xml version="1.0" encoding="UTF-8"?>


                    XML_PROCESSING_INSTRUCTION

                    public static final StartTagType XML_PROCESSING_INSTRUCTION
                    The tag type given to an XML processing instruction (<?PITarget ... ?>).

                    An XML processing instruction is a specific form of SGML processing instruction with the following two additional constraints:

                    This library does not include a predefined generic tag type for SGML processing instructions as the only forms in which they are found in HTML documents are the more specific XML processing instruction and the XML declaration, both of which have their own dedicated predefined tag type.

                    There is no restriction on the contents of an XML processing instruction. In particular, it can not be assumed that the processing instruction contains attributes, in contrast to the XML declaration.

                    Note that registering the PHPTagTypes.PHP_SHORT tag type overrides this tag type. This is because they both have the same start delimiter, so the one registered latest takes precedence over the other. See the documentation of the PHPTagTypes class for more information.

                    Properties:
                    PropertyValue
                    DescriptionXML processing instruction
                    StartDelimiter<?
                    ClosingDelimiter?>
                    IsServerTagfalse
                    NamePrefix?
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredtrue
                    Example:
                    <?xml-stylesheet href="standardstyle.css" type="text/css"?>


                    DOCTYPE_DECLARATION

                    public static final StartTagType DOCTYPE_DECLARATION
                    The tag type given to a document type declaration (<!DOCTYPE ... >).

                    Information about the document type declaration can be found in the HTML 4.01 specification section 7.2, and the XML 1.0 specification section 2.8.

                    The "!DOCTYPE" tag name is required to be in upper case in the source document, but all tag properties are stored in lower case because this library performs all parsing in lower case.

                    Properties:
                    PropertyValue
                    Descriptiondocument type declaration
                    StartDelimiter<!doctype
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix!doctype
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">


                    MARKUP_DECLARATION

                    public static final StartTagType MARKUP_DECLARATION
                    The tag type given to a markup declaration (<!ELEMENT ... > | <!ATTLIST ... > | <!ENTITY ... > | <!NOTATION ... >).

                    The name of a markup declaration tag is must be one of "!element", "!attlist", "!entity" or "!notation". These tag names are required to be in upper case in the source document, but all tag properties are stored in lower case because this library performs all parsing in lower case.

                    Markup declarations usually appear inside a document type definition (DTD), which is usually an external document to the HTML or XML document, but they can also appear directly within the document type declaration which is why they must be recognised by the parser.

                    Properties:
                    PropertyValue
                    Descriptionmarkup declaration
                    StartDelimiter<!
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix!
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredtrue
                    Example:
                    <!ELEMENT BODY O O (%flow;)* +(INS|DEL) -- document body -->


                    CDATA_SECTION

                    public static final StartTagType CDATA_SECTION
                    The tag type given to a CDATA section (<![CDATA[ ... ]]>).

                    A CDATA section is a specific form of a marked section. This library does not include a predefined generic tag type for marked sections, as the only type of marked sections found in HTML documents are CDATA sections.

                    The HTML 4.01 specification section B.3.5 and the XML 1.0 specification section 2.7 contain definitions for a CDATA section.

                    There is inconsistency between the SGML and HTML/XML specifications in the definition of a marked section. SGML requires the presence of a space between the "<![" prefix and the keyword, and allows a space after the keyword. The XML specification forbids these spaces, and the examples given in the HTML specification do not include them either. This library only recognises CDATA sections that do not include the spaces.

                    The "![CDATA[" tag name is required to be in upper case in the source document according to the HTML/XML specifications, but all tag properties are stored in lower case because this makes it more efficient for the library to perform case-insensitive parsing of all tags.

                    In the default configuration, any non-server tag appearing within a CDATA section is ignored by the parser. See the documentation of the tag parsing process for more information.

                    Properties:
                    PropertyValue
                    DescriptionCDATA section
                    StartDelimiter<![cdata[
                    ClosingDelimiter]]>
                    IsServerTagfalse
                    NamePrefix![cdata[
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    This example shows the recommended practice of enclosing scripts inside a CDATA section:
                    <script type="text/javascript">
                    //<![CDATA[
                    function min(a,b) {return a<b ? a : b;}
                    //]]>
                    </script>


                    SERVER_COMMON

                    public static final StartTagType SERVER_COMMON
                    The tag type given to a common server tag (<% ... %>).

                    Common server tags include ASP, JSP, PSP, ASP-style PHP, eRuby, and Mason substitution tags.

                    This tag and the escaped common server tag are the only standard tag types that define server tags. They are included as standard tag types because of the common server tag's widespread use in many platforms, including those listed above.

                    Properties:
                    PropertyValue
                    Descriptioncommon server tag
                    StartDelimiter<%
                    ClosingDelimiter%>
                    IsServerTagtrue
                    NamePrefix%
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <%@ include file="header.html" %>


                    SERVER_COMMON_ESCAPED

                    public static final StartTagType SERVER_COMMON_ESCAPED
                    The tag type given to an escaped common server tag (<\% ... %>).

                    Some of the platforms that support the common server tag also support a mechanism to escape that tag by adding a backslash (\) before the percent (%) character. Although rarely used, this tag type allows the parser to recognise these escaped tags in addition to the common server tag itself.

                    Properties:
                    PropertyValue
                    Descriptionescaped common server tag
                    StartDelimiter<\%
                    ClosingDelimiter%>
                    IsServerTagtrue
                    NamePrefix\%
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <\%@ include file="header.html" %>

                    Constructor Detail

                    StartTagType

                    protected StartTagType(java.lang.String description,
                                           java.lang.String startDelimiter,
                                           java.lang.String closingDelimiter,
                                           EndTagType correspondingEndTagType,
                                           boolean isServerTag,
                                           boolean hasAttributes,
                                           boolean isNameAfterPrefixRequired)
                    Constructs a new StartTagType object with the specified properties.
                    (implementation assistance method)

                    As StartTagType is an abstract class, this constructor is only called from sub-class constructors.

                    Parameters:
                    description - a description of the new start tag type useful for debugging purposes.
                    startDelimiter - the start delimiter of the new start tag type.
                    closingDelimiter - the closing delimiter of the new start tag type.
                    correspondingEndTagType - the corresponding end tag type of the new start tag type.
                    isServerTag - indicates whether the new start tag type is a server tag.
                    hasAttributes - indicates whether the new start tag type has attributes.
                    isNameAfterPrefixRequired - indicates whether a name is required after the prefix.
                    Method Detail

                    getCorrespondingEndTagType

                    public final EndTagType getCorrespondingEndTagType()
                    Returns the type of end tag required to pair with a start tag of this type to form an element.
                    (property method)

                    This can be represented by the following expression that is always true given an arbitrary element that has an end tag:

                    element.getStartTag().getStartTagType().getCorrespondingEndTagType()==element.getEndTag().getEndTagType()

                    Standard Tag Type Values:
                    Start Tag TypeCorresponding End Tag Type
                    UNREGISTEREDnull
                    NORMALEndTagType.NORMAL
                    COMMENTnull
                    XML_DECLARATIONnull
                    XML_PROCESSING_INSTRUCTIONnull
                    DOCTYPE_DECLARATIONnull
                    MARKUP_DECLARATIONnull
                    CDATA_SECTIONnull
                    SERVER_COMMONnull
                    SERVER_COMMON_ESCAPEDnull
                    Extended Tag Type Values:
                    Start Tag TypeCorresponding End Tag Type
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENTnull
                    PHPTagTypes.PHP_SCRIPTEndTagType.NORMAL
                    PHPTagTypes.PHP_SHORTnull
                    PHPTagTypes.PHP_STANDARDnull
                    MasonTagTypes.MASON_COMPONENT_CALLnull
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENTMasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END
                    MasonTagTypes.MASON_NAMED_BLOCKMasonTagTypes.MASON_NAMED_BLOCK_END

                    Returns:
                    the type of end tag required to pair with a start tag of this type to form an Element.
                    See Also:
                    EndTagType.getCorrespondingStartTagType()

                    hasAttributes

                    public final boolean hasAttributes()
                    Indicates whether a start tag of this type contains attributes.
                    (property method)

                    The attributes start at the end of the name and continue until the closing delimiter is encountered. If the character sequence representing the closing delimiter occurs within a quoted attribute value it is not recognised as the end of the tag.

                    The atEndOfAttributes(Source, int pos, boolean isClosingSlashIgnored) method can be overridden to provide more control over where the attributes end.

                    Standard Tag Type Values:
                    Start Tag TypeHas Attributes
                    UNREGISTEREDfalse
                    NORMALtrue
                    COMMENTfalse
                    XML_DECLARATIONtrue
                    XML_PROCESSING_INSTRUCTIONfalse
                    DOCTYPE_DECLARATIONfalse
                    MARKUP_DECLARATIONfalse
                    CDATA_SECTIONfalse
                    SERVER_COMMONfalse
                    SERVER_COMMON_ESCAPEDfalse
                    Extended Tag Type Values:
                    Start Tag TypeHas Attributes
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENTfalse
                    PHPTagTypes.PHP_SCRIPTtrue
                    PHPTagTypes.PHP_SHORTfalse
                    PHPTagTypes.PHP_STANDARDfalse
                    MasonTagTypes.MASON_COMPONENT_CALLfalse
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENTfalse
                    MasonTagTypes.MASON_NAMED_BLOCKfalse

                    Returns:
                    true if a start tag of this type contains attributes, otherwise false.

                    isNameAfterPrefixRequired

                    public final boolean isNameAfterPrefixRequired()
                    Indicates whether a valid XML tag name is required directly after the prefix.
                    (property method)

                    If this property is true, the name of the tag consists of the prefix followed by an XML tag name.

                    If this property is false, the name of the tag consists of only the prefix.

                    Standard Tag Type Values:
                    Start Tag TypeName After Prefix Required
                    UNREGISTEREDfalse
                    NORMALtrue
                    COMMENTfalse
                    XML_DECLARATIONfalse
                    XML_PROCESSING_INSTRUCTIONtrue
                    DOCTYPE_DECLARATIONfalse
                    MARKUP_DECLARATIONtrue
                    CDATA_SECTIONfalse
                    SERVER_COMMONfalse
                    SERVER_COMMON_ESCAPEDfalse
                    Extended Tag Type Values:
                    Start Tag TypeName After Prefix Required
                    MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENTtrue
                    PHPTagTypes.PHP_SCRIPTfalse
                    PHPTagTypes.PHP_SHORTfalse
                    PHPTagTypes.PHP_STANDARDfalse
                    MasonTagTypes.MASON_COMPONENT_CALLfalse
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENTfalse
                    MasonTagTypes.MASON_NAMED_BLOCKtrue

                    Returns:
                    true if a valid XML tag name is required directly after the prefix, otherwise false.

                    atEndOfAttributes

                    public boolean atEndOfAttributes(Source source,
                                                     int pos,
                                                     boolean isClosingSlashIgnored)
                    Indicates whether the specified source document position is at the end of a tag's attributes.
                    (default implementation method)

                    This method is called internally while parsing attributes to detect where they should end.

                    It can be assumed that the specified position is not inside a quoted attribute value.

                    The default implementation simply compares the parse text at the specified position with the closing delimiter, and is equivalent to:
                    source.getParseText().containsAt(getClosingDelimiter(),pos)

                    The isClosingSlashIgnored parameter is only relevant in the NORMAL start tag type, which makes use of it to cater for the '/' character that can occur before the closing delimiter in empty-element tags. It's value is always false when passed to other start tag types.

                    Parameters:
                    source - the Source document.
                    pos - the character position in the source document.
                    isClosingSlashIgnored - indicates whether the name of the start tag being tested is incompatible with an empty-element tag.
                    Returns:
                    true if the specified source document position is at the end of a tag's attributes, otherwise false.

                    constructStartTag

                    protected final StartTag constructStartTag(Source source,
                                                               int begin,
                                                               int end,
                                                               java.lang.String name,
                                                               Attributes attributes)
                    Internal method for the construction of a StartTag object if this type.
                    (implementation assistance method)

                    Intended for use from within the constructTagAt(Source, int pos) method.

                    Parameters:
                    source - the Source document.
                    begin - the character position in the source document where the tag begins.
                    end - the character position in the source document where the tag ends.
                    name - the name of the tag.
                    attributes - the attributes of the tag.
                    Returns:
                    the new StartTag object.

                    parseAttributes

                    protected final Attributes parseAttributes(Source source,
                                                               int startTagBegin,
                                                               java.lang.String tagName)
                    Internal method for the parsing of Attributes.
                    (implementation assistance method)

                    Intended for use from within the constructTagAt(Source, int pos) method.

                    The returned Attributes segment begins at startTagBegin+1+tagName.length(), and ends straight after the last attribute found before the tag's closing delimiter.

                    Only returns null if the segment contains a major syntactical error or more than the default maximum number of minor syntactical errors.

                    Parameters:
                    source - the Source document.
                    startTagBegin - the position in the source document at which the start tag is to begin.
                    tagName - the name of the start tag to be constructed.
                    Returns:
                    the Attributes of the start tag to be constructed, or null if too many errors occur while parsing.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/StartTagTypeGenericImplementation.html0000644000175000017500000007301311214132422032311 0ustar twernertwerner StartTagTypeGenericImplementation (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class StartTagTypeGenericImplementation

                    java.lang.Object
                      extended by TagType
                          extended by StartTagType
                              extended by StartTagTypeGenericImplementation
                    

                    public class StartTagTypeGenericImplementation
                    extends StartTagType

                    Provides a generic implementation of the abstract StartTagType class based on the most common start tag behaviour.

                    This class is only of interest to users who wish to create custom tag types.

                    The only external difference between this class and its abstract superclass StartTagType is that it provides a default implementation of the constructTagAt(Source, int pos) method.

                    Most of the predefined start tag types are implemented using this class or a subclass of it.

                    See Also:
                    EndTagTypeGenericImplementation

                    Field Summary
                     
                    Fields inherited from class StartTagType
                    CDATA_SECTION, COMMENT, DOCTYPE_DECLARATION, MARKUP_DECLARATION, NORMAL, SERVER_COMMON, SERVER_COMMON_ESCAPED, UNREGISTERED, XML_DECLARATION, XML_PROCESSING_INSTRUCTION
                     
                    Constructor Summary
                    protected StartTagTypeGenericImplementation(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag)
                              Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    protected StartTagTypeGenericImplementation(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag, boolean hasAttributes, boolean isNameAfterPrefixRequired)
                              Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                     
                    Method Summary
                    protected  Tag constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  int getEnd(Source source, int pos)
                              Returns the end of a tag of this type, starting from the specified position in the specified source document.
                     
                    Methods inherited from class StartTagType
                    atEndOfAttributes, constructStartTag, getCorrespondingEndTagType, hasAttributes, isNameAfterPrefixRequired, parseAttributes
                     
                    Methods inherited from class TagType
                    deregister, getClosingDelimiter, getDescription, getNamePrefix, getRegisteredTagTypes, getStartDelimiter, getTagTypesIgnoringEnclosedMarkup, isServerTag, isValidPosition, register, setTagTypesIgnoringEnclosedMarkup, tagEncloses, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    StartTagTypeGenericImplementation

                    protected StartTagTypeGenericImplementation(java.lang.String description,
                                                                java.lang.String startDelimiter,
                                                                java.lang.String closingDelimiter,
                                                                EndTagType correspondingEndTagType,
                                                                boolean isServerTag)
                    Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    (implementation assistance method)

                    This is equivalent to calling
                    new StartTagTypeGenericImplementation(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false).

                    Parameters:
                    description - a description of the new start tag type useful for debugging purposes.
                    startDelimiter - the start delimiter of the new start tag type.
                    closingDelimiter - the closing delimiter of the new start tag type.
                    correspondingEndTagType - the corresponding end tag type of the new start tag type.
                    isServerTag - indicates whether the new start tag type is a server tag.

                    StartTagTypeGenericImplementation

                    protected StartTagTypeGenericImplementation(java.lang.String description,
                                                                java.lang.String startDelimiter,
                                                                java.lang.String closingDelimiter,
                                                                EndTagType correspondingEndTagType,
                                                                boolean isServerTag,
                                                                boolean hasAttributes,
                                                                boolean isNameAfterPrefixRequired)
                    Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    (implementation assistance method)

                    Parameters:
                    description - a description of the new start tag type useful for debugging purposes.
                    startDelimiter - the start delimiter of the new start tag type.
                    closingDelimiter - the closing delimiter of the new start tag type.
                    correspondingEndTagType - the corresponding end tag type of the new start tag type.
                    isServerTag - indicates whether the new start tag type is a server tag.
                    hasAttributes - indicates whether the new start tag type has attributes.
                    isNameAfterPrefixRequired - indicates whether a name is required after the prefix.
                    Method Detail

                    constructTagAt

                    protected Tag constructTagAt(Source source,
                                                 int pos)
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    (default implementation method)

                    This default implementation performs the following steps:

                    1. If a name is required after the prefix, search for a valid XML tag name directly after the name prefix using the Source.getNameEnd(int pos) method. If one is found, set the name to include it, otherwise return null.
                    2. If the last character of the name prefix is a letter (indicating that the prefix includes the full name of the tag), and the character following the prefix in the source text is also a letter or any other valid XML name character, return null.
                      Example: the source text "<?xmlt ?>" should not be recognised as an XML processing instruction, which has the prefix "<?xml".
                    3. If the tag type has attributes, call parseAttributes(source,pos,name) to parse them. Return null if too many errors occur while parsing the attributes.
                    4. Find the end of the tag using the getEnd(Source, int pos) method, where pos is either the end of the attributes segment or the end of the name depending on whether the tag type has attributes. Return null if the end of the tag can not be found.
                    5. Construct the StartTag object using the constructStartTag(Source, int pos, int end, String name, Attributes) method with the argument values collected over the previous steps.

                    See TagType.constructTagAt(Source, int pos) for more important information about this method.

                    Specified by:
                    constructTagAt in class TagType
                    Parameters:
                    source - the Source document.
                    pos - the position in the source document.
                    Returns:
                    a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria.

                    getEnd

                    protected int getEnd(Source source,
                                         int pos)
                    Returns the end of a tag of this type, starting from the specified position in the specified source document.
                    (implementation assistance method)

                    This default implementation simply searches for the first occurrence of the closing delimiter after the specified position, and returns the position immediately after the end of it.

                    If the closing delimiter is not found, the value -1 is returned.

                    Parameters:
                    source - the Source document.
                    pos - the position in the source document.
                    Returns:
                    the end of a tag of this type, starting from the specified position in the specified source document, or -1 if the end of the tag can not be found.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/CharStreamSource.html0000644000175000017500000003741311214132420026727 0ustar twernertwerner CharStreamSource (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface CharStreamSource

                    All Known Subinterfaces:
                    OutputSegment
                    All Known Implementing Classes:
                    OutputDocument, Renderer, SourceCompactor, SourceFormatter, TextExtractor

                    public interface CharStreamSource

                    Represents a character stream source. The purpose of a class that implements this interface is to output text.

                    For small amounts of data, or when memory usage isn't a prime concern (e.g. in client-side applications), the simplest way to obtain the data is by calling the toString() method. If the character stream might contain a large amount of data it is recommended to use the writeTo(Writer) method to access the data, especially if running in a multi-user server environment.

                    The advantage of providing textual data via this interface is that it gives the user the choice as to whether they would like to receive the data as a stream of characters, or all as a single string. Furthermore, it allows the "active" stream source (see below) to be easily converted into a "passive" stream source if required.

                    An active stream source is a stream source that actively outputs to a passive receiver ("sink"). The writeTo(Writer) method in this interface signifies an active source as the transmission of the entire data stream takes place when this method is executed. In this case the sink is the object that supplies the Writer object, and would typically contain a getWriter() method. The sink is passive because it just supplies a Writer object to be written to by the code in some other class.

                    A passive stream source is a stream source that is read from by an active sink. For character streams, a passive stream source simply supplies a Reader object. The active sink would typically contain a readFrom(Reader) method which actively reads the entire data stream from the Reader object.

                    The CharStreamSourceUtil.getReader(CharStreamSource) method converts a CharStreamSource into a Reader, allowing the data from the active CharStreamSource to be consumed by an active sink with a readFrom(Reader) method.

                    Every implementing class must override the toString() method to return the output as a string.

                    An easy way to implement this is by calling the CharStreamSourceUtil.toString(this) method, which buffers the output from the writeTo(Writer) method into a string.

                    See Also:
                    OutputDocument, SourceFormatter, Renderer, TextExtractor

                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the output to the specified Appendable object.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     java.lang.String toString()
                              Returns the output as a string.
                     void writeTo(java.io.Writer writer)
                              Writes the output to the specified Writer.
                     

                    Method Detail

                    writeTo

                    void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Writes the output to the specified Writer.

                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Appends the output to the specified Appendable object.

                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    getEstimatedMaximumOutputLength

                    long getEstimatedMaximumOutputLength()
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    java.lang.String toString()
                    Returns the output as a string.

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the output as a string.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Tag.html0000644000175000017500000015505011214132422024230 0ustar twernertwerner Tag (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Tag

                    java.lang.Object
                      extended by Segment
                          extended by Tag
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>
                    Direct Known Subclasses:
                    EndTag, StartTag

                    public abstract class Tag
                    extends Segment

                    Represents either a StartTag or EndTag in a specific source document.

                    Take the following HTML segment as an example:

                    <p>This is a sample paragraph.</p>

                    The "<p>" is represented by a StartTag object, and the "</p>" is represented by an EndTag object, both of which are subclasses of the Tag class. The whole segment, including the start tag, its corresponding end tag and all of the content in between, is represented by an Element object.

                    Tag Parsing Process

                    The following process describes how each tag is identified by the parser:
                    1. Every '<' character found in the source document is considered to be the start of a tag. The characters following it are compared with the start delimiters of all the registered tag types, and a list of matching tag types is determined.
                    2. A more detailed analysis of the source is performed according to the features of each matching tag type from the first step, in order of precedence, until a valid tag is able to be constructed.

                      The analysis performed in relation to each candidate tag type is a two-stage process:

                      1. The position of the tag is checked to determine whether it is valid. In theory, a server tag is valid in any position, but a non-server tag is not valid inside any other tag, nor inside elements with CDATA content such as SCRIPT and STYLE elements. Theory dictates therefore that comments and explicit CDATA sections inside script elements should not be recognised as tags. The behaviour of the parser however does not always strictly adhere to the theory, to maintain compatability with major browsers and also for efficiency reasons.

                        The TagType.isValidPosition(Source, int pos, int[] fullSequentialParseData) method is responsible for this check and has a common default implementation for all tag types (although custom tag types can override it if necessary). Its behaviour differs depending on whether or not a full sequential parse is peformed. See the documentation of the isValidPosition method for full details.

                      2. A final analysis is performed by the TagType.constructTagAt(Source, int pos) method of the candidate tag type. This method returns a valid Tag object if all conditions of the candidate tag type are met, otherwise it returns null and the process continues with the next candidate tag type.
                    3. If the source does not match the start delimiter or syntax of any registered tag type, the segment spanning it and the next '>' character is taken to be an unregistered tag. Some tag search methods ignore unregistered tags. See the isUnregistered() method for more information.

                    See the documentation of the TagType class for more details on how tags are recognised.

                    Tag Search Methods

                    Methods that get tags in a source document are collectively referred to as Tag Search Methods. They are found mostly in the Source and Segment classes, and can be generally categorised as follows:

                    Open Search:
                    These methods search for tags of any name and type.
                    Named Search:
                    These methods include a parameter called name which is used to specify the name of the tag to search for. Specifying a name that ends in a colon (:) searches for all elements or tags in the specified XML namespace.
                    Tag Type Search:
                    These methods typically include a parameter called tagType which is used to specify the type of the tag to search for. In some methods the search parameter is restricted to the StartTagType or EndTagType subclass of TagType.
                    Attribute Search:
                    These methods perform the search based on an attribute name and value.


                    Method Summary
                    abstract  Element getElement()
                              Returns the element that is started or ended by this tag.
                     java.lang.String getName()
                              Returns the name of this tag, always in lower case.
                     Segment getNameSegment()
                              Returns the segment spanning the name of this tag.
                     Tag getNextTag()
                              Returns the next tag in the source document.
                     Tag getPreviousTag()
                              Returns the previous tag in the source document.
                    abstract  TagType getTagType()
                              Returns the type of this tag.
                     java.lang.Object getUserData()
                              Returns the general purpose user data object that has previously been associated with this tag via the setUserData(Object) method.
                    abstract  boolean isUnregistered()
                              Indicates whether this tag has a syntax that does not match any of the registered tag types.
                    static boolean isXMLName(java.lang.CharSequence text)
                              Indicates whether the specified text is a valid XML Name.
                    static boolean isXMLNameChar(char ch)
                              Indicates whether the specified character is valid anywhere in an XML Name.
                    static boolean isXMLNameStartChar(char ch)
                              Indicates whether the specified character is valid at the start of an XML Name.
                     void setUserData(java.lang.Object userData)
                              Associates the specified general purpose user data object with this tag.
                    abstract  java.lang.String tidy()
                              Returns an XML representation of this tag.
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getDebugInfo, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getElement

                    public abstract Element getElement()
                    Returns the element that is started or ended by this tag.

                    StartTag.getElement() is guaranteed not null.

                    EndTag.getElement() can return null if the end tag is not properly matched to a start tag.

                    Returns:
                    the element that is started or ended by this tag.

                    getName

                    public final java.lang.String getName()
                    Returns the name of this tag, always in lower case.

                    The name always starts with the name prefix defined in this tag's type. For some tag types, the name consists only of this prefix, while in others it must be followed by a valid XML name (see StartTagType.isNameAfterPrefixRequired()).

                    If the name is equal to one of the constants defined in the HTMLElementName interface, this method is guaranteed to return the constant itself. This allows comparisons to be performed using the == operator instead of the less efficient String.equals(Object) method.

                    For example, the following expression can be used to test whether a StartTag is from a SELECT element:
                    startTag.getName()==HTMLElementName.SELECT

                    To get the name of this tag in its original case, use getNameSegment().toString().

                    Returns:
                    the name of this tag, always in lower case.

                    getNameSegment

                    public Segment getNameSegment()
                    Returns the segment spanning the name of this tag.

                    The code getNameSegment().toString() can be used to retrieve the name of this tag in its original case.

                    Every call to this method constructs a new Segment object.

                    Returns:
                    the segment spanning the name of this tag.
                    See Also:
                    getName()

                    getTagType

                    public abstract TagType getTagType()
                    Returns the type of this tag.

                    Returns:
                    the type of this tag.

                    getUserData

                    public java.lang.Object getUserData()
                    Returns the general purpose user data object that has previously been associated with this tag via the setUserData(Object) method.

                    If setUserData(Object) has not been called, this method returns null.

                    Returns:
                    the generic data object that has previously been associated with this tag via the setUserData(Object) method.

                    setUserData

                    public void setUserData(java.lang.Object userData)
                    Associates the specified general purpose user data object with this tag.

                    This property can be useful for applications that need to associate extra information with tags. The object can be retrieved later via the getUserData() method.

                    Parameters:
                    userData - general purpose user data of any type.

                    getNextTag

                    public Tag getNextTag()
                    Returns the next tag in the source document.

                    This method also returns server tags.

                    The result of a call to this method is cached. Performing a full sequential parse prepopulates this cache.

                    If the result is not cached, a call to this method is equivalent to source.getNextTag(getBegin()+1).

                    See the Tag class documentation for more details about the behaviour of this method.

                    Returns:
                    the next tag in the source document, or null if this is the last tag.

                    getPreviousTag

                    public Tag getPreviousTag()
                    Returns the previous tag in the source document.

                    This method also returns server tags.

                    The result of a call to this method is cached. Performing a full sequential parse prepopulates this cache.

                    If the result is not cached, a call to this method is equivalent to source.getPreviousTag(getBegin()-1).

                    See the Tag class documentation for more details about the behaviour of this method.

                    Returns:
                    the previous tag in the source document, or null if this is the first tag.

                    isUnregistered

                    public abstract boolean isUnregistered()
                    Indicates whether this tag has a syntax that does not match any of the registered tag types.

                    The only requirement of an unregistered tag type is that it starts with '<' and there is a closing '>' character at some position after it in the source document.

                    The absence or presence of a '/' character after the initial '<' determines whether an unregistered tag is respectively a StartTag with a type of StartTagType.UNREGISTERED or an EndTag with a type of EndTagType.UNREGISTERED.

                    There are no restrictions on the characters that might appear between these delimiters, including other '<' characters. This may result in a '>' character that is identified as the closing delimiter of two separate tags, one an unregistered tag, and the other a tag of any type that begins in the middle of the unregistered tag. As explained below, unregistered tags are usually only found when specifically looking for them, so it is up to the user to detect and deal with any such nonsensical results.

                    Unregistered tags are only returned by the Source.getTagAt(int pos) method, named search methods, where the specified name matches the first characters inside the tag, and by tag type search methods, where the specified tagType is either StartTagType.UNREGISTERED or EndTagType.UNREGISTERED.

                    Open tag searches and other searches always ignore unregistered tags, although every discovery of an unregistered tag is logged by the parser.

                    The logic behind this design is that unregistered tag types are usually the result of a '<' character in the text that was mistakenly left unencoded, or a less-than operator inside a script, or some other occurrence which is of no interest to the user. By returning unregistered tags in named and tag type search methods, the library allows the user to specifically search for tags with a certain syntax that does not match any existing TagType. This expediency feature avoids the need for the user to create a custom tag type to define the syntax before searching for these tags. By not returning unregistered tags in the less specific search methods, it is providing only the information that most users are interested in.

                    Returns:
                    true if this tag has a syntax that does not match any of the registered tag types, otherwise false.

                    tidy

                    public abstract java.lang.String tidy()
                    Returns an XML representation of this tag.

                    This is an abstract method which is implemented in the StartTag and EndTag subclasses. See the documentation of the StartTag.tidy() and EndTag.tidy() methods for details.

                    Returns:
                    an XML representation of this tag.

                    isXMLName

                    public static final boolean isXMLName(java.lang.CharSequence text)
                    Indicates whether the specified text is a valid XML Name.

                    This implementation first checks that the first character of the specified text is a valid XML Name start character as defined by the isXMLNameStartChar(char) method, and then checks that the rest of the characters are valid XML Name characters as defined by the isXMLNameChar(char) method.

                    Note that this implementation does not exactly adhere to the formal definition of an XML Name, but the differences are unlikely to be significant in real-world XML or HTML documents.

                    Parameters:
                    text - the text to test.
                    Returns:
                    true if the specified text is a valid XML Name, otherwise false.
                    See Also:
                    Source.getNameEnd(int pos)

                    isXMLNameStartChar

                    public static final boolean isXMLNameStartChar(char ch)
                    Indicates whether the specified character is valid at the start of an XML Name.

                    The XML 1.0 specification section 2.3 defines a Name as starting with one of the characters
                    (Letter | '_' | ':').

                    This method uses the expression
                    Character.isLetter(ch) || ch=='_' || ch==':'.

                    Note that there are many differences between the Character.isLetter() definition of a Letter and the XML definition of a Letter, but these differences are unlikely to be significant in real-world XML or HTML documents.

                    Parameters:
                    ch - the character to test.
                    Returns:
                    true if the specified character is valid at the start of an XML Name, otherwise false.
                    See Also:
                    Source.getNameEnd(int pos)

                    isXMLNameChar

                    public static final boolean isXMLNameChar(char ch)
                    Indicates whether the specified character is valid anywhere in an XML Name.

                    The XML 1.0 specification section 2.3 uses the entity NameChar to represent this set of characters, which is defined as
                    (Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender).

                    This method uses the expression
                    Character.isLetterOrDigit(ch) || ch=='.' || ch=='-' || ch=='_' || ch==':'.

                    Note that there are many differences between these definitions, but these differences are unlikely to be significant in real-world XML or HTML documents.

                    Parameters:
                    ch - the character to test.
                    Returns:
                    true if the specified character is valid anywhere in an XML Name, otherwise false.
                    See Also:
                    Source.getNameEnd(int pos)


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/CharacterEntityReference.html0000644000175000017500000074712611214132420030436 0ustar twernertwerner CharacterEntityReference (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class CharacterEntityReference

                    java.lang.Object
                      extended by Segment
                          extended by CharacterReference
                              extended by CharacterEntityReference
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public class CharacterEntityReference
                    extends CharacterReference

                    Represents an HTML Character Entity Reference.

                    Click here to scroll down to the method summary.

                    The full list of HTML character entity references can be found at the following URL:
                    http://www.w3.org/TR/REC-html40/sgml/entities.html.

                    There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666.

                    Static methods to encode and decode strings and single characters can be found in the CharacterReference superclass.

                    The &apos; entity reference is not defined for use in HTML. It is defined in the XHTML Special Characters Entity Set, and is the only one that is not included in both HTML and XHTML. For this reason, the &apos; entity reference is recognised by this library in decoding functions, but in encoding functions the numeric character reference &#39; is used instead. Most modern browsers support it in both XHTML and HTML, with the notable exception of Microsoft Internet Explorer 6.0, which doesn't support it in either.

                    CharacterEntityReference instances are obtained using one of the following methods:

                    See Also:
                    CharacterReference, NumericCharacterReference

                    Field Summary
                    static char _aacute
                              á &aacute; = &#225; -- latin small letter a with acute, U+00E1 ISOlat1.
                    static char _Aacute
                              Á &Aacute; = &#193; -- latin capital letter A with acute, U+00C1 ISOlat1.
                    static char _acirc
                              â &acirc; = &#226; -- latin small letter a with circumflex, U+00E2 ISOlat1.
                    static char _Acirc
                              Â &Acirc; = &#194; -- latin capital letter A with circumflex, U+00C2 ISOlat1.
                    static char _acute
                              ´ &acute; = &#180; -- acute accent = spacing acute, U+00B4 ISOdia.
                    static char _aelig
                              æ &aelig; = &#230; -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1.
                    static char _AElig
                              Æ &AElig; = &#198; -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1.
                    static char _agrave
                              à &agrave; = &#224; -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1.
                    static char _Agrave
                              À &Agrave; = &#192; -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1.
                    static char _alefsym
                               &alefsym; = &#8501; -- alef symbol = first transfinite cardinal, U+2135 NEW
                    (see comments).
                    static char _alpha
                              α &alpha; = &#945; -- greek small letter alpha, U+03B1 ISOgrk3.
                    static char _Alpha
                              Α &Alpha; = &#913; -- greek capital letter alpha, U+0391.
                    static char _amp
                              & &amp; = &#38; -- ampersand, U+0026 ISOnum.
                    static char _and
                               &and; = &#8743; -- logical and = wedge, U+2227 ISOtech.
                    static char _ang
                               &ang; = &#8736; -- angle, U+2220 ISOamso.
                    static char _apos
                              ' &apos; = &#39; -- apostrophe = APL quote, U+0027 ISOnum
                    (see comments).
                    static char _aring
                              å &aring; = &#229; -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1.
                    static char _Aring
                              Å &Aring; = &#197; -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1.
                    static char _asymp
                               &asymp; = &#8776; -- almost equal to = asymptotic to, U+2248 ISOamsr.
                    static char _atilde
                              ã &atilde; = &#227; -- latin small letter a with tilde, U+00E3 ISOlat1.
                    static char _Atilde
                              Ã &Atilde; = &#195; -- latin capital letter A with tilde, U+00C3 ISOlat1.
                    static char _auml
                              ä &auml; = &#228; -- latin small letter a with diaeresis, U+00E4 ISOlat1.
                    static char _Auml
                              Ä &Auml; = &#196; -- latin capital letter A with diaeresis, U+00C4 ISOlat1.
                    static char _bdquo
                               &bdquo; = &#8222; -- double low-9 quotation mark, U+201E NEW.
                    static char _beta
                              β &beta; = &#946; -- greek small letter beta, U+03B2 ISOgrk3.
                    static char _Beta
                              Β &Beta; = &#914; -- greek capital letter beta, U+0392.
                    static char _brvbar
                              ¦ &brvbar; = &#166; -- broken bar = broken vertical bar, U+00A6 ISOnum.
                    static char _bull
                               &bull; = &#8226; -- bullet = black small circle, U+2022 ISOpub
                    (see comments).
                    static char _cap
                               &cap; = &#8745; -- intersection = cap, U+2229 ISOtech.
                    static char _ccedil
                              ç &ccedil; = &#231; -- latin small letter c with cedilla, U+00E7 ISOlat1.
                    static char _Ccedil
                              Ç &Ccedil; = &#199; -- latin capital letter C with cedilla, U+00C7 ISOlat1.
                    static char _cedil
                              ¸ &cedil; = &#184; -- cedilla = spacing cedilla, U+00B8 ISOdia.
                    static char _cent
                              ¢ &cent; = &#162; -- cent sign, U+00A2 ISOnum.
                    static char _chi
                              χ &chi; = &#967; -- greek small letter chi, U+03C7 ISOgrk3.
                    static char _Chi
                              Χ &Chi; = &#935; -- greek capital letter chi, U+03A7.
                    static char _circ
                              ˆ &circ; = &#710; -- modifier letter circumflex accent, U+02C6 ISOpub.
                    static char _clubs
                               &clubs; = &#9827; -- black club suit = shamrock, U+2663 ISOpub.
                    static char _cong
                               &cong; = &#8773; -- approximately equal to, U+2245 ISOtech.
                    static char _copy
                              © &copy; = &#169; -- copyright sign, U+00A9 ISOnum.
                    static char _crarr
                               &crarr; = &#8629; -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW.
                    static char _cup
                               &cup; = &#8746; -- union = cup, U+222A ISOtech.
                    static char _curren
                              ¤ &curren; = &#164; -- currency sign, U+00A4 ISOnum.
                    static char _dagger
                               &dagger; = &#8224; -- dagger, U+2020 ISOpub.
                    static char _Dagger
                               &Dagger; = &#8225; -- double dagger, U+2021 ISOpub.
                    static char _darr
                               &darr; = &#8595; -- downwards arrow, U+2193 ISOnum.
                    static char _dArr
                               &dArr; = &#8659; -- downwards double arrow, U+21D3 ISOamsa.
                    static char _deg
                              ° &deg; = &#176; -- degree sign, U+00B0 ISOnum.
                    static char _delta
                              δ &delta; = &#948; -- greek small letter delta, U+03B4 ISOgrk3.
                    static char _Delta
                              Δ &Delta; = &#916; -- greek capital letter delta, U+0394 ISOgrk3.
                    static char _diams
                               &diams; = &#9830; -- black diamond suit, U+2666 ISOpub.
                    static char _divide
                              ÷ &divide; = &#247; -- division sign, U+00F7 ISOnum.
                    static char _eacute
                              é &eacute; = &#233; -- latin small letter e with acute, U+00E9 ISOlat1.
                    static char _Eacute
                              É &Eacute; = &#201; -- latin capital letter E with acute, U+00C9 ISOlat1.
                    static char _ecirc
                              ê &ecirc; = &#234; -- latin small letter e with circumflex, U+00EA ISOlat1.
                    static char _Ecirc
                              Ê &Ecirc; = &#202; -- latin capital letter E with circumflex, U+00CA ISOlat1.
                    static char _egrave
                              è &egrave; = &#232; -- latin small letter e with grave, U+00E8 ISOlat1.
                    static char _Egrave
                              È &Egrave; = &#200; -- latin capital letter E with grave, U+00C8 ISOlat1.
                    static char _empty
                               &empty; = &#8709; -- empty set = null set = diameter, U+2205 ISOamso.
                    static char _emsp
                               &emsp; = &#8195; -- em space, U+2003 ISOpub.
                    static char _ensp
                               &ensp; = &#8194; -- en space, U+2002 ISOpub.
                    static char _epsilon
                              ε &epsilon; = &#949; -- greek small letter epsilon, U+03B5 ISOgrk3.
                    static char _Epsilon
                              Ε &Epsilon; = &#917; -- greek capital letter epsilon, U+0395.
                    static char _equiv
                               &equiv; = &#8801; -- identical to, U+2261 ISOtech.
                    static char _eta
                              η &eta; = &#951; -- greek small letter eta, U+03B7 ISOgrk3.
                    static char _Eta
                              Η &Eta; = &#919; -- greek capital letter eta, U+0397.
                    static char _eth
                              ð &eth; = &#240; -- latin small letter eth, U+00F0 ISOlat1.
                    static char _ETH
                              Ð &ETH; = &#208; -- latin capital letter ETH, U+00D0 ISOlat1.
                    static char _euml
                              ë &euml; = &#235; -- latin small letter e with diaeresis, U+00EB ISOlat1.
                    static char _Euml
                              Ë &Euml; = &#203; -- latin capital letter E with diaeresis, U+00CB ISOlat1.
                    static char _euro
                               &euro; = &#8364; -- euro sign, U+20AC NEW.
                    static char _exist
                               &exist; = &#8707; -- there exists, U+2203 ISOtech.
                    static char _fnof
                              ƒ &fnof; = &#402; -- latin small letter f with hook = function = florin, U+0192 ISOtech.
                    static char _forall
                               &forall; = &#8704; -- for all, U+2200 ISOtech.
                    static char _frac12
                              ½ &frac12; = &#189; -- vulgar fraction one half = fraction one half, U+00BD ISOnum.
                    static char _frac14
                              ¼ &frac14; = &#188; -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum.
                    static char _frac34
                              ¾ &frac34; = &#190; -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum.
                    static char _frasl
                               &frasl; = &#8260; -- fraction slash, U+2044 NEW.
                    static char _gamma
                              γ &gamma; = &#947; -- greek small letter gamma, U+03B3 ISOgrk3.
                    static char _Gamma
                              Γ &Gamma; = &#915; -- greek capital letter gamma, U+0393 ISOgrk3.
                    static char _ge
                               &ge; = &#8805; -- greater-than or equal to, U+2265 ISOtech.
                    static char _gt
                              > &gt; = &#62; -- greater-than sign, U+003E ISOnum.
                    static char _harr
                               &harr; = &#8596; -- left right arrow, U+2194 ISOamsa.
                    static char _hArr
                               &hArr; = &#8660; -- left right double arrow, U+21D4 ISOamsa.
                    static char _hearts
                               &hearts; = &#9829; -- black heart suit = valentine, U+2665 ISOpub.
                    static char _hellip
                               &hellip; = &#8230; -- horizontal ellipsis = three dot leader, U+2026 ISOpub.
                    static char _iacute
                              í &iacute; = &#237; -- latin small letter i with acute, U+00ED ISOlat1.
                    static char _Iacute
                              Í &Iacute; = &#205; -- latin capital letter I with acute, U+00CD ISOlat1.
                    static char _icirc
                              î &icirc; = &#238; -- latin small letter i with circumflex, U+00EE ISOlat1.
                    static char _Icirc
                              Î &Icirc; = &#206; -- latin capital letter I with circumflex, U+00CE ISOlat1.
                    static char _iexcl
                              ¡ &iexcl; = &#161; -- inverted exclamation mark, U+00A1 ISOnum.
                    static char _igrave
                              ì &igrave; = &#236; -- latin small letter i with grave, U+00EC ISOlat1.
                    static char _Igrave
                              Ì &Igrave; = &#204; -- latin capital letter I with grave, U+00CC ISOlat1.
                    static char _image
                               &image; = &#8465; -- black-letter capital I = imaginary part, U+2111 ISOamso.
                    static char _infin
                               &infin; = &#8734; -- infinity, U+221E ISOtech.
                    static char _int
                               &int; = &#8747; -- integral, U+222B ISOtech.
                    static char _iota
                              ι &iota; = &#953; -- greek small letter iota, U+03B9 ISOgrk3.
                    static char _Iota
                              Ι &Iota; = &#921; -- greek capital letter iota, U+0399.
                    static char _iquest
                              ¿ &iquest; = &#191; -- inverted question mark = turned question mark, U+00BF ISOnum.
                    static char _isin
                               &isin; = &#8712; -- element of, U+2208 ISOtech.
                    static char _iuml
                              ï &iuml; = &#239; -- latin small letter i with diaeresis, U+00EF ISOlat1.
                    static char _Iuml
                              Ï &Iuml; = &#207; -- latin capital letter I with diaeresis, U+00CF ISOlat1.
                    static char _kappa
                              κ &kappa; = &#954; -- greek small letter kappa, U+03BA ISOgrk3.
                    static char _Kappa
                              Κ &Kappa; = &#922; -- greek capital letter kappa, U+039A.
                    static char _lambda
                              λ &lambda; = &#955; -- greek small letter lambda, U+03BB ISOgrk3.
                    static char _Lambda
                              Λ &Lambda; = &#923; -- greek capital letter lambda, U+039B ISOgrk3.
                    static char _lang
                               &lang; = &#9001; -- left-pointing angle bracket = bra, U+2329 ISOtech
                    (see comments).
                    static char _laquo
                              « &laquo; = &#171; -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum.
                    static char _larr
                               &larr; = &#8592; -- leftwards arrow, U+2190 ISOnum.
                    static char _lArr
                               &lArr; = &#8656; -- leftwards double arrow, U+21D0 ISOtech
                    (see comments).
                    static char _lceil
                               &lceil; = &#8968; -- left ceiling = APL upstile, U+2308 ISOamsc.
                    static char _ldquo
                               &ldquo; = &#8220; -- left double quotation mark, U+201C ISOnum.
                    static char _le
                               &le; = &#8804; -- less-than or equal to, U+2264 ISOtech.
                    static char _lfloor
                               &lfloor; = &#8970; -- left floor = APL downstile, U+230A ISOamsc.
                    static char _lowast
                               &lowast; = &#8727; -- asterisk operator, U+2217 ISOtech.
                    static char _loz
                               &loz; = &#9674; -- lozenge, U+25CA ISOpub.
                    static char _lrm
                               &lrm; = &#8206; -- left-to-right mark, U+200E NEW RFC 2070.
                    static char _lsaquo
                               &lsaquo; = &#8249; -- single left-pointing angle quotation mark, U+2039 ISO proposed
                    (see comments).
                    static char _lsquo
                               &lsquo; = &#8216; -- left single quotation mark, U+2018 ISOnum.
                    static char _lt
                              < &lt; = &#60; -- less-than sign, U+003C ISOnum.
                    static char _macr
                              ¯ &macr; = &#175; -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia.
                    static char _mdash
                               &mdash; = &#8212; -- em dash, U+2014 ISOpub.
                    static char _micro
                              µ &micro; = &#181; -- micro sign, U+00B5 ISOnum.
                    static char _middot
                              · &middot; = &#183; -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum.
                    static char _minus
                               &minus; = &#8722; -- minus sign, U+2212 ISOtech.
                    static char _mu
                              μ &mu; = &#956; -- greek small letter mu, U+03BC ISOgrk3.
                    static char _Mu
                              Μ &Mu; = &#924; -- greek capital letter mu, U+039C.
                    static char _nabla
                               &nabla; = &#8711; -- nabla = backward difference, U+2207 ISOtech.
                    static char _nbsp
                                &nbsp; = &#160; -- no-break space = non-breaking space, U+00A0 ISOnum.
                    static char _ndash
                               &ndash; = &#8211; -- en dash, U+2013 ISOpub.
                    static char _ne
                               &ne; = &#8800; -- not equal to, U+2260 ISOtech.
                    static char _ni
                               &ni; = &#8715; -- contains as member, U+220B ISOtech
                    (see comments).
                    static char _not
                              ¬ &not; = &#172; -- not sign = angled dash, U+00AC ISOnum.
                    static char _notin
                               &notin; = &#8713; -- not an element of, U+2209 ISOtech.
                    static char _nsub
                               &nsub; = &#8836; -- not a subset of, U+2284 ISOamsn.
                    static char _ntilde
                              ñ &ntilde; = &#241; -- latin small letter n with tilde, U+00F1 ISOlat1.
                    static char _Ntilde
                              Ñ &Ntilde; = &#209; -- latin capital letter N with tilde, U+00D1 ISOlat1.
                    static char _nu
                              ν &nu; = &#957; -- greek small letter nu, U+03BD ISOgrk3.
                    static char _Nu
                              Ν &Nu; = &#925; -- greek capital letter nu, U+039D.
                    static char _oacute
                              ó &oacute; = &#243; -- latin small letter o with acute, U+00F3 ISOlat1.
                    static char _Oacute
                              Ó &Oacute; = &#211; -- latin capital letter O with acute, U+00D3 ISOlat1.
                    static char _ocirc
                              ô &ocirc; = &#244; -- latin small letter o with circumflex, U+00F4 ISOlat1.
                    static char _Ocirc
                              Ô &Ocirc; = &#212; -- latin capital letter O with circumflex, U+00D4 ISOlat1.
                    static char _oelig
                              œ &oelig; = &#339; -- latin small ligature oe, U+0153 ISOlat2
                    (see comments).
                    static char _OElig
                              Œ &OElig; = &#338; -- latin capital ligature OE, U+0152 ISOlat2.
                    static char _ograve
                              ò &ograve; = &#242; -- latin small letter o with grave, U+00F2 ISOlat1.
                    static char _Ograve
                              Ò &Ograve; = &#210; -- latin capital letter O with grave, U+00D2 ISOlat1.
                    static char _oline
                               &oline; = &#8254; -- overline = spacing overscore, U+203E NEW.
                    static char _omega
                              ω &omega; = &#969; -- greek small letter omega, U+03C9 ISOgrk3.
                    static char _Omega
                              Ω &Omega; = &#937; -- greek capital letter omega, U+03A9 ISOgrk3.
                    static char _omicron
                              ο &omicron; = &#959; -- greek small letter omicron, U+03BF NEW.
                    static char _Omicron
                              Ο &Omicron; = &#927; -- greek capital letter omicron, U+039F.
                    static char _oplus
                               &oplus; = &#8853; -- circled plus = direct sum, U+2295 ISOamsb.
                    static char _or
                               &or; = &#8744; -- logical or = vee, U+2228 ISOtech.
                    static char _ordf
                              ª &ordf; = &#170; -- feminine ordinal indicator, U+00AA ISOnum.
                    static char _ordm
                              º &ordm; = &#186; -- masculine ordinal indicator, U+00BA ISOnum.
                    static char _oslash
                              ø &oslash; = &#248; -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1.
                    static char _Oslash
                              Ø &Oslash; = &#216; -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1.
                    static char _otilde
                              õ &otilde; = &#245; -- latin small letter o with tilde, U+00F5 ISOlat1.
                    static char _Otilde
                              Õ &Otilde; = &#213; -- latin capital letter O with tilde, U+00D5 ISOlat1.
                    static char _otimes
                               &otimes; = &#8855; -- circled times = vector product, U+2297 ISOamsb.
                    static char _ouml
                              ö &ouml; = &#246; -- latin small letter o with diaeresis, U+00F6 ISOlat1.
                    static char _Ouml
                              Ö &Ouml; = &#214; -- latin capital letter O with diaeresis, U+00D6 ISOlat1.
                    static char _para
                               &para; = &#182; -- pilcrow sign = paragraph sign, U+00B6 ISOnum.
                    static char _part
                               &part; = &#8706; -- partial differential, U+2202 ISOtech.
                    static char _permil
                               &permil; = &#8240; -- per mille sign, U+2030 ISOtech.
                    static char _perp
                               &perp; = &#8869; -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech.
                    static char _phi
                              φ &phi; = &#966; -- greek small letter phi, U+03C6 ISOgrk3.
                    static char _Phi
                              Φ &Phi; = &#934; -- greek capital letter phi, U+03A6 ISOgrk3.
                    static char _pi
                              π &pi; = &#960; -- greek small letter pi, U+03C0 ISOgrk3.
                    static char _Pi
                              Π &Pi; = &#928; -- greek capital letter pi, U+03A0 ISOgrk3.
                    static char _piv
                              ϖ &piv; = &#982; -- greek pi symbol, U+03D6 ISOgrk3.
                    static char _plusmn
                              ± &plusmn; = &#177; -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum.
                    static char _pound
                              £ &pound; = &#163; -- pound sign, U+00A3 ISOnum.
                    static char _prime
                               &prime; = &#8242; -- prime = minutes = feet, U+2032 ISOtech.
                    static char _Prime
                               &Prime; = &#8243; -- double prime = seconds = inches, U+2033 ISOtech.
                    static char _prod
                               &prod; = &#8719; -- n-ary product = product sign, U+220F ISOamsb
                    (see comments).
                    static char _prop
                               &prop; = &#8733; -- proportional to, U+221D ISOtech.
                    static char _psi
                              ψ &psi; = &#968; -- greek small letter psi, U+03C8 ISOgrk3.
                    static char _Psi
                              Ψ &Psi; = &#936; -- greek capital letter psi, U+03A8 ISOgrk3.
                    static char _quot
                              " &quot; = &#34; -- quotation mark = APL quote, U+0022 ISOnum.
                    static char _radic
                               &radic; = &#8730; -- square root = radical sign, U+221A ISOtech.
                    static char _rang
                               &rang; = &#9002; -- right-pointing angle bracket = ket, U+232A ISOtech
                    (see comments).
                    static char _raquo
                              » &raquo; = &#187; -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum.
                    static char _rarr
                               &rarr; = &#8594; -- rightwards arrow, U+2192 ISOnum.
                    static char _rArr
                               &rArr; = &#8658; -- rightwards double arrow, U+21D2 ISOtech
                    (see comments).
                    static char _rceil
                               &rceil; = &#8969; -- right ceiling, U+2309 ISOamsc.
                    static char _rdquo
                               &rdquo; = &#8221; -- right double quotation mark, U+201D ISOnum.
                    static char _real
                               &real; = &#8476; -- black-letter capital R = real part symbol, U+211C ISOamso.
                    static char _reg
                              ® &reg; = &#174; -- registered sign = registered trade mark sign, U+00AE ISOnum.
                    static char _rfloor
                               &rfloor; = &#8971; -- right floor, U+230B ISOamsc.
                    static char _rho
                              ρ &rho; = &#961; -- greek small letter rho, U+03C1 ISOgrk3.
                    static char _Rho
                              Ρ &Rho; = &#929; -- greek capital letter rho, U+03A1.
                    static char _rlm
                               &rlm; = &#8207; -- right-to-left mark, U+200F NEW RFC 2070.
                    static char _rsaquo
                               &rsaquo; = &#8250; -- single right-pointing angle quotation mark, U+203A ISO proposed
                    (see comments).
                    static char _rsquo
                               &rsquo; = &#8217; -- right single quotation mark, U+2019 ISOnum.
                    static char _sbquo
                               &sbquo; = &#8218; -- single low-9 quotation mark, U+201A NEW.
                    static char _scaron
                              š &scaron; = &#353; -- latin small letter s with caron, U+0161 ISOlat2.
                    static char _Scaron
                              Š &Scaron; = &#352; -- latin capital letter S with caron, U+0160 ISOlat2.
                    static char _sdot
                               &sdot; = &#8901; -- dot operator, U+22C5 ISOamsb
                    (see comments).
                    static char _sect
                              § &sect; = &#167; -- section sign, U+00A7 ISOnum.
                    static char _shy
                              ­ &shy; = &#173; -- soft hyphen = discretionary hyphen, U+00AD ISOnum.
                    static char _sigma
                              σ &sigma; = &#963; -- greek small letter sigma, U+03C3 ISOgrk3.
                    static char _Sigma
                              Σ &Sigma; = &#931; -- greek capital letter sigma, U+03A3 ISOgrk3.
                    static char _sigmaf
                              ς &sigmaf; = &#962; -- greek small letter final sigma, U+03C2 ISOgrk3.
                    static char _sim
                               &sim; = &#8764; -- tilde operator = varies with = similar to, U+223C ISOtech
                    (see comments).
                    static char _spades
                               &spades; = &#9824; -- black spade suit, U+2660 ISOpub
                    (see comments).
                    static char _sub
                               &sub; = &#8834; -- subset of, U+2282 ISOtech.
                    static char _sube
                               &sube; = &#8838; -- subset of or equal to, U+2286 ISOtech.
                    static char _sum
                               &sum; = &#8721; -- n-ary summation, U+2211 ISOamsb
                    (see comments).
                    static char _sup
                               &sup; = &#8835; -- superset of, U+2283 ISOtech
                    (see comments).
                    static char _sup1
                              ¹ &sup1; = &#185; -- superscript one = superscript digit one, U+00B9 ISOnum.
                    static char _sup2
                              ² &sup2; = &#178; -- superscript two = superscript digit two = squared, U+00B2 ISOnum.
                    static char _sup3
                              ³ &sup3; = &#179; -- superscript three = superscript digit three = cubed, U+00B3 ISOnum.
                    static char _supe
                               &supe; = &#8839; -- superset of or equal to, U+2287 ISOtech.
                    static char _szlig
                              ß &szlig; = &#223; -- latin small letter sharp s = ess-zed, U+00DF ISOlat1.
                    static char _tau
                              τ &tau; = &#964; -- greek small letter tau, U+03C4 ISOgrk3.
                    static char _Tau
                              Τ &Tau; = &#932; -- greek capital letter tau, U+03A4.
                    static char _there4
                               &there4; = &#8756; -- therefore, U+2234 ISOtech.
                    static char _theta
                              θ &theta; = &#952; -- greek small letter theta, U+03B8 ISOgrk3.
                    static char _Theta
                              Θ &Theta; = &#920; -- greek capital letter theta, U+0398 ISOgrk3.
                    static char _thetasym
                              ϑ &thetasym; = &#977; -- greek small letter theta symbol, U+03D1 NEW.
                    static char _thinsp
                               &thinsp; = &#8201; -- thin space, U+2009 ISOpub.
                    static char _thorn
                              þ &thorn; = &#254; -- latin small letter thorn, U+00FE ISOlat1.
                    static char _THORN
                              Þ &THORN; = &#222; -- latin capital letter THORN, U+00DE ISOlat1.
                    static char _tilde
                              ˜ &tilde; = &#732; -- small tilde, U+02DC ISOdia.
                    static char _times
                              × &times; = &#215; -- multiplication sign, U+00D7 ISOnum.
                    static char _trade
                               &trade; = &#8482; -- trade mark sign, U+2122 ISOnum.
                    static char _uacute
                              ú &uacute; = &#250; -- latin small letter u with acute, U+00FA ISOlat1.
                    static char _Uacute
                              Ú &Uacute; = &#218; -- latin capital letter U with acute, U+00DA ISOlat1.
                    static char _uarr
                               &uarr; = &#8593; -- upwards arrow, U+2191 ISOnum.
                    static char _uArr
                               &uArr; = &#8657; -- upwards double arrow, U+21D1 ISOamsa.
                    static char _ucirc
                              û &ucirc; = &#251; -- latin small letter u with circumflex, U+00FB ISOlat1.
                    static char _Ucirc
                              Û &Ucirc; = &#219; -- latin capital letter U with circumflex, U+00DB ISOlat1.
                    static char _ugrave
                              ù &ugrave; = &#249; -- latin small letter u with grave, U+00F9 ISOlat1.
                    static char _Ugrave
                              Ù &Ugrave; = &#217; -- latin capital letter U with grave, U+00D9 ISOlat1.
                    static char _uml
                              ¨ &uml; = &#168; -- diaeresis = spacing diaeresis, U+00A8 ISOdia.
                    static char _upsih
                              ϒ &upsih; = &#978; -- greek upsilon with hook symbol, U+03D2 NEW.
                    static char _upsilon
                              υ &upsilon; = &#965; -- greek small letter upsilon, U+03C5 ISOgrk3.
                    static char _Upsilon
                              Υ &Upsilon; = &#933; -- greek capital letter upsilon, U+03A5 ISOgrk3.
                    static char _uuml
                              ü &uuml; = &#252; -- latin small letter u with diaeresis, U+00FC ISOlat1.
                    static char _Uuml
                              Ü &Uuml; = &#220; -- latin capital letter U with diaeresis, U+00DC ISOlat1.
                    static char _weierp
                               &weierp; = &#8472; -- script capital P = power set = Weierstrass p, U+2118 ISOamso.
                    static char _xi
                              ξ &xi; = &#958; -- greek small letter xi, U+03BE ISOgrk3.
                    static char _Xi
                              Ξ &Xi; = &#926; -- greek capital letter xi, U+039E ISOgrk3.
                    static char _yacute
                              ý &yacute; = &#253; -- latin small letter y with acute, U+00FD ISOlat1.
                    static char _Yacute
                              Ý &Yacute; = &#221; -- latin capital letter Y with acute, U+00DD ISOlat1.
                    static char _yen
                              ¥ &yen; = &#165; -- yen sign = yuan sign, U+00A5 ISOnum.
                    static char _yuml
                              ÿ &yuml; = &#255; -- latin small letter y with diaeresis, U+00FF ISOlat1.
                    static char _Yuml
                              Ÿ &Yuml; = &#376; -- latin capital letter Y with diaeresis, U+0178 ISOlat2.
                    static char _zeta
                              ζ &zeta; = &#950; -- greek small letter zeta, U+03B6 ISOgrk3.
                    static char _Zeta
                              Ζ &Zeta; = &#918; -- greek capital letter zeta, U+0396.
                    static char _zwj
                               &zwj; = &#8205; -- zero width joiner, U+200D NEW RFC 2070.
                    static char _zwnj
                               &zwnj; = &#8204; -- zero width non-joiner, U+200C NEW RFC 2070.
                     
                    Fields inherited from class CharacterReference
                    INVALID_CODE_POINT
                     
                    Method Summary
                     java.lang.String getCharacterReferenceString()
                              Returns the correct encoded form of this character entity reference.
                    static java.lang.String getCharacterReferenceString(int codePoint)
                              Returns the character entity reference encoded form of the specified unicode code point.
                    static int getCodePointFromName(java.lang.String name)
                              Returns the unicode code point of the specified character entity reference name.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     java.lang.String getName()
                              Returns the name of this character entity reference.
                    static java.lang.String getName(char ch)
                              Returns the character entity reference name of the specified character.
                    static java.lang.String getName(int codePoint)
                              Returns the character entity reference name of the specified unicode code point.
                    static java.util.Map<java.lang.String,java.lang.Integer> getNameToCodePointMap()
                              Returns a map of character entity reference names (String) to unicode code points (Integer).
                     
                    Methods inherited from class CharacterReference
                    appendCharTo, decode, decode, decodeCollapseWhiteSpace, encode, encode, encodeWithWhiteSpaceFormatting, getChar, getCodePoint, getCodePointFromCharacterReferenceString, getDecimalCharacterReferenceString, getDecimalCharacterReferenceString, getEncodingFilterWriter, getHexadecimalCharacterReferenceString, getHexadecimalCharacterReferenceString, getUnicodeText, getUnicodeText, isTerminated, parse, reencode, requiresEncoding
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    _nbsp

                    public static final char _nbsp
                      &nbsp; = &#160; -- no-break space = non-breaking space, U+00A0 ISOnum.

                    See Also:
                    Constant Field Values

                    _iexcl

                    public static final char _iexcl
                    ¡ &iexcl; = &#161; -- inverted exclamation mark, U+00A1 ISOnum.

                    See Also:
                    Constant Field Values

                    _cent

                    public static final char _cent
                    ¢ &cent; = &#162; -- cent sign, U+00A2 ISOnum.

                    See Also:
                    Constant Field Values

                    _pound

                    public static final char _pound
                    £ &pound; = &#163; -- pound sign, U+00A3 ISOnum.

                    See Also:
                    Constant Field Values

                    _curren

                    public static final char _curren
                    ¤ &curren; = &#164; -- currency sign, U+00A4 ISOnum.

                    See Also:
                    Constant Field Values

                    _yen

                    public static final char _yen
                    ¥ &yen; = &#165; -- yen sign = yuan sign, U+00A5 ISOnum.

                    See Also:
                    Constant Field Values

                    _brvbar

                    public static final char _brvbar
                    ¦ &brvbar; = &#166; -- broken bar = broken vertical bar, U+00A6 ISOnum.

                    See Also:
                    Constant Field Values

                    _sect

                    public static final char _sect
                    § &sect; = &#167; -- section sign, U+00A7 ISOnum.

                    See Also:
                    Constant Field Values

                    _uml

                    public static final char _uml
                    ¨ &uml; = &#168; -- diaeresis = spacing diaeresis, U+00A8 ISOdia.

                    See Also:
                    Constant Field Values

                    _copy

                    public static final char _copy
                    © &copy; = &#169; -- copyright sign, U+00A9 ISOnum.

                    See Also:
                    Constant Field Values

                    _ordf

                    public static final char _ordf
                    ª &ordf; = &#170; -- feminine ordinal indicator, U+00AA ISOnum.

                    See Also:
                    Constant Field Values

                    _laquo

                    public static final char _laquo
                    « &laquo; = &#171; -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum.

                    See Also:
                    Constant Field Values

                    _not

                    public static final char _not
                    ¬ &not; = &#172; -- not sign = angled dash, U+00AC ISOnum.

                    See Also:
                    Constant Field Values

                    _shy

                    public static final char _shy
                    ­ &shy; = &#173; -- soft hyphen = discretionary hyphen, U+00AD ISOnum.

                    See Also:
                    Constant Field Values

                    _reg

                    public static final char _reg
                    ® &reg; = &#174; -- registered sign = registered trade mark sign, U+00AE ISOnum.

                    See Also:
                    Constant Field Values

                    _macr

                    public static final char _macr
                    ¯ &macr; = &#175; -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia.

                    See Also:
                    Constant Field Values

                    _deg

                    public static final char _deg
                    ° &deg; = &#176; -- degree sign, U+00B0 ISOnum.

                    See Also:
                    Constant Field Values

                    _plusmn

                    public static final char _plusmn
                    ± &plusmn; = &#177; -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum.

                    See Also:
                    Constant Field Values

                    _sup2

                    public static final char _sup2
                    ² &sup2; = &#178; -- superscript two = superscript digit two = squared, U+00B2 ISOnum.

                    See Also:
                    Constant Field Values

                    _sup3

                    public static final char _sup3
                    ³ &sup3; = &#179; -- superscript three = superscript digit three = cubed, U+00B3 ISOnum.

                    See Also:
                    Constant Field Values

                    _acute

                    public static final char _acute
                    ´ &acute; = &#180; -- acute accent = spacing acute, U+00B4 ISOdia.

                    See Also:
                    Constant Field Values

                    _micro

                    public static final char _micro
                    µ &micro; = &#181; -- micro sign, U+00B5 ISOnum.

                    See Also:
                    Constant Field Values

                    _para

                    public static final char _para
                    &para; = &#182; -- pilcrow sign = paragraph sign, U+00B6 ISOnum.

                    See Also:
                    Constant Field Values

                    _middot

                    public static final char _middot
                    · &middot; = &#183; -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum.

                    See Also:
                    Constant Field Values

                    _cedil

                    public static final char _cedil
                    ¸ &cedil; = &#184; -- cedilla = spacing cedilla, U+00B8 ISOdia.

                    See Also:
                    Constant Field Values

                    _sup1

                    public static final char _sup1
                    ¹ &sup1; = &#185; -- superscript one = superscript digit one, U+00B9 ISOnum.

                    See Also:
                    Constant Field Values

                    _ordm

                    public static final char _ordm
                    º &ordm; = &#186; -- masculine ordinal indicator, U+00BA ISOnum.

                    See Also:
                    Constant Field Values

                    _raquo

                    public static final char _raquo
                    » &raquo; = &#187; -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum.

                    See Also:
                    Constant Field Values

                    _frac14

                    public static final char _frac14
                    ¼ &frac14; = &#188; -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum.

                    See Also:
                    Constant Field Values

                    _frac12

                    public static final char _frac12
                    ½ &frac12; = &#189; -- vulgar fraction one half = fraction one half, U+00BD ISOnum.

                    See Also:
                    Constant Field Values

                    _frac34

                    public static final char _frac34
                    ¾ &frac34; = &#190; -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum.

                    See Also:
                    Constant Field Values

                    _iquest

                    public static final char _iquest
                    ¿ &iquest; = &#191; -- inverted question mark = turned question mark, U+00BF ISOnum.

                    See Also:
                    Constant Field Values

                    _Agrave

                    public static final char _Agrave
                    À &Agrave; = &#192; -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Aacute

                    public static final char _Aacute
                    Á &Aacute; = &#193; -- latin capital letter A with acute, U+00C1 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Acirc

                    public static final char _Acirc
                    Â &Acirc; = &#194; -- latin capital letter A with circumflex, U+00C2 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Atilde

                    public static final char _Atilde
                    Ã &Atilde; = &#195; -- latin capital letter A with tilde, U+00C3 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Auml

                    public static final char _Auml
                    Ä &Auml; = &#196; -- latin capital letter A with diaeresis, U+00C4 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Aring

                    public static final char _Aring
                    Å &Aring; = &#197; -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1.

                    See Also:
                    Constant Field Values

                    _AElig

                    public static final char _AElig
                    Æ &AElig; = &#198; -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ccedil

                    public static final char _Ccedil
                    Ç &Ccedil; = &#199; -- latin capital letter C with cedilla, U+00C7 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Egrave

                    public static final char _Egrave
                    È &Egrave; = &#200; -- latin capital letter E with grave, U+00C8 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Eacute

                    public static final char _Eacute
                    É &Eacute; = &#201; -- latin capital letter E with acute, U+00C9 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ecirc

                    public static final char _Ecirc
                    Ê &Ecirc; = &#202; -- latin capital letter E with circumflex, U+00CA ISOlat1.

                    See Also:
                    Constant Field Values

                    _Euml

                    public static final char _Euml
                    Ë &Euml; = &#203; -- latin capital letter E with diaeresis, U+00CB ISOlat1.

                    See Also:
                    Constant Field Values

                    _Igrave

                    public static final char _Igrave
                    Ì &Igrave; = &#204; -- latin capital letter I with grave, U+00CC ISOlat1.

                    See Also:
                    Constant Field Values

                    _Iacute

                    public static final char _Iacute
                    Í &Iacute; = &#205; -- latin capital letter I with acute, U+00CD ISOlat1.

                    See Also:
                    Constant Field Values

                    _Icirc

                    public static final char _Icirc
                    Î &Icirc; = &#206; -- latin capital letter I with circumflex, U+00CE ISOlat1.

                    See Also:
                    Constant Field Values

                    _Iuml

                    public static final char _Iuml
                    Ï &Iuml; = &#207; -- latin capital letter I with diaeresis, U+00CF ISOlat1.

                    See Also:
                    Constant Field Values

                    _ETH

                    public static final char _ETH
                    Ð &ETH; = &#208; -- latin capital letter ETH, U+00D0 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ntilde

                    public static final char _Ntilde
                    Ñ &Ntilde; = &#209; -- latin capital letter N with tilde, U+00D1 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ograve

                    public static final char _Ograve
                    Ò &Ograve; = &#210; -- latin capital letter O with grave, U+00D2 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Oacute

                    public static final char _Oacute
                    Ó &Oacute; = &#211; -- latin capital letter O with acute, U+00D3 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ocirc

                    public static final char _Ocirc
                    Ô &Ocirc; = &#212; -- latin capital letter O with circumflex, U+00D4 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Otilde

                    public static final char _Otilde
                    Õ &Otilde; = &#213; -- latin capital letter O with tilde, U+00D5 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ouml

                    public static final char _Ouml
                    Ö &Ouml; = &#214; -- latin capital letter O with diaeresis, U+00D6 ISOlat1.

                    See Also:
                    Constant Field Values

                    _times

                    public static final char _times
                    × &times; = &#215; -- multiplication sign, U+00D7 ISOnum.

                    See Also:
                    Constant Field Values

                    _Oslash

                    public static final char _Oslash
                    Ø &Oslash; = &#216; -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ugrave

                    public static final char _Ugrave
                    Ù &Ugrave; = &#217; -- latin capital letter U with grave, U+00D9 ISOlat1.

                    See Also:
                    Constant Field Values

                    _Uacute

                    public static final char _Uacute
                    Ú &Uacute; = &#218; -- latin capital letter U with acute, U+00DA ISOlat1.

                    See Also:
                    Constant Field Values

                    _Ucirc

                    public static final char _Ucirc
                    Û &Ucirc; = &#219; -- latin capital letter U with circumflex, U+00DB ISOlat1.

                    See Also:
                    Constant Field Values

                    _Uuml

                    public static final char _Uuml
                    Ü &Uuml; = &#220; -- latin capital letter U with diaeresis, U+00DC ISOlat1.

                    See Also:
                    Constant Field Values

                    _Yacute

                    public static final char _Yacute
                    Ý &Yacute; = &#221; -- latin capital letter Y with acute, U+00DD ISOlat1.

                    See Also:
                    Constant Field Values

                    _THORN

                    public static final char _THORN
                    Þ &THORN; = &#222; -- latin capital letter THORN, U+00DE ISOlat1.

                    See Also:
                    Constant Field Values

                    _szlig

                    public static final char _szlig
                    ß &szlig; = &#223; -- latin small letter sharp s = ess-zed, U+00DF ISOlat1.

                    See Also:
                    Constant Field Values

                    _agrave

                    public static final char _agrave
                    à &agrave; = &#224; -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1.

                    See Also:
                    Constant Field Values

                    _aacute

                    public static final char _aacute
                    á &aacute; = &#225; -- latin small letter a with acute, U+00E1 ISOlat1.

                    See Also:
                    Constant Field Values

                    _acirc

                    public static final char _acirc
                    â &acirc; = &#226; -- latin small letter a with circumflex, U+00E2 ISOlat1.

                    See Also:
                    Constant Field Values

                    _atilde

                    public static final char _atilde
                    ã &atilde; = &#227; -- latin small letter a with tilde, U+00E3 ISOlat1.

                    See Also:
                    Constant Field Values

                    _auml

                    public static final char _auml
                    ä &auml; = &#228; -- latin small letter a with diaeresis, U+00E4 ISOlat1.

                    See Also:
                    Constant Field Values

                    _aring

                    public static final char _aring
                    å &aring; = &#229; -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1.

                    See Also:
                    Constant Field Values

                    _aelig

                    public static final char _aelig
                    æ &aelig; = &#230; -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ccedil

                    public static final char _ccedil
                    ç &ccedil; = &#231; -- latin small letter c with cedilla, U+00E7 ISOlat1.

                    See Also:
                    Constant Field Values

                    _egrave

                    public static final char _egrave
                    è &egrave; = &#232; -- latin small letter e with grave, U+00E8 ISOlat1.

                    See Also:
                    Constant Field Values

                    _eacute

                    public static final char _eacute
                    é &eacute; = &#233; -- latin small letter e with acute, U+00E9 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ecirc

                    public static final char _ecirc
                    ê &ecirc; = &#234; -- latin small letter e with circumflex, U+00EA ISOlat1.

                    See Also:
                    Constant Field Values

                    _euml

                    public static final char _euml
                    ë &euml; = &#235; -- latin small letter e with diaeresis, U+00EB ISOlat1.

                    See Also:
                    Constant Field Values

                    _igrave

                    public static final char _igrave
                    ì &igrave; = &#236; -- latin small letter i with grave, U+00EC ISOlat1.

                    See Also:
                    Constant Field Values

                    _iacute

                    public static final char _iacute
                    í &iacute; = &#237; -- latin small letter i with acute, U+00ED ISOlat1.

                    See Also:
                    Constant Field Values

                    _icirc

                    public static final char _icirc
                    î &icirc; = &#238; -- latin small letter i with circumflex, U+00EE ISOlat1.

                    See Also:
                    Constant Field Values

                    _iuml

                    public static final char _iuml
                    ï &iuml; = &#239; -- latin small letter i with diaeresis, U+00EF ISOlat1.

                    See Also:
                    Constant Field Values

                    _eth

                    public static final char _eth
                    ð &eth; = &#240; -- latin small letter eth, U+00F0 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ntilde

                    public static final char _ntilde
                    ñ &ntilde; = &#241; -- latin small letter n with tilde, U+00F1 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ograve

                    public static final char _ograve
                    ò &ograve; = &#242; -- latin small letter o with grave, U+00F2 ISOlat1.

                    See Also:
                    Constant Field Values

                    _oacute

                    public static final char _oacute
                    ó &oacute; = &#243; -- latin small letter o with acute, U+00F3 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ocirc

                    public static final char _ocirc
                    ô &ocirc; = &#244; -- latin small letter o with circumflex, U+00F4 ISOlat1.

                    See Also:
                    Constant Field Values

                    _otilde

                    public static final char _otilde
                    õ &otilde; = &#245; -- latin small letter o with tilde, U+00F5 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ouml

                    public static final char _ouml
                    ö &ouml; = &#246; -- latin small letter o with diaeresis, U+00F6 ISOlat1.

                    See Also:
                    Constant Field Values

                    _divide

                    public static final char _divide
                    ÷ &divide; = &#247; -- division sign, U+00F7 ISOnum.

                    See Also:
                    Constant Field Values

                    _oslash

                    public static final char _oslash
                    ø &oslash; = &#248; -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1.

                    See Also:
                    Constant Field Values

                    _ugrave

                    public static final char _ugrave
                    ù &ugrave; = &#249; -- latin small letter u with grave, U+00F9 ISOlat1.

                    See Also:
                    Constant Field Values

                    _uacute

                    public static final char _uacute
                    ú &uacute; = &#250; -- latin small letter u with acute, U+00FA ISOlat1.

                    See Also:
                    Constant Field Values

                    _ucirc

                    public static final char _ucirc
                    û &ucirc; = &#251; -- latin small letter u with circumflex, U+00FB ISOlat1.

                    See Also:
                    Constant Field Values

                    _uuml

                    public static final char _uuml
                    ü &uuml; = &#252; -- latin small letter u with diaeresis, U+00FC ISOlat1.

                    See Also:
                    Constant Field Values

                    _yacute

                    public static final char _yacute
                    ý &yacute; = &#253; -- latin small letter y with acute, U+00FD ISOlat1.

                    See Also:
                    Constant Field Values

                    _thorn

                    public static final char _thorn
                    þ &thorn; = &#254; -- latin small letter thorn, U+00FE ISOlat1.

                    See Also:
                    Constant Field Values

                    _yuml

                    public static final char _yuml
                    ÿ &yuml; = &#255; -- latin small letter y with diaeresis, U+00FF ISOlat1.

                    See Also:
                    Constant Field Values

                    _fnof

                    public static final char _fnof
                    ƒ &fnof; = &#402; -- latin small letter f with hook = function = florin, U+0192 ISOtech.

                    See Also:
                    Constant Field Values

                    _Alpha

                    public static final char _Alpha
                    Α &Alpha; = &#913; -- greek capital letter alpha, U+0391.

                    See Also:
                    Constant Field Values

                    _Beta

                    public static final char _Beta
                    Β &Beta; = &#914; -- greek capital letter beta, U+0392.

                    See Also:
                    Constant Field Values

                    _Gamma

                    public static final char _Gamma
                    Γ &Gamma; = &#915; -- greek capital letter gamma, U+0393 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Delta

                    public static final char _Delta
                    Δ &Delta; = &#916; -- greek capital letter delta, U+0394 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Epsilon

                    public static final char _Epsilon
                    Ε &Epsilon; = &#917; -- greek capital letter epsilon, U+0395.

                    See Also:
                    Constant Field Values

                    _Zeta

                    public static final char _Zeta
                    Ζ &Zeta; = &#918; -- greek capital letter zeta, U+0396.

                    See Also:
                    Constant Field Values

                    _Eta

                    public static final char _Eta
                    Η &Eta; = &#919; -- greek capital letter eta, U+0397.

                    See Also:
                    Constant Field Values

                    _Theta

                    public static final char _Theta
                    Θ &Theta; = &#920; -- greek capital letter theta, U+0398 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Iota

                    public static final char _Iota
                    Ι &Iota; = &#921; -- greek capital letter iota, U+0399.

                    See Also:
                    Constant Field Values

                    _Kappa

                    public static final char _Kappa
                    Κ &Kappa; = &#922; -- greek capital letter kappa, U+039A.

                    See Also:
                    Constant Field Values

                    _Lambda

                    public static final char _Lambda
                    Λ &Lambda; = &#923; -- greek capital letter lambda, U+039B ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Mu

                    public static final char _Mu
                    Μ &Mu; = &#924; -- greek capital letter mu, U+039C.

                    See Also:
                    Constant Field Values

                    _Nu

                    public static final char _Nu
                    Ν &Nu; = &#925; -- greek capital letter nu, U+039D.

                    See Also:
                    Constant Field Values

                    _Xi

                    public static final char _Xi
                    Ξ &Xi; = &#926; -- greek capital letter xi, U+039E ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Omicron

                    public static final char _Omicron
                    Ο &Omicron; = &#927; -- greek capital letter omicron, U+039F.

                    See Also:
                    Constant Field Values

                    _Pi

                    public static final char _Pi
                    Π &Pi; = &#928; -- greek capital letter pi, U+03A0 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Rho

                    public static final char _Rho
                    Ρ &Rho; = &#929; -- greek capital letter rho, U+03A1.

                    See Also:
                    Constant Field Values

                    _Sigma

                    public static final char _Sigma
                    Σ &Sigma; = &#931; -- greek capital letter sigma, U+03A3 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Tau

                    public static final char _Tau
                    Τ &Tau; = &#932; -- greek capital letter tau, U+03A4.

                    See Also:
                    Constant Field Values

                    _Upsilon

                    public static final char _Upsilon
                    Υ &Upsilon; = &#933; -- greek capital letter upsilon, U+03A5 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Phi

                    public static final char _Phi
                    Φ &Phi; = &#934; -- greek capital letter phi, U+03A6 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Chi

                    public static final char _Chi
                    Χ &Chi; = &#935; -- greek capital letter chi, U+03A7.

                    See Also:
                    Constant Field Values

                    _Psi

                    public static final char _Psi
                    Ψ &Psi; = &#936; -- greek capital letter psi, U+03A8 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _Omega

                    public static final char _Omega
                    Ω &Omega; = &#937; -- greek capital letter omega, U+03A9 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _alpha

                    public static final char _alpha
                    α &alpha; = &#945; -- greek small letter alpha, U+03B1 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _beta

                    public static final char _beta
                    β &beta; = &#946; -- greek small letter beta, U+03B2 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _gamma

                    public static final char _gamma
                    γ &gamma; = &#947; -- greek small letter gamma, U+03B3 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _delta

                    public static final char _delta
                    δ &delta; = &#948; -- greek small letter delta, U+03B4 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _epsilon

                    public static final char _epsilon
                    ε &epsilon; = &#949; -- greek small letter epsilon, U+03B5 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _zeta

                    public static final char _zeta
                    ζ &zeta; = &#950; -- greek small letter zeta, U+03B6 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _eta

                    public static final char _eta
                    η &eta; = &#951; -- greek small letter eta, U+03B7 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _theta

                    public static final char _theta
                    θ &theta; = &#952; -- greek small letter theta, U+03B8 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _iota

                    public static final char _iota
                    ι &iota; = &#953; -- greek small letter iota, U+03B9 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _kappa

                    public static final char _kappa
                    κ &kappa; = &#954; -- greek small letter kappa, U+03BA ISOgrk3.

                    See Also:
                    Constant Field Values

                    _lambda

                    public static final char _lambda
                    λ &lambda; = &#955; -- greek small letter lambda, U+03BB ISOgrk3.

                    See Also:
                    Constant Field Values

                    _mu

                    public static final char _mu
                    μ &mu; = &#956; -- greek small letter mu, U+03BC ISOgrk3.

                    See Also:
                    Constant Field Values

                    _nu

                    public static final char _nu
                    ν &nu; = &#957; -- greek small letter nu, U+03BD ISOgrk3.

                    See Also:
                    Constant Field Values

                    _xi

                    public static final char _xi
                    ξ &xi; = &#958; -- greek small letter xi, U+03BE ISOgrk3.

                    See Also:
                    Constant Field Values

                    _omicron

                    public static final char _omicron
                    ο &omicron; = &#959; -- greek small letter omicron, U+03BF NEW.

                    See Also:
                    Constant Field Values

                    _pi

                    public static final char _pi
                    π &pi; = &#960; -- greek small letter pi, U+03C0 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _rho

                    public static final char _rho
                    ρ &rho; = &#961; -- greek small letter rho, U+03C1 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _sigmaf

                    public static final char _sigmaf
                    ς &sigmaf; = &#962; -- greek small letter final sigma, U+03C2 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _sigma

                    public static final char _sigma
                    σ &sigma; = &#963; -- greek small letter sigma, U+03C3 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _tau

                    public static final char _tau
                    τ &tau; = &#964; -- greek small letter tau, U+03C4 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _upsilon

                    public static final char _upsilon
                    υ &upsilon; = &#965; -- greek small letter upsilon, U+03C5 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _phi

                    public static final char _phi
                    φ &phi; = &#966; -- greek small letter phi, U+03C6 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _chi

                    public static final char _chi
                    χ &chi; = &#967; -- greek small letter chi, U+03C7 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _psi

                    public static final char _psi
                    ψ &psi; = &#968; -- greek small letter psi, U+03C8 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _omega

                    public static final char _omega
                    ω &omega; = &#969; -- greek small letter omega, U+03C9 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _thetasym

                    public static final char _thetasym
                    ϑ &thetasym; = &#977; -- greek small letter theta symbol, U+03D1 NEW.

                    See Also:
                    Constant Field Values

                    _upsih

                    public static final char _upsih
                    ϒ &upsih; = &#978; -- greek upsilon with hook symbol, U+03D2 NEW.

                    See Also:
                    Constant Field Values

                    _piv

                    public static final char _piv
                    ϖ &piv; = &#982; -- greek pi symbol, U+03D6 ISOgrk3.

                    See Also:
                    Constant Field Values

                    _bull

                    public static final char _bull
                    &bull; = &#8226; -- bullet = black small circle, U+2022 ISOpub
                    (see comments).

                    bullet is NOT the same as bullet operator, U+2219

                    See Also:
                    Constant Field Values

                    _hellip

                    public static final char _hellip
                    &hellip; = &#8230; -- horizontal ellipsis = three dot leader, U+2026 ISOpub.

                    See Also:
                    Constant Field Values

                    _prime

                    public static final char _prime
                    &prime; = &#8242; -- prime = minutes = feet, U+2032 ISOtech.

                    See Also:
                    Constant Field Values

                    _Prime

                    public static final char _Prime
                    &Prime; = &#8243; -- double prime = seconds = inches, U+2033 ISOtech.

                    See Also:
                    Constant Field Values

                    _oline

                    public static final char _oline
                    &oline; = &#8254; -- overline = spacing overscore, U+203E NEW.

                    See Also:
                    Constant Field Values

                    _frasl

                    public static final char _frasl
                    &frasl; = &#8260; -- fraction slash, U+2044 NEW.

                    See Also:
                    Constant Field Values

                    _weierp

                    public static final char _weierp
                    &weierp; = &#8472; -- script capital P = power set = Weierstrass p, U+2118 ISOamso.

                    See Also:
                    Constant Field Values

                    _image

                    public static final char _image
                    &image; = &#8465; -- black-letter capital I = imaginary part, U+2111 ISOamso.

                    See Also:
                    Constant Field Values

                    _real

                    public static final char _real
                    &real; = &#8476; -- black-letter capital R = real part symbol, U+211C ISOamso.

                    See Also:
                    Constant Field Values

                    _trade

                    public static final char _trade
                    &trade; = &#8482; -- trade mark sign, U+2122 ISOnum.

                    See Also:
                    Constant Field Values

                    _alefsym

                    public static final char _alefsym
                    &alefsym; = &#8501; -- alef symbol = first transfinite cardinal, U+2135 NEW
                    (see comments).

                    alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters

                    See Also:
                    Constant Field Values

                    _larr

                    public static final char _larr
                    &larr; = &#8592; -- leftwards arrow, U+2190 ISOnum.

                    See Also:
                    Constant Field Values

                    _uarr

                    public static final char _uarr
                    &uarr; = &#8593; -- upwards arrow, U+2191 ISOnum.

                    See Also:
                    Constant Field Values

                    _rarr

                    public static final char _rarr
                    &rarr; = &#8594; -- rightwards arrow, U+2192 ISOnum.

                    See Also:
                    Constant Field Values

                    _darr

                    public static final char _darr
                    &darr; = &#8595; -- downwards arrow, U+2193 ISOnum.

                    See Also:
                    Constant Field Values

                    _harr

                    public static final char _harr
                    &harr; = &#8596; -- left right arrow, U+2194 ISOamsa.

                    See Also:
                    Constant Field Values

                    _crarr

                    public static final char _crarr
                    &crarr; = &#8629; -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW.

                    See Also:
                    Constant Field Values

                    _lArr

                    public static final char _lArr
                    &lArr; = &#8656; -- leftwards double arrow, U+21D0 ISOtech
                    (see comments).

                    ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests

                    See Also:
                    Constant Field Values

                    _uArr

                    public static final char _uArr
                    &uArr; = &#8657; -- upwards double arrow, U+21D1 ISOamsa.

                    See Also:
                    Constant Field Values

                    _rArr

                    public static final char _rArr
                    &rArr; = &#8658; -- rightwards double arrow, U+21D2 ISOtech
                    (see comments).

                    ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests

                    See Also:
                    Constant Field Values

                    _dArr

                    public static final char _dArr
                    &dArr; = &#8659; -- downwards double arrow, U+21D3 ISOamsa.

                    See Also:
                    Constant Field Values

                    _hArr

                    public static final char _hArr
                    &hArr; = &#8660; -- left right double arrow, U+21D4 ISOamsa.

                    See Also:
                    Constant Field Values

                    _forall

                    public static final char _forall
                    &forall; = &#8704; -- for all, U+2200 ISOtech.

                    See Also:
                    Constant Field Values

                    _part

                    public static final char _part
                    &part; = &#8706; -- partial differential, U+2202 ISOtech.

                    See Also:
                    Constant Field Values

                    _exist

                    public static final char _exist
                    &exist; = &#8707; -- there exists, U+2203 ISOtech.

                    See Also:
                    Constant Field Values

                    _empty

                    public static final char _empty
                    &empty; = &#8709; -- empty set = null set = diameter, U+2205 ISOamso.

                    See Also:
                    Constant Field Values

                    _nabla

                    public static final char _nabla
                    &nabla; = &#8711; -- nabla = backward difference, U+2207 ISOtech.

                    See Also:
                    Constant Field Values

                    _isin

                    public static final char _isin
                    &isin; = &#8712; -- element of, U+2208 ISOtech.

                    See Also:
                    Constant Field Values

                    _notin

                    public static final char _notin
                    &notin; = &#8713; -- not an element of, U+2209 ISOtech.

                    See Also:
                    Constant Field Values

                    _ni

                    public static final char _ni
                    &ni; = &#8715; -- contains as member, U+220B ISOtech
                    (see comments).

                    should there be a more memorable name than 'ni'?

                    See Also:
                    Constant Field Values

                    _prod

                    public static final char _prod
                    &prod; = &#8719; -- n-ary product = product sign, U+220F ISOamsb
                    (see comments).

                    prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both

                    See Also:
                    Constant Field Values

                    _sum

                    public static final char _sum
                    &sum; = &#8721; -- n-ary summation, U+2211 ISOamsb
                    (see comments).

                    sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both

                    See Also:
                    Constant Field Values

                    _minus

                    public static final char _minus
                    &minus; = &#8722; -- minus sign, U+2212 ISOtech.

                    See Also:
                    Constant Field Values

                    _lowast

                    public static final char _lowast
                    &lowast; = &#8727; -- asterisk operator, U+2217 ISOtech.

                    See Also:
                    Constant Field Values

                    _radic

                    public static final char _radic
                    &radic; = &#8730; -- square root = radical sign, U+221A ISOtech.

                    See Also:
                    Constant Field Values

                    _prop

                    public static final char _prop
                    &prop; = &#8733; -- proportional to, U+221D ISOtech.

                    See Also:
                    Constant Field Values

                    _infin

                    public static final char _infin
                    &infin; = &#8734; -- infinity, U+221E ISOtech.

                    See Also:
                    Constant Field Values

                    _ang

                    public static final char _ang
                    &ang; = &#8736; -- angle, U+2220 ISOamso.

                    See Also:
                    Constant Field Values

                    _and

                    public static final char _and
                    &and; = &#8743; -- logical and = wedge, U+2227 ISOtech.

                    See Also:
                    Constant Field Values

                    _or

                    public static final char _or
                    &or; = &#8744; -- logical or = vee, U+2228 ISOtech.

                    See Also:
                    Constant Field Values

                    _cap

                    public static final char _cap
                    &cap; = &#8745; -- intersection = cap, U+2229 ISOtech.

                    See Also:
                    Constant Field Values

                    _cup

                    public static final char _cup
                    &cup; = &#8746; -- union = cup, U+222A ISOtech.

                    See Also:
                    Constant Field Values

                    _int

                    public static final char _int
                    &int; = &#8747; -- integral, U+222B ISOtech.

                    See Also:
                    Constant Field Values

                    _there4

                    public static final char _there4
                    &there4; = &#8756; -- therefore, U+2234 ISOtech.

                    See Also:
                    Constant Field Values

                    _sim

                    public static final char _sim
                    &sim; = &#8764; -- tilde operator = varies with = similar to, U+223C ISOtech
                    (see comments).

                    tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both

                    See Also:
                    Constant Field Values

                    _cong

                    public static final char _cong
                    &cong; = &#8773; -- approximately equal to, U+2245 ISOtech.

                    See Also:
                    Constant Field Values

                    _asymp

                    public static final char _asymp
                    &asymp; = &#8776; -- almost equal to = asymptotic to, U+2248 ISOamsr.

                    See Also:
                    Constant Field Values

                    _ne

                    public static final char _ne
                    &ne; = &#8800; -- not equal to, U+2260 ISOtech.

                    See Also:
                    Constant Field Values

                    _equiv

                    public static final char _equiv
                    &equiv; = &#8801; -- identical to, U+2261 ISOtech.

                    See Also:
                    Constant Field Values

                    _le

                    public static final char _le
                    &le; = &#8804; -- less-than or equal to, U+2264 ISOtech.

                    See Also:
                    Constant Field Values

                    _ge

                    public static final char _ge
                    &ge; = &#8805; -- greater-than or equal to, U+2265 ISOtech.

                    See Also:
                    Constant Field Values

                    _sub

                    public static final char _sub
                    &sub; = &#8834; -- subset of, U+2282 ISOtech.

                    See Also:
                    Constant Field Values

                    _sup

                    public static final char _sup
                    &sup; = &#8835; -- superset of, U+2283 ISOtech
                    (see comments).

                    note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn

                    See Also:
                    Constant Field Values

                    _nsub

                    public static final char _nsub
                    &nsub; = &#8836; -- not a subset of, U+2284 ISOamsn.

                    See Also:
                    Constant Field Values

                    _sube

                    public static final char _sube
                    &sube; = &#8838; -- subset of or equal to, U+2286 ISOtech.

                    See Also:
                    Constant Field Values

                    _supe

                    public static final char _supe
                    &supe; = &#8839; -- superset of or equal to, U+2287 ISOtech.

                    See Also:
                    Constant Field Values

                    _oplus

                    public static final char _oplus
                    &oplus; = &#8853; -- circled plus = direct sum, U+2295 ISOamsb.

                    See Also:
                    Constant Field Values

                    _otimes

                    public static final char _otimes
                    &otimes; = &#8855; -- circled times = vector product, U+2297 ISOamsb.

                    See Also:
                    Constant Field Values

                    _perp

                    public static final char _perp
                    &perp; = &#8869; -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech.

                    See Also:
                    Constant Field Values

                    _sdot

                    public static final char _sdot
                    &sdot; = &#8901; -- dot operator, U+22C5 ISOamsb
                    (see comments).

                    dot operator is NOT the same character as U+00B7 middle dot

                    See Also:
                    Constant Field Values

                    _lceil

                    public static final char _lceil
                    &lceil; = &#8968; -- left ceiling = APL upstile, U+2308 ISOamsc.

                    See Also:
                    Constant Field Values

                    _rceil

                    public static final char _rceil
                    &rceil; = &#8969; -- right ceiling, U+2309 ISOamsc.

                    See Also:
                    Constant Field Values

                    _lfloor

                    public static final char _lfloor
                    &lfloor; = &#8970; -- left floor = APL downstile, U+230A ISOamsc.

                    See Also:
                    Constant Field Values

                    _rfloor

                    public static final char _rfloor
                    &rfloor; = &#8971; -- right floor, U+230B ISOamsc.

                    See Also:
                    Constant Field Values

                    _lang

                    public static final char _lang
                    &lang; = &#9001; -- left-pointing angle bracket = bra, U+2329 ISOtech
                    (see comments).

                    lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'

                    See Also:
                    Constant Field Values

                    _rang

                    public static final char _rang
                    &rang; = &#9002; -- right-pointing angle bracket = ket, U+232A ISOtech
                    (see comments).

                    rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'

                    See Also:
                    Constant Field Values

                    _loz

                    public static final char _loz
                    &loz; = &#9674; -- lozenge, U+25CA ISOpub.

                    See Also:
                    Constant Field Values

                    _spades

                    public static final char _spades
                    &spades; = &#9824; -- black spade suit, U+2660 ISOpub
                    (see comments).

                    black here seems to mean filled as opposed to hollow

                    See Also:
                    Constant Field Values

                    _clubs

                    public static final char _clubs
                    &clubs; = &#9827; -- black club suit = shamrock, U+2663 ISOpub.

                    See Also:
                    Constant Field Values

                    _hearts

                    public static final char _hearts
                    &hearts; = &#9829; -- black heart suit = valentine, U+2665 ISOpub.

                    See Also:
                    Constant Field Values

                    _diams

                    public static final char _diams
                    &diams; = &#9830; -- black diamond suit, U+2666 ISOpub.

                    See Also:
                    Constant Field Values

                    _quot

                    public static final char _quot
                    " &quot; = &#34; -- quotation mark = APL quote, U+0022 ISOnum.

                    See Also:
                    Constant Field Values

                    _amp

                    public static final char _amp
                    & &amp; = &#38; -- ampersand, U+0026 ISOnum.

                    See Also:
                    Constant Field Values

                    _lt

                    public static final char _lt
                    < &lt; = &#60; -- less-than sign, U+003C ISOnum.

                    See Also:
                    Constant Field Values

                    _gt

                    public static final char _gt
                    > &gt; = &#62; -- greater-than sign, U+003E ISOnum.

                    See Also:
                    Constant Field Values

                    _OElig

                    public static final char _OElig
                    Œ &OElig; = &#338; -- latin capital ligature OE, U+0152 ISOlat2.

                    See Also:
                    Constant Field Values

                    _oelig

                    public static final char _oelig
                    œ &oelig; = &#339; -- latin small ligature oe, U+0153 ISOlat2
                    (see comments).

                    ligature is a misnomer, this is a separate character in some languages

                    See Also:
                    Constant Field Values

                    _Scaron

                    public static final char _Scaron
                    Š &Scaron; = &#352; -- latin capital letter S with caron, U+0160 ISOlat2.

                    See Also:
                    Constant Field Values

                    _scaron

                    public static final char _scaron
                    š &scaron; = &#353; -- latin small letter s with caron, U+0161 ISOlat2.

                    See Also:
                    Constant Field Values

                    _Yuml

                    public static final char _Yuml
                    Ÿ &Yuml; = &#376; -- latin capital letter Y with diaeresis, U+0178 ISOlat2.

                    See Also:
                    Constant Field Values

                    _circ

                    public static final char _circ
                    ˆ &circ; = &#710; -- modifier letter circumflex accent, U+02C6 ISOpub.

                    See Also:
                    Constant Field Values

                    _tilde

                    public static final char _tilde
                    ˜ &tilde; = &#732; -- small tilde, U+02DC ISOdia.

                    See Also:
                    Constant Field Values

                    _ensp

                    public static final char _ensp
                    &ensp; = &#8194; -- en space, U+2002 ISOpub.

                    See Also:
                    Constant Field Values

                    _emsp

                    public static final char _emsp
                    &emsp; = &#8195; -- em space, U+2003 ISOpub.

                    See Also:
                    Constant Field Values

                    _thinsp

                    public static final char _thinsp
                    &thinsp; = &#8201; -- thin space, U+2009 ISOpub.

                    See Also:
                    Constant Field Values

                    _zwnj

                    public static final char _zwnj
                    &zwnj; = &#8204; -- zero width non-joiner, U+200C NEW RFC 2070.

                    See Also:
                    Constant Field Values

                    _zwj

                    public static final char _zwj
                    &zwj; = &#8205; -- zero width joiner, U+200D NEW RFC 2070.

                    See Also:
                    Constant Field Values

                    _lrm

                    public static final char _lrm
                    &lrm; = &#8206; -- left-to-right mark, U+200E NEW RFC 2070.

                    See Also:
                    Constant Field Values

                    _rlm

                    public static final char _rlm
                    &rlm; = &#8207; -- right-to-left mark, U+200F NEW RFC 2070.

                    See Also:
                    Constant Field Values

                    _ndash

                    public static final char _ndash
                    &ndash; = &#8211; -- en dash, U+2013 ISOpub.

                    See Also:
                    Constant Field Values

                    _mdash

                    public static final char _mdash
                    &mdash; = &#8212; -- em dash, U+2014 ISOpub.

                    See Also:
                    Constant Field Values

                    _lsquo

                    public static final char _lsquo
                    &lsquo; = &#8216; -- left single quotation mark, U+2018 ISOnum.

                    See Also:
                    Constant Field Values

                    _rsquo

                    public static final char _rsquo
                    &rsquo; = &#8217; -- right single quotation mark, U+2019 ISOnum.

                    See Also:
                    Constant Field Values

                    _sbquo

                    public static final char _sbquo
                    &sbquo; = &#8218; -- single low-9 quotation mark, U+201A NEW.

                    See Also:
                    Constant Field Values

                    _ldquo

                    public static final char _ldquo
                    &ldquo; = &#8220; -- left double quotation mark, U+201C ISOnum.

                    See Also:
                    Constant Field Values

                    _rdquo

                    public static final char _rdquo
                    &rdquo; = &#8221; -- right double quotation mark, U+201D ISOnum.

                    See Also:
                    Constant Field Values

                    _bdquo

                    public static final char _bdquo
                    &bdquo; = &#8222; -- double low-9 quotation mark, U+201E NEW.

                    See Also:
                    Constant Field Values

                    _dagger

                    public static final char _dagger
                    &dagger; = &#8224; -- dagger, U+2020 ISOpub.

                    See Also:
                    Constant Field Values

                    _Dagger

                    public static final char _Dagger
                    &Dagger; = &#8225; -- double dagger, U+2021 ISOpub.

                    See Also:
                    Constant Field Values

                    _permil

                    public static final char _permil
                    &permil; = &#8240; -- per mille sign, U+2030 ISOtech.

                    See Also:
                    Constant Field Values

                    _lsaquo

                    public static final char _lsaquo
                    &lsaquo; = &#8249; -- single left-pointing angle quotation mark, U+2039 ISO proposed
                    (see comments).

                    lsaquo is proposed but not yet ISO standardized

                    See Also:
                    Constant Field Values

                    _rsaquo

                    public static final char _rsaquo
                    &rsaquo; = &#8250; -- single right-pointing angle quotation mark, U+203A ISO proposed
                    (see comments).

                    rsaquo is proposed but not yet ISO standardized

                    See Also:
                    Constant Field Values

                    _euro

                    public static final char _euro
                    &euro; = &#8364; -- euro sign, U+20AC NEW.

                    See Also:
                    Constant Field Values

                    _apos

                    public static final char _apos
                    ' &apos; = &#39; -- apostrophe = APL quote, U+0027 ISOnum
                    (see comments).

                    apos is only defined for use in XHTML (see the XHTML Special Characters Entity Set), but not in HTML.

                    See Also:
                    Config.IsApostropheEncoded, Constant Field Values
                    Method Detail

                    getName

                    public java.lang.String getName()
                    Returns the name of this character entity reference.

                    Example:
                    ((CharacterEntityReference)CharacterReference.parse("&gt;")).getName() returns "gt"

                    Returns:
                    the name of this character entity reference.
                    See Also:
                    getName(int codePoint)

                    getName

                    public static java.lang.String getName(char ch)
                    Returns the character entity reference name of the specified character.

                    Since all character entity references represent unicode BMP code points, the functionality of this method is identical to that of getName(int codePoint).

                    Example:
                    CharacterEntityReference.getName('>') returns "gt"

                    Returns:
                    the character entity reference name of the specified character, or null if none exists.

                    getName

                    public static java.lang.String getName(int codePoint)
                    Returns the character entity reference name of the specified unicode code point.

                    Since all character entity references represent unicode BMP code points, the functionality of this method is identical to that of getName(char ch).

                    Example:
                    CharacterEntityReference.getName(62) returns "gt"

                    Returns:
                    the character entity reference name of the specified unicode code point, or null if none exists.

                    getCodePointFromName

                    public static int getCodePointFromName(java.lang.String name)
                    Returns the unicode code point of the specified character entity reference name.

                    If the string does not represent a valid character entity reference name, this method returns INVALID_CODE_POINT.

                    Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, some browsers also recognise them in a case-insensitive way. For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.

                    Example:
                    CharacterEntityReference.getCodePointFromName("gt") returns 62

                    Returns:
                    the unicode code point of the specified character entity reference name, or INVALID_CODE_POINT if the string does not represent a valid character entity reference name.

                    getCharacterReferenceString

                    public java.lang.String getCharacterReferenceString()
                    Returns the correct encoded form of this character entity reference.

                    Note that the returned string is not necessarily the same as the original source text used to create this object. This library recognises certain invalid forms of character references, as detailed in the decode(String encodedString) method.

                    To retrieve the original source text, use the toString() method instead.

                    Example:
                    CharacterReference.parse("&GT").getCharacterReferenceString() returns "&gt;"

                    Specified by:
                    getCharacterReferenceString in class CharacterReference
                    Returns:
                    the correct encoded form of this character entity reference.
                    See Also:
                    CharacterReference.getCharacterReferenceString(int codePoint)

                    getCharacterReferenceString

                    public static java.lang.String getCharacterReferenceString(int codePoint)
                    Returns the character entity reference encoded form of the specified unicode code point.

                    If the specified unicode code point does not have an equivalent character entity reference, this method returns null. To get either the entity or numeric reference encoded form, use the CharacterReference.getCharacterReferenceString(int codePoint) method instead.

                    Examples:
                    CharacterEntityReference.getCharacterReferenceString(62) returns "&gt;"
                    CharacterEntityReference.getCharacterReferenceString(9786) returns null

                    Returns:
                    the character entity reference encoded form of the specified unicode code point, or null if none exists.
                    See Also:
                    CharacterReference.getCharacterReferenceString(int codePoint)

                    getNameToCodePointMap

                    public static java.util.Map<java.lang.String,java.lang.Integer> getNameToCodePointMap()
                    Returns a map of character entity reference names (String) to unicode code points (Integer).

                    Returns:
                    a map of character entity reference names to unicode code points.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/CharacterReference.html0000644000175000017500000017250311214132420027230 0ustar twernertwerner CharacterReference (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class CharacterReference

                    java.lang.Object
                      extended by Segment
                          extended by CharacterReference
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>
                    Direct Known Subclasses:
                    CharacterEntityReference, NumericCharacterReference

                    public abstract class CharacterReference
                    extends Segment

                    Represents an HTML Character Reference, implemented by the subclasses CharacterEntityReference and NumericCharacterReference.

                    This class, together with its subclasses, contains static methods to perform most required operations without having to instantiate an object.

                    Instances of this class are useful when the positions of character references in a source document are required, or to replace the found character references with customised text.

                    CharacterReference instances are obtained using one of the following methods:


                    Field Summary
                    static int INVALID_CODE_POINT
                              Represents an invalid unicode code point.
                     
                    Method Summary
                     void appendCharTo(java.lang.Appendable appendable)
                              Appends the character represented by this character reference to the specified appendable object.
                    static java.lang.String decode(java.lang.CharSequence encodedText)
                              Decodes the specified HTML encoded text into normal text.
                    static java.lang.String decode(java.lang.CharSequence encodedText, boolean insideAttributeValue)
                              Decodes the specified HTML encoded text into normal text.
                    static java.lang.String decodeCollapseWhiteSpace(java.lang.CharSequence text)
                              Decodes the specified text after collapsing its white space.
                    static java.lang.String encode(char ch)
                              Encodes the specified character into a character reference if required.
                    static java.lang.String encode(java.lang.CharSequence unencodedText)
                              Encodes the specified text, escaping special characters into character references.
                    static java.lang.String encodeWithWhiteSpaceFormatting(java.lang.CharSequence unencodedText)
                              Encodes the specified text, preserving line breaks, tabs and spaces for rendering by converting them to markup.
                     char getChar()
                              Returns the character represented by this character reference.
                    abstract  java.lang.String getCharacterReferenceString()
                              Returns the encoded form of this character reference.
                    static java.lang.String getCharacterReferenceString(int codePoint)
                              Returns the encoded form of the specified unicode code point.
                     int getCodePoint()
                              Returns the unicode code point represented by this character reference.
                    static int getCodePointFromCharacterReferenceString(java.lang.CharSequence characterReferenceText)
                              Parses a single encoded character reference text into a unicode code point.
                     java.lang.String getDecimalCharacterReferenceString()
                              Returns the decimal encoded form of this character reference.
                    static java.lang.String getDecimalCharacterReferenceString(int codePoint)
                              Returns the decimal encoded form of the specified unicode code point.
                    static java.io.Writer getEncodingFilterWriter(java.io.Writer writer)
                              Returns a filter Writer that encodes all text before passing it through to the specified Writer.
                     java.lang.String getHexadecimalCharacterReferenceString()
                              Returns the hexadecimal encoded form of this character reference.
                    static java.lang.String getHexadecimalCharacterReferenceString(int codePoint)
                              Returns the hexadecimal encoded form of the specified unicode code point.
                     java.lang.String getUnicodeText()
                              Returns the unicode code point of this character reference in U+ notation.
                    static java.lang.String getUnicodeText(int codePoint)
                              Returns the specified unicode code point in U+ notation.
                     boolean isTerminated()
                              Indicates whether this character reference is terminated by a semicolon (;).
                    static CharacterReference parse(java.lang.CharSequence characterReferenceText)
                              Parses a single encoded character reference text into a CharacterReference object.
                    static java.lang.String reencode(java.lang.CharSequence encodedText)
                              Re-encodes the specified text, equivalent to decoding and then encoding again.
                    static boolean requiresEncoding(char ch)
                              Indicates whether the specified character would need to be encoded in HTML text.
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getDebugInfo, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    INVALID_CODE_POINT

                    public static final int INVALID_CODE_POINT
                    Represents an invalid unicode code point.

                    This can be the result of parsing a numeric character reference outside of the valid unicode range of 0x000000-0x10FFFF, or any other invalid character reference.

                    See Also:
                    Constant Field Values
                    Method Detail

                    getCodePoint

                    public int getCodePoint()
                    Returns the unicode code point represented by this character reference.

                    Returns:
                    the unicode code point represented by this character reference.
                    See Also:
                    appendCharTo(Appendable)

                    getChar

                    public char getChar()
                    Returns the character represented by this character reference.

                    If this character reference represents a unicode supplimentary code point, any bits outside of the least significant 16 bits of the code point are truncated, yielding an incorrect result.

                    To ensure that the character is correctly appended to an Appendable object such as a Writer, use the code:
                    characterReference.appendCharTo(appendable)
                    instead of:
                    appendable.append(characterReference.getChar())

                    Returns:
                    the character represented by this character reference.
                    See Also:
                    appendCharTo(Appendable), getCodePoint()

                    appendCharTo

                    public final void appendCharTo(java.lang.Appendable appendable)
                                            throws java.io.IOException
                    Appends the character represented by this character reference to the specified appendable object.

                    If this character is a unicode supplementary character, then both the UTF-16 high/low surrogate char values of the of the character are appended, as described in the Unicode character representations section of the java.lang.Character class.

                    If the static Config.ConvertNonBreakingSpaces property is set to true (the default), then calling this method on a non-breaking space character reference (&nbsp;) results in a normal space being appended.

                    Parameters:
                    appendable - the object to append this character reference to.
                    Throws:
                    java.io.IOException

                    isTerminated

                    public boolean isTerminated()
                    Indicates whether this character reference is terminated by a semicolon (;).

                    Conversely, this library defines an unterminated character reference as one which does not end with a semicolon.

                    The SGML specification allows unterminated character references in some circumstances, and because the HTML 4.01 specification states simply that "authors may use SGML character references", it follows that they are also valid in HTML documents, although their use is strongly discouraged.

                    Unterminated character references are not allowed in XHTML documents.

                    Returns:
                    true if this character reference is terminated by a semicolon, otherwise false.
                    See Also:
                    decode(CharSequence encodedText, boolean insideAttributeValue)

                    encode

                    public static java.lang.String encode(java.lang.CharSequence unencodedText)
                    Encodes the specified text, escaping special characters into character references.

                    Each character is encoded only if the requiresEncoding(char) method would return true for that character, using its CharacterEntityReference if available, or a decimal NumericCharacterReference if its unicode code point is greater than U+007F.

                    The only exception to this is an apostrophe (U+0027), which depending on the current setting of the static Config.IsApostropheEncoded property, is either left unencoded (default setting), or encoded as the numeric character reference "&#39;".

                    This method never encodes an apostrophe into its character entity reference &apos; as this entity is not defined for use in HTML. See the comments in the CharacterEntityReference class for more information.

                    To encode text using only numeric character references, use the
                    NumericCharacterReference.encode(CharSequence) method instead.

                    Parameters:
                    unencodedText - the text to encode.
                    Returns:
                    the encoded string.
                    See Also:
                    decode(CharSequence)

                    encode

                    public static java.lang.String encode(char ch)
                    Encodes the specified character into a character reference if required.

                    The encoding of the character follows the same rules as for each character in the encode(CharSequence unencodedText) method.

                    Parameters:
                    ch - the character to encode.
                    Returns:
                    a character reference if appropriate, otherwise a string containing the original character.

                    encodeWithWhiteSpaceFormatting

                    public static java.lang.String encodeWithWhiteSpaceFormatting(java.lang.CharSequence unencodedText)
                    Encodes the specified text, preserving line breaks, tabs and spaces for rendering by converting them to markup.

                    This performs the same encoding as the encode(CharSequence) method, but also performs the following conversions:

                    • Line breaks, being Carriage Return (U+000D) or Line Feed (U+000A) characters, and Form Feed characters (U+000C) are converted to "<br />". CR/LF pairs are treated as a single line break.
                    • Multiple consecutive spaces are converted so that every second space is converted to "&nbsp;" while ensuring the last is always a normal space.
                    • Tab characters (U+0009) are converted as if they were four consecutive spaces.

                    The conversion of multiple consecutive spaces to alternating space/non-breaking-space allows the correct number of spaces to be rendered, but also allows the line to wrap in the middle of it.

                    Note that zero-width spaces (U+200B) are converted to the numeric character reference "&#x200B;" through the normal encoding process, but IE6 does not render them properly either encoded or unencoded.

                    There is no method provided to reverse this encoding.

                    Parameters:
                    unencodedText - the text to encode.
                    Returns:
                    the encoded string with white space formatting converted to markup.
                    See Also:
                    encode(CharSequence)

                    decode

                    public static java.lang.String decode(java.lang.CharSequence encodedText)
                    Decodes the specified HTML encoded text into normal text.

                    All character entity references and numeric character references are converted to their respective characters.

                    This is equivalent to decode(encodedText,false).

                    Unterminated character references are dealt with according to the rules for text outside of attribute values in the current compatibility mode.

                    If the static Config.ConvertNonBreakingSpaces property is set to true (the default), then all non-breaking space (&nbsp;) character entity references are converted to normal spaces.

                    Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, some browsers also recognise them in a case-insensitive way. For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.

                    Parameters:
                    encodedText - the text to decode.
                    Returns:
                    the decoded string.
                    See Also:
                    encode(CharSequence)

                    decode

                    public static java.lang.String decode(java.lang.CharSequence encodedText,
                                                          boolean insideAttributeValue)
                    Decodes the specified HTML encoded text into normal text.

                    All character entity references and numeric character references are converted to their respective characters.

                    Unterminated character references are dealt with according to the value of the insideAttributeValue parameter and the current compatibility mode.

                    If the static Config.ConvertNonBreakingSpaces property is set to true (the default), then all non-breaking space (&nbsp;) character entity references are converted to normal spaces.

                    Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, some browsers also recognise them in a case-insensitive way. For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case.

                    Parameters:
                    encodedText - the text to decode.
                    insideAttributeValue - specifies whether the encoded text is inside an attribute value.
                    Returns:
                    the decoded string.
                    See Also:
                    decode(CharSequence), encode(CharSequence)

                    decodeCollapseWhiteSpace

                    public static java.lang.String decodeCollapseWhiteSpace(java.lang.CharSequence text)
                    Decodes the specified text after collapsing its white space.

                    All leading and trailing white space is omitted, and any sections of internal white space are replaced by a single space.

                    The result is how the text would normally be rendered by a user agent, assuming it does not contain any tags.

                    If the static Config.ConvertNonBreakingSpaces property is set to true (the default), then all non-breaking space (&nbsp;) character entity references are converted to normal spaces.

                    Unterminated character references are dealt with according to the rules for text outside of attribute values in the current compatibility mode. See the discussion of the insideAttributeValue parameter of the decode(CharSequence, boolean insideAttributeValue) method for a more detailed explanation of this topic.

                    Parameters:
                    text - the source text
                    Returns:
                    the decoded text with collapsed white space.
                    See Also:
                    FormControl.getPredefinedValues()

                    reencode

                    public static java.lang.String reencode(java.lang.CharSequence encodedText)
                    Re-encodes the specified text, equivalent to decoding and then encoding again.

                    This process ensures that the specified encoded text does not contain any remaining unencoded characters.

                    IMPLEMENTATION NOTE: At present this method simply calls the decode method followed by the encode method, but a more efficient implementation may be used in future.

                    Parameters:
                    encodedText - the text to re-encode.
                    Returns:
                    the re-encoded string.

                    getCharacterReferenceString

                    public abstract java.lang.String getCharacterReferenceString()
                    Returns the encoded form of this character reference.

                    The exact behaviour of this method depends on the class of this object. See the CharacterEntityReference.getCharacterReferenceString() and NumericCharacterReference.getCharacterReferenceString() methods for more details.

                    Examples:
                    CharacterReference.parse("&GT;").getCharacterReferenceString() returns "&gt;"
                    CharacterReference.parse("&#x3E;").getCharacterReferenceString() returns "&#3e;"

                    Returns:
                    the encoded form of this character reference.
                    See Also:
                    getCharacterReferenceString(int codePoint), getDecimalCharacterReferenceString()

                    getCharacterReferenceString

                    public static java.lang.String getCharacterReferenceString(int codePoint)
                    Returns the encoded form of the specified unicode code point.

                    This method returns the character entity reference encoded form of the unicode code point if one exists, otherwise it returns the decimal character reference encoded form.

                    The only exception to this is an apostrophe (U+0027), which is encoded as the numeric character reference "&#39;" instead of its character entity reference "&apos;".

                    Examples:
                    CharacterReference.getCharacterReferenceString(62) returns "&gt;"
                    CharacterReference.getCharacterReferenceString('>') returns "&gt;"
                    CharacterReference.getCharacterReferenceString('☺') returns "&#9786;"

                    Parameters:
                    codePoint - the unicode code point to encode.
                    Returns:
                    the encoded form of the specified unicode code point.
                    See Also:
                    getHexadecimalCharacterReferenceString(int codePoint)

                    getDecimalCharacterReferenceString

                    public java.lang.String getDecimalCharacterReferenceString()
                    Returns the decimal encoded form of this character reference.

                    This is equivalent to getDecimalCharacterReferenceString(getCodePoint()).

                    Example:
                    CharacterReference.parse("&gt;").getDecimalCharacterReferenceString() returns "&#62;"

                    Returns:
                    the decimal encoded form of this character reference.
                    See Also:
                    getCharacterReferenceString(), getHexadecimalCharacterReferenceString()

                    getDecimalCharacterReferenceString

                    public static java.lang.String getDecimalCharacterReferenceString(int codePoint)
                    Returns the decimal encoded form of the specified unicode code point.

                    Example:
                    CharacterReference.getDecimalCharacterReferenceString('>') returns "&#62;"

                    Parameters:
                    codePoint - the unicode code point to encode.
                    Returns:
                    the decimal encoded form of the specified unicode code point.
                    See Also:
                    getCharacterReferenceString(int codePoint), getHexadecimalCharacterReferenceString(int codePoint)

                    getHexadecimalCharacterReferenceString

                    public java.lang.String getHexadecimalCharacterReferenceString()
                    Returns the hexadecimal encoded form of this character reference.

                    This is equivalent to getHexadecimalCharacterReferenceString(getCodePoint()).

                    Example:
                    CharacterReference.parse("&gt;").getHexadecimalCharacterReferenceString() returns "&#x3e;"

                    Returns:
                    the hexadecimal encoded form of this character reference.
                    See Also:
                    getCharacterReferenceString(), getDecimalCharacterReferenceString()

                    getHexadecimalCharacterReferenceString

                    public static java.lang.String getHexadecimalCharacterReferenceString(int codePoint)
                    Returns the hexadecimal encoded form of the specified unicode code point.

                    Example:
                    CharacterReference.getHexadecimalCharacterReferenceString('>') returns "&#x3e;"

                    Parameters:
                    codePoint - the unicode code point to encode.
                    Returns:
                    the hexadecimal encoded form of the specified unicode code point.
                    See Also:
                    getCharacterReferenceString(int codePoint), getDecimalCharacterReferenceString(int codePoint)

                    getUnicodeText

                    public java.lang.String getUnicodeText()
                    Returns the unicode code point of this character reference in U+ notation.

                    This is equivalent to getUnicodeText(getCodePoint()).

                    Example:
                    CharacterReference.parse("&gt;").getUnicodeText() returns "U+003E"

                    Returns:
                    the unicode code point of this character reference in U+ notation.
                    See Also:
                    getUnicodeText(int codePoint)

                    getUnicodeText

                    public static java.lang.String getUnicodeText(int codePoint)
                    Returns the specified unicode code point in U+ notation.

                    Example:
                    CharacterReference.getUnicodeText('>') returns "U+003E"

                    Parameters:
                    codePoint - the unicode code point.
                    Returns:
                    the specified unicode code point in U+ notation.

                    parse

                    public static CharacterReference parse(java.lang.CharSequence characterReferenceText)
                    Parses a single encoded character reference text into a CharacterReference object.

                    The character reference must be at the start of the given text, but may contain other characters at the end. The getEnd() method can be used on the resulting object to determine at which character position the character reference ended.

                    If the text does not represent a valid character reference, this method returns null.

                    Unterminated character references are always accepted, regardless of the settings in the current compatibility mode.

                    To decode all character references in a given text, use the decode(CharSequence) method instead.

                    Example:
                    CharacterReference.parse("&gt;").getChar() returns '>'

                    Parameters:
                    characterReferenceText - the text containing a single encoded character reference.
                    Returns:
                    a CharacterReference object representing the specified text, or null if the text does not represent a valid character reference.
                    See Also:
                    decode(CharSequence)

                    getCodePointFromCharacterReferenceString

                    public static int getCodePointFromCharacterReferenceString(java.lang.CharSequence characterReferenceText)
                    Parses a single encoded character reference text into a unicode code point.

                    The character reference must be at the start of the given text, but may contain other characters at the end.

                    If the text does not represent a valid character reference, this method returns INVALID_CODE_POINT.

                    This is equivalent to parse(characterReferenceText).getCodePoint(), except that it returns INVALID_CODE_POINT if an invalid character reference is specified instead of throwing a NullPointerException.

                    Example:
                    CharacterReference.getCodePointFromCharacterReferenceString("&gt;") returns 38

                    Parameters:
                    characterReferenceText - the text containing a single encoded character reference.
                    Returns:
                    the unicode code point representing representing the specified text, or INVALID_CODE_POINT if the text does not represent a valid character reference.

                    requiresEncoding

                    public static final boolean requiresEncoding(char ch)
                    Indicates whether the specified character would need to be encoded in HTML text.

                    This is the case if a character entity reference exists for the character, or the unicode code point is greater than U+007F.

                    The only exception to this is an apostrophe (U+0027), which only returns true if the static Config.IsApostropheEncoded property is currently set to true.

                    Parameters:
                    ch - the character to test.
                    Returns:
                    true if the specified character would need to be encoded in HTML text, otherwise false.

                    getEncodingFilterWriter

                    public static java.io.Writer getEncodingFilterWriter(java.io.Writer writer)
                    Returns a filter Writer that encodes all text before passing it through to the specified Writer.

                    Parameters:
                    writer - the destination for the encoded text
                    Returns:
                    a filter Writer that encodes all text before passing it through to the specified Writer.
                    See Also:
                    encode(CharSequence unencodedText)


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/BasicLogFormatter.html0000644000175000017500000004245211214132420027063 0ustar twernertwerner BasicLogFormatter (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class BasicLogFormatter

                    java.lang.Object
                      extended by java.util.logging.Formatter
                          extended by BasicLogFormatter
                    

                    public class BasicLogFormatter
                    extends java.util.logging.Formatter

                    Provides basic formatting for log messages.

                    This class extends the java.util.logging.Formatter class, allowing it to be specified as a formatter for the java.util.logging system.

                    The static format(String level, String message, String loggerName) method provides a means of using the same formatting outside of the java.util.logging framework. See the documentation of this method for more details.


                    Field Summary
                    static boolean OutputLevel
                              Determines whether the logging level is included in the output.
                    static boolean OutputName
                              Determines whether the logger name is included in the output.
                     
                    Constructor Summary
                    BasicLogFormatter()
                               
                     
                    Method Summary
                     java.lang.String format(java.util.logging.LogRecord logRecord)
                              Returns a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord.
                    static java.lang.String format(java.lang.String level, java.lang.String message, java.lang.String loggerName)
                              Returns a formatted string representing the specified log entry information.
                     
                    Methods inherited from class java.util.logging.Formatter
                    formatMessage, getHead, getTail
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    OutputLevel

                    public static boolean OutputLevel
                    Determines whether the logging level is included in the output.

                    The default value is true.

                    As this is a static property, changing the value will affect all BasicLogFormatter instances, as well as the behaviour of the static format(String level, String message, String loggerName) method.


                    OutputName

                    public static boolean OutputName
                    Determines whether the logger name is included in the output.

                    The default value is false.

                    The logger name used for all automatically created Logger instances is "net.htmlparser.jericho".

                    As this is a static property, changing the value will affect all BasicLogFormatter instances, as well as the behaviour of the static format(String level, String message, String loggerName) method.

                    Constructor Detail

                    BasicLogFormatter

                    public BasicLogFormatter()
                    Method Detail

                    format

                    public java.lang.String format(java.util.logging.LogRecord logRecord)
                    Returns a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord.

                    This method is not called directly, but is used by the java.util.logging framework when this class is specified as a formatter in the logging.properties file.

                    See the documentation of the parent java.util.logging.Formatter class in the Java SDK for more details.

                    Specified by:
                    format in class java.util.logging.Formatter
                    Parameters:
                    logRecord - a java.util.logging.LogRecord object containing all of the log entry information.
                    Returns:
                    a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord.

                    format

                    public static java.lang.String format(java.lang.String level,
                                                          java.lang.String message,
                                                          java.lang.String loggerName)
                    Returns a formatted string representing the specified log entry information.

                    This method is used by the default implementation of the WriterLogger.log(String level, String message) method.

                    The static properties OutputLevel and OutputName affect what information is included in the output.

                    The static Config.NewLine property determines the character sequence used for line breaks.

                    A line of output typically looks like this:

                    INFO: this is the log message
                    or if the OutputName property is set to true, the output would look similar to this:
                    INFO: [net.htmlparser.jericho] this is the log message

                    Parameters:
                    level - a string representing the logging level of the log entry.
                    message - the log message.
                    loggerName - the name of the logger.
                    Returns:
                    a formatted string representing the specified log entry information.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormControl.html0000644000175000017500000017777111214132420025775 0ustar twernertwerner FormControl (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class FormControl

                    java.lang.Object
                      extended by Segment
                          extended by FormControl
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public abstract class FormControl
                    extends Segment

                    Represents an HTML form control.

                    A FormControl consists of a single element that matches one of the form control types.

                    The term output element is used to describe the element that is output if this form control is replaced in an OutputDocument.

                    A predefined value control is a form control for which getFormControlType().hasPredefinedValue() returns true. It has a control type of CHECKBOX, RADIO, BUTTON, SUBMIT, IMAGE, SELECT_SINGLE or SELECT_MULTIPLE.

                    A user value control is a form control for which getFormControlType().hasPredefinedValue() returns false. It has a control type of FILE, HIDDEN, PASSWORD, TEXT or TEXTAREA.

                    The functionality of most significance to users of this class relates to the display characteristics of the output element, manipulated using the setDisabled(boolean) and setOutputStyle(FormControlOutputStyle) methods.

                    As a general rule, the operations dealing with the control's submission values are better performed on a FormFields or FormField object, which provide a more intuitive interface by grouping form controls of the same name together. The higher abstraction level of these classes means they can automatically ensure that the submission values of their constituent controls are consistent with each other, for example by ensuring that only one RADIO control with a given name is checked at a time.

                    A FormFields object can be directly constructed from a collection of FormControl objects.

                    FormControl instances are obtained using the Element.getFormControl() method or are created automatically with the creation of a FormFields object via the Segment.getFormFields() method.

                    See Also:
                    FormControlType, FormFields, FormField

                    Method Summary
                     boolean addValue(java.lang.String value)
                              Adds the specified value to this control's submission values *.
                     void clearValues()
                              Clears the control's existing submission values.
                     java.util.Map<java.lang.String,java.lang.String> getAttributesMap()
                              Returns a map of the names and values of this form control's output attributes.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     Element getElement()
                              Returns the element representing this form control in the source document.
                     FormControlType getFormControlType()
                              Returns the type of this form control.
                     java.lang.String getName()
                              Returns the name of the control.
                     java.util.Iterator<Element> getOptionElementIterator()
                              Returns an iterator over the OPTION elements contained within this control, in order of appearance.
                     FormControlOutputStyle getOutputStyle()
                              Returns the current output style of this form control.
                     java.lang.String getPredefinedValue()
                              Returns the initial value of this control if it has a predefined value.
                     java.util.Collection<java.lang.String> getPredefinedValues()
                              Returns a collection of all predefined values in this control in order of appearance.
                     java.util.List<java.lang.String> getValues()
                              Returns a list of the control's submission values in order of appearance.
                     boolean isChecked()
                              Indicates whether this form control is checked.
                     boolean isDisabled()
                              Indicates whether this form control is disabled.
                     void setDisabled(boolean disabled)
                              Sets whether this form control is disabled.
                     void setOutputStyle(FormControlOutputStyle outputStyle)
                              Sets the output style of this form control.
                    abstract  boolean setValue(java.lang.String value)
                              Sets the control's submission value *.
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getFormControlType

                    public final FormControlType getFormControlType()
                    Returns the type of this form control.

                    Returns:
                    the type of this form control.

                    getName

                    public final java.lang.String getName()
                    Returns the name of the control.

                    The name comes from the value of the name attribute of the form control's element, not the name of the element itself.

                    Since a FormField is simply a group of controls with the same name, the terms control name and field name are for the most part synonymous, with only a possible difference in case differentiating them.

                    In contrast to the FormField.getName() method, this method always returns the name using the original case from the source document, regardless of the current setting of the static Config.CurrentCompatibilityMode.FormFieldNameCaseInsensitive property.

                    Returns:
                    the name of the control.

                    getElement

                    public final Element getElement()
                    Returns the element representing this form control in the source document.

                    The attributes of this source element should correspond with the output attributes if the display characteristics or submission values have not been modified.

                    Returns:
                    the element representing this form control in the source document.

                    getOptionElementIterator

                    public java.util.Iterator<Element> getOptionElementIterator()
                    Returns an iterator over the OPTION elements contained within this control, in order of appearance.

                    This method is only relevant to form controls with a type of SELECT_SINGLE or SELECT_MULTIPLE.

                    Returns:
                    an iterator over the OPTION elements contained within this control, in order of appearance.
                    Throws:
                    java.lang.UnsupportedOperationException - if the type of this control is not SELECT_SINGLE or SELECT_MULTIPLE.

                    getOutputStyle

                    public FormControlOutputStyle getOutputStyle()
                    Returns the current output style of this form control.

                    This property affects how this form control is displayed if it has been replaced in an OutputDocument. See the documentation of the FormControlOutputStyle class for information on the available output styles.

                    The default output style for every form control is FormControlOutputStyle.NORMAL.

                    Returns:
                    the current output style of this form control.
                    See Also:
                    setOutputStyle(FormControlOutputStyle)

                    setOutputStyle

                    public void setOutputStyle(FormControlOutputStyle outputStyle)
                    Sets the output style of this form control.

                    See the getOutputStyle() method for a full description of this property.

                    Parameters:
                    outputStyle - the new output style of this form control.

                    getAttributesMap

                    public final java.util.Map<java.lang.String,java.lang.String> getAttributesMap()
                    Returns a map of the names and values of this form control's output attributes.

                    The term output attributes is used in this library to refer to the attributes of a form control's output element.

                    The map keys are the String attribute names, which should all be in lower case. The map values are the corresponding String attribute values, with a null value given to an attribute that has no value.

                    Direct manipulation of the returned map affects the attributes of this form control's output element. It is the responsibility of the user to ensure that all entries added to the map use the correct key and value types, and that all keys (attribute names) are in lower case.

                    It is recommended that the submission value modification methods are used to modify attributes that affect the submission value of the control rather than manipulating the attributes map directly.

                    An iteration over the map entries will return the attributes in the same order as they appeared in the source document, or at the end if the attribute was not present in the source document.

                    The returned attributes only correspond with those of the source element if the control's display characteristics and submission values have not been modified.

                    Returns:
                    a map of the names and values of this form control's output attributes.

                    isDisabled

                    public final boolean isDisabled()
                    Indicates whether this form control is disabled.

                    The form control is disabled if the attribute "disabled" is present in its output element.

                    The return value is is logically equivalent to getAttributesMap().containsKey("disabled"), but using this property may be more efficient in some circumstances.

                    Returns:
                    true if this form control is disabled, otherwise false.

                    setDisabled

                    public final void setDisabled(boolean disabled)
                    Sets whether this form control is disabled.

                    If the argument supplied to this method is true and the disabled attribute is not already present in the output element, the full XHTML compatible attribute disabled="disabled" is added. If the attribute is already present, it is left unchanged.

                    If the argument supplied to this method is false, the attribute is removed from the output element.

                    See the isDisabled() method for more information.

                    Parameters:
                    disabled - the new value of this property.

                    isChecked

                    public boolean isChecked()
                    Indicates whether this form control is checked.

                    The term checked is used to describe a checkbox or radio button control that is selected, which is the case if the attribute "checked" is present in its output element.

                    This property is only relevant to form controls with a type of FormControlType.CHECKBOX or FormControlType.RADIO, and throws an UnsupportedOperationException for other control types.

                    Use one of the submission value modification methods to change the value of this property.

                    If this control is a checkbox, you can set it to checked by calling setValue(getName()), and set it to unchecked by calling clearValues().

                    If this control is a radio button, you should use the FormField.setValue(String) method or one of the other higher level submission value modification methods to set the control to checked, as calling setValue(String) method on this object in the same way as for a checkbox does not automatically uncheck all other radio buttons with the same name. Even calling clearValues() on this object to ensure that this radio button is unchecked is not recommended, as it can lead to a situation where all the radio buttons with this name are unchecked. The HTML 4.01 specification of radio buttons recommends against this situation because it is not defined how user agents should handle it, and behaviour differs amongst browsers.

                    The return value is logically equivalent to getAttributesMap().containsKey("checked"), but using this property may be more efficient in some circumstances.

                    Returns:
                    true if this form control is checked, otherwise false.
                    Throws:
                    java.lang.UnsupportedOperationException - if the type of this control is not FormControlType.CHECKBOX or FormControlType.RADIO.

                    getPredefinedValue

                    public java.lang.String getPredefinedValue()
                    Returns the initial value of this control if it has a predefined value.

                    Only predefined value controls can return a non-null result. All other control types return null.

                    CHECKBOX and RADIO controls have a guaranteed predefined value determined by the value of its compulsory value attribute. If the attribute is not present in the source document, this library assigns the control a default predefined value of "on", consistent with popular browsers.

                    SUBMIT, BUTTON and IMAGE controls have an optional predefined value determined by the value of its value attribute. This value is successful only in the control used to submit the form.

                    FormControlType.SELECT_SINGLE and FormControlType.SELECT_MULTIPLE controls are special cases because they usually contain multiple OPTION elements, each with its own predefined value. In this case the getPredefinedValues() method should be used instead, which returns a collection of all the control's predefined values. Attempting to call this method on a SELECT control results in a java.lang.UnsupportedOperationException.

                    The predefined value of a control is not affected by changes to the submission value of the control.

                    Returns:
                    the initial value of this control if it has a predefined value, or null if none.

                    getPredefinedValues

                    public java.util.Collection<java.lang.String> getPredefinedValues()
                    Returns a collection of all predefined values in this control in order of appearance.

                    All objects in the returned collection are of type String, with no null entries.

                    This method is most useful for SELECT controls since they typically contain multiple predefined values. In other controls it returns a collection with zero or one item based on the output of the getPredefinedValue() method, so for efficiency it is recommended to use the getPredefinedValue() method instead.

                    The multiple predefined values of a SELECT control are defined by the OPTION elements within it. Each OPTION element has an initial value determined by the value of its value attribute, or if this attribute is not present, by its decoded content text with collapsed white space.

                    The predefined values of a control are not affected by changes to the submission values of the control.

                    Returns:
                    a collection of all predefined values in this control in order of appearance, guaranteed not null.
                    See Also:
                    FormField.getPredefinedValues()

                    getValues

                    public java.util.List<java.lang.String> getValues()
                    Returns a list of the control's submission values in order of appearance.

                    All objects in the returned list are of type String, with no null entries.

                    The term submission value is used in this library to refer to the value the control would contribute to the form data set of a submitted form, assuming no modification of the control's current value by the user agent or by end user interaction.

                    For user value controls, the submission value corresponds to the control's initial value.

                    The definition of the submission value for each predefined value control type is as follows:

                    CHECKBOX and RADIO controls have a submission value specified by its predefined value if it is checked, otherwise it has no submission value.

                    SELECT_SINGLE and SELECT_MULTIPLE controls have submission values specified by the values of the control's selected OPTION elements.

                    Only a SELECT_MULTIPLE control can have more than one submission value, all other control types return a list containing either one value or no values. A SELECT_SINGLE control only returns multiple submission values if it illegally contains multiple selected options in the source document.

                    SUBMIT, BUTTON, and IMAGE controls are only ever successful when they are activated by the user to submit the form. Because the submission value is intended to be a static representation of a control's data without interaction by the user, this library never associates submission values with submit buttons, so this method always returns an empty list for these control types.

                    The submission value(s) of a control can be modified for subsequent output in an OutputDocument using the various submission value modification methods, namely:
                    FormField.setValue(String)
                    FormField.addValue(String)
                    FormField.setValues(Collection)
                    FormField.clearValues()
                    FormFields.setValue(String fieldName, String value)
                    FormFields.addValue(String fieldName, String value)
                    FormFields.setDataSet(Map)
                    FormFields.clearValues()
                    FormControl.setValue(String)
                    FormControl.addValue(String)
                    FormControl.clearValues()

                    The values returned by this method reflect any changes made using the submission value modification methods, in contrast to methods found in the Attributes and Attribute classes, which always reflect the source document.

                    Returns:
                    a list of the control's submission values in order of appearance, guaranteed not null.
                    See Also:
                    getPredefinedValues()

                    clearValues

                    public final void clearValues()
                    Clears the control's existing submission values.

                    This is equivalent to setValue(null).

                    NOTE: The FormFields and FormField classes provide a more appropriate abstraction level for the modification of form control submission values.

                    See Also:
                    FormFields.clearValues(), FormField.clearValues()

                    setValue

                    public abstract boolean setValue(java.lang.String value)
                    Sets the control's submission value *.

                    * NOTE: The FormFields and FormField classes provide a more appropriate abstraction level for the modification of form control submission values. Consider using the FormFields.setValue(String fieldName, String value) method instead.

                    The specified value replaces any existing submission values of the control.

                    The return value indicates whether the control has "accepted" the value. For user value controls, the return value is always true.

                    For predefined value controls, calling this method does not affect the control's predefined values, but instead determines whether the control (or its options) become checked or selected as detailed below:

                    CHECKBOX and RADIO controls become checked and the method returns true if the specified value matches the control's predefined value (case sensitive), otherwise the control becomes unchecked and the method returns false. Note that any other controls with the same name are not unchecked if this control becomes checked, possibly resulting in an invalid state where multiple RADIO controls are checked at the same time. The FormField.setValue(String) method avoids such problems and its use is recommended over this method.

                    SELECT_SINGLE and SELECT_MULTIPLE controls receive the specified value by selecting the option with the matching value and deselecting all others. If none of the options match, all are deselected. The return value of this method indicates whether one of the options matched.

                    SUBMIT, BUTTON, and IMAGE controls never have a submission value, so calling this method has no effect and always returns false.

                    Parameters:
                    value - the new submission value of this control, or null to clear the control of all submission values.
                    Returns:
                    true if the control accepts the value, otherwise false.
                    See Also:
                    FormFields.setValue(String fieldName, String value)

                    addValue

                    public boolean addValue(java.lang.String value)
                    Adds the specified value to this control's submission values *.

                    * NOTE: The FormFields and FormField classes provide a more appropriate abstraction level for the modification of form control submission values. Consider using the FormFields.addValue(String fieldName, String value) method instead.

                    This is almost equivalent to setValue(String), with only the following differences:

                    CHECKBOX controls retain their existing submission value instead of becoming unchecked if the specified value does not match the control's predefined value.

                    SELECT_MULTIPLE controls retain their existing submission values, meaning that the control's OPTION elements whose predefined values do not match the specified value are not deselected. This is the only type of control that can have multiple submission values within the one control.

                    Parameters:
                    value - the value to add to this control's submission values, must not be null.
                    Returns:
                    true if the control accepts the value, otherwise false.
                    See Also:
                    FormFields.addValue(String fieldName, String value)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Description copied from class: Segment
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Util.html0000644000175000017500000003122711214132422024431 0ustar twernertwerner Util (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Util

                    java.lang.Object
                      extended by Util
                    

                    public final class Util
                    extends java.lang.Object

                    Contains miscellaneous utility methods not directly associated with the HTML Parser library.


                    Method Summary
                    static java.lang.String getString(java.io.Reader reader)
                              Returns the text loaded from the specified Reader as a string.
                    static void outputCSVLine(java.io.Writer writer, java.lang.String[] values)
                              Outputs the specified array of strings to the specified Writer in the format of a line for a CSV file.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Method Detail

                    getString

                    public static java.lang.String getString(java.io.Reader reader)
                                                      throws java.io.IOException
                    Returns the text loaded from the specified Reader as a string.

                    If a null argument is supplied to this method, an empty string is returned.

                    To load text from an InputStream, use getString(new InputStreamReader(inputStream,encoding)).

                    Parameters:
                    reader - the java.io.Reader from which to load the text.
                    Returns:
                    the text loaded from the specified java.io.Reader as a string.
                    Throws:
                    java.io.IOException - if an I/O error occurs.

                    outputCSVLine

                    public static void outputCSVLine(java.io.Writer writer,
                                                     java.lang.String[] values)
                                              throws java.io.IOException
                    Outputs the specified array of strings to the specified Writer in the format of a line for a CSV file.

                    "CSV" stands for Comma Separated Values. There is no formal specification for a CSV file, so there is significant variation in the way different applications handle issues like the encoding of different data types and special characters.

                    Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of field values separated by commas. Each record in the file should contain the same number of field values, with the values at each position representing the same type of data in all the records. In this way the file can also be divided into columns, often with the first line of the file containing the column labels.

                    Columns can have different data types such as text, numeric, date / time and boolean. A text value is often delimited with single (') or double-quotes ("), especially if the value contains a comma, line feed, or other special character that is significant to the syntax. Encoding techniques for including quote characters themselves in text values vary widely. Values of other types are generally unquoted to distinguish them from text values.

                    This method produces output that is readable by MS-Excel, conforming to the following rules:

                    • All values are considered to be of type text, except for the static constants Config.ColumnValueTrue and Config.ColumnValueFalse, representing the boolean values true and false respectively.
                    • All text values are enclosed in double-quotes.
                    • Double-quote characters contained in text values are encoded using two consecutive double-quotes ("").
                    • null values are represented as empty fields.
                    • The end of each record is represented by a carriage-return / line-feed (CR/LF) pair.
                    • Line breaks inside text values are represented by a single line feed (LF) character.

                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O error occurs.
                    See Also:
                    FormFields.getColumnLabels(), FormFields.getColumnValues(Map)


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/package-use.html0000644000175000017500000004653611214132424025714 0ustar twernertwerner Uses of Package net.htmlparser.jericho (Jericho HTML Parser 3.1)

                    Uses of Package
                    net.htmlparser.jericho

                    Classes in net.htmlparser.jericho used by net.htmlparser.jericho
                    Attribute
                              Represents a single attribute name/value segment within a StartTag.
                    Attributes
                              Represents the list of Attribute objects present within a particular StartTag.
                    CharacterReference
                              Represents an HTML Character Reference, implemented by the subclasses CharacterEntityReference and NumericCharacterReference.
                    CharStreamSource
                              Represents a character stream source.
                    Config.CompatibilityMode
                              Represents a set of configuration parameters that relate to user agent compatibility issues.
                    Element
                              Represents an element in a specific source document, which encompasses a start tag, an optional end tag and all content in between.
                    EndTag
                              Represents the end tag of an element in a specific source document.
                    EndTagType
                              Defines the syntax for an end tag type.
                    FormControl
                              Represents an HTML form control.
                    FormControlOutputStyle
                              An enumerated type representing the three major output styles of a form control's output element.
                    FormControlType
                              Represents the control type of a FormControl.
                    FormField
                              Represents a field in an HTML form, a field being defined as the group of all form controls having the same name.
                    FormFields
                              Represents a collection of FormField objects.
                    Logger
                              Defines the interface for handling log messages.
                    LoggerProvider
                              Defines the interface for a factory class to provide Logger instances for each Source object.
                    OutputSegment
                              Defines the interface for an output segment, which is used in an OutputDocument to replace segments of the source document with other text.
                    ParseText
                              Represents the text from the source document that is to be parsed.
                    Renderer
                              Performs a simple rendering of HTML markup into text.
                    RowColumnVector
                              Represents the row and column number of a character position in the source document.
                    Segment
                              Represents a segment of a Source document.
                    Source
                              Represents a source HTML document.
                    SourceCompactor
                              Compacts HTML source by removing all unnecessary white space.
                    SourceFormatter
                              Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                    StartTag
                              Represents the start tag of an element in a specific source document.
                    StartTagType
                              Defines the syntax for a start tag type.
                    StreamedSource
                              Represents a streamed source HTML document.
                    Tag
                              Represents either a StartTag or EndTag in a specific source document.
                    TagType
                              Defines the syntax for a tag type that can be recognised by the parser.
                    TextExtractor
                              Extracts the textual content from HTML markup.
                     

                    Classes in net.htmlparser.jericho used by net.htmlparser.jericho.nodoc
                    Segment
                              Represents a segment of a Source document.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/RowColumnVector.html0000644000175000017500000002665011214132422026630 0ustar twernertwerner RowColumnVector (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class RowColumnVector

                    java.lang.Object
                      extended by RowColumnVector
                    

                    public final class RowColumnVector
                    extends java.lang.Object

                    Represents the row and column number of a character position in the source document.

                    Obtained using the Source.getRowColumnVector(int pos) method.


                    Method Summary
                     int getColumn()
                              Returns the column number of this character position in the source document.
                     int getPos()
                              Returns the character position in the source document.
                     int getRow()
                              Returns the row number of this character position in the source document.
                     java.lang.String toString()
                              Returns a string representation of this character position.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getRow

                    public int getRow()
                    Returns the row number of this character position in the source document.

                    If a StreamedSource is in use, this method always returns -1.

                    Returns:
                    the row number of this character position in the source document.

                    getColumn

                    public int getColumn()
                    Returns the column number of this character position in the source document.

                    If a StreamedSource is in use, this method always returns -1.

                    Returns:
                    the column number of this character position in the source document.

                    getPos

                    public int getPos()
                    Returns the character position in the source document.

                    Returns:
                    the character position in the source document.

                    toString

                    public java.lang.String toString()
                    Returns a string representation of this character position.

                    The returned string has the format "(row,column:pos)".

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    a string representation of this character position.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormControlOutputStyle.ConfigDisplayValue.html0000644000175000017500000005043311214132420033747 0ustar twernertwerner FormControlOutputStyle.ConfigDisplayValue (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class FormControlOutputStyle.ConfigDisplayValue

                    java.lang.Object
                      extended by FormControlOutputStyle.ConfigDisplayValue
                    
                    Enclosing class:
                    FormControlOutputStyle

                    public static final class FormControlOutputStyle.ConfigDisplayValue
                    extends java.lang.Object

                    Contains static properties that configure the FormControlOutputStyle.DISPLAY_VALUE form control output style.

                    None of the properties should be assigned a null value.

                    See the documentation of the FormControlOutputStyle.DISPLAY_VALUE output style for details on how these properties are used.


                    Field Summary
                    static java.util.List<java.lang.String> AttributeNames
                              Defines the names of the attributes that are copied from the normal form control output element to a display value element.
                    static java.lang.String CheckedHTML
                              Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it contains a checked attribute.
                    static java.lang.String ElementName
                              Defines the name of display value elements.
                    static java.lang.String EmptyHTML
                              Defines the content of a display value element if the submission value of the control is null or an empty string.
                    static java.lang.String MultipleValueSeparator
                              Defines the text that is used to separate multiple values in a display value element.
                    static char PasswordChar
                              Defines the character used to represent the value of a PASSWORD form control in a display value element.
                    static java.lang.String UncheckedHTML
                              Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it does not contain a checked attribute.
                     
                    Method Summary
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    MultipleValueSeparator

                    public static volatile java.lang.String MultipleValueSeparator
                    Defines the text that is used to separate multiple values in a display value element.

                    This property is only relevant to SELECT_MULTIPLE form controls, and is only used if multiple items in the control are selected.

                    The default value is ", ".


                    ElementName

                    public static volatile java.lang.String ElementName
                    Defines the name of display value elements.

                    The default value is "div".

                    Although all form control elements are inline-level elements, the default replacement is the block-level DIV element, which allows richer stylesheet formatting than the most common alternative, the SPAN element, such as the ability to set its width and height.

                    This has the undesired effect in some cases of displaying the value on a new line, whereas the original form control was not on a new line. In practical use however, many form controls are placed inside table cells for better control over their positioning. In this case replacing the original inline form control with the block DIV element does not alter its position.


                    AttributeNames

                    public static volatile java.util.List<java.lang.String> AttributeNames
                    Defines the names of the attributes that are copied from the normal form control output element to a display value element.

                    The names included in the list by default are "id", "class" and "style".

                    These attributes are usually all that is needed to identify the elements in style sheets or specify the styles directly.

                    The default list is modifiable.


                    EmptyHTML

                    public static volatile java.lang.String EmptyHTML
                    Defines the content of a display value element if the submission value of the control is null or an empty string.

                    The content is not encoded before output.

                    The default content is "&nbsp;".


                    PasswordChar

                    public static volatile char PasswordChar
                    Defines the character used to represent the value of a PASSWORD form control in a display value element.

                    The character is repeated n times, where n is the number of characters in the control's submission value.

                    The resulting string is encoded before output.

                    The default password character is '*'.


                    CheckedHTML

                    public static volatile java.lang.String CheckedHTML
                    Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it contains a checked attribute.

                    If this property is null, the output element is simply a disabled version of the form control.

                    The HTML is not encoded before output.

                    The default value is null.


                    UncheckedHTML

                    public static volatile java.lang.String UncheckedHTML
                    Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it does not contain a checked attribute.

                    If this property is null, the output element is simply a disabled version of the form control.

                    The HTML is not encoded before output.

                    The default value is null.



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/HTMLElementName.html0000644000175000017500000040751611214132422026403 0ustar twernertwerner HTMLElementName (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface HTMLElementName


                    public interface HTMLElementName

                    Contains static fields representing the names of all elements defined in the HTML 4.01 specification.

                    All of the name strings are in lower case.

                    The HTMLElements class is closely related to this interface, containing static methods which group these names by the characteristics of their associated elements.

                    This interface does not specify any methods, but can be inherited by other classes, or statically imported (Java 5.0), to provide less verbose access to the contained element name static fields.

                    The field values in this interface can be used as name arguments in named tag searches.

                    Note that since the Tag class implements HTMLElementName, all the constants defined in this interface can be referred to via the Tag class.
                    For example, Tag.BODY is equivalent to HTMLElementName.BODY.

                    See Also:
                    HTMLElements, Element

                    Field Summary
                    static java.lang.String A
                              HTML element A - anchor.
                    static java.lang.String ABBR
                              HTML element ABBR - abbreviated form (e.g., WWW, HTTP, etc.).
                    static java.lang.String ACRONYM
                              HTML element ACRONYM - acronym.
                    static java.lang.String ADDRESS
                              HTML element ADDRESS - information on author.
                    static java.lang.String APPLET
                              HTML element APPLET - Java applet.
                    static java.lang.String AREA
                              HTML element AREA - client-side image map area.
                    static java.lang.String B
                              HTML element B - bold text style.
                    static java.lang.String BASE
                              HTML element BASE - document base URI.
                    static java.lang.String BASEFONT
                              HTML element BASEFONT - base font size.
                    static java.lang.String BDO
                              HTML element BDO - I18N BiDi over-ride.
                    static java.lang.String BIG
                              HTML element BIG - large text style.
                    static java.lang.String BLOCKQUOTE
                              HTML element BLOCKQUOTE - long quotation.
                    static java.lang.String BODY
                              HTML element BODY - document body.
                    static java.lang.String BR
                              HTML element BR - forced line break.
                    static java.lang.String BUTTON
                              HTML element BUTTON - push button.
                    static java.lang.String CAPTION
                              HTML element CAPTION - table caption.
                    static java.lang.String CENTER
                              HTML element CENTER - shorthand for DIV align=center.
                    static java.lang.String CITE
                              HTML element CITE - citation.
                    static java.lang.String CODE
                              HTML element CODE - computer code fragment.
                    static java.lang.String COL
                              HTML element COL - table column.
                    static java.lang.String COLGROUP
                              HTML element COLGROUP - table column group.
                    static java.lang.String DD
                              HTML element DD - definition description.
                    static java.lang.String DEL
                              HTML element DEL - deleted text.
                    static java.lang.String DFN
                              HTML element DFN - instance definition.
                    static java.lang.String DIR
                              HTML element DIR - directory list.
                    static java.lang.String DIV
                              HTML element DIV - generic language/style container.
                    static java.lang.String DL
                              HTML element DL - definition list.
                    static java.lang.String DT
                              HTML element DT - definition term.
                    static java.lang.String EM
                              HTML element EM - emphasis.
                    static java.lang.String FIELDSET
                              HTML element FIELDSET - form control group.
                    static java.lang.String FONT
                              HTML element FONT - local change to font.
                    static java.lang.String FORM
                              HTML element FORM - interactive form.
                    static java.lang.String FRAME
                              HTML element FRAME - subwindow.
                    static java.lang.String FRAMESET
                              HTML element FRAMESET - window subdivision.
                    static java.lang.String H1
                              HTML element H1 - heading.
                    static java.lang.String H2
                              HTML element H2 - heading.
                    static java.lang.String H3
                              HTML element H3 - heading.
                    static java.lang.String H4
                              HTML element H4 - heading.
                    static java.lang.String H5
                              HTML element H5 - heading.
                    static java.lang.String H6
                              HTML element H6 - heading.
                    static java.lang.String HEAD
                              HTML element HEAD - document head.
                    static java.lang.String HR
                              HTML element HR - horizontal rule.
                    static java.lang.String HTML
                              HTML element HTML - document root element.
                    static java.lang.String I
                              HTML element I - italic text style.
                    static java.lang.String IFRAME
                              HTML element IFRAME - inline subwindow.
                    static java.lang.String IMG
                              HTML element IMG - Embedded image.
                    static java.lang.String INPUT
                              HTML element INPUT - form control.
                    static java.lang.String INS
                              HTML element INS - inserted text.
                    static java.lang.String ISINDEX
                              HTML element ISINDEX - single line prompt.
                    static java.lang.String KBD
                              HTML element KBD - text to be entered by the user.
                    static java.lang.String LABEL
                              HTML element LABEL - form field label text.
                    static java.lang.String LEGEND
                              HTML element LEGEND - fieldset legend.
                    static java.lang.String LI
                              HTML element LI - list item.
                    static java.lang.String LINK
                              HTML element LINK - a media-independent link.
                    static java.lang.String MAP
                              HTML element MAP - client-side image map.
                    static java.lang.String MENU
                              HTML element MENU - menu list.
                    static java.lang.String META
                              HTML element META - generic metainformation.
                    static java.lang.String NOFRAMES
                              HTML element NOFRAMES - alternate content container for non frame-based rendering.
                    static java.lang.String NOSCRIPT
                              HTML element NOSCRIPT - alternate content container for non script-based rendering.
                    static java.lang.String OBJECT
                              HTML element OBJECT - generic embedded object.
                    static java.lang.String OL
                              HTML element OL - ordered list.
                    static java.lang.String OPTGROUP
                              HTML element OPTGROUP - option group.
                    static java.lang.String OPTION
                              HTML element OPTION - selectable choice.
                    static java.lang.String P
                              HTML element P - paragraph.
                    static java.lang.String PARAM
                              HTML element PARAM - named property value.
                    static java.lang.String PRE
                              HTML element PRE - preformatted text.
                    static java.lang.String Q
                              HTML element Q - short inline quotation.
                    static java.lang.String S
                              HTML element S - strike-through text style.
                    static java.lang.String SAMP
                              HTML element SAMP - sample program output, scripts, etc..
                    static java.lang.String SCRIPT
                              HTML element SCRIPT - script statements.
                    static java.lang.String SELECT
                              HTML element SELECT - option selector.
                    static java.lang.String SMALL
                              HTML element SMALL - small text style.
                    static java.lang.String SPAN
                              HTML element SPAN - generic language/style container.
                    static java.lang.String STRIKE
                              HTML element STRIKE - strike-through text.
                    static java.lang.String STRONG
                              HTML element STRONG - strong emphasis.
                    static java.lang.String STYLE
                              HTML element STYLE - style info.
                    static java.lang.String SUB
                              HTML element SUB - subscript.
                    static java.lang.String SUP
                              HTML element SUP - superscript.
                    static java.lang.String TABLE
                              HTML element TABLE - table.
                    static java.lang.String TBODY
                              HTML element TBODY - table body.
                    static java.lang.String TD
                              HTML element TD - table data cell.
                    static java.lang.String TEXTAREA
                              HTML element TEXTAREA - multi-line text field.
                    static java.lang.String TFOOT
                              HTML element TFOOT - table footer.
                    static java.lang.String TH
                              HTML element TH - table header cell.
                    static java.lang.String THEAD
                              HTML element THEAD - table header.
                    static java.lang.String TITLE
                              HTML element TITLE - document title.
                    static java.lang.String TR
                              HTML element TR - table row.
                    static java.lang.String TT
                              HTML element TT - teletype or monospaced text style.
                    static java.lang.String U
                              HTML element U - underlined text style.
                    static java.lang.String UL
                              HTML element UL - unordered list.
                    static java.lang.String VAR
                              HTML element VAR - instance of a variable or program argument.
                     

                    Field Detail

                    A

                    static final java.lang.String A
                    HTML element A - anchor.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    ABBR

                    static final java.lang.String ABBR
                    HTML element ABBR - abbreviated form (e.g., WWW, HTTP, etc.).

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    ACRONYM

                    static final java.lang.String ACRONYM
                    HTML element ACRONYM - acronym.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    ADDRESS

                    static final java.lang.String ADDRESS
                    HTML element ADDRESS - information on author.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    APPLET

                    static final java.lang.String APPLET
                    HTML element APPLET - Java applet.

                    This is an inline-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    AREA

                    static final java.lang.String AREA
                    HTML element AREA - client-side image map area.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    B

                    static final java.lang.String B
                    HTML element B - bold text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    BASE

                    static final java.lang.String BASE
                    HTML element BASE - document base URI.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    BASEFONT

                    static final java.lang.String BASEFONT
                    HTML element BASEFONT - base font size.

                    This is an inline-level element.

                    The end tag of this element is forbidden.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    BDO

                    static final java.lang.String BDO
                    HTML element BDO - I18N BiDi over-ride.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    BIG

                    static final java.lang.String BIG
                    HTML element BIG - large text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    BLOCKQUOTE

                    static final java.lang.String BLOCKQUOTE
                    HTML element BLOCKQUOTE - long quotation.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    BODY

                    static final java.lang.String BODY
                    HTML element BODY - document body.

                    The start tag of this element is optional.

                    The end tag of this element is optional:
                    Terminating start tags: (none)
                    Terminating end tags: BODY, HTML
                    Nonterminating elements: HTML

                    Note that the HTML element is included as a nonterminating element in case the source contains (illegaly) nested HTML elements.

                    See Also:
                    Constant Field Values

                    BR

                    static final java.lang.String BR
                    HTML element BR - forced line break.

                    This is an inline-level element.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    BUTTON

                    static final java.lang.String BUTTON
                    HTML element BUTTON - push button.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    CAPTION

                    static final java.lang.String CAPTION
                    HTML element CAPTION - table caption.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    CENTER

                    static final java.lang.String CENTER
                    HTML element CENTER - shorthand for DIV align=center.

                    This is a block-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    CITE

                    static final java.lang.String CITE
                    HTML element CITE - citation.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    CODE

                    static final java.lang.String CODE
                    HTML element CODE - computer code fragment.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    COL

                    static final java.lang.String COL
                    HTML element COL - table column.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    COLGROUP

                    static final java.lang.String COLGROUP
                    HTML element COLGROUP - table column group.

                    The end tag of this element is optional:
                    Terminating start tags: COLGROUP, TBODY, TFOOT, THEAD, TR
                    Terminating end tags: COLGROUP, TABLE
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    DD

                    static final java.lang.String DD
                    HTML element DD - definition description.

                    The end tag of this element is optional:
                    Terminating start tags: DD, DT
                    Terminating end tags: DD, DL
                    Nonterminating elements: DL

                    See Also:
                    Constant Field Values

                    DEL

                    static final java.lang.String DEL
                    HTML element DEL - deleted text.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    DFN

                    static final java.lang.String DFN
                    HTML element DFN - instance definition.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    DIR

                    static final java.lang.String DIR
                    HTML element DIR - directory list.

                    This is a block-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    DIV

                    static final java.lang.String DIV
                    HTML element DIV - generic language/style container.

                    The end tag of this element is required.

                    This is a block-level element.

                    See Also:
                    Constant Field Values

                    DL

                    static final java.lang.String DL
                    HTML element DL - definition list.

                    The end tag of this element is required.

                    This is a block-level element.

                    See Also:
                    Constant Field Values

                    DT

                    static final java.lang.String DT
                    HTML element DT - definition term.

                    The end tag of this element is optional:
                    Terminating start tags: DD, DT
                    Terminating end tags: DL, DT
                    Nonterminating elements: DL

                    See Also:
                    Constant Field Values

                    EM

                    static final java.lang.String EM
                    HTML element EM - emphasis.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    FIELDSET

                    static final java.lang.String FIELDSET
                    HTML element FIELDSET - form control group.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    FONT

                    static final java.lang.String FONT
                    HTML element FONT - local change to font.

                    This is an inline-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    FORM

                    static final java.lang.String FORM
                    HTML element FORM - interactive form.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    FRAME

                    static final java.lang.String FRAME
                    HTML element FRAME - subwindow.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    FRAMESET

                    static final java.lang.String FRAMESET
                    HTML element FRAMESET - window subdivision.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H1

                    static final java.lang.String H1
                    HTML element H1 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H2

                    static final java.lang.String H2
                    HTML element H2 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H3

                    static final java.lang.String H3
                    HTML element H3 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H4

                    static final java.lang.String H4
                    HTML element H4 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H5

                    static final java.lang.String H5
                    HTML element H5 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    H6

                    static final java.lang.String H6
                    HTML element H6 - heading.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    HEAD

                    static final java.lang.String HEAD
                    HTML element HEAD - document head.

                    The start tag of this element is optional.

                    The end tag of this element is optional:
                    Terminating start tags: BODY, FRAMESET
                    Terminating end tags: HEAD, HTML
                    Nonterminating elements: (none)

                    See Also:
                    Constant Field Values

                    HR

                    static final java.lang.String HR
                    HTML element HR - horizontal rule.

                    This is a block-level element.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    HTML

                    static final java.lang.String HTML
                    HTML element HTML - document root element.

                    The start tag of this element is optional.

                    The end tag of this element is optional:
                    Terminating start tags: (none)
                    Terminating end tags: HTML
                    Nonterminating elements: HTML

                    Note that the HTML element is included as a nonterminating element in case the source contains (illegaly) nested HTML elements.

                    See Also:
                    Constant Field Values

                    I

                    static final java.lang.String I
                    HTML element I - italic text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    IFRAME

                    static final java.lang.String IFRAME
                    HTML element IFRAME - inline subwindow.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    IMG

                    static final java.lang.String IMG
                    HTML element IMG - Embedded image.

                    This is an inline-level element.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    INPUT

                    static final java.lang.String INPUT
                    HTML element INPUT - form control.

                    This is an inline-level element.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    INS

                    static final java.lang.String INS
                    HTML element INS - inserted text.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    ISINDEX

                    static final java.lang.String ISINDEX
                    HTML element ISINDEX - single line prompt.

                    This is a block-level element.

                    The end tag of this element is forbidden.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    KBD

                    static final java.lang.String KBD
                    HTML element KBD - text to be entered by the user.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    LABEL

                    static final java.lang.String LABEL
                    HTML element LABEL - form field label text.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    LEGEND

                    static final java.lang.String LEGEND
                    HTML element LEGEND - fieldset legend.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    LI

                    static final java.lang.String LI
                    HTML element LI - list item.

                    The end tag of this element is optional:
                    Terminating start tags: LI
                    Terminating end tags: LI, OL, UL
                    Nonterminating elements: OL, UL

                    See Also:
                    Constant Field Values

                    LINK

                    static final java.lang.String LINK
                    HTML element LINK - a media-independent link.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    MAP

                    static final java.lang.String MAP
                    HTML element MAP - client-side image map.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    MENU

                    static final java.lang.String MENU
                    HTML element MENU - menu list.

                    This is a block-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    META

                    static final java.lang.String META
                    HTML element META - generic metainformation.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    NOFRAMES

                    static final java.lang.String NOFRAMES
                    HTML element NOFRAMES - alternate content container for non frame-based rendering.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    NOSCRIPT

                    static final java.lang.String NOSCRIPT
                    HTML element NOSCRIPT - alternate content container for non script-based rendering.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    OBJECT

                    static final java.lang.String OBJECT
                    HTML element OBJECT - generic embedded object.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    OL

                    static final java.lang.String OL
                    HTML element OL - ordered list.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    OPTGROUP

                    static final java.lang.String OPTGROUP
                    HTML element OPTGROUP - option group.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    OPTION

                    static final java.lang.String OPTION
                    HTML element OPTION - selectable choice.

                    The end tag of this element is optional:
                    Terminating start tags: OPTGROUP, OPTION
                    Terminating end tags: OPTION, SELECT
                    Nonterminating elements: (none)

                    See Also:
                    Constant Field Values

                    P

                    static final java.lang.String P
                    HTML element P - paragraph.

                    This is a block-level element.

                    The end tag of this element is optional:
                    Terminating start tags: All block-level element names,
                    DD, DT, LI, TD, TH
                    Terminating end tags: All block-level element names,
                    BODY, CAPTION, DD, DT, HTML, LEGEND, TD, TH, TBODY, TFOOT, THEAD, TR
                    Nonterminating elements: (none)

                    The definition of this element in the HTML 4.01 specification explicitly states that the P element cannot contain block-level elements. Despite this, all of the popular browsers (in at least some modes of operation) allow P elements to enclose TABLE elements, which are also block-level elements.

                    It is possible to make this parser compatible with this incorrect behaviour by executing the following code:

                     HTMLElements.getTerminatingStartTagNames(HTMLElementName.P).remove(HTMLElementName.TABLE);
                     HTMLElements.getNonterminatingElementNames(HTMLElementName.P).add(HTMLElementName.TABLE);

                    See Also:
                    Constant Field Values

                    PARAM

                    static final java.lang.String PARAM
                    HTML element PARAM - named property value.

                    The end tag of this element is forbidden.

                    See Also:
                    Constant Field Values

                    PRE

                    static final java.lang.String PRE
                    HTML element PRE - preformatted text.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    Q

                    static final java.lang.String Q
                    HTML element Q - short inline quotation.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    S

                    static final java.lang.String S
                    HTML element S - strike-through text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    SAMP

                    static final java.lang.String SAMP
                    HTML element SAMP - sample program output, scripts, etc..

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SCRIPT

                    static final java.lang.String SCRIPT
                    HTML element SCRIPT - script statements.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SELECT

                    static final java.lang.String SELECT
                    HTML element SELECT - option selector.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SMALL

                    static final java.lang.String SMALL
                    HTML element SMALL - small text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SPAN

                    static final java.lang.String SPAN
                    HTML element SPAN - generic language/style container.

                    This is an inline-level element.

                    See Also:
                    Constant Field Values

                    STRIKE

                    static final java.lang.String STRIKE
                    HTML element STRIKE - strike-through text.

                    This is an inline-level element.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    STRONG

                    static final java.lang.String STRONG
                    HTML element STRONG - strong emphasis.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    STYLE

                    static final java.lang.String STYLE
                    HTML element STYLE - style info.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SUB

                    static final java.lang.String SUB
                    HTML element SUB - subscript.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    SUP

                    static final java.lang.String SUP
                    HTML element SUP - superscript.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    TABLE

                    static final java.lang.String TABLE
                    HTML element TABLE - table.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    TBODY

                    static final java.lang.String TBODY
                    HTML element TBODY - table body.

                    The start tag of this element is optional.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TFOOT, THEAD
                    Terminating end tags: TABLE, TBODY
                    Nonterminating elements: TABLE

                    Note that the TFOOT and THEAD elements are included as terminating start tags, even though the HTML 4.01 specification section 11.2.3 states that they must precede the TBODY element inside a TABLE. Most browsers tolerate an incorrect ordering of the THEAD, TFOOT and TBODY elements, so this parser also recognises the elements in any order.

                    See Also:
                    Constant Field Values

                    TD

                    static final java.lang.String TD
                    HTML element TD - table data cell.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TD, TFOOT, TH, THEAD, TR
                    Terminating end tags: TABLE, TBODY, TD, TFOOT, THEAD, TR
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    TEXTAREA

                    static final java.lang.String TEXTAREA
                    HTML element TEXTAREA - multi-line text field.

                    The end tag of this element is required.

                    This is an inline-level element.

                    See Also:
                    Constant Field Values

                    TFOOT

                    static final java.lang.String TFOOT
                    HTML element TFOOT - table footer.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TFOOT, THEAD
                    Terminating end tags: TABLE, TFOOT
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    TH

                    static final java.lang.String TH
                    HTML element TH - table header cell.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TD, TFOOT, TH, THEAD, TR
                    Terminating end tags: TABLE, TBODY, TFOOT, TH, THEAD, TR
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    THEAD

                    static final java.lang.String THEAD
                    HTML element THEAD - table header.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TFOOT, THEAD
                    Terminating end tags: TABLE, THEAD
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    TITLE

                    static final java.lang.String TITLE
                    HTML element TITLE - document title.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    TR

                    static final java.lang.String TR
                    HTML element TR - table row.

                    The end tag of this element is optional:
                    Terminating start tags: TBODY, TFOOT, THEAD, TR
                    Terminating end tags: TABLE, TBODY, TFOOT, THEAD, TR
                    Nonterminating elements: TABLE

                    See Also:
                    Constant Field Values

                    TT

                    static final java.lang.String TT
                    HTML element TT - teletype or monospaced text style.

                    The end tag of this element is required.

                    This is an inline-level element.

                    See Also:
                    Constant Field Values

                    U

                    static final java.lang.String U
                    HTML element U - underlined text style.

                    This is an inline-level element.

                    The end tag of this element is required.

                    This element is deprecated in HTML 4.01. (see HTMLElements.getDeprecatedElementNames())

                    See Also:
                    Constant Field Values

                    UL

                    static final java.lang.String UL
                    HTML element UL - unordered list.

                    This is a block-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values

                    VAR

                    static final java.lang.String VAR
                    HTML element VAR - instance of a variable or program argument.

                    This is an inline-level element.

                    The end tag of this element is required.

                    See Also:
                    Constant Field Values


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormFields.html0000644000175000017500000016732111214132422025553 0ustar twernertwerner FormFields (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class FormFields

                    java.lang.Object
                      extended by java.util.AbstractCollection<FormField>
                          extended by FormFields
                    
                    All Implemented Interfaces:
                    java.lang.Iterable<FormField>, java.util.Collection<FormField>

                    public final class FormFields
                    extends java.util.AbstractCollection<FormField>

                    Represents a collection of FormField objects.

                    This class provides the main interface for the analysis and manipulation of form controls. A FormFields object is a collection of FormField objects, with each form field consisting of a group of form controls having the same name.

                    The functionality provided by this class can be used to accomplish two main tasks:

                    1. Modify the submission values of the constituent form controls for subsequent output in an OutputDocument.

                      The methods available for this purpose are:
                      List<String> getValues(String fieldName)
                      Map<String,String[]> getDataSet()
                      void clearValues()
                      void setDataSet(Map<String,String[]>)
                      boolean setValue(String fieldName, String value)
                      boolean addValue(String fieldName, String value)

                      Although the FormField and FormControl classes provide methods for directly modifying the submission values of individual form fields and controls, it is generally recommended to use the interface provided by this (the FormFields) class unless there is a specific requirement for the lower level functionality.

                      The display characteristics of individual controls, such as whether the control is disabled, replaced with a simple value, or removed altogether, can only be set on the individual FormControl objects. See below for information about retrieving a specific FormControl object from the FormFields object.

                    2. Convert data from a form data set (represented as a field data set) into a simple array format, suitable for storage in a tabular format such as a database table or .CSV file.

                      The methods available for this purpose are:
                      String[] getColumnLabels()
                      String[] getColumnValues(Map)
                      String[] getColumnValues()

                      The Util class contains a method called outputCSVLine(Writer,String[]) which writes the String[] output of these methods to the specified Writer in .CSV format.

                      The implementation of these methods makes use of certain properties in the FormField class that describe the structure of the data in each field. These properties can be utilised directly in the event that a form data set is to be converted from its normal format into some other type of data structure.

                    To access a specific FormControl from a FormFields object, use:

                    The term field data set is used in this library to refer to a data structure consisting of a set of names (in lower case), each mapped to one or more values. Generally, this is represented by a data type of java.util.Map<String,String[]>, with the keys (names) being of type String and the values represented by an array containing one or more items of type String. A field data set can be used to represent the data in an HTML form data set.

                    FormFields instances are obtained using the FormFields(Collection formControls) constructor or by calling the Segment.getFormFields() method.

                    The case sensitivity of form field names is determined by the static Config.CurrentCompatibilityMode.FormFieldNameCaseInsensitive property.

                    Examples:

                    1. Write the data received from in the current ServletRequest to a .CSV file, and then display the form populated with this data:

                          Source source=new Source(htmlTextOfOriginatingForm);
                          FormFields formFields=source.getFormFields();
                      
                          File csvOutputFile=new File("FormData.csv");
                          boolean outputHeadings=!csvOutputFile.exists();
                          Writer writer=new FileWriter(csvOutputFile,true);
                          if (outputHeadings) Util.outputCSVLine(writer,formFields.getColumnLabels());
                          Util.outputCSVLine(writer,formFields.getColumnValues(servletRequest.getParameterMap()));
                          writer.close();
                      
                          formFields.setDataSet(servletRequest.getParameterMap());
                          OutputDocument outputDocument=new OutputDocument(source);
                          outputDocument.replace(formFields);
                          outputDocument.writeTo(servletResponse.getWriter());

                      See also the sample program FormFieldCSVOutput.

                    2. Replace the initial values of controls in the form named "MyForm" with new values:

                          Source source=new Source(htmlText);
                          Element myForm=null;
                          List formElements=source.getAllElements(Tag.FORM);
                          for (Iterator i=formElements.iterator(); i.hasNext();) {
                            Element formElement=(Element)i.next();
                            String formName=formElement.getAttributes().getValue("name");
                            if ("MyForm".equals(formName)) {
                              myForm=form;
                              break;
                            }
                          }
                          FormFields formFields=myForm.getFormFields();
                          formFields.clearValues(); // clear any values that might be set in the source document
                          formFields.addValue("Name","Humphrey Bear");
                          formFields.addValue("MailingList","A");
                          formFields.addValue("MailingList","B");
                          formFields.addValue("FavouriteFare","honey");
                          OutputDocument outputDocument=new OutputDocument(source);
                          outputDocument.replace(formFields);
                          String newHtmlText=outputDocument.toString();

                      See also the sample program FormFieldSetValues.

                    3. Change the display characteristics of individual controls:

                          Source source=new Source(htmlText);
                          FormFields formFields=source.getFormFields();
                          // disable some controls:
                          formFields.get("Password").getFormControl().setDisabled(true);
                          FormField mailingListFormField=formFields.get("MailingList");
                          mailingListFormField.setValue("C");
                          mailingListFormField.getFormControl("C").setDisabled(true);
                          mailingListFormField.getFormControl("D").setDisabled(true);
                          // remove some controls:
                          formFields.get("button1").getFormControl().setOutputStyle(FormControlOutputStyle.REMOVE);
                          FormControl rhubarbFormControl=formFields.get("FavouriteFare").getFormControl("rhubarb");
                          rhubarbFormControl.setOutputStyle(FormControlOutputStyle.REMOVE);
                          // set some controls to display value:
                          formFields.setValue("Address","The Lodge\nDeakin  ACT  2600\nAustralia");
                          formFields.get("Address").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE);
                          FormField favouriteSportsFormField=formFields.get("FavouriteSports");
                          favouriteSportsFormField.setValue("BB");
                          favouriteSportsFormField.addValue("AFL");
                          favouriteSportsFormField.getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE);
                          OutputDocument outputDocument=new OutputDocument(source);
                          outputDocument.replace(formFields); // adds all segments necessary to effect changes
                          String newHtmlText=outputDocument.toString();

                      See also the sample program FormControlDisplayCharacteristics.

                    See Also:
                    FormField, FormControl

                    Constructor Summary
                    FormFields(java.util.Collection<FormControl> formControls)
                              Constructs a new FormFields object consisting of the specified form controls.
                     
                    Method Summary
                     boolean addValue(java.lang.String fieldName, java.lang.String value)
                              Adds the specified value to the field submission values of the constituent form field with the specified name.
                     void clearValues()
                              Clears the submission values of all the constituent form controls.
                     FormField get(java.lang.String fieldName)
                              Returns the FormField with the specified name.
                     java.lang.String[] getColumnLabels()
                              Returns a string array containing the column labels corresponding to the values from the getColumnValues(Map) method.
                     java.lang.String[] getColumnValues()
                              Converts all the form submission values of the constituent form fields into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.
                     java.lang.String[] getColumnValues(java.util.Map<java.lang.String,java.lang.String[]> dataSet)
                              Converts the data values in the specified field data set into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.
                     int getCount()
                              Returns the number of FormField objects.
                     java.util.Map<java.lang.String,java.lang.String[]> getDataSet()
                              Returns the entire field data set represented by the values of the constituent form fields.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     java.util.List getFormControls()
                              Returns a list of all the constituent form controls from all the form fields in this collection.
                     java.util.List<java.lang.String> getValues(java.lang.String fieldName)
                              Returns a list of the field submission values of all the specified constituent form fields with the specified name.
                     java.util.Iterator<FormField> iterator()
                              Returns an iterator over the FormField objects in the collection.
                     void merge(FormFields formFields)
                              Merges the specified FormFields into this FormFields collection.
                     void setDataSet(java.util.Map<java.lang.String,java.lang.String[]> dataSet)
                              Sets the submission values of all the constituent form controls to match the data in the specified field data set.
                     boolean setValue(java.lang.String fieldName, java.lang.String value)
                              Sets the field submission values of the constituent form field with the specified name to the single specified value.
                     int size()
                              Returns the number of FormField objects.
                     java.lang.String toString()
                              Returns a string representation of this object useful for debugging purposes.
                     
                    Methods inherited from class java.util.AbstractCollection
                    add, addAll, clear, contains, containsAll, isEmpty, remove, removeAll, retainAll, toArray, toArray
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     
                    Methods inherited from interface java.util.Collection
                    equals, hashCode
                     

                    Constructor Detail

                    FormFields

                    public FormFields(java.util.Collection<FormControl> formControls)
                    Constructs a new FormFields object consisting of the specified form controls.

                    Parameters:
                    formControls - a collection of FormControl objects.
                    See Also:
                    Segment.getFormFields()
                    Method Detail

                    getCount

                    public int getCount()
                    Returns the number of FormField objects.

                    Returns:
                    the number of FormField objects.

                    size

                    public int size()
                    Returns the number of FormField objects.

                    This is equivalent to getCount(), and is necessary to for the implementation of the java.util.Collection interface.

                    Specified by:
                    size in interface java.util.Collection<FormField>
                    Specified by:
                    size in class java.util.AbstractCollection<FormField>
                    Returns:
                    the number of FormField objects.

                    get

                    public FormField get(java.lang.String fieldName)
                    Returns the FormField with the specified name.

                    The case sensitivity of the fieldName argument is determined by the static Config.CurrentCompatibilityMode.FormFieldNameCaseInsensitive property.

                    Parameters:
                    fieldName - the name of the FormField to get.
                    Returns:
                    the FormField with the specified name, or null if no FormField with the specified name exists.

                    iterator

                    public java.util.Iterator<FormField> iterator()
                    Returns an iterator over the FormField objects in the collection.

                    The order in which the form fields are iterated corresponds to the order of appearance of each form field's first FormControl in the source document.

                    If this FormFields object has been merged with another, the ordering is no longer defined.

                    Specified by:
                    iterator in interface java.lang.Iterable<FormField>
                    Specified by:
                    iterator in interface java.util.Collection<FormField>
                    Specified by:
                    iterator in class java.util.AbstractCollection<FormField>
                    Returns:
                    an iterator over the FormField objects in the collection.

                    getValues

                    public java.util.List<java.lang.String> getValues(java.lang.String fieldName)
                    Returns a list of the field submission values of all the specified constituent form fields with the specified name.

                    All objects in the returned list are of type String, with no null entries.

                    This is equivalent to get(fieldName).getValues(), assuming that a field with the specified name exists in this collection.

                    Parameters:
                    fieldName - the name of the form field.
                    Returns:
                    a list of the field submission values of all the specified constituent form field with the specified name, or null if no form field with this name exists.
                    See Also:
                    FormField.getValues()

                    getDataSet

                    public java.util.Map<java.lang.String,java.lang.String[]> getDataSet()
                    Returns the entire field data set represented by the values of the constituent form fields.

                    The values in the map returned by this method are represented as a string array, giving the map a format consistent with the javax.servlet.ServletRequest.getParameterMap() method.

                    Only the names of form fields with at least one value are included in the map, meaning every String[] is guaranteed to have at least one entry.

                    Iterating over the map keys returns them in the order of appearance in the source document.

                    Returns:
                    the entire field data set represented by the values of the constituent form fields.
                    See Also:
                    setDataSet(Map)

                    clearValues

                    public void clearValues()
                    Clears the submission values of all the constituent form controls.

                    See Also:
                    FormControl.clearValues()

                    setDataSet

                    public void setDataSet(java.util.Map<java.lang.String,java.lang.String[]> dataSet)
                    Sets the submission values of all the constituent form controls to match the data in the specified field data set.

                    The map keys must be String field names, with each map value an array of String objects containing the field's new values.

                    The map returned by the javax.servlet.ServletRequest.getParameterMap() method has a suitable format for use with this method.

                    All existing values are cleared before the values from the field data set are added.

                    Any map entries with a null value are ignored.

                    Parameters:
                    dataSet - the field data set containing the new values of the constituent form fields.
                    See Also:
                    getDataSet()

                    setValue

                    public boolean setValue(java.lang.String fieldName,
                                            java.lang.String value)
                    Sets the field submission values of the constituent form field with the specified name to the single specified value.

                    This is equivalent to get(fieldName).setValue(value), assuming that a field with the specified name exists in this collection.

                    The return value indicates whether the specified form field "accepted" the value. A return value of false implies an error condition as either no field with the specified name exists, or the specified value is not compatible with the specified field.

                    Parameters:
                    fieldName - the name of the form field.
                    value - the new field submission value of the specified field, or null to clear the field of all submission values.
                    Returns:
                    true if a field of the specified name exists in this collection and it accepts the specified value, otherwise false.

                    addValue

                    public boolean addValue(java.lang.String fieldName,
                                            java.lang.String value)
                    Adds the specified value to the field submission values of the constituent form field with the specified name.

                    This is equivalent to get(fieldName).addValue(value), assuming that a field with the specified name exists in this collection.

                    The return value indicates whether the specified form field "accepted" the value. A return value of false implies an error condition as either no field with the specified name exists, or the specified value is not compatible with the specified field.

                    Parameters:
                    fieldName - the name of the form field.
                    value - the new field submission value to add to the specified field, must not be null.
                    Returns:
                    true if a field of the specified name exists in this collection and it accepts the specified value, otherwise false.

                    getColumnLabels

                    public java.lang.String[] getColumnLabels()
                    Returns a string array containing the column labels corresponding to the values from the getColumnValues(Map) method.

                    Instead of using the name of each constituent form field to construct the labels, the name of the first form control from each form field is used. This allows the labels to be constructed using the names with the original case from the source document rather than unsing the all lower case names of the form fields.

                    See the documentation of the getColumnValues(Map) method for more details.

                    Returns:
                    a string array containing the column labels corresponding to the values from the getColumnValues(Map) method.
                    See Also:
                    Util.outputCSVLine(Writer,String[])

                    getColumnValues

                    public java.lang.String[] getColumnValues(java.util.Map<java.lang.String,java.lang.String[]> dataSet)
                    Converts the data values in the specified field data set into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.

                    The conversion is performed in a way that allows the multiple values of certain fields to be stored in separate columns, by analysing the possible form data sets that can be generated from the constituent form controls.

                    The column labels and values are determined as follows:

                    The sample program FormFieldCSVOutput demonstrates the use of this method and its output.

                    Parameters:
                    dataSet - a field data set containing the data to convert.
                    Returns:
                    the data values in the specified field data set in the form of a simple string array.
                    See Also:
                    Util.outputCSVLine(Writer,String[]), getColumnLabels(), getColumnValues()

                    getColumnValues

                    public java.lang.String[] getColumnValues()
                    Converts all the form submission values of the constituent form fields into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.

                    This is equivalent to getColumnValues(getDataSet()).

                    Returns:
                    all the form submission values of the constituent form fields in the form of a simple string array.

                    getFormControls

                    public java.util.List getFormControls()
                    Returns a list of all the constituent form controls from all the form fields in this collection.

                    Returns:
                    a list of all the constituent form controls from all the form fields in this collection.

                    merge

                    public void merge(FormFields formFields)
                    Merges the specified FormFields into this FormFields collection. This is useful if a full collection of possible form fields is required from multiple source documents.

                    If both collections contain a FormField with the same name, the resulting FormField has the following properties:

                    NOTE: Some underlying data structures may end up being shared between the two merged FormFields collections.


                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Returns:
                    a string representation of this object useful for debugging purposes.

                    toString

                    public java.lang.String toString()
                    Returns a string representation of this object useful for debugging purposes.

                    This is equivalent to getDebugInfo().

                    Overrides:
                    toString in class java.util.AbstractCollection<FormField>
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Attribute.html0000644000175000017500000007274011214132420025462 0ustar twernertwerner Attribute (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Attribute

                    java.lang.Object
                      extended by Segment
                          extended by Attribute
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public final class Attribute
                    extends Segment

                    Represents a single attribute name/value segment within a StartTag.

                    An instance of this class is a representation of a single attribute in the source document and is not modifiable. The OutputDocument.replace(Attributes, Map) and OutputDocument.replace(Attributes, boolean convertNamesToLowerCase) methods provide the means to add, delete or modify attributes and their values in an OutputDocument.

                    Obtained using the Attributes.get(String key) method.

                    See also the XML 1.0 specification for attributes.

                    See Also:
                    Attributes

                    Method Summary
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     java.lang.String getKey()
                              Returns the name of this attribute in lower case.
                     java.lang.String getName()
                              Returns the name of this attribute in original case.
                     Segment getNameSegment()
                              Returns the segment spanning the name of this attribute.
                     char getQuoteChar()
                              Returns the character used to quote the value.
                     java.lang.String getValue()
                              Returns the decoded value of this attribute, or null if it has no value.
                     Segment getValueSegment()
                              Returns the segment spanning the value of this attribute, or null if it has no value.
                     Segment getValueSegmentIncludingQuotes()
                              Returns the segment spanning the value of this attribute, including quotation marks if any, or null if it has no value.
                     boolean hasValue()
                              Indicates whether this attribute has a value.
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getKey

                    public java.lang.String getKey()
                    Returns the name of this attribute in lower case.

                    This package treats all attribute names as case insensitive, consistent with HTML but not consistent with XHTML.

                    Returns:
                    the name of this attribute in lower case.
                    See Also:
                    getName()

                    getName

                    public java.lang.String getName()
                    Returns the name of this attribute in original case.

                    This is exactly equivalent to getNameSegment().toString().

                    Returns:
                    the name of this attribute in original case.
                    See Also:
                    getKey()

                    getNameSegment

                    public Segment getNameSegment()
                    Returns the segment spanning the name of this attribute.

                    Returns:
                    the segment spanning the name of this attribute.
                    See Also:
                    getName()

                    hasValue

                    public boolean hasValue()
                    Indicates whether this attribute has a value.

                    This method also returns true if this attribute has been assigned a zero-length value.

                    It only returns false if this attribute appears in minimized form.

                    Returns:
                    true if this attribute has a value, otherwise false.

                    getValue

                    public java.lang.String getValue()
                    Returns the decoded value of this attribute, or null if it has no value.

                    This is equivalent to CharacterReference.decode(getValueSegment(),true).

                    Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document, without decoding.

                    To obtain the raw value without decoding, use getValueSegment().toString().

                    Special attention should be given to attributes that contain URLs, such as the href attribute. When such an attribute contains a URL with parameters (as described in the form-urlencoded media type), the ampersand (&) characters used to separate the parameters should be encoded to prevent the parameter names from being unintentionally interpreted as character entity references. This requirement is explicitly stated in the HTML 4.01 specification section 5.3.2.

                    For example, take the following element in the source document:

                    <a href="Report.jsp?chapt=2&sect=3">next</a>
                    By default, calling getAttributes().getValue("href") on this element returns the string "Report.jsp?chapt=2§=3", since the text "&sect" is interpreted as the rarely used character entity reference &sect; (U+00A7), despite the fact that it is missing the terminating semicolon (;).

                    Most browsers recognise unterminated character entity references in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value. One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would have interpreted the URL in the above example differently to most other browsers. Most browsers also use different rules depending on whether the unterminated character reference is inside or outside of an attribute value, with both of these possibilities further split into different rules for character entity references, decimal character references, and hexadecimal character references.

                    The behaviour of this library is determined by the current compatibility mode setting, which is determined by the static Config.CurrentCompatibilityMode property.

                    Returns:
                    the decoded value of this attribute, or null if it has no value.

                    getValueSegment

                    public Segment getValueSegment()
                    Returns the segment spanning the value of this attribute, or null if it has no value.

                    Returns:
                    the segment spanning the value of this attribute, or null if it has no value.
                    See Also:
                    getValue()

                    getValueSegmentIncludingQuotes

                    public Segment getValueSegmentIncludingQuotes()
                    Returns the segment spanning the value of this attribute, including quotation marks if any, or null if it has no value.

                    If the value is not enclosed by quotation marks, this is the same as the value segment

                    Returns:
                    the segment spanning the value of this attribute, including quotation marks if any, or null if it has no value.

                    getQuoteChar

                    public char getQuoteChar()
                    Returns the character used to quote the value.

                    The return value is either a double-quote ("), a single-quote ('), or a space.

                    Returns:
                    the character used to quote the value, or a space if the value is not quoted or this attribute has no value.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormControlType.html0000644000175000017500000015251611214132422026627 0ustar twernertwerner FormControlType (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Enum FormControlType

                    java.lang.Object
                      extended by java.lang.Enum<FormControlType>
                          extended by FormControlType
                    
                    All Implemented Interfaces:
                    java.io.Serializable, java.lang.Comparable<FormControlType>

                    public enum FormControlType
                    extends java.lang.Enum<FormControlType>

                    Represents the control type of a FormControl.

                    Use the FormControl.getFormControlType() method to determine the type of a form control.

                    The following table shows the relationship between the HTML 4.01 specification control type descriptions, their associated Element names and attributes, and the FormControlType constants defined in this class:
                    Description Element Name Distinguishing Attribute FormControlType
                    buttons - submit button BUTTON type="submit" BUTTON
                    INPUT type="submit" SUBMIT
                    type="image" IMAGE
                    buttons - reset button BUTTON, INPUT type="reset" -
                    buttons - push button BUTTON, INPUT type="button" -
                    checkboxes INPUT type="checkbox" CHECKBOX
                    radio buttons INPUT type="radio" RADIO
                    menus SELECT multiple SELECT_MULTIPLE
                    absence of multiple SELECT_SINGLE
                    text input INPUT type="text" TEXT
                    type="password" PASSWORD
                    TEXTAREA - TEXTAREA
                    file select INPUT type="file" FILE
                    hidden controls INPUT type="hidden" HIDDEN
                    object controls OBJECT - -
                    Reset buttons and push buttons have no associated FormControlType because they do not contribute to the form data set of a submitted form, and so have no relevance to the methods provided in the FormControl and associated classes. If required they can be found and manipulated as normal elements.

                    Object controls have no associated FormControlType because any data they might contribute to the form data set is entirely dependent on the class of object, the interpretation of which is is beyond the scope of this library.

                    This library does not consider the OPTION elements found within SELECT elements to be controls themselves, despite them being referred to as such in some parts of the HTML 4.01 specification. Hence the absence of an OPTION control type.

                    See Also:
                    FormControl, FormField

                    Enum Constant Summary
                    BUTTON
                              The form control type given to a submit button control implemented using a BUTTON element.
                    CHECKBOX
                              The form control type given to a checkbox control.
                    FILE
                              The form control type given to a file select control.
                    HIDDEN
                              The form control type given to a hidden control.
                    IMAGE
                              The form control type given to a submit button control implemented using an INPUT element with attribute type="image".
                    PASSWORD
                              The form control type given to a text input control implemented using an INPUT element with attribute type="password".
                    RADIO
                              The form control type given to a radio button control.
                    SELECT_MULTIPLE
                              The form control type given to a menu control implemented using a SELECT element containing the attribute "multiple".
                    SELECT_SINGLE
                              The form control type given to a menu control implemented using a SELECT element that does not contain the attribute "multiple".
                    SUBMIT
                              The form control type given to a submit button control implemented using an INPUT element with attribute type="submit".
                    TEXT
                              The form control type given to a text input control implemented using an INPUT element with attribute type="text".
                    TEXTAREA
                              The form control type given to a text input control implemented using a TEXTAREA element.
                     
                    Method Summary
                     java.lang.String getElementName()
                              Returns the name of the Element that constitues this form control type.
                     boolean hasPredefinedValue()
                              Indicates whether any value submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts.
                     boolean isSubmit()
                              Indicates whether this control type causes the form to be submitted.
                    static FormControlType valueOf(java.lang.String name)
                              Returns the enum constant of this type with the specified name.
                    static FormControlType[] values()
                              Returns an array containing the constants of this enum type, in the order they're declared.
                     
                    Methods inherited from class java.lang.Enum
                    clone, compareTo, equals, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
                     
                    Methods inherited from class java.lang.Object
                    finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Enum Constant Detail

                    BUTTON

                    public static final FormControlType BUTTON
                    The form control type given to a submit button control implemented using a BUTTON element.

                    Example:
                    <button type="submit" name="FieldName" value="PredefinedValue">Send</button>
                    Properties:
                    getElementName() = HTMLElementName.BUTTON
                    hasPredefinedValue() = true
                    isSubmit() = true


                    CHECKBOX

                    public static final FormControlType CHECKBOX
                    The form control type given to a checkbox control.

                    Example:
                    <input type="checkbox" name="FieldName" value="PredefinedValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = true
                    isSubmit() = false


                    FILE

                    public static final FormControlType FILE
                    The form control type given to a file select control.

                    This library considers the submission value of this type of control to be consist of only the selected file name, regardless of whether the file content would normally be included in the form data set.

                    To determine manually whether the file content is included in the form data set, the enctype attribute of the control's associated FORM element can be examined. Although the exact behaviour is not defined in the HTML 4.01 specification, the convention is that the content is not included unless an enctype value of "multipart/form-data" is specified.

                    For more information see the HTML 4.01 specification section 17.13.4 - Form content types.

                    Example:
                    <input type="file" name="FieldName" value="DefaultFileName" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = false
                    isSubmit() = false


                    HIDDEN

                    public static final FormControlType HIDDEN
                    The form control type given to a hidden control.

                    Example:
                    <input type="hidden" name="FieldName" value="DefaultValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = false
                    isSubmit() = false
                    Note that hasPredefinedValue() returns false for this control type because the value of hidden fields is usually set via server or client side scripting.


                    IMAGE

                    public static final FormControlType IMAGE
                    The form control type given to a submit button control implemented using an INPUT element with attribute type="image".

                    See the description under the heading "image" in the HTML 4.01 specification section 17.4.1 - Form control types created with INPUT.

                    When a form control of type IMAGE is present in the form used to construct a FormFields instance, three separate FormField objects are created for the one control. One has the name specified in the name attribute of the INPUT element, and the other two have this name with the suffixes ".x" and ".y" appended to them to represent the additional click coordinates submitted by this control when activated using a pointing device.

                    This type of control is also mentioned in the HTML 4.01 specification section 13.6.2 - Server-side image maps.

                    Example:
                    <input type="image" name="FieldName" src="ImageURL" value="PredefinedValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = true
                    isSubmit() = true


                    PASSWORD

                    public static final FormControlType PASSWORD
                    The form control type given to a text input control implemented using an INPUT element with attribute type="password".

                    Example:
                    <input type="password" name="FieldName" value="DefaultValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = false
                    isSubmit() = false


                    RADIO

                    public static final FormControlType RADIO
                    The form control type given to a radio button control.

                    Example:
                    <input type="radio" name="FieldName" value="PredefinedValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = true
                    isSubmit() = false


                    SELECT_MULTIPLE

                    public static final FormControlType SELECT_MULTIPLE
                    The form control type given to a menu control implemented using a SELECT element containing the attribute "multiple".

                    SELECT elements that do not contain the attribute "multiple" are represented by the SELECT_SINGLE form control type.

                    This is the only control type that can have multiple submission values within the one control. Contrast this with CHECKBOX controls, which require multiple separate controls with the same name in order to contribute multiple submission values.

                    The individual OPTION elements contained within a form control of this type can be obtained using the FormControl.getOptionElementIterator() method.

                    The most efficient way to test whether a form control type is either SELECT_MULTIPLE or SELECT_SINGLE is to test for getElementName()==HTMLElementName.SELECT.

                    Example:
                    <select name="FieldName" multiple>
                      <option value="PredefinedValue1" selected>Display Text1</option>
                      <option value="PredefinedValue2">Display Text2</option>
                    </select>
                    Properties:
                    getElementName() = HTMLElementName.SELECT
                    hasPredefinedValue() = true
                    isSubmit() = false


                    SELECT_SINGLE

                    public static final FormControlType SELECT_SINGLE
                    The form control type given to a menu control implemented using a SELECT element that does not contain the attribute "multiple".

                    SELECT elements that do contain the attribute "multiple" are represented by the SELECT_MULTIPLE form control type.

                    The individual OPTION elements contained within a form control of this type can be obtained using the FormControl.getOptionElementIterator() method.

                    The most efficient way to test whether a form control type is either SELECT_MULTIPLE or SELECT_SINGLE is to test for getElementName()==HTMLElementName.SELECT.

                    Example:
                    <select name="FieldName">
                      <option value="PredefinedValue1" selected>Display Text1</option>
                      <option value="PredefinedValue2">Display Text2</option>
                    </select>
                    Properties:
                    getElementName() = HTMLElementName.SELECT
                    hasPredefinedValue() = true
                    isSubmit() = false


                    SUBMIT

                    public static final FormControlType SUBMIT
                    The form control type given to a submit button control implemented using an INPUT element with attribute type="submit".

                    Example:
                    <input type="submit" name="FieldName" value="PredefinedValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = true
                    isSubmit() = true


                    TEXT

                    public static final FormControlType TEXT
                    The form control type given to a text input control implemented using an INPUT element with attribute type="text".

                    Example:
                    <input type="text" name="FieldName" value="DefaultValue" />
                    Properties:
                    getElementName() = HTMLElementName.INPUT
                    hasPredefinedValue() = false
                    isSubmit() = false


                    TEXTAREA

                    public static final FormControlType TEXTAREA
                    The form control type given to a text input control implemented using a TEXTAREA element.

                    Example:
                    <textarea name="FieldName">Default Value</textarea>
                    Properties:
                    getElementName() = HTMLElementName.TEXTAREA
                    hasPredefinedValue() = false
                    isSubmit() = false

                    Method Detail

                    values

                    public static final FormControlType[] values()
                    Returns an array containing the constants of this enum type, in the order they're declared. This method may be used to iterate over the constants as follows:
                    for(FormControlType c : FormControlType.values())
                            System.out.println(c);
                    

                    Returns:
                    an array containing the constants of this enum type, in the order they're declared

                    valueOf

                    public static FormControlType valueOf(java.lang.String name)
                    Returns the enum constant of this type with the specified name. The string must match exactly an identifier used to declare an enum constant in this type. (Extraneous whitespace characters are not permitted.)

                    Parameters:
                    name - the name of the enum constant to be returned.
                    Returns:
                    the enum constant with the specified name
                    Throws:
                    java.lang.IllegalArgumentException - if this enum type has no constant with the specified name

                    getElementName

                    public java.lang.String getElementName()
                    Returns the name of the Element that constitues this form control type.

                    Returns:
                    the name of the Element that constitues this form control type.

                    hasPredefinedValue

                    public boolean hasPredefinedValue()
                    Indicates whether any value submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts.

                    The word "typically" is used because the use of client side scripts can cause control types which normally have predefined values to be set by the user, which is a condition which is beyond the scope of this library to test for.

                    The predefined value is defined by the control's initial value.

                    A return value of true signifies that a form control of this type is a predefined value control.

                    A return value of false signifies that a form control of this type is a user value control.

                    Note that the HIDDEN type returns false for this method because the value of hidden fields is usually set via server or client side scripting.

                    Returns:
                    true if any value submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts, otherwise false.

                    isSubmit

                    public boolean isSubmit()
                    Indicates whether this control type causes the form to be submitted.

                    Returns true only for the SUBMIT, BUTTON, and IMAGE instances.

                    Returns:
                    true if this control type causes the form to be submitted, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/MasonTagTypes.html0000644000175000017500000006761111214132422026260 0ustar twernertwerner MasonTagTypes (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class MasonTagTypes

                    java.lang.Object
                      extended by MasonTagTypes
                    

                    public final class MasonTagTypes
                    extends java.lang.Object

                    Contains tag types related to the Mason server platform.

                    There is no specific tag type defined for the Mason substitution tag as it is recognised using the common server tag type.

                    The tag types defined in this class are not registered by default. The register() method is provided as a convenient way to register them all at once.


                    Field Summary
                    static StartTagType MASON_COMPONENT_CALL
                              The tag type given to a Mason component call (<& ... &>).
                    static StartTagType MASON_COMPONENT_CALLED_WITH_CONTENT
                              The tag type given to the start tag of a Mason component called with content (<&| ... &> ... </&>).
                    static EndTagType MASON_COMPONENT_CALLED_WITH_CONTENT_END
                              The tag type given to the end tag of a Mason component called with content.
                    static StartTagType MASON_NAMED_BLOCK
                              The tag type given to the start tag of a Mason named block (<%name ... > ... </%name>).
                    static EndTagType MASON_NAMED_BLOCK_END
                              The tag type given to the end tag of a Mason named block.
                     
                    Method Summary
                    static boolean defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                    static boolean isParsedByMason(TagType tagType)
                              Indicates whether the specified tag type is recognised by a Mason parser.
                    static void register()
                              Registers all of the tag types defined in this class at once.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    MASON_COMPONENT_CALL

                    public static final StartTagType MASON_COMPONENT_CALL
                    The tag type given to a Mason component call (<& ... &>).

                    Properties:
                    PropertyValue
                    Descriptionmason component call
                    StartDelimiter<&
                    ClosingDelimiter&>
                    IsServerTagtrue
                    NamePrefix&
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <& menu &>


                    MASON_COMPONENT_CALLED_WITH_CONTENT

                    public static final StartTagType MASON_COMPONENT_CALLED_WITH_CONTENT
                    The tag type given to the start tag of a Mason component called with content (<&| ... &> ... </&>).

                    Properties:
                    PropertyValue
                    Descriptionmason component called with content
                    StartDelimiter<&|
                    ClosingDelimiter&>
                    IsServerTagtrue
                    NamePrefix&|
                    CorrespondingEndTagTypeMASON_COMPONENT_CALLED_WITH_CONTENT_END
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                     <&| /sql/select, query => 'SELECT name, age FROM User' &>
                       <tr><td>%name</td><td>%age</td></tr>
                     </&>


                    MASON_COMPONENT_CALLED_WITH_CONTENT_END

                    public static final EndTagType MASON_COMPONENT_CALLED_WITH_CONTENT_END
                    The tag type given to the end tag of a Mason component called with content.

                    See the corresponding start tag type MASON_COMPONENT_CALLED_WITH_CONTENT for more details.

                    Properties:
                    Property/MethodValue
                    Description/mason component called with content
                    StartDelimiter</&
                    ClosingDelimiter>
                    IsServerTagtrue
                    NamePrefix/&
                    CorrespondingStartTagTypeMASON_COMPONENT_CALLED_WITH_CONTENT
                    generateHTML("StartTagName")</&>
                    Example:
                    </&>

                    See Also:
                    MASON_COMPONENT_CALLED_WITH_CONTENT

                    MASON_NAMED_BLOCK

                    public static final StartTagType MASON_NAMED_BLOCK
                    The tag type given to the start tag of a Mason named block (<%name ... > ... </%name>).

                    A tag of this type must not have a '%' character before its closing delimiter, otherwise it is most likely a common server tag.

                    For the start tag to be recognised, a corresponding end tag of the correct type must exist somewhere in the source document following the start tag.

                    Properties:
                    PropertyValue
                    Descriptionmason named block
                    StartDelimiter<%
                    ClosingDelimiter>
                    IsServerTagtrue
                    NamePrefix%
                    CorrespondingEndTagTypeMASON_NAMED_BLOCK_END
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredtrue
                    Example:
                    <%perl> print "hello world"; </%perl>


                    MASON_NAMED_BLOCK_END

                    public static final EndTagType MASON_NAMED_BLOCK_END
                    The tag type given to the end tag of a Mason named block.

                    See the corresponding start tag type MASON_NAMED_BLOCK for more details.

                    Properties:
                    Property/MethodValue
                    Description/mason named block
                    StartDelimiter</%
                    ClosingDelimiter>
                    IsServerTagtrue
                    NamePrefix/%
                    CorrespondingStartTagTypeMASON_NAMED_BLOCK
                    generateHTML("%StartTagName")</%StartTagName>
                    Example:
                    </%perl>

                    See Also:
                    MASON_NAMED_BLOCK
                    Method Detail

                    register

                    public static void register()
                    Registers all of the tag types defined in this class at once.

                    The tag types must be registered before the parser will recognise them.


                    defines

                    public static boolean defines(TagType tagType)
                    Indicates whether the specified tag type is defined in this class.

                    Parameters:
                    tagType - the TagType to test.
                    Returns:
                    true if the specified tag type is defined in this class, otherwise false.

                    isParsedByMason

                    public static boolean isParsedByMason(TagType tagType)
                    Indicates whether the specified tag type is recognised by a Mason parser.

                    This is true if the specified tag type is defined in this class or if it is the common server tag type.

                    Parameters:
                    tagType - the TagType to test.
                    Returns:
                    true if the specified tag type is recognised by a Mason parser, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/EndTag.html0000644000175000017500000010353311214132420024654 0ustar twernertwerner EndTag (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class EndTag

                    java.lang.Object
                      extended by Segment
                          extended by Tag
                              extended by EndTag
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public final class EndTag
                    extends Tag

                    Represents the end tag of an element in a specific source document.

                    An end tag always has a type that is a subclass of EndTagType, meaning it always starts with the characters '</'.

                    EndTag instances are obtained using one of the following methods:

                    The Tag superclass defines the getName() method used to get the name of this end tag.

                    See also the XML 1.0 specification for end tags.

                    See Also:
                    Tag, StartTag, Element

                    Method Summary
                    static java.lang.String generateHTML(java.lang.String tagName)
                              Generates the HTML text of a normal end tag with the specified tag name.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     Element getElement()
                              Returns the element that is ended by this end tag.
                     EndTagType getEndTagType()
                              Returns the type of this end tag.
                     TagType getTagType()
                              Returns the type of this tag.
                     boolean isUnregistered()
                              Indicates whether this tag has a syntax that does not match any of the registered tag types.
                     java.lang.String tidy()
                              Returns an XML representation of this end tag.
                     
                    Methods inherited from class Tag
                    getName, getNameSegment, getNextTag, getPreviousTag, getUserData, isXMLName, isXMLNameChar, isXMLNameStartChar, setUserData
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getChildElements, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getElement

                    public Element getElement()
                    Returns the element that is ended by this end tag.

                    Returns null if this end tag is not properly matched to any start tag in the source document.

                    This method is much less efficient than the StartTag.getElement() method.

                    IMPLEMENTATION NOTE: The explanation for why this method is relatively inefficient lies in the fact that more than one start tag type can have the same corresponding end tag type, so it is not possible to know for certain which type of start tag this end tag is matched to (see EndTagType.getCorrespondingStartTagType() for more explanation). Because of this uncertainty, the implementation of this method must check every start tag preceding this end tag, calling its StartTag.getElement() method to see whether it is terminated by this end tag.

                    Specified by:
                    getElement in class Tag
                    Returns:
                    the element that is ended by this end tag.

                    getEndTagType

                    public EndTagType getEndTagType()
                    Returns the type of this end tag.

                    This is equivalent to (EndTagType)getTagType().

                    Returns:
                    the type of this end tag.

                    getTagType

                    public TagType getTagType()
                    Description copied from class: Tag
                    Returns the type of this tag.

                    Specified by:
                    getTagType in class Tag
                    Returns:
                    the type of this tag.

                    isUnregistered

                    public boolean isUnregistered()
                    Description copied from class: Tag
                    Indicates whether this tag has a syntax that does not match any of the registered tag types.

                    The only requirement of an unregistered tag type is that it starts with '<' and there is a closing '>' character at some position after it in the source document.

                    The absence or presence of a '/' character after the initial '<' determines whether an unregistered tag is respectively a StartTag with a type of StartTagType.UNREGISTERED or an EndTag with a type of EndTagType.UNREGISTERED.

                    There are no restrictions on the characters that might appear between these delimiters, including other '<' characters. This may result in a '>' character that is identified as the closing delimiter of two separate tags, one an unregistered tag, and the other a tag of any type that begins in the middle of the unregistered tag. As explained below, unregistered tags are usually only found when specifically looking for them, so it is up to the user to detect and deal with any such nonsensical results.

                    Unregistered tags are only returned by the Source.getTagAt(int pos) method, named search methods, where the specified name matches the first characters inside the tag, and by tag type search methods, where the specified tagType is either StartTagType.UNREGISTERED or EndTagType.UNREGISTERED.

                    Open tag searches and other searches always ignore unregistered tags, although every discovery of an unregistered tag is logged by the parser.

                    The logic behind this design is that unregistered tag types are usually the result of a '<' character in the text that was mistakenly left unencoded, or a less-than operator inside a script, or some other occurrence which is of no interest to the user. By returning unregistered tags in named and tag type search methods, the library allows the user to specifically search for tags with a certain syntax that does not match any existing TagType. This expediency feature avoids the need for the user to create a custom tag type to define the syntax before searching for these tags. By not returning unregistered tags in the less specific search methods, it is providing only the information that most users are interested in.

                    Specified by:
                    isUnregistered in class Tag
                    Returns:
                    true if this tag has a syntax that does not match any of the registered tag types, otherwise false.

                    tidy

                    public java.lang.String tidy()
                    Returns an XML representation of this end tag.

                    This method is included for symmetry with the StartTag.tidy() method and simply returns the source text of the tag.

                    Specified by:
                    tidy in class Tag
                    Returns:
                    an XML representation of this end tag.

                    generateHTML

                    public static java.lang.String generateHTML(java.lang.String tagName)
                    Generates the HTML text of a normal end tag with the specified tag name.

                    Example:

                    The following method call:

                    EndTag.generateHTML("INPUT")
                    returns the following output:
                    </INPUT>

                    Parameters:
                    tagName - the name of the end tag.
                    Returns:
                    the HTML text of a normal end tag with the specified tag name.
                    See Also:
                    StartTag.generateHTML(String tagName, Map attributesMap, boolean emptyElementTag)

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Description copied from class: Segment
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormControlOutputStyle.html0000644000175000017500000006135711214132420030227 0ustar twernertwerner FormControlOutputStyle (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Enum FormControlOutputStyle

                    java.lang.Object
                      extended by java.lang.Enum<FormControlOutputStyle>
                          extended by FormControlOutputStyle
                    
                    All Implemented Interfaces:
                    java.io.Serializable, java.lang.Comparable<FormControlOutputStyle>

                    public enum FormControlOutputStyle
                    extends java.lang.Enum<FormControlOutputStyle>

                    An enumerated type representing the three major output styles of a form control's output element.

                    A form control's output style is set using the FormControl.setOutputStyle(FormControlOutputStyle) method.


                    Nested Class Summary
                    static class FormControlOutputStyle.ConfigDisplayValue
                              Contains static properties that configure the DISPLAY_VALUE form control output style.
                     
                    Enum Constant Summary
                    DISPLAY_VALUE
                              The normal output element is replaced with a simple representation of the form control's submission value(s).
                    NORMAL
                              Normal display of the output element.
                    REMOVE
                              Remove the output element from the output document completely.
                     
                    Method Summary
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                    static FormControlOutputStyle valueOf(java.lang.String name)
                              Returns the enum constant of this type with the specified name.
                    static FormControlOutputStyle[] values()
                              Returns an array containing the constants of this enum type, in the order they're declared.
                     
                    Methods inherited from class java.lang.Enum
                    clone, compareTo, equals, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
                     
                    Methods inherited from class java.lang.Object
                    finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Enum Constant Detail

                    NORMAL

                    public static final FormControlOutputStyle NORMAL
                    Normal display of the output element.

                    This is the default display style.


                    REMOVE

                    public static final FormControlOutputStyle REMOVE
                    Remove the output element from the output document completely.


                    DISPLAY_VALUE

                    public static final FormControlOutputStyle DISPLAY_VALUE
                    The normal output element is replaced with a simple representation of the form control's submission value(s).

                    The implementation of this functionality is highly subjective, but provides a more aesthetic way of displaying a read-only version of a form without having to resort to using disabled controls.

                    The representation is dependent on the form control type, and can be configured using the static properties of the ConfigDisplayValue nested class.

                    Unless specified otherwise below, the normal output element is replaced with a display value element having the name specified in the static ConfigDisplayValue.ElementName property (div by default). The attributes specified in the static ConfigDisplayValue.AttributeNames list (id, class and style by default) are copied from the normal output element into the display value element.

                    Details of the content of the display value element or other representation of the control value are as follows:

                    TEXT, FILE
                    The content of the display value element is the re-encoded value of the normal output element's value attribute.
                    TEXTAREA
                    The content of the display value element is the content of the TEXTAREA element re-encoded with white space formatting.
                    CHECKBOX, RADIO
                    The normal output element is replaced with the un-encoded content specified in the ConfigDisplayValue.CheckedHTML or ConfigDisplayValue.UncheckedHTML static property, depending on whether the normal output element contains a checked attribute. If the relevant static property has a value of null (the default), the output element is simply a disabled version of the form control. Attempting to determine which labels might apply to which checkbox or radio button, allowing only the selected controls to be displayed, would require a very complex and inexact algorithm, so is best left to the developer to implement if required.
                    SELECT_SINGLE, SELECT_MULTIPLE
                    The content of the display value element is the re-encoded label of the currently selected option. In the case of a SELECT_MULTIPLE control, all labels of selected options are listed, separated by the text specified in the static ConfigDisplayValue.MultipleValueSeparator property (", " by default).
                    PASSWORD
                    The content of the display value element is the encoded character specified in the ConfigDisplayValue.PasswordChar static property ('*' by default), repeated n times, where n is the number of characters in the control's submission value.
                    HIDDEN
                    The output element is removed completely.
                    BUTTON, SUBMIT, IMAGE
                    The output element is a disabled version of the original form control.

                    If the submission value of the control is null or an empty string, the display value element is given the un-encoded content specified in the ConfigDisplayValue.EmptyHTML static property.

                    Method Detail

                    values

                    public static final FormControlOutputStyle[] values()
                    Returns an array containing the constants of this enum type, in the order they're declared. This method may be used to iterate over the constants as follows:
                    for(FormControlOutputStyle c : FormControlOutputStyle.values())
                            System.out.println(c);
                    

                    Returns:
                    an array containing the constants of this enum type, in the order they're declared

                    valueOf

                    public static FormControlOutputStyle valueOf(java.lang.String name)
                    Returns the enum constant of this type with the specified name. The string must match exactly an identifier used to declare an enum constant in this type. (Extraneous whitespace characters are not permitted.)

                    Parameters:
                    name - the name of the enum constant to be returned.
                    Returns:
                    the enum constant with the specified name
                    Throws:
                    java.lang.IllegalArgumentException - if this enum type has no constant with the specified name

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    This is equivalent to Enum.toString().

                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Config.html0000644000175000017500000006460111214132420024721 0ustar twernertwerner Config (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Config

                    java.lang.Object
                      extended by Config
                    

                    public final class Config
                    extends java.lang.Object

                    Encapsulates global configuration properties which determine the behaviour of various functions.

                    All of the properties in this class are static, affecting all objects and threads. Multiple concurrent configurations are not possible.

                    Properties that relate to user agent compatibility issues are stored in instances of the Config.CompatibilityMode class. This allows all of the properties in the compatibility mode to be set as a block by setting the static CurrentCompatibilityMode property to a different instance.

                    See Also:
                    Config.CompatibilityMode

                    Nested Class Summary
                    static class Config.CompatibilityMode
                              Represents a set of configuration parameters that relate to user agent compatibility issues.
                     
                    Field Summary
                    static java.lang.String ColumnMultipleValueSeparator
                              Determines the string used to separate a single column's multiple values in the output of the FormFields.getColumnValues(Map) method.
                    static java.lang.String ColumnValueFalse
                              Determines the string that represents the value false in the output of the FormFields.getColumnValues(Map) method.
                    static java.lang.String ColumnValueTrue
                              Determines the string that represents the value true in the output of the FormFields.getColumnValues(Map) method.
                    static boolean ConvertNonBreakingSpaces
                              Determines whether the CharacterReference.decode(CharSequence) and similar methods convert non-breaking space (&nbsp;) character references to normal spaces.
                    static Config.CompatibilityMode CurrentCompatibilityMode
                              Determines the currently active compatibility mode.
                    static boolean IsApostropheEncoded
                              Determines whether apostrophes are encoded when calling the CharacterReference.encode(CharSequence) method.
                    static LoggerProvider LoggerProvider
                              Determines the LoggerProvider that is used to create the default Logger object for each new Source object.
                    static java.lang.String NewLine
                              Determines the string used to represent a newline in text output throughout the library.
                     
                    Method Summary
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    ColumnMultipleValueSeparator

                    public static java.lang.String ColumnMultipleValueSeparator
                    Determines the string used to separate a single column's multiple values in the output of the FormFields.getColumnValues(Map) method.

                    The situation where a single column has multiple values only arises if FormField.getUserValueCount()>1 on the relevant form field, which usually indicates a poorly designed form.

                    The default value is "," (a comma, not including the quotes).

                    Must not be null.


                    ColumnValueTrue

                    public static java.lang.String ColumnValueTrue
                    Determines the string that represents the value true in the output of the FormFields.getColumnValues(Map) method.

                    The default value is "true" (without the quotes).

                    Must not be null.


                    ColumnValueFalse

                    public static java.lang.String ColumnValueFalse
                    Determines the string that represents the value false in the output of the FormFields.getColumnValues(Map) method.

                    The default value is null, which represents no output at all.


                    ConvertNonBreakingSpaces

                    public static boolean ConvertNonBreakingSpaces
                    Determines whether the CharacterReference.decode(CharSequence) and similar methods convert non-breaking space (&nbsp;) character references to normal spaces.

                    The default value is true.

                    When this property is set to false, non-breaking space (&nbsp;) character references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020).

                    The default behaviour of the library reflects the fact that non-breaking space character references are almost always used in HTML documents as a non-collapsing white space character. Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users who expected to see only normal spaces. The most common example of this is its visualisation as the character á in the MS-DOS CP437 character set.

                    The functionality of the following methods is affected:


                    CurrentCompatibilityMode

                    public static Config.CompatibilityMode CurrentCompatibilityMode
                    Determines the currently active compatibility mode.

                    The default setting is Config.CompatibilityMode.IE (MS Internet Explorer 6.0).

                    Must not be null.


                    IsApostropheEncoded

                    public static boolean IsApostropheEncoded
                    Determines whether apostrophes are encoded when calling the CharacterReference.encode(CharSequence) method.

                    A value of false means apostrophe (U+0027) characters are not encoded. The only time apostrophes need to be encoded is within an attribute value delimited by single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and enhances the readability of the source document.

                    Note that apostrophes are always encoded as a numeric character reference, never as the character entity reference &apos;.

                    The default value is false.


                    LoggerProvider

                    public static LoggerProvider LoggerProvider
                    Determines the LoggerProvider that is used to create the default Logger object for each new Source object.

                    The LoggerProvider interface contains several predefined LoggerProvider instances which this property can be set to, mostly representing wrappers to common logging frameworks.

                    The default value is null, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm:

                    1. If the class org.slf4j.impl.StaticLoggerBinder is detected:
                    2. If the class org.apache.commons.logging.Log is detected:
                      Create an instance of it using the commons-logging LogFactory class.
                    3. If the class org.apache.log4j.Logger is detected, use LoggerProvider.LOG4J.
                    4. otherwise, use LoggerProvider.JAVA.

                    See Also:
                    Source.setLogger(Logger)

                    NewLine

                    public static java.lang.String NewLine
                    Determines the string used to represent a newline in text output throughout the library.

                    The default value is the standard new line character sequence of the host platform, determined by System.getProperty("line.separator").



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/FormField.html0000644000175000017500000011433111214132422025361 0ustar twernertwerner FormField (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class FormField

                    java.lang.Object
                      extended by FormField
                    

                    public final class FormField
                    extends java.lang.Object

                    Represents a field in an HTML form, a field being defined as the group of all form controls having the same name.

                    The getFormControls() method can be used to obtain the collection of this field's constituent FormControl objects.

                    The FormFields class, which represents a collection of FormField objects, provides the highest level interface for dealing with form fields and controls. For the most common tasks it can be used directly without the need to work with its constituent FormField or FormControl objects.

                    The FormField class serves two main purposes:

                    1. Provide methods for the modification and retrieval of form control submission values while ensuring that the states of all the field's constituent form controls remain consistent with each other.

                      The methods available for this purpose are:
                      List getValues()
                      void clearValues()
                      void setValues(Collection)
                      boolean setValue(String)
                      boolean addValue(String)

                      Although the FormControl class provides methods for directly modifying the submission values of individual form controls, it is generally recommended to use the interface provided by the FormFields class unless there is a specific requirement for the lower level functionality. The FormFields class contains convenience methods providing most of the functionality of the above methods, as well as some higher level functionality such as the ability to set the form submission values as a complete field data set using the FormFields.setDataSet(Map) method.

                    2. Provide a means of determining the data structure of the field, allowing a server receiving a submitted form data set to interpret and store the data in an appropriate way.

                      The properties available for this purpose are:
                      boolean allowsMultipleValues()
                      int getUserValueCount()
                      Collection getPredefinedValues()

                      The FormFields.getColumnLabels() and FormFields.getColumnValues(Map) methods utilise these properties to convert data from a form data set (represented as a field data set) into a simple array format, suitable for storage in a tabular format such as a database table or .CSV file.

                      The properties need only be utilised directly in the event that a form data set is to be converted from its normal format into some other type of data structure.

                    A form field which allows user values normally consists of a single user value control, such as a TEXT control.

                    When a form field consists of more than one control, these controls are normally all predefined value controls of the same type, such as CHECKBOX controls.

                    Form fields consisting of more than one control do not necessarily return multiple values. A form field consisting of CHECKBOX controls can return multiple values, whereas a form field consisting of RADIO controls returns at most one value.

                    The HTML author can disregard convention and mix all types of controls with the same name in the same form, or include multiple user value controls of the same name. The evidence that such an unusual combination is present is when getUserValueCount()>1.

                    FormField instances are created automatically with the creation of a FormFields collection.

                    The case sensitivity of form field names is determined by the static Config.CurrentCompatibilityMode.FormFieldNameCaseInsensitive property.

                    See Also:
                    FormFields, FormControl, FormControlType

                    Method Summary
                     boolean addValue(java.lang.String value)
                              Adds the specified value to the field submission values of this field.
                     boolean allowsMultipleValues()
                              Indicates whether the field allows multiple values.
                     void clearValues()
                              Clears the submission values of all the constituent form controls in this field.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     FormControl getFormControl()
                              Returns the first FormControl from this field.
                     FormControl getFormControl(java.lang.String predefinedValue)
                              Returns the constituent FormControl with the specified predefined value.
                     java.util.Collection<FormControl> getFormControls()
                              Returns a collection of all the constituent form controls in this field.
                     java.lang.String getName()
                              Returns the control name shared by all of this field's constituent controls.
                     java.util.Collection<java.lang.String> getPredefinedValues()
                              Returns a collection of the predefined values of all constituent controls in this field.
                     int getUserValueCount()
                              Returns the number of constituent user value controls in this field.
                     java.util.List<java.lang.String> getValues()
                              Returns a list of the field submission values in order of appearance.
                     boolean setValue(java.lang.String value)
                              Sets the field submission values of this field to the single specified value.
                     void setValues(java.util.Collection<java.lang.String> values)
                              Sets the field submission values of this field to the specified values.
                     java.lang.String toString()
                              Returns a string representation of this object useful for debugging purposes.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getName

                    public java.lang.String getName()
                    Returns the control name shared by all of this field's constituent controls.

                    If the static Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive() property is set to true, the grouping of the controls by name is case insensitive and this method always returns the name in lower case.

                    Since a form field is simply a group of controls with the same name, the terms control name and field name are for the most part synonymous, with only a possible difference in case differentiating them.

                    Returns:
                    the control name shared by all of this field's constituent controls.
                    See Also:
                    FormControl.getName()

                    getFormControls

                    public java.util.Collection<FormControl> getFormControls()
                    Returns a collection of all the constituent form controls in this field.

                    An iterator over this collection returns the controls in the order of appearance in the source.

                    Returns:
                    a collection of all the constituent form controls in this field.
                    See Also:
                    getFormControl(), getFormControl(String predefinedValue)

                    getFormControl

                    public FormControl getFormControl(java.lang.String predefinedValue)
                    Returns the constituent FormControl with the specified predefined value.

                    Specifying a predefined value of null returns the first control without a predefined value.

                    Parameters:
                    predefinedValue - the predefined value of the control to be returned, or null to return the first control without a predefined value.
                    Returns:
                    the constituent FormControl with the specified predefined value, or null if none exists.
                    See Also:
                    getFormControl(), getFormControls()

                    getFormControl

                    public FormControl getFormControl()
                    Returns the first FormControl from this field.

                    Returns:
                    the first FormControl from this field, guaranteed not null.
                    See Also:
                    getFormControl(String predefinedValue), getFormControls()

                    allowsMultipleValues

                    public boolean allowsMultipleValues()
                    Indicates whether the field allows multiple values.

                    Returns false in any one of the following circumstances:

                    • The field consists of only one control (unless it is a multiple select with more than one option)
                    • The field consists entirely of radio buttons
                    • The field consists entirely of submit buttons
                    If none of these three conditions are met, the method returns true.

                    Returns:
                    true if the field allows multiple values, otherwise false.

                    getUserValueCount

                    public int getUserValueCount()
                    Returns the number of constituent user value controls in this field. This should in most cases be either 0 or 1.

                    A value of 0 indicates the field values consist only of predefined values, which is the case when the field consists only of predefined value controls.

                    A value of 1 indicates the field values consist of at most one value set by the user. It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed controls of different types with the same name, but any other values would consist only of predefined values.

                    A value greater than 1 indicates that the HTML author has included more than one user value control with the same name. This would nearly always indicate an unintentional error in the HTML source document, in which case your application can either log a warning that a poorly designed form has been encountered, or take special action to try to interpret the multiple user values that might be submitted.

                    Returns:
                    the number of constituent user value controls in this field.

                    getPredefinedValues

                    public java.util.Collection<java.lang.String> getPredefinedValues()
                    Returns a collection of the predefined values of all constituent controls in this field.

                    All objects in the returned collection are of type String, with no null entries.

                    An interator over this collection returns the values in the order of appearance in the source document.

                    Returns:
                    a collection of the predefined values of all constituent controls in this field, or null if none.
                    See Also:
                    FormControl.getPredefinedValues()

                    getValues

                    public java.util.List<java.lang.String> getValues()
                    Returns a list of the field submission values in order of appearance.

                    The term field submission values is used in this library to refer to the aggregate of all the submission values of a field's constituent form controls.

                    All objects in the returned list are of type String, with no null entries.

                    The list may contain duplicates if the this field has multiple controls with the same value.

                    Returns:
                    a list of the field submission values in order of appearance, guaranteed not null.

                    clearValues

                    public void clearValues()
                    Clears the submission values of all the constituent form controls in this field.

                    See Also:
                    FormControl.clearValues()

                    setValues

                    public void setValues(java.util.Collection<java.lang.String> values)
                    Sets the field submission values of this field to the specified values.

                    This is equivalent to calling clearValues() followed by addValue(value) for each value in the specified collection.

                    The specified collection must not contain any null values.

                    Parameters:
                    values - the new field submission values of this field.
                    See Also:
                    addValue(String value)

                    setValue

                    public boolean setValue(java.lang.String value)
                    Sets the field submission values of this field to the single specified value.

                    This is equivalent to calling clearValues() followed by addValue(value).

                    The return value indicates whether any of the constituent form controls "accepted" the value. A return value of false implies an error condition as the specified value is not compatible with this field.

                    Specifying a null value is equivalent to calling clearValues() alone, and always returns true.

                    See the addValue(String value) method for more information.

                    Parameters:
                    value - the new field submission value of this field, or null to clear the field of all submission values.
                    Returns:
                    true if one of the constituent form controls accepts the value, otherwise false.
                    See Also:
                    FormFields.setValue(String fieldName, String value)

                    addValue

                    public boolean addValue(java.lang.String value)
                    Adds the specified value to the field submission values of this field.

                    This is achieved internally by attempting to add the value to every constituent form control until one "accepts" it.

                    The return value indicates whether any of the constituent form controls accepted the value. A return value of false implies an error condition as the specified value is not compatible with this field.

                    In the unusual case that this field consists of multiple form controls, but not all of them are predefined value controls, priority is given to the predefined value controls before attempting to add the value to the user value controls.

                    Parameters:
                    value - the new field submission value to add to this field, must not be null.
                    Returns:
                    true if one of the constituent form controls accepts the value, otherwise false.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Returns:
                    a string representation of this object useful for debugging purposes.

                    toString

                    public java.lang.String toString()
                    Returns a string representation of this object useful for debugging purposes.

                    This is equivalent to getDebugInfo().

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Logger.html0000644000175000017500000003675411214132422024745 0ustar twernertwerner Logger (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface Logger

                    All Known Implementing Classes:
                    WriterLogger

                    public interface Logger

                    Defines the interface for handling log messages.

                    It is not usually necessary for users to create implementations of this interface, as the LoggerProvider interface contains several predefined instances which provide the most commonly required Logger implementations.

                    By default, logging is configured automatically according to the algorithm described in the static Config.LoggerProvider property.

                    An instance of a class that implements this interface is used by calling the Source.setLogger(Logger) method on the relevant Source object.

                    Four logging levels are defined in this interface. The logging level is specified only by the use of different method names, there is no class or type defining the levels. This makes the code required to wrap other logging frameworks much simpler and more efficient.

                    The four logging levels are:

                    IMPLEMENTATION NOTE: Ideally the java.util.logging.Logger class could have been used as a basis for logging, even if used to define a wrapper around other logging frameworks. This would have avoided the need to define yet another logging interface, but because java.util.logging.Logger is implemented very poorly, it is quite tricky to extend it as a wrapper. Other logging wrapper frameworks such as SLF4J or Jakarta Commons Logging provide good logging interfaces, but to avoid introducing dependencies it was decided to create this new interface.

                    See Also:
                    Config.LoggerProvider

                    Method Summary
                     void debug(java.lang.String message)
                              Logs a message at the DEBUG level.
                     void error(java.lang.String message)
                              Logs a message at the ERROR level.
                     void info(java.lang.String message)
                              Logs a message at the INFO level.
                     boolean isDebugEnabled()
                              Indicates whether logging is enabled at the DEBUG level.
                     boolean isErrorEnabled()
                              Indicates whether logging is enabled at the ERROR level.
                     boolean isInfoEnabled()
                              Indicates whether logging is enabled at the INFO level.
                     boolean isWarnEnabled()
                              Indicates whether logging is enabled at the WARN level.
                     void warn(java.lang.String message)
                              Logs a message at the WARN level.
                     

                    Method Detail

                    error

                    void error(java.lang.String message)
                    Logs a message at the ERROR level.

                    Parameters:
                    message - the message to log.

                    warn

                    void warn(java.lang.String message)
                    Logs a message at the WARN level.

                    Parameters:
                    message - the message to log.

                    info

                    void info(java.lang.String message)
                    Logs a message at the INFO level.

                    Parameters:
                    message - the message to log.

                    debug

                    void debug(java.lang.String message)
                    Logs a message at the DEBUG level.

                    Parameters:
                    message - the message to log.

                    isErrorEnabled

                    boolean isErrorEnabled()
                    Indicates whether logging is enabled at the ERROR level.

                    Returns:
                    true if logging is enabled at the ERROR level, otherwise false.

                    isWarnEnabled

                    boolean isWarnEnabled()
                    Indicates whether logging is enabled at the WARN level.

                    Returns:
                    true if logging is enabled at the WARN level, otherwise false.

                    isInfoEnabled

                    boolean isInfoEnabled()
                    Indicates whether logging is enabled at the INFO level.

                    Returns:
                    true if logging is enabled at the INFO level, otherwise false.

                    isDebugEnabled

                    boolean isDebugEnabled()
                    Indicates whether logging is enabled at the DEBUG level.

                    Returns:
                    true if logging is enabled at the DEBUG level, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/0000755000175000017500000000000011214132424024522 5ustar twernertwernerjericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/SourceCompactor.html0000644000175000017500000001613511214132424030526 0ustar twernertwerner Uses of Class net.htmlparser.jericho.SourceCompactor (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.SourceCompactor

                    Uses of SourceCompactor in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return SourceCompactor
                     SourceCompactor SourceCompactor.setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/MicrosoftTagTypes.html0000644000175000017500000001310511214132424031036 0ustar twernertwerner Uses of Class net.htmlparser.jericho.MicrosoftTagTypes (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.MicrosoftTagTypes

                    No usage of net.htmlparser.jericho.MicrosoftTagTypes



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/TextExtractor.html0000644000175000017500000002166511214132424030242 0ustar twernertwerner Uses of Class net.htmlparser.jericho.TextExtractor (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.TextExtractor

                    Uses of TextExtractor in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return TextExtractor
                     TextExtractor Segment.getTextExtractor()
                              Extracts the textual content from the HTML markup of this segment.
                     TextExtractor TextExtractor.setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                              Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     TextExtractor TextExtractor.setExcludeNonHTMLElements(boolean excludeNonHTMLElements)
                              Sets whether the content of non-HTML elements is excluded from the output.
                     TextExtractor TextExtractor.setIncludeAttributes(boolean includeAttributes)
                              Sets whether any attribute values are included in the output.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Segment.html0000644000175000017500000007522011214132424027020 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Segment (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Segment

                    Uses of Segment in net.htmlparser.jericho
                     

                    Subclasses of Segment in net.htmlparser.jericho
                     class Attribute
                              Represents a single attribute name/value segment within a StartTag.
                     class Attributes
                              Represents the list of Attribute objects present within a particular StartTag.
                     class CharacterEntityReference
                              Represents an HTML Character Entity Reference.
                     class CharacterReference
                              Represents an HTML Character Reference, implemented by the subclasses CharacterEntityReference and NumericCharacterReference.
                     class Element
                              Represents an element in a specific source document, which encompasses a start tag, an optional end tag and all content in between.
                     class EndTag
                              Represents the end tag of an element in a specific source document.
                     class FormControl
                              Represents an HTML form control.
                     class NumericCharacterReference
                              Represents an HTML Numeric Character Reference.
                     class Source
                              Represents a source HTML document.
                     class StartTag
                              Represents the start tag of an element in a specific source document.
                     class Tag
                              Represents either a StartTag or EndTag in a specific source document.
                     

                    Methods in net.htmlparser.jericho that return Segment
                     Segment Element.getContent()
                              Returns the segment representing the content of the element.
                     Segment StreamedSource.getCurrentSegment()
                              Returns the current Segment from the StreamedSource.iterator().
                     Segment Tag.getNameSegment()
                              Returns the segment spanning the name of this tag.
                     Segment Attribute.getNameSegment()
                              Returns the segment spanning the name of this attribute.
                     Segment StartTag.getTagContent()
                              Returns the segment between the end of the tag's name and the start of its end delimiter.
                     Segment Attribute.getValueSegment()
                              Returns the segment spanning the value of this attribute, or null if it has no value.
                     Segment Attribute.getValueSegmentIncludingQuotes()
                              Returns the segment spanning the value of this attribute, including quotation marks if any, or null if it has no value.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type Segment
                     java.util.Iterator<Segment> Segment.getNodeIterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within this segment.
                     java.util.Iterator<Segment> StreamedSource.iterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within the source document.
                     java.util.Iterator<Segment> Source.iterator()
                              Returns an iterator over every tag, character reference and plain text segment contained within the source document.
                     

                    Methods in net.htmlparser.jericho with parameters of type Segment
                     int Segment.compareTo(Segment segment)
                              Compares this Segment object to another object.
                     boolean Segment.encloses(Segment segment)
                              Indicates whether this Segment encloses the specified Segment.
                     void OutputDocument.remove(Segment segment)
                              Removes the specified segment from this output document.
                     void OutputDocument.replace(Segment segment, java.lang.CharSequence text)
                              Replaces the specified segment in this output document with the specified text.
                     

                    Method parameters in net.htmlparser.jericho with type arguments of type Segment
                     void Source.ignoreWhenParsing(java.util.Collection<? extends Segment> segments)
                              Causes all of the segments in the specified collection to be ignored when parsing.
                     void OutputDocument.remove(java.util.Collection<? extends Segment> segments)
                              Removes all the segments from this output document represented by the specified source Segment objects.
                     

                    Constructors in net.htmlparser.jericho with parameters of type Segment
                    OutputDocument(Segment segment)
                              Constructs a new output document based on the specified Segment.
                    Renderer(Segment segment)
                              Constructs a new Renderer based on the specified Segment.
                    SourceCompactor(Segment segment)
                              Constructs a new SourceCompactor based on the specified Segment.
                    SourceFormatter(Segment segment)
                              Constructs a new SourceFormatter based on the specified Segment.
                    TextExtractor(Segment segment)
                              Constructs a new TextExtractor based on the specified Segment.
                     

                    Uses of Segment in net.htmlparser.jericho.nodoc
                     

                    Subclasses of Segment in net.htmlparser.jericho.nodoc
                     class net.htmlparser.jericho.nodoc.SequentialListSegment<E>
                               
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Source.html0000644000175000017500000004215611214132424026660 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Source (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Source

                    Uses of Source in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return Source
                     Source Segment.getSource()
                              Returns the Source document containing this segment.
                     

                    Methods in net.htmlparser.jericho with parameters of type Source
                     boolean StartTagType.atEndOfAttributes(Source source, int pos, boolean isClosingSlashIgnored)
                              Indicates whether the specified source document position is at the end of a tag's attributes.
                    protected  EndTag EndTagType.constructEndTag(Source source, int begin, int end, java.lang.String name)
                              Internal method for the construction of an EndTag object of this type.
                    protected  StartTag StartTagType.constructStartTag(Source source, int begin, int end, java.lang.String name, Attributes attributes)
                              Internal method for the construction of a StartTag object if this type.
                    protected abstract  Tag TagType.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  Tag StartTagTypeGenericImplementation.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  Tag EndTagTypeGenericImplementation.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  int StartTagTypeGenericImplementation.getEnd(Source source, int pos)
                              Returns the end of a tag of this type, starting from the specified position in the specified source document.
                    protected  boolean TagType.isValidPosition(Source source, int pos, int[] fullSequentialParseData)
                              Indicates whether a tag of this type is valid in the specified position of the specified source document.
                    protected  Attributes StartTagType.parseAttributes(Source source, int startTagBegin, java.lang.String tagName)
                              Internal method for the parsing of Attributes.
                    protected  boolean TagType.tagEncloses(Source source, int pos)
                              Indicates whether a tag of this type encloses the specified position of the specified source document.
                     

                    Constructors in net.htmlparser.jericho with parameters of type Source
                    OutputDocument(Source source)
                              Constructs a new output document based on the specified source document.
                    Segment(Source source, int begin, int end)
                              Constructs a new Segment within the specified source document with the specified begin and end character positions.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/EndTagTypeGenericImplementation.html0000644000175000017500000001332111214132424033617 0ustar twernertwerner Uses of Class net.htmlparser.jericho.EndTagTypeGenericImplementation (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.EndTagTypeGenericImplementation

                    No usage of net.htmlparser.jericho.EndTagTypeGenericImplementation



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/TagType.html0000644000175000017500000005044011214132424026770 0ustar twernertwerner Uses of Class net.htmlparser.jericho.TagType (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.TagType

                    Uses of TagType in net.htmlparser.jericho
                     

                    Subclasses of TagType in net.htmlparser.jericho
                     class EndTagType
                              Defines the syntax for an end tag type.
                     class EndTagTypeGenericImplementation
                              Provides a generic implementation of the abstract EndTagType class based on the most common end tag behaviour.
                     class StartTagType
                              Defines the syntax for a start tag type.
                     class StartTagTypeGenericImplementation
                              Provides a generic implementation of the abstract StartTagType class based on the most common start tag behaviour.
                     

                    Methods in net.htmlparser.jericho that return TagType
                    abstract  TagType Tag.getTagType()
                              Returns the type of this tag.
                     TagType StartTag.getTagType()
                               
                     TagType EndTag.getTagType()
                               
                    static TagType[] TagType.getTagTypesIgnoringEnclosedMarkup()
                              Returns an array of all the tag types inside which the parser ignores all non-server tags in parse on demand mode.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type TagType
                    static java.util.List<TagType> TagType.getRegisteredTagTypes()
                              Returns a list of all the currently registered tag types in order of lowest to highest precedence.
                     

                    Methods in net.htmlparser.jericho with parameters of type TagType
                    static boolean PHPTagTypes.defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                    static boolean MicrosoftTagTypes.defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                    static boolean MasonTagTypes.defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                     java.util.List<Tag> Segment.getAllTags(TagType tagType)
                              Returns a list of all Tag objects of the specified type that are enclosed by this segment.
                     Tag Source.getEnclosingTag(int pos, TagType tagType)
                              Returns the Tag of the specified type that encloses the specified position in the source document.
                     Tag Source.getNextTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately following the specified position in the source document.
                     Tag Source.getPreviousTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately preceding (or enclosing) the specified position in the source document.
                    static boolean MasonTagTypes.isParsedByMason(TagType tagType)
                              Indicates whether the specified tag type is recognised by a Mason parser.
                    static boolean PHPTagTypes.isParsedByPHP(TagType tagType)
                              Indicates whether the specified tag type is recognised by a PHP parser.
                    static void TagType.setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes)
                              Sets the tag types inside which the parser ignores all non-server tags.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/NumericCharacterReference.html0000644000175000017500000001322511214132424032451 0ustar twernertwerner Uses of Class net.htmlparser.jericho.NumericCharacterReference (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.NumericCharacterReference

                    No usage of net.htmlparser.jericho.NumericCharacterReference



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Attributes.html0000644000175000017500000003507111214132424027544 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Attributes (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Attributes

                    Uses of Attributes in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return Attributes
                     Attributes StartTag.getAttributes()
                              Returns the attributes specified in this start tag.
                     Attributes Element.getAttributes()
                              Returns the attributes specified in this element's start tag.
                     Attributes StartTag.parseAttributes()
                              Parses the attributes specified in this start tag, regardless of the type of start tag.
                     Attributes Segment.parseAttributes()
                              Parses any Attributes within this segment.
                     Attributes StartTag.parseAttributes(int maxErrorCount)
                              Parses the attributes specified in this start tag, regardless of the type of start tag.
                     Attributes Source.parseAttributes(int pos, int maxEnd)
                              Parses any Attributes starting at the specified position.
                     Attributes Source.parseAttributes(int pos, int maxEnd, int maxErrorCount)
                              Parses any Attributes starting at the specified position.
                    protected  Attributes StartTagType.parseAttributes(Source source, int startTagBegin, java.lang.String tagName)
                              Internal method for the parsing of Attributes.
                     

                    Methods in net.htmlparser.jericho with parameters of type Attributes
                    protected  StartTag StartTagType.constructStartTag(Source source, int begin, int end, java.lang.String name, Attributes attributes)
                              Internal method for the construction of a StartTag object if this type.
                     java.util.Map<java.lang.String,java.lang.String> OutputDocument.replace(Attributes attributes, boolean convertNamesToLowerCase)
                              Replaces the specified Attributes segment in this output document with the name/value entries in the returned Map.
                     void OutputDocument.replace(Attributes attributes, java.util.Map<java.lang.String,java.lang.String> map)
                              Replaces the specified attributes segment in this source document with the name/value entries in the specified Map.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/OutputSegment.html0000644000175000017500000002312111214132424030232 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.OutputSegment (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.OutputSegment

                    Uses of OutputSegment in net.htmlparser.jericho
                     

                    Fields in net.htmlparser.jericho with type parameters of type OutputSegment
                    static java.util.Comparator<OutputSegment> OutputSegment.COMPARATOR
                              The comparator used to sort output segments in the OutputDocument before output.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type OutputSegment
                     java.util.List<OutputSegment> OutputDocument.getRegisteredOutputSegments()
                              Returns a list all of the registered OutputSegment objects in this output document.
                     

                    Methods in net.htmlparser.jericho with parameters of type OutputSegment
                     void OutputDocument.register(OutputSegment outputSegment)
                              Registers the specified output segment in this output document.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/StartTag.html0000644000175000017500000010225611214132424027147 0ustar twernertwerner Uses of Class net.htmlparser.jericho.StartTag (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.StartTag

                    Uses of StartTag in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return StartTag
                    protected  StartTag StartTagType.constructStartTag(Source source, int begin, int end, java.lang.String name, Attributes attributes)
                              Internal method for the construction of a StartTag object if this type.
                     StartTag Segment.getFirstStartTag()
                              Returns the first StartTag enclosed by this segment.
                     StartTag Segment.getFirstStartTag(StartTagType startTagType)
                              Returns the first StartTag of the specified type enclosed by this segment.
                     StartTag Segment.getFirstStartTag(java.lang.String name)
                              Returns the first normal StartTag enclosed by this segment.
                     StartTag Segment.getFirstStartTag(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the first StartTag with the specified attribute name and value pattern that is enclosed by this segment.
                     StartTag Segment.getFirstStartTag(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the first StartTag with the specified attribute name/value pair enclosed by this segment.
                     StartTag Segment.getFirstStartTagByClass(java.lang.String className)
                              Returns the first StartTag with the specified class that is enclosed by this segment.
                     StartTag Source.getNextStartTag(int pos)
                              Returns the StartTag beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, java.lang.String name)
                              Returns the normal StartTag with the specified name beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the StartTag with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the StartTag with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTagByClass(int pos, java.lang.String className)
                              Returns the StartTag with the specified class beginning at or immediately following the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos)
                              Returns the StartTag at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos, java.lang.String name)
                              Returns the normal StartTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag Element.getStartTag()
                              Returns the start tag of the element.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type StartTag
                     java.util.List<StartTag> Source.getAllStartTags()
                              Returns a list of all start tags in this source document.
                     java.util.List<StartTag> Segment.getAllStartTags()
                              Returns a list of all StartTag objects that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTags(StartTagType startTagType)
                              Returns a list of all StartTag objects of the specified type that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTags(java.lang.String name)
                              Returns a list of all normal StartTag objects with the specified name that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTags(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns a list of all StartTag objects with the specified attribute name and value pattern that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTags(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns a list of all StartTag objects with the specified attribute name/value pair that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTagsByClass(java.lang.String className)
                              Returns a list of all StartTag objects with the specified class that are enclosed by this segment.
                     

                    Methods in net.htmlparser.jericho with parameters of type StartTag
                     boolean TextExtractor.excludeElement(StartTag startTag)
                              Indicates whether the text inside the Element of the specified start tag should be excluded from the output.
                     boolean TextExtractor.includeAttribute(StartTag startTag, Attribute attribute)
                              Indicates whether the value of the specified attribute in the specified start tag is included in the output.
                     java.lang.String Renderer.renderHyperlinkURL(StartTag startTag)
                              Renders the hyperlink URL from the specified StartTag.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/StreamedSource.html0000644000175000017500000001757311214132424030352 0ustar twernertwerner Uses of Class net.htmlparser.jericho.StreamedSource (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.StreamedSource

                    Uses of StreamedSource in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return StreamedSource
                     StreamedSource StreamedSource.setBuffer(char[] buffer)
                              Specifies an existing character array to use for buffering the incoming character stream.
                     StreamedSource StreamedSource.setCoalescing(boolean coalescing)
                              Specifies whether an unbroken section of plain text in the source document should always be coalesced into a single Segment by the iterator.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/OutputDocument.html0000644000175000017500000001304711214132424030414 0ustar twernertwerner Uses of Class net.htmlparser.jericho.OutputDocument (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.OutputDocument

                    No usage of net.htmlparser.jericho.OutputDocument



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/ParseText.html0000644000175000017500000001573511214132424027342 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.ParseText (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.ParseText

                    Uses of ParseText in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return ParseText
                     ParseText Source.getParseText()
                              Returns the parse text of this source document.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/CharStreamSourceUtil.html0000644000175000017500000001314311214132424031462 0ustar twernertwerner Uses of Class net.htmlparser.jericho.CharStreamSourceUtil (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.CharStreamSourceUtil

                    No usage of net.htmlparser.jericho.CharStreamSourceUtil



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/StartTagType.html0000644000175000017500000007350311214132424030013 0ustar twernertwerner Uses of Class net.htmlparser.jericho.StartTagType (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.StartTagType

                    Uses of StartTagType in net.htmlparser.jericho
                     

                    Subclasses of StartTagType in net.htmlparser.jericho
                     class StartTagTypeGenericImplementation
                              Provides a generic implementation of the abstract StartTagType class based on the most common start tag behaviour.
                     

                    Fields in net.htmlparser.jericho declared as StartTagType
                    static StartTagType StartTagType.CDATA_SECTION
                              The tag type given to a CDATA section (<![CDATA[ ... ]]>).
                    static StartTagType StartTagType.COMMENT
                              The tag type given to an HTML comment (<!-- ... -->).
                    static StartTagType StartTagType.DOCTYPE_DECLARATION
                              The tag type given to a document type declaration (<!DOCTYPE ... >).
                    static StartTagType MicrosoftTagTypes.DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT
                              The tag type given to a Microsoft® downlevel-revealed conditional comment (<![if ... ]> | <![endif]>).
                    static StartTagType StartTagType.MARKUP_DECLARATION
                              The tag type given to a markup declaration (<!ELEMENT ... > | <!ATTLIST ... > | <!ENTITY ... > | <!NOTATION ... >).
                    static StartTagType MasonTagTypes.MASON_COMPONENT_CALL
                              The tag type given to a Mason component call (<& ... &>).
                    static StartTagType MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT
                              The tag type given to the start tag of a Mason component called with content (<&| ... &> ... </&>).
                    static StartTagType MasonTagTypes.MASON_NAMED_BLOCK
                              The tag type given to the start tag of a Mason named block (<%name ... > ... </%name>).
                    static StartTagType StartTagType.NORMAL
                              The tag type given to a normal HTML or XML start tag (<name ... >).
                    static StartTagType PHPTagTypes.PHP_SCRIPT
                              The tag type given to a script-style PHP start tag (<script language="php"> ... </script>).
                    static StartTagType PHPTagTypes.PHP_SHORT
                              The tag type given to a short-form PHP tag (<? ... ?>).
                    static StartTagType PHPTagTypes.PHP_STANDARD
                              The tag type given to a standard PHP tag (<?php ... ?>).
                    static StartTagType StartTagType.SERVER_COMMON
                              The tag type given to a common server tag (<% ... %>).
                    static StartTagType StartTagType.SERVER_COMMON_ESCAPED
                              The tag type given to an escaped common server tag (<\% ... %>).
                    static StartTagType StartTagType.UNREGISTERED
                              The tag type given to an unregistered start tag (< ... >).
                    static StartTagType StartTagType.XML_DECLARATION
                              The tag type given to an XML declaration (<?xml ... ?>).
                    static StartTagType StartTagType.XML_PROCESSING_INSTRUCTION
                              The tag type given to an XML processing instruction (<?PITarget ... ?>).
                     

                    Methods in net.htmlparser.jericho that return StartTagType
                     StartTagType EndTagType.getCorrespondingStartTagType()
                              Returns the type of start tag that is usually paired with an end tag of this type to form an Element.
                     StartTagType StartTag.getStartTagType()
                              Returns the type of this start tag.
                     

                    Methods in net.htmlparser.jericho with parameters of type StartTagType
                     java.util.List<Element> Segment.getAllElements(StartTagType startTagType)
                              Returns a list of all Element objects with start tags of the specified type that are enclosed by this segment.
                     java.util.List<StartTag> Segment.getAllStartTags(StartTagType startTagType)
                              Returns a list of all StartTag objects of the specified type that are enclosed by this segment.
                     StartTag Segment.getFirstStartTag(StartTagType startTagType)
                              Returns the first StartTag of the specified type enclosed by this segment.
                     StartTag Source.getNextStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type beginning at or immediately following the specified position in the source document.
                     StartTag Source.getNextStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos, StartTagType startTagType)
                              Returns the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     StartTag Source.getPreviousStartTag(int pos, java.lang.String name, StartTagType startTagType)
                              Returns the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document.
                     



                    ././@LongLink0000000000000000000000000000014600000000000011566 Lustar rootrootjericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/StartTagTypeGenericImplementation.htmljericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/StartTagTypeGenericImplementation.htm0000644000175000017500000001334511214132424034040 0ustar twernertwerner Uses of Class net.htmlparser.jericho.StartTagTypeGenericImplementation (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.StartTagTypeGenericImplementation

                    No usage of net.htmlparser.jericho.StartTagTypeGenericImplementation



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/CharStreamSource.html0000644000175000017500000002757011214132424030635 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.CharStreamSource (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.CharStreamSource

                    Uses of CharStreamSource in net.htmlparser.jericho
                     

                    Subinterfaces of CharStreamSource in net.htmlparser.jericho
                     interface OutputSegment
                              Defines the interface for an output segment, which is used in an OutputDocument to replace segments of the source document with other text.
                     

                    Classes in net.htmlparser.jericho that implement CharStreamSource
                     class OutputDocument
                              Represents a modified version of an original Source document or Segment.
                     class Renderer
                              Performs a simple rendering of HTML markup into text.
                     class SourceCompactor
                              Compacts HTML source by removing all unnecessary white space.
                     class SourceFormatter
                              Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                     class TextExtractor
                              Extracts the textual content from HTML markup.
                     

                    Methods in net.htmlparser.jericho with parameters of type CharStreamSource
                    static java.io.Reader CharStreamSourceUtil.getReader(CharStreamSource charStreamSource)
                              Returns a Reader that reads the output of the specified CharStreamSource.
                    static java.lang.String CharStreamSourceUtil.toString(CharStreamSource charStreamSource)
                              Returns the output of the specified CharStreamSource as a string.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Tag.html0000644000175000017500000005520211214132424026127 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Tag (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Tag

                    Uses of Tag in net.htmlparser.jericho
                     

                    Subclasses of Tag in net.htmlparser.jericho
                     class EndTag
                              Represents the end tag of an element in a specific source document.
                     class StartTag
                              Represents the start tag of an element in a specific source document.
                     

                    Methods in net.htmlparser.jericho that return Tag
                    protected abstract  Tag TagType.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  Tag StartTagTypeGenericImplementation.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    protected  Tag EndTagTypeGenericImplementation.constructTagAt(Source source, int pos)
                              Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                     Tag[] Source.fullSequentialParse()
                              Parses all of the tags in this source document sequentially from beginning to end.
                     Tag Source.getEnclosingTag(int pos)
                              Returns the Tag that encloses the specified position in the source document.
                     Tag Source.getEnclosingTag(int pos, TagType tagType)
                              Returns the Tag of the specified type that encloses the specified position in the source document.
                     Tag Tag.getNextTag()
                              Returns the next tag in the source document.
                     Tag Source.getNextTag(int pos)
                              Returns the Tag beginning at or immediately following the specified position in the source document.
                     Tag Source.getNextTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately following the specified position in the source document.
                     Tag Tag.getPreviousTag()
                              Returns the previous tag in the source document.
                     Tag Source.getPreviousTag(int pos)
                              Returns the Tag beginning at or immediately preceding (or enclosing) the specified position in the source document.
                     Tag Source.getPreviousTag(int pos, TagType tagType)
                              Returns the Tag of the specified type beginning at or immediately preceding (or enclosing) the specified position in the source document.
                     Tag Source.getTagAt(int pos)
                              Returns the Tag at the specified position in the source document.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type Tag
                     java.util.List<Tag> Source.getAllTags()
                              Returns a list of all tags in this source document.
                     java.util.List<Tag> Segment.getAllTags()
                              Returns a list of all Tag objects that are enclosed by this segment.
                     java.util.List<Tag> Segment.getAllTags(TagType tagType)
                              Returns a list of all Tag objects of the specified type that are enclosed by this segment.
                     

                    Methods in net.htmlparser.jericho with parameters of type Tag
                    static boolean MicrosoftTagTypes.isConditionalCommentEndifTag(Tag tag)
                              Indicates whether the specified tag is a downlevel-revealed conditional comment "endif" tag (<![endif]>).
                    static boolean MicrosoftTagTypes.isConditionalCommentIfTag(Tag tag)
                              Indicates whether the specified tag is a downlevel-revealed conditional comment "if" tag (<![if ... ]>).
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/CharacterEntityReference.html0000644000175000017500000001321311214132424032320 0ustar twernertwerner Uses of Class net.htmlparser.jericho.CharacterEntityReference (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.CharacterEntityReference

                    No usage of net.htmlparser.jericho.CharacterEntityReference



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/CharacterReference.html0000644000175000017500000002676111214132424031137 0ustar twernertwerner Uses of Class net.htmlparser.jericho.CharacterReference (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.CharacterReference

                    Uses of CharacterReference in net.htmlparser.jericho
                     

                    Subclasses of CharacterReference in net.htmlparser.jericho
                     class CharacterEntityReference
                              Represents an HTML Character Entity Reference.
                     class NumericCharacterReference
                              Represents an HTML Numeric Character Reference.
                     

                    Methods in net.htmlparser.jericho that return CharacterReference
                     CharacterReference Source.getNextCharacterReference(int pos)
                              Returns the CharacterReference beginning at or immediately following the specified position in the source document.
                     CharacterReference Source.getPreviousCharacterReference(int pos)
                              Returns the CharacterReference at or immediately preceding (or enclosing) the specified position in the source document.
                    static CharacterReference CharacterReference.parse(java.lang.CharSequence characterReferenceText)
                              Parses a single encoded character reference text into a CharacterReference object.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type CharacterReference
                     java.util.List<CharacterReference> Segment.getAllCharacterReferences()
                              Returns a list of all CharacterReference objects that are enclosed by this segment.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/BasicLogFormatter.html0000644000175000017500000001310511214132424030757 0ustar twernertwerner Uses of Class net.htmlparser.jericho.BasicLogFormatter (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.BasicLogFormatter

                    No usage of net.htmlparser.jericho.BasicLogFormatter



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormControl.html0000644000175000017500000003216311214132424027661 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormControl (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormControl

                    Uses of FormControl in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return FormControl
                     FormControl StartTag.getFormControl()
                              Returns the FormControl defined by this start tag.
                     FormControl FormField.getFormControl()
                              Returns the first FormControl from this field.
                     FormControl Element.getFormControl()
                              Returns the FormControl defined by this element.
                     FormControl FormField.getFormControl(java.lang.String predefinedValue)
                              Returns the constituent FormControl with the specified predefined value.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type FormControl
                     java.util.List<FormControl> Segment.getFormControls()
                              Returns a list of the FormControl objects that are enclosed by this segment.
                     java.util.Collection<FormControl> FormField.getFormControls()
                              Returns a collection of all the constituent form controls in this field.
                     

                    Methods in net.htmlparser.jericho with parameters of type FormControl
                     void OutputDocument.replace(FormControl formControl)
                              Replaces the specified FormControl in this output document.
                     

                    Constructor parameters in net.htmlparser.jericho with type arguments of type FormControl
                    FormFields(java.util.Collection<FormControl> formControls)
                              Constructs a new FormFields object consisting of the specified form controls.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Util.html0000644000175000017500000001270311214132424026330 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Util (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Util

                    No usage of net.htmlparser.jericho.Util



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/RowColumnVector.html0000644000175000017500000001622511214132424030526 0ustar twernertwerner Uses of Class net.htmlparser.jericho.RowColumnVector (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.RowColumnVector

                    Uses of RowColumnVector in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return RowColumnVector
                     RowColumnVector Source.getRowColumnVector(int pos)
                              Returns a RowColumnVector object representing the row and column number of the specified character position in the source document.
                     



                    ././@LongLink0000000000000000000000000000015600000000000011567 Lustar rootrootjericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormControlOutputStyle.ConfigDisplayValue.htmljericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormControlOutputStyle.ConfigDisplayV0000644000175000017500000001346511214132424034044 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormControlOutputStyle.ConfigDisplayValue (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormControlOutputStyle.ConfigDisplayValue

                    No usage of net.htmlparser.jericho.FormControlOutputStyle.ConfigDisplayValue



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/HTMLElementName.html0000644000175000017500000001310511214132424030267 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.HTMLElementName (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.HTMLElementName

                    No usage of net.htmlparser.jericho.HTMLElementName



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormFields.html0000644000175000017500000002210511214132424027442 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormFields (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormFields

                    Uses of FormFields in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return FormFields
                     FormFields Segment.getFormFields()
                              Returns the FormFields object representing all form fields that are enclosed by this segment.
                     

                    Methods in net.htmlparser.jericho with parameters of type FormFields
                     void FormFields.merge(FormFields formFields)
                              Merges the specified FormFields into this FormFields collection.
                     void OutputDocument.replace(FormFields formFields)
                              Replaces all the constituent form controls from the specified FormFields in this output document.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Attribute.html0000644000175000017500000002447211214132424027364 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Attribute (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Attribute

                    Uses of Attribute in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return Attribute
                     Attribute Attributes.get(java.lang.String name)
                              Returns the Attribute with the specified name (case insensitive).
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type Attribute
                     java.util.Iterator<Attribute> Attributes.iterator()
                              Returns an iterator over the Attribute objects in this list in order of appearance.
                     java.util.ListIterator<Attribute> Attributes.listIterator(int index)
                              Returns a list iterator of the Attribute objects in this list in order of appearance, starting at the specified position in the list.
                     

                    Methods in net.htmlparser.jericho with parameters of type Attribute
                     boolean TextExtractor.includeAttribute(StartTag startTag, Attribute attribute)
                              Indicates whether the value of the specified attribute in the specified start tag is included in the output.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormControlType.html0000644000175000017500000002026211214132424030520 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormControlType (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormControlType

                    Uses of FormControlType in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return FormControlType
                     FormControlType FormControl.getFormControlType()
                              Returns the type of this form control.
                    static FormControlType FormControlType.valueOf(java.lang.String name)
                              Returns the enum constant of this type with the specified name.
                    static FormControlType[] FormControlType.values()
                              Returns an array containing the constants of this enum type, in the order they're declared.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/MasonTagTypes.html0000644000175000017500000001303511214132424030150 0ustar twernertwerner Uses of Class net.htmlparser.jericho.MasonTagTypes (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.MasonTagTypes

                    No usage of net.htmlparser.jericho.MasonTagTypes



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/EndTag.html0000644000175000017500000003427011214132424026560 0ustar twernertwerner Uses of Class net.htmlparser.jericho.EndTag (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.EndTag

                    Uses of EndTag in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return EndTag
                    protected  EndTag EndTagType.constructEndTag(Source source, int begin, int end, java.lang.String name)
                              Internal method for the construction of an EndTag object of this type.
                     EndTag Element.getEndTag()
                              Returns the end tag of the element.
                     EndTag Source.getNextEndTag(int pos)
                              Returns the EndTag beginning at or immediately following the specified position in the source document.
                     EndTag Source.getNextEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type beginning at or immediately following the specified position in the source document.
                     EndTag Source.getNextEndTag(int pos, java.lang.String name)
                              Returns the normal EndTag with the specified name beginning at or immediately following the specified position in the source document.
                     EndTag Source.getNextEndTag(int pos, java.lang.String name, EndTagType endTagType)
                              Returns the EndTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     EndTag Source.getPreviousEndTag(int pos)
                              Returns the EndTag at or immediately preceding (or enclosing) the specified position in the source document.
                     EndTag Source.getPreviousEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     EndTag Source.getPreviousEndTag(int pos, java.lang.String name)
                              Returns the normal EndTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormControlOutputStyle.html0000644000175000017500000002301611214132424032120 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormControlOutputStyle (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormControlOutputStyle

                    Uses of FormControlOutputStyle in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return FormControlOutputStyle
                     FormControlOutputStyle FormControl.getOutputStyle()
                              Returns the current output style of this form control.
                    static FormControlOutputStyle FormControlOutputStyle.valueOf(java.lang.String name)
                              Returns the enum constant of this type with the specified name.
                    static FormControlOutputStyle[] FormControlOutputStyle.values()
                              Returns an array containing the constants of this enum type, in the order they're declared.
                     

                    Methods in net.htmlparser.jericho with parameters of type FormControlOutputStyle
                     void FormControl.setOutputStyle(FormControlOutputStyle outputStyle)
                              Sets the output style of this form control.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Config.html0000644000175000017500000001272711214132424026626 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Config (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Config

                    No usage of net.htmlparser.jericho.Config



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/FormField.html0000644000175000017500000002007011214132424027256 0ustar twernertwerner Uses of Class net.htmlparser.jericho.FormField (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.FormField

                    Uses of FormField in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return FormField
                     FormField FormFields.get(java.lang.String fieldName)
                              Returns the FormField with the specified name.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type FormField
                     java.util.Iterator<FormField> FormFields.iterator()
                              Returns an iterator over the FormField objects in the collection.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Logger.html0000644000175000017500000002572311214132424026640 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.Logger (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.Logger

                    Uses of Logger in net.htmlparser.jericho
                     

                    Classes in net.htmlparser.jericho that implement Logger
                     class WriterLogger
                              Provides an implementation of the Logger interface that sends output to the specified java.io.Writer.
                     

                    Methods in net.htmlparser.jericho that return Logger
                     Logger StreamedSource.getLogger()
                              Returns the Logger that handles log messages.
                     Logger Source.getLogger()
                              Returns the Logger that handles log messages.
                     Logger LoggerProvider.getLogger(java.lang.String name)
                              Creates a new Logger instance with the specified name.
                     

                    Methods in net.htmlparser.jericho with parameters of type Logger
                     void StreamedSource.setLogger(Logger logger)
                              Sets the Logger that handles log messages.
                     void Source.setLogger(Logger logger)
                              Sets the Logger that handles log messages.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/WriterLogger.html0000644000175000017500000001302311214132424030023 0ustar twernertwerner Uses of Class net.htmlparser.jericho.WriterLogger (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.WriterLogger

                    No usage of net.htmlparser.jericho.WriterLogger



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/PHPTagTypes.html0000644000175000017500000001301111214132424027514 0ustar twernertwerner Uses of Class net.htmlparser.jericho.PHPTagTypes (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.PHPTagTypes

                    No usage of net.htmlparser.jericho.PHPTagTypes



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Element.html0000644000175000017500000007243611214132424027015 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Element (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Element

                    Uses of Element in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return Element
                    abstract  Element Tag.getElement()
                              Returns the element that is started or ended by this tag.
                     Element StartTag.getElement()
                              Returns the element that is started by this start tag.
                     Element FormControl.getElement()
                              Returns the element representing this form control in the source document.
                     Element EndTag.getElement()
                              Returns the element that is ended by this end tag.
                     Element Source.getElementById(java.lang.String id)
                              Returns the Element with the specified id attribute value.
                     Element Source.getEnclosingElement(int pos)
                              Returns the most nested normal Element that encloses the specified position in the source document.
                     Element Source.getEnclosingElement(int pos, java.lang.String name)
                              Returns the most nested normal Element with the specified name that encloses the specified position in the source document.
                     Element Segment.getFirstElement()
                              Returns the first Element enclosed by this segment.
                     Element Segment.getFirstElement(java.lang.String name)
                              Returns the first normal Element with the specified name enclosed by this segment.
                     Element Segment.getFirstElement(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the first Element with the specified attribute name and value pattern that is enclosed by this segment.
                     Element Segment.getFirstElement(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the first Element with the specified attribute name/value pair enclosed by this segment.
                     Element Segment.getFirstElementByClass(java.lang.String className)
                              Returns the first Element with the specified class that is enclosed by this segment.
                     Element Source.getNextElement(int pos)
                              Returns the Element beginning at or immediately following the specified position in the source document.
                     Element Source.getNextElement(int pos, java.lang.String name)
                              Returns the normal Element with the specified name beginning at or immediately following the specified position in the source document.
                     Element Source.getNextElement(int pos, java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns the Element with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                     Element Source.getNextElement(int pos, java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns the Element with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                     Element Source.getNextElementByClass(int pos, java.lang.String className)
                              Returns the Element with the specified class beginning at or immediately following the specified position in the source document.
                     Element Element.getParentElement()
                              Returns the parent of this element in the document element hierarchy.
                     

                    Methods in net.htmlparser.jericho that return types with arguments of type Element
                     java.util.List<Element> Source.getAllElements()
                              Returns a list of all elements in this source document.
                     java.util.List<Element> Segment.getAllElements()
                              Returns a list of all Element objects that are enclosed by this segment.
                     java.util.List<Element> Segment.getAllElements(StartTagType startTagType)
                              Returns a list of all Element objects with start tags of the specified type that are enclosed by this segment.
                     java.util.List<Element> Segment.getAllElements(java.lang.String name)
                              Returns a list of all Element objects with the specified name that are enclosed by this segment.
                     java.util.List<Element> Segment.getAllElements(java.lang.String attributeName, java.util.regex.Pattern valueRegexPattern)
                              Returns a list of all Element objects with the specified attribute name and value pattern that are enclosed by this segment.
                     java.util.List<Element> Segment.getAllElements(java.lang.String attributeName, java.lang.String value, boolean valueCaseSensitive)
                              Returns a list of all Element objects with the specified attribute name/value pair that are enclosed by this segment.
                     java.util.List<Element> Segment.getAllElementsByClass(java.lang.String className)
                              Returns a list of all Element objects with the specified class that are enclosed by this segment.
                     java.util.List<Element> Source.getChildElements()
                              Returns a list of the top-level elements in the document element hierarchy.
                     java.util.List<Element> Segment.getChildElements()
                              Returns a list of the immediate children of this segment in the document element hierarchy.
                     java.util.List<Element> Element.getChildElements()
                              Returns a list of the immediate children of this element in the document element hierarchy.
                     java.util.Iterator<Element> FormControl.getOptionElementIterator()
                              Returns an iterator over the OPTION elements contained within this control, in order of appearance.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/HTMLElements.html0000644000175000017500000001302311214132424027650 0ustar twernertwerner Uses of Class net.htmlparser.jericho.HTMLElements (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.HTMLElements

                    No usage of net.htmlparser.jericho.HTMLElements



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/EndTagType.html0000644000175000017500000004600311214132424027417 0ustar twernertwerner Uses of Class net.htmlparser.jericho.EndTagType (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.EndTagType

                    Uses of EndTagType in net.htmlparser.jericho
                     

                    Subclasses of EndTagType in net.htmlparser.jericho
                     class EndTagTypeGenericImplementation
                              Provides a generic implementation of the abstract EndTagType class based on the most common end tag behaviour.
                     

                    Fields in net.htmlparser.jericho declared as EndTagType
                    static EndTagType MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_END
                              The tag type given to the end tag of a Mason component called with content.
                    static EndTagType MasonTagTypes.MASON_NAMED_BLOCK_END
                              The tag type given to the end tag of a Mason named block.
                    static EndTagType EndTagType.NORMAL
                              The tag type given to a normal HTML or XML end tag (</name>).
                    static EndTagType EndTagType.UNREGISTERED
                              The tag type given to an unregistered end tag (</ ... >).
                     

                    Methods in net.htmlparser.jericho that return EndTagType
                     EndTagType StartTagType.getCorrespondingEndTagType()
                              Returns the type of end tag required to pair with a start tag of this type to form an element.
                     EndTagType EndTag.getEndTagType()
                              Returns the type of this end tag.
                     

                    Methods in net.htmlparser.jericho with parameters of type EndTagType
                     EndTag Source.getNextEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type beginning at or immediately following the specified position in the source document.
                     EndTag Source.getNextEndTag(int pos, java.lang.String name, EndTagType endTagType)
                              Returns the EndTag with the specified name and type beginning at or immediately following the specified position in the source document.
                     EndTag Source.getPreviousEndTag(int pos, EndTagType endTagType)
                              Returns the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                     

                    Constructors in net.htmlparser.jericho with parameters of type EndTagType
                    StartTagType(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag, boolean hasAttributes, boolean isNameAfterPrefixRequired)
                              Constructs a new StartTagType object with the specified properties.
                    StartTagTypeGenericImplementation(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag)
                              Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    StartTagTypeGenericImplementation(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, EndTagType correspondingEndTagType, boolean isServerTag, boolean hasAttributes, boolean isNameAfterPrefixRequired)
                              Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Renderer.html0000644000175000017500000003124511214132424027163 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Renderer (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Renderer

                    Uses of Renderer in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return Renderer
                     Renderer Segment.getRenderer()
                              Performs a simple rendering of the HTML markup in this segment into text.
                     Renderer Renderer.setBlockIndentSize(int blockIndentSize)
                              Sets the size of the indent to be used for anything other than LI elements.
                     Renderer Renderer.setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                              Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     Renderer Renderer.setDecorateFontStyles(boolean decorateFontStyles)
                              Sets whether decoration characters are to be included around the content of some font style elements and phrase elements.
                     Renderer Renderer.setIncludeHyperlinkURLs(boolean includeHyperlinkURLs)
                              Sets whether hyperlink URL's are included in the output.
                     Renderer Renderer.setListBullets(char[] listBullets)
                              Sets the bullet characters to use for list items inside UL elements.
                     Renderer Renderer.setListIndentSize(int listIndentSize)
                              Sets the size of the indent to be used for LI elements.
                     Renderer Renderer.setMaxLineLength(int maxLineLength)
                              Sets the column at which lines are to be wrapped.
                     Renderer Renderer.setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     Renderer Renderer.setTableCellSeparator(java.lang.String tableCellSeparator)
                              Sets the string that is to separate table cells.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/SourceFormatter.html0000644000175000017500000002455511214132424030547 0ustar twernertwerner Uses of Class net.htmlparser.jericho.SourceFormatter (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.SourceFormatter

                    Uses of SourceFormatter in net.htmlparser.jericho
                     

                    Methods in net.htmlparser.jericho that return SourceFormatter
                     SourceFormatter Source.getSourceFormatter()
                              Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                     SourceFormatter SourceFormatter.setCollapseWhiteSpace(boolean collapseWhiteSpace)
                              Sets whether white space in the text between the tags is to be collapsed.
                     SourceFormatter SourceFormatter.setIndentAllElements(boolean indentAllElements)
                              Sets whether all elements are to be indented, including inline-level elements and those with preformatted contents.
                     SourceFormatter SourceFormatter.setIndentString(java.lang.String indentString)
                              Sets the string to be used for indentation.
                     SourceFormatter SourceFormatter.setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     SourceFormatter SourceFormatter.setTidyTags(boolean tidyTags)
                              Sets whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/Config.CompatibilityMode.html0000644000175000017500000002354711214132424032245 0ustar twernertwerner Uses of Class net.htmlparser.jericho.Config.CompatibilityMode (Jericho HTML Parser 3.1)

                    Uses of Class
                    net.htmlparser.jericho.Config.CompatibilityMode

                    Uses of Config.CompatibilityMode in net.htmlparser.jericho
                     

                    Fields in net.htmlparser.jericho declared as Config.CompatibilityMode
                    static Config.CompatibilityMode Config.CurrentCompatibilityMode
                              Determines the currently active compatibility mode.
                    static Config.CompatibilityMode Config.CompatibilityMode.IE
                              Microsoft Internet Explorer compatibility mode.
                    static Config.CompatibilityMode Config.CompatibilityMode.MOZILLA
                              Mozilla / Firefox / Netscape compatibility mode.
                    static Config.CompatibilityMode Config.CompatibilityMode.OPERA
                              Opera compatibility mode.
                    static Config.CompatibilityMode Config.CompatibilityMode.XHTML
                              XHTML compatibility mode.
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/class-use/LoggerProvider.html0000644000175000017500000002733711214132424030356 0ustar twernertwerner Uses of Interface net.htmlparser.jericho.LoggerProvider (Jericho HTML Parser 3.1)

                    Uses of Interface
                    net.htmlparser.jericho.LoggerProvider

                    Uses of LoggerProvider in net.htmlparser.jericho
                     

                    Fields in net.htmlparser.jericho declared as LoggerProvider
                    static LoggerProvider LoggerProvider.DISABLED
                              A LoggerProvider implementation that disables all log messages.
                    static LoggerProvider LoggerProvider.JAVA
                              A LoggerProvider implementation that wraps the standard java.util.logging system included in the Java SDK version 1.4 and above.
                    static LoggerProvider LoggerProvider.JCL
                              A LoggerProvider implementation that wraps the Jakarta Commons Logging (JCL) framework.
                    static LoggerProvider LoggerProvider.LOG4J
                              A LoggerProvider implementation that wraps the Apache Log4J framework.
                    static LoggerProvider Config.LoggerProvider
                              Determines the LoggerProvider that is used to create the default Logger object for each new Source object.
                    static LoggerProvider LoggerProvider.SLF4J
                              A LoggerProvider implementation that wraps the SLF4J framework.
                    static LoggerProvider LoggerProvider.STDERR
                              A LoggerProvider implementation that sends all log messages to the standard error output stream (System.err).
                     



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/WriterLogger.html0000644000175000017500000006627411214132422026142 0ustar twernertwerner WriterLogger (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class WriterLogger

                    java.lang.Object
                      extended by WriterLogger
                    
                    All Implemented Interfaces:
                    Logger

                    public class WriterLogger
                    extends java.lang.Object
                    implements Logger

                    Provides an implementation of the Logger interface that sends output to the specified java.io.Writer.

                    Each log entry is formatted using the BasicLogFormatter.format(String level, String message, String loggerName) method.

                    Note that each logging level can be enabled independently in this implementation. All levels are enabled by default.


                    Constructor Summary
                    WriterLogger(java.io.Writer writer)
                              Constructs a new WriterLogger with the specified Writer and the default name.
                    WriterLogger(java.io.Writer writer, java.lang.String name)
                              Constructs a new WriterLogger with the specified Writer and name.
                     
                    Method Summary
                     void debug(java.lang.String message)
                              Logs a message at the DEBUG level.
                     void error(java.lang.String message)
                              Logs a message at the ERROR level.
                     java.lang.String getName()
                              Returns the name of this logger.
                     java.io.Writer getWriter()
                              Returns the Writer to which all output is sent.
                     void info(java.lang.String message)
                              Logs a message at the INFO level.
                     boolean isDebugEnabled()
                              Indicates whether logging is enabled at the DEBUG level.
                     boolean isErrorEnabled()
                              Indicates whether logging is enabled at the ERROR level.
                     boolean isInfoEnabled()
                              Indicates whether logging is enabled at the INFO level.
                     boolean isWarnEnabled()
                              Indicates whether logging is enabled at the WARN level.
                    protected  void log(java.lang.String level, java.lang.String message)
                              Logs the specified message at the specified level.
                     void setDebugEnabled(boolean debugEnabled)
                              Sets whether logging is enabled at the DEBUG level.
                     void setErrorEnabled(boolean errorEnabled)
                              Sets whether logging is enabled at the ERROR level.
                     void setInfoEnabled(boolean infoEnabled)
                              Sets whether logging is enabled at the INFO level.
                     void setWarnEnabled(boolean warnEnabled)
                              Sets whether logging is enabled at the WARN level.
                     void warn(java.lang.String message)
                              Logs a message at the WARN level.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Constructor Detail

                    WriterLogger

                    public WriterLogger(java.io.Writer writer)
                    Constructs a new WriterLogger with the specified Writer and the default name.

                    The default logger name is "net.htmlparser.jericho".

                    Parameters:
                    writer - the Writer to which all output is sent.

                    WriterLogger

                    public WriterLogger(java.io.Writer writer,
                                        java.lang.String name)
                    Constructs a new WriterLogger with the specified Writer and name.

                    The value of the name argument is only relevant if the BasicLogFormatter.OutputName static property is set to true, otherwise the name is not included in the output at all.

                    Parameters:
                    writer - the Writer to which all output is sent.
                    name - the logger name, may be null.
                    Method Detail

                    getWriter

                    public java.io.Writer getWriter()
                    Returns the Writer to which all output is sent.

                    Returns:
                    the Writer to which all output is sent.

                    getName

                    public java.lang.String getName()
                    Returns the name of this logger.

                    Returns:
                    the name of this logger, may be null.

                    error

                    public void error(java.lang.String message)
                    Description copied from interface: Logger
                    Logs a message at the ERROR level.

                    Specified by:
                    error in interface Logger
                    Parameters:
                    message - the message to log.

                    warn

                    public void warn(java.lang.String message)
                    Description copied from interface: Logger
                    Logs a message at the WARN level.

                    Specified by:
                    warn in interface Logger
                    Parameters:
                    message - the message to log.

                    info

                    public void info(java.lang.String message)
                    Description copied from interface: Logger
                    Logs a message at the INFO level.

                    Specified by:
                    info in interface Logger
                    Parameters:
                    message - the message to log.

                    debug

                    public void debug(java.lang.String message)
                    Description copied from interface: Logger
                    Logs a message at the DEBUG level.

                    Specified by:
                    debug in interface Logger
                    Parameters:
                    message - the message to log.

                    isErrorEnabled

                    public boolean isErrorEnabled()
                    Description copied from interface: Logger
                    Indicates whether logging is enabled at the ERROR level.

                    Specified by:
                    isErrorEnabled in interface Logger
                    Returns:
                    true if logging is enabled at the ERROR level, otherwise false.

                    setErrorEnabled

                    public void setErrorEnabled(boolean errorEnabled)
                    Sets whether logging is enabled at the ERROR level.

                    Parameters:
                    errorEnabled - determines whether logging is enabled at the ERROR level.

                    isWarnEnabled

                    public boolean isWarnEnabled()
                    Description copied from interface: Logger
                    Indicates whether logging is enabled at the WARN level.

                    Specified by:
                    isWarnEnabled in interface Logger
                    Returns:
                    true if logging is enabled at the WARN level, otherwise false.

                    setWarnEnabled

                    public void setWarnEnabled(boolean warnEnabled)
                    Sets whether logging is enabled at the WARN level.

                    Parameters:
                    warnEnabled - determines whether logging is enabled at the WARN level.

                    isInfoEnabled

                    public boolean isInfoEnabled()
                    Description copied from interface: Logger
                    Indicates whether logging is enabled at the INFO level.

                    Specified by:
                    isInfoEnabled in interface Logger
                    Returns:
                    true if logging is enabled at the INFO level, otherwise false.

                    setInfoEnabled

                    public void setInfoEnabled(boolean infoEnabled)
                    Sets whether logging is enabled at the INFO level.

                    Parameters:
                    infoEnabled - determines whether logging is enabled at the INFO level.

                    isDebugEnabled

                    public boolean isDebugEnabled()
                    Description copied from interface: Logger
                    Indicates whether logging is enabled at the DEBUG level.

                    Specified by:
                    isDebugEnabled in interface Logger
                    Returns:
                    true if logging is enabled at the DEBUG level, otherwise false.

                    setDebugEnabled

                    public void setDebugEnabled(boolean debugEnabled)
                    Sets whether logging is enabled at the DEBUG level.

                    Parameters:
                    debugEnabled - determines whether logging is enabled at the DEBUG level.

                    log

                    protected void log(java.lang.String level,
                                       java.lang.String message)
                    Logs the specified message at the specified level.

                    This method is called internally by the error(String), warn(String), info(String) and debug(String) methods, with the level argument set to the text "ERROR", "WARN", "INFO", or "DEBUG" respectively.

                    The default implementation of this method sends the the output of BasicLogFormatter.format(level,message,getName()) to the Writer specified in the class constructor, and then flushes it.

                    Overriding this method in a subclass provides a convenient means of logging to a Writer using a different format.

                    Parameters:
                    level - a string representing the level of the log message.
                    message - the message to log.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/PHPTagTypes.html0000644000175000017500000005474211214132422025633 0ustar twernertwerner PHPTagTypes (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class PHPTagTypes

                    java.lang.Object
                      extended by PHPTagTypes
                    

                    public final class PHPTagTypes
                    extends java.lang.Object

                    Contains tag types related to the PHP server platform.

                    There is no specific tag type defined for the ASP-style PHP tag as it is recognised using the common server tag type.

                    The tag types defined in this class are not registered by default. The register() method is provided as a convenient way to register them all at once.


                    Field Summary
                    static StartTagType PHP_SCRIPT
                              The tag type given to a script-style PHP start tag (<script language="php"> ... </script>).
                    static StartTagType PHP_SHORT
                              The tag type given to a short-form PHP tag (<? ... ?>).
                    static StartTagType PHP_STANDARD
                              The tag type given to a standard PHP tag (<?php ... ?>).
                     
                    Method Summary
                    static boolean defines(TagType tagType)
                              Indicates whether the specified tag type is defined in this class.
                    static boolean isParsedByPHP(TagType tagType)
                              Indicates whether the specified tag type is recognised by a PHP parser.
                    static void register()
                              Registers all of the tag types defined in this class at once.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Field Detail

                    PHP_STANDARD

                    public static final StartTagType PHP_STANDARD
                    The tag type given to a standard PHP tag (<?php ... ?>).

                    Note that the standard PHP processor includes as part of the tag any newline characters directly following the closing delimiter, but PHP tags recognised by this library do not include trailing newlines. They must be removed manually if required.

                    This library only correctly recognises standard PHP tags that comply with the XML syntax for processing instructions. Specifically, the tag is terminated by the first occurrence of the closing delimiter "?>", even if it occurs within a PHP string expression. Unfortunately there is no reliable way to determine the end of a PHP tag without the use of a PHP parser. The following code is an example of a standard PHP tag that is not recognised correctly by this parser because of the presence of the closing delimiter within a string expression:

                    <?php echo("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); ?>

                    This is recognised as the PHP tag: <?php echo("<?xml version=\"1.0\" encoding=\"UTF-8\"?>
                    followed by the plain text: \n"); ?>

                    Properties:
                    PropertyValue
                    DescriptionPHP standard tag
                    StartDelimiter<?php
                    ClosingDelimiter?>
                    IsServerTagtrue
                    NamePrefix?php
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <?php echo '<p>Hello World</p>'; ?>


                    PHP_SHORT

                    public static final StartTagType PHP_SHORT
                    The tag type given to a short-form PHP tag (<? ... ?>).

                    When this tag type is registered, all XML processing instructions are recognised as short-form PHP tags instead.

                    The comments in the documentation of the PHP_STANDARD tag type regarding the termination of PHP tags and trailing newlines are also applicable to this tag type.

                    Properties:
                    PropertyValue
                    DescriptionPHP short tag
                    StartDelimiter<?
                    ClosingDelimiter?>
                    IsServerTagtrue
                    NamePrefix?
                    CorrespondingEndTagTypenull
                    HasAttributesfalse
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <? echo '<p>Hello World</p>'; ?>


                    PHP_SCRIPT

                    public static final StartTagType PHP_SCRIPT
                    The tag type given to a script-style PHP start tag (<script language="php"> ... </script>).

                    Properties:
                    PropertyValue
                    DescriptionPHP script
                    StartDelimiter<script
                    ClosingDelimiter>
                    IsServerTagtrue
                    NamePrefixscript
                    CorrespondingEndTagTypeEndTagType.NORMAL
                    HasAttributestrue
                    IsNameAfterPrefixRequiredfalse
                    Example:
                    <script language="php"> echo '<p>Hello World</p>'; </script>

                    Method Detail

                    register

                    public static void register()
                    Registers all of the tag types defined in this class at once.

                    The tag types must be registered before the parser will recognise them.


                    defines

                    public static boolean defines(TagType tagType)
                    Indicates whether the specified tag type is defined in this class.

                    Parameters:
                    tagType - the TagType to test.
                    Returns:
                    true if the specified tag type is defined in this class, otherwise false.

                    isParsedByPHP

                    public static boolean isParsedByPHP(TagType tagType)
                    Indicates whether the specified tag type is recognised by a PHP parser.

                    This is true if the specified tag type is defined in this class or if it is the common server tag type.

                    Parameters:
                    tagType - the TagType to test.
                    Returns:
                    true if the specified tag type is recognised by a PHP parser, otherwise false.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Element.html0000644000175000017500000014704011214132420025104 0ustar twernertwerner Element (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Element

                    java.lang.Object
                      extended by Segment
                          extended by Element
                    
                    All Implemented Interfaces:
                    java.lang.CharSequence, java.lang.Comparable<Segment>

                    public final class Element
                    extends Segment

                    Represents an element in a specific source document, which encompasses a start tag, an optional end tag and all content in between.

                    Take the following HTML segment as an example:

                    <p>This is a sample paragraph.</p>

                    The whole segment is represented by an Element object. This is comprised of the StartTag "<p>", the EndTag "</p>", as well as the text in between. An element may also contain other elements between its start and end tags.

                    The term normal element refers to an element having a start tag with a type of StartTagType.NORMAL. This comprises all HTML elements and non-HTML elements.

                    Element instances are obtained using one of the following methods:

                    See also the HTMLElements class, and the XML 1.0 specification for elements.

                    Element Structure

                    The three possible structures of an element are listed below:

                    Single Tag Element:
                    Example:
                    <img src="mypicture.jpg">

                    The element consists only of a single start tag and has no element content (although the start tag itself may have tag content).
                    getEndTag()==null
                    isEmpty()==true
                    getEnd()==getStartTag().getEnd()

                    This occurs in the following situations:

                    Explicitly Terminated Element:
                    Example:
                    <p>This is a sample paragraph.</p>

                    The element consists of a start tag, content, and an end tag.
                    getEndTag()!=null.
                    isEmpty()==false (provided the end tag doesn't immediately follow the start tag)
                    getEnd()==getEndTag().getEnd().

                    This occurs in the following situations, assuming the start tag's matching end tag is present in the source document:

                    Implicitly Terminated Element:
                    Example:
                    <p>This text is included in the paragraph element even though no end tag is present.
                    <p>This is the next paragraph.

                    The element consists of a start tag and content, but no end tag.
                    getEndTag()==null.
                    isEmpty()==false
                    getEnd()!=getStartTag().getEnd().

                    This only occurs in an HTML element for which the end tag is optional.

                    The element ends at the start of a tag which implies the termination of the element, called the implicitly terminating tag. If the implicitly terminating tag is situated immediately after the element's start tag, the element is classed as a single tag element.

                    See the element parsing rules for HTML elements with optional end tags for details on which tags can implicitly terminate a given element.

                    See also the documentation of the HTMLElements.getEndTagOptionalElementNames() method.

                    Element Parsing Rules

                    The following rules describe the algorithm used in the StartTag.getElement() method to construct an element. The detection of the start tag's matching end tag or other terminating tags always takes into account the possible nesting of elements.

                    See Also:
                    HTMLElements

                    Method Summary
                     Attributes getAttributes()
                              Returns the attributes specified in this element's start tag.
                     java.lang.String getAttributeValue(java.lang.String attributeName)
                              Returns the decoded value of the attribute with the specified name (case insensitive).
                     java.util.List<Element> getChildElements()
                              Returns a list of the immediate children of this element in the document element hierarchy.
                     Segment getContent()
                              Returns the segment representing the content of the element.
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     int getDepth()
                              Returns the nesting depth of this element in the document element hierarchy.
                     EndTag getEndTag()
                              Returns the end tag of the element.
                     FormControl getFormControl()
                              Returns the FormControl defined by this element.
                     java.lang.String getName()
                              Returns the name of the start tag of this element, always in lower case.
                     Element getParentElement()
                              Returns the parent of this element in the document element hierarchy.
                     StartTag getStartTag()
                              Returns the start tag of the element.
                     boolean isEmpty()
                              Indicates whether this element has zero-length content.
                     boolean isEmptyElementTag()
                              Indicates whether this element is an empty-element tag.
                     
                    Methods inherited from class Segment
                    charAt, compareTo, encloses, encloses, equals, getAllCharacterReferences, getAllElements, getAllElements, getAllElements, getAllElements, getAllElements, getAllElementsByClass, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTags, getAllStartTagsByClass, getAllTags, getAllTags, getBegin, getEnd, getFirstElement, getFirstElement, getFirstElement, getFirstElement, getFirstElementByClass, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTag, getFirstStartTagByClass, getFormControls, getFormFields, getNodeIterator, getRenderer, getSource, getTextExtractor, hashCode, ignoreWhenParsing, isWhiteSpace, isWhiteSpace, length, parseAttributes, subSequence, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, finalize, getClass, notify, notifyAll, wait, wait, wait
                     

                    Method Detail

                    getParentElement

                    public Element getParentElement()
                    Returns the parent of this element in the document element hierarchy.

                    The Source.fullSequentialParse() method must be called (either explicitly or implicitly) immediately after construction of the Source object if this method is to be used. An IllegalStateException is thrown if a full sequential parse has not been performed or if it was performed after this element was found.

                    This method returns null for a top-level element, as well as any element formed from a server tag, regardless of whether it is nested inside a normal element.

                    See the Source.getChildElements() method for more details.

                    Returns:
                    the parent of this element in the document element hierarchy, or null if this element is a top-level element.
                    Throws:
                    java.lang.IllegalStateException - if a full sequential parse has not been performed or if it was performed after this element was found.
                    See Also:
                    getChildElements()

                    getChildElements

                    public final java.util.List<Element> getChildElements()
                    Returns a list of the immediate children of this element in the document element hierarchy.

                    The objects in the list are all of type Element.

                    See the Source.getChildElements() method for more details.

                    Overrides:
                    getChildElements in class Segment
                    Returns:
                    a list of the immediate children of this element in the document element hierarchy, guaranteed not null.
                    See Also:
                    getParentElement()

                    getDepth

                    public int getDepth()
                    Returns the nesting depth of this element in the document element hierarchy.

                    The Source.fullSequentialParse() method must be called (either explicitly or implicitly) after construction of the Source object if this method is to be used. An IllegalStateException is thrown if a full sequential parse has not been performed or if it was performed after this element was found.

                    A top-level element has a nesting depth of 0.

                    An element formed from a server tag always have a nesting depth of 0, regardless of whether it is nested inside a normal element.

                    See the Source.getChildElements() method for more details.

                    Returns:
                    the nesting depth of this element in the document element hierarchy.
                    Throws:
                    java.lang.IllegalStateException - if a full sequential parse has not been performed or if it was performed after this element was found.
                    See Also:
                    getParentElement()

                    getContent

                    public Segment getContent()
                    Returns the segment representing the content of the element.

                    This segment spans between the end of the start tag and the start of the end tag. If the end tag is not present, the content reaches to the end of the element.

                    A zero-length segment is returned if the element is empty,

                    Returns:
                    the segment representing the content of the element, guaranteed not null.

                    getStartTag

                    public StartTag getStartTag()
                    Returns the start tag of the element.

                    Returns:
                    the start tag of the element.

                    getEndTag

                    public EndTag getEndTag()
                    Returns the end tag of the element.

                    If the element has no end tag this method returns null.

                    Returns:
                    the end tag of the element, or null if the element has no end tag.

                    getName

                    public java.lang.String getName()
                    Returns the name of the start tag of this element, always in lower case.

                    This is equivalent to getStartTag().getName().

                    See the Tag.getName() method for more information.

                    Returns:
                    the name of the start tag of this element, always in lower case.

                    isEmpty

                    public boolean isEmpty()
                    Indicates whether this element has zero-length content.

                    This is equivalent to getContent().length()==0.

                    Note that this is a broader definition than that of both the HTML definition of an empty element, which is only those elements whose end tag is forbidden, and the XML definition of an empty element, which is "either a start-tag immediately followed by an end-tag, or an empty-element tag". The other possibility covered by this property is the case of an HTML element with an optional end tag that is immediately followed by another tag that implicitly terminates the element.

                    Returns:
                    true if this element has zero-length content, otherwise false.
                    See Also:
                    isEmptyElementTag()

                    isEmptyElementTag

                    public boolean isEmptyElementTag()
                    Indicates whether this element is an empty-element tag.

                    This is equivalent to getStartTag().isEmptyElementTag().

                    Returns:
                    true if this element is an empty-element tag, otherwise false.

                    getAttributes

                    public Attributes getAttributes()
                    Returns the attributes specified in this element's start tag.

                    This is equivalent to getStartTag().getAttributes().

                    Returns:
                    the attributes specified in this element's start tag.
                    See Also:
                    StartTag.getAttributes()

                    getAttributeValue

                    public java.lang.String getAttributeValue(java.lang.String attributeName)
                    Returns the decoded value of the attribute with the specified name (case insensitive).

                    Returns null if the start tag of this element does not have attributes, no attribute with the specified name exists or the attribute has no value.

                    This is equivalent to getStartTag().getAttributeValue(attributeName).

                    Parameters:
                    attributeName - the name of the attribute to get.
                    Returns:
                    the decoded value of the attribute with the specified name, or null if the attribute does not exist or has no value.

                    getFormControl

                    public FormControl getFormControl()
                    Returns the FormControl defined by this element.

                    Returns:
                    the FormControl defined by this element, or null if it is not a control.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Description copied from class: Segment
                    Returns a string representation of this object useful for debugging purposes.

                    Overrides:
                    getDebugInfo in class Segment
                    Returns:
                    a string representation of this object useful for debugging purposes.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/package-summary.html0000644000175000017500000006227011214132422026604 0ustar twernertwerner net.htmlparser.jericho (Jericho HTML Parser 3.1)

                    Package net.htmlparser.jericho

                    Jericho HTML Parser 3.1

                    See:
                              Description

                    Interface Summary
                    CharStreamSource Represents a character stream source.
                    HTMLElementName Contains static fields representing the names of all elements defined in the HTML 4.01 specification.
                    Logger Defines the interface for handling log messages.
                    LoggerProvider Defines the interface for a factory class to provide Logger instances for each Source object.
                    OutputSegment Defines the interface for an output segment, which is used in an OutputDocument to replace segments of the source document with other text.
                    ParseText Represents the text from the source document that is to be parsed.
                     

                    Class Summary
                    Attribute Represents a single attribute name/value segment within a StartTag.
                    Attributes Represents the list of Attribute objects present within a particular StartTag.
                    BasicLogFormatter Provides basic formatting for log messages.
                    CharacterEntityReference Represents an HTML Character Entity Reference.
                    CharacterReference Represents an HTML Character Reference, implemented by the subclasses CharacterEntityReference and NumericCharacterReference.
                    CharStreamSourceUtil Contains static utility methods for manipulating the way data is retrieved from a CharStreamSource object.
                    Config Encapsulates global configuration properties which determine the behaviour of various functions.
                    Config.CompatibilityMode Represents a set of configuration parameters that relate to user agent compatibility issues.
                    Element Represents an element in a specific source document, which encompasses a start tag, an optional end tag and all content in between.
                    EndTag Represents the end tag of an element in a specific source document.
                    EndTagType Defines the syntax for an end tag type.
                    EndTagTypeGenericImplementation Provides a generic implementation of the abstract EndTagType class based on the most common end tag behaviour.
                    FormControl Represents an HTML form control.
                    FormControlOutputStyle.ConfigDisplayValue Contains static properties that configure the FormControlOutputStyle.DISPLAY_VALUE form control output style.
                    FormField Represents a field in an HTML form, a field being defined as the group of all form controls having the same name.
                    FormFields Represents a collection of FormField objects.
                    HTMLElements Contains static methods which group HTML element names by the characteristics of their associated elements.
                    MasonTagTypes Contains tag types related to the Mason server platform.
                    MicrosoftTagTypes Contains tag types recognised exclusively by Microsoft® Internet Explorer.
                    NumericCharacterReference Represents an HTML Numeric Character Reference.
                    OutputDocument Represents a modified version of an original Source document or Segment.
                    PHPTagTypes Contains tag types related to the PHP server platform.
                    Renderer Performs a simple rendering of HTML markup into text.
                    RowColumnVector Represents the row and column number of a character position in the source document.
                    Segment Represents a segment of a Source document.
                    Source Represents a source HTML document.
                    SourceCompactor Compacts HTML source by removing all unnecessary white space.
                    SourceFormatter Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                    StartTag Represents the start tag of an element in a specific source document.
                    StartTagType Defines the syntax for a start tag type.
                    StartTagTypeGenericImplementation Provides a generic implementation of the abstract StartTagType class based on the most common start tag behaviour.
                    StreamedSource Represents a streamed source HTML document.
                    Tag Represents either a StartTag or EndTag in a specific source document.
                    TagType Defines the syntax for a tag type that can be recognised by the parser.
                    TextExtractor Extracts the textual content from HTML markup.
                    Util Contains miscellaneous utility methods not directly associated with the HTML Parser library.
                    WriterLogger Provides an implementation of the Logger interface that sends output to the specified java.io.Writer.
                     

                    Enum Summary
                    FormControlOutputStyle An enumerated type representing the three major output styles of a form control's output element.
                    FormControlType Represents the control type of a FormControl.
                     

                    Package net.htmlparser.jericho Description

                    Jericho HTML Parser 3.1

                    A java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML. Also provides high-level HTML form manipulation functions.

                    For an introduction to the API, the documentation of the Source class is the best place to start.

                    For a summary of features and sample applications, visit the homepage at http://jerichohtml.sourceforge.net

                    For downloads, support and updates visit the SourceForge.net project page at http://sourceforge.net/projects/jerichohtml/

                    The Jericho HTML Parser is an open source library released under both the Eclipse Public License (EPL) and GNU Lesser General Public License (LGPL). You are therefore free to use it in commercial applications subject to the terms detailed in either one of these licence documents.



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/HTMLElements.html0000644000175000017500000040772011214132422025762 0ustar twernertwerner HTMLElements (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class HTMLElements

                    java.lang.Object
                      extended by HTMLElements
                    

                    public final class HTMLElements
                    extends java.lang.Object

                    Contains static methods which group HTML element names by the characteristics of their associated elements.

                    An HTML element is a normal element with a name that matches one of the HTML element names (ignoring case). This type of element spans the logical HTML element as described in the HTML 4.01 specification section 3.2.1, which may be implicitly terminated if it specifies an optional end tag.

                    The term Non-HTML element refers to a normal element with a name that does not match one of the HTML element names. This type of element must be either a single tag element or explicitly terminated.

                    All of the sets returned by the methods in this class may be modified to customise the behaviour of the parser. Care must be taken however to ensure that the sets only contain tag names in lower case.

                    Below is a table summarising the default characteristics of each HTML element. See also the index of elements in the HTML 4.01 specification for the official table containing similar information.

                    NameBox TypeStart TagEnd TagNestDepr.Description / Specification
                    AInline RequiredNF anchor
                    ABBRInline Required  abbreviated form (e.g., WWW, HTTP, etc.)
                    ACRONYMInline Required  acronym
                    ADDRESSBlock RequiredNF information on author
                    APPLETInline RequiredNFDJava applet
                    AREA  ForbiddenNF client-side image map area
                    BInline Required  bold text style
                    BASE  ForbiddenNF document base URI
                    BASEFONTInline ForbiddenNFDbase font size
                    BDOInline Required  I18N BiDi over-ride
                    BIGInline Required  large text style
                    BLOCKQUOTEBlock Required  long quotation
                    BODY OptionalOptional (details)NF document body
                    BRInline ForbiddenNF forced line break
                    BUTTONInline RequiredNF push button
                    CAPTION  RequiredNF table caption
                    CENTERBlock Required Dshorthand for DIV align=center
                    CITEInline Required  citation
                    CODEInline Required  computer code fragment
                    COL  ForbiddenNF table column
                    COLGROUP  Optional (details)NF table column group
                    DD  Optional (details)  definition description
                    DELInline Required  deleted text
                    DFNInline Required  instance definition
                    DIRBlock Required Ddirectory list
                    DIVBlock Required  generic language/style container
                    DLBlock Required  definition list
                    DT  Optional (details)  definition term
                    EMInline Required  emphasis
                    FIELDSETBlock Required  form control group
                    FONTInline Required Dlocal change to font
                    FORMBlock RequiredNF interactive form
                    FRAME  ForbiddenNF subwindow
                    FRAMESET  Required  window subdivision
                    H1Block Required  heading
                    H2Block Required  heading
                    H3Block Required  heading
                    H4Block Required  heading
                    H5Block Required  heading
                    H6Block Required  heading
                    HEAD OptionalOptional (details)NF document head
                    HRBlock ForbiddenNF horizontal rule
                    HTML OptionalOptional (details)NF document root element
                    IInline Required  italic text style
                    IFRAMEInline RequiredNF inline subwindow
                    IMGInline ForbiddenNF Embedded image
                    INPUTInline ForbiddenNF form control
                    INSInline Required  inserted text
                    ISINDEXBlock ForbiddenNFDsingle line prompt
                    KBDInline Required  text to be entered by the user
                    LABELInline RequiredNF form field label text
                    LEGEND  RequiredNF fieldset legend
                    LI  Optional (details)  list item
                    LINK  ForbiddenNF a media-independent link
                    MAPInline Required  client-side image map
                    MENUBlock Required Dmenu list
                    META  ForbiddenNF generic metainformation
                    NOFRAMESBlock Required  alternate content container for non frame-based rendering
                    NOSCRIPTBlock Required  alternate content container for non script-based rendering
                    OBJECTInline Required  generic embedded object
                    OLBlock Required  ordered list
                    OPTGROUP  RequiredNF option group
                    OPTION  Optional (details)NF selectable choice
                    PBlock Optional (details)NF paragraph
                    PARAM  ForbiddenNF named property value
                    PREBlock Required  preformatted text
                    QInline Required  short inline quotation
                    SInline Required Dstrike-through text style
                    SAMPInline Required  sample program output, scripts, etc.
                    SCRIPTInline RequiredNF script statements
                    SELECTInline RequiredNF option selector
                    SMALLInline Required  small text style
                    SPANInline Required  generic language/style container
                    STRIKEInline Required Dstrike-through text
                    STRONGInline Required  strong emphasis
                    STYLE  RequiredNF style info
                    SUBInline Required  subscript
                    SUPInline Required  superscript
                    TABLEBlock Required  table
                    TBODY OptionalOptional (details)  table body
                    TD  Optional (details)  table data cell
                    TEXTAREAInline RequiredNF multi-line text field
                    TFOOT  Optional (details)  table footer
                    TH  Optional (details)  table header cell
                    THEAD  Optional (details)  table header
                    TITLE  RequiredNF document title
                    TR  Optional (details)  table row
                    TTInline Required  teletype or monospaced text style
                    UInline Required Dunderlined text style
                    ULBlock Required  unordered list
                    VARInline Required  instance of a variable or program argument

                    See Also:
                    HTMLElementName, Element

                    Method Summary
                    static java.util.Set<java.lang.String> getBlockLevelElementNames()
                              Returns a set containing the names of all the block-level elements.
                    static java.util.Set<java.lang.String> getDeprecatedElementNames()
                              Returns a set containing the names of all deprecated elements in HTML 4.01.
                    static java.util.List<java.lang.String> getElementNames()
                              Returns a list containing all of the HTML element names.
                    static java.util.Set<java.lang.String> getEndTagForbiddenElementNames()
                              Returns a set containing the names of all of the HTML elements for which the end tag is forbidden.
                    static java.util.Set<java.lang.String> getEndTagOptionalElementNames()
                              Returns a set containing the names of all of the HTML elements for which the end tag is optional.
                    static java.util.Set<java.lang.String> getEndTagRequiredElementNames()
                              Returns a set containing the names of all of the HTML elements for which the end tag is required.
                    static java.util.Set<java.lang.String> getInlineLevelElementNames()
                              Returns a set containing the names of all the inline-level elements.
                    static java.util.Set<java.lang.String> getNestingForbiddenElementNames()
                              Returns a set containing the names of all of the HTML elements which should never contain elements of the same name, either as direct or indirect descendants.
                    static java.util.Set<java.lang.String> getNonterminatingElementNames(java.lang.String endTagOptionalElementName)
                              Returns the names of elements that do NOT implicitly terminate an HTML element with the specified name.
                    static java.util.Set<java.lang.String> getStartTagOptionalElementNames()
                              Returns a set containing the names of all of the HTML elements for which the start tag is optional.
                    static java.util.Set<java.lang.String> getTerminatingEndTagNames(java.lang.String endTagOptionalElementName)
                              Returns the names of end tags that implicitly terminate an HTML element with the specified name.
                    static java.util.Set<java.lang.String> getTerminatingStartTagNames(java.lang.String endTagOptionalElementName)
                              Returns the names of start tags that implicitly terminate an HTML element with the specified name.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
                     

                    Method Detail

                    getElementNames

                    public static final java.util.List<java.lang.String> getElementNames()
                    Returns a list containing all of the HTML element names.

                    The returned list is in alphabetical order.

                    Returns:
                    a list containing of all the HTML element names.

                    getBlockLevelElementNames

                    public static java.util.Set<java.lang.String> getBlockLevelElementNames()
                    Returns a set containing the names of all the block-level elements.

                    The element names contained in this set are:
                    ADDRESS, BLOCKQUOTE, CENTER, DIR, DIV, DL, FIELDSET, FORM, H1, H2, H3, H4, H5, H6, HR, ISINDEX, MENU, NOFRAMES, NOSCRIPT, OL, P, PRE, TABLE, UL

                    This set is defined in the HTML 4.01 Transitional DTD, but more detailed information can be found in the HTML 4.01 specification section 7.5.3 - Block-level and inline elements and the CSS2 specification section 9.2.1 - Block-level elements and block boxes.

                    The CSS2 display property can be used to override the normal box type of an element.

                    Returns:
                    a set containing the names of all the block-level elements.
                    See Also:
                    getInlineLevelElementNames()

                    getInlineLevelElementNames

                    public static java.util.Set<java.lang.String> getInlineLevelElementNames()
                    Returns a set containing the names of all the inline-level elements.

                    The element names contained in this set are:
                    A, ABBR, ACRONYM, APPLET, B, BASEFONT, BDO, BIG, BR, BUTTON, CITE, CODE, DEL, DFN, EM, FONT, I, IFRAME, IMG, INPUT, INS, KBD, LABEL, MAP, OBJECT, Q, S, SAMP, SCRIPT, SELECT, SMALL, SPAN, STRIKE, STRONG, SUB, SUP, TEXTAREA, TT, U, VAR

                    This set is defined in the HTML 4.01 Transitional DTD, but more detailed information can be found in the HTML 4.01 specification section 7.5.3 - Block-level and inline elements and the CSS2 specification section 9.2.2 - Inline-level elements and inline boxes.

                    The CSS2 display property can be used to override the normal box type of an element.

                    The HTML Document Type Definitions forbid the presence of block-level elements inside inline-level elements, but it is tolerated by all popular browsers in various situations, even in XHTML documents. The most notorious example of this is the common inclusion of block-level elements inside FONT elements.

                    Returns:
                    a set containing the names of all the inline-level elements.
                    See Also:
                    getBlockLevelElementNames()

                    getDeprecatedElementNames

                    public static java.util.Set<java.lang.String> getDeprecatedElementNames()
                    Returns a set containing the names of all deprecated elements in HTML 4.01.

                    Returns:
                    a set containing the names of all deprecated elements in HTML 4.01.

                    getEndTagForbiddenElementNames

                    public static java.util.Set<java.lang.String> getEndTagForbiddenElementNames()
                    Returns a set containing the names of all of the HTML elements for which the end tag is forbidden.

                    See the element parsing rules for HTML elements with forbidden end tags for more information.

                    The index of elements in the HTML 4.01 specification includes the letter 'F' in the "End Tag" column for elements whose end tag is forbidden.

                    Returns:
                    a set containing the names of all of the HTML elements for which the end tag is forbidden.
                    See Also:
                    getEndTagOptionalElementNames(), getEndTagRequiredElementNames()

                    getEndTagOptionalElementNames

                    public static java.util.Set<java.lang.String> getEndTagOptionalElementNames()
                    Returns a set containing the names of all of the HTML elements for which the end tag is optional.

                    Elements with these names may be implicitly terminated by a subsequent terminating start tag or terminating end tag. A list of the these terminating tags, and the names of non-terminating elements that can be nested within the element, can be found in the documentation of each relevant element in the HTMLElementName class.

                    See the element parsing rules for HTML elements with optional end tags for more information.

                    The index of elements in the HTML 4.01 specification includes the letter 'O' in the "End Tag" column for elements whose end tag is optional.

                    Returns:
                    a set containing the names of all of the HTML elements for which the end tag is optional.
                    See Also:
                    getEndTagForbiddenElementNames(), getEndTagRequiredElementNames()

                    getEndTagRequiredElementNames

                    public static java.util.Set<java.lang.String> getEndTagRequiredElementNames()
                    Returns a set containing the names of all of the HTML elements for which the end tag is required.

                    See the element parsing rules for HTML elements with required end tags for more information.

                    The index of elements in the HTML 4.01 specification leaves the "End Tag" column blank for elements whose end tag is required.

                    Returns:
                    a set containing the names of all of the HTML elements for which the end tag is required.
                    See Also:
                    getEndTagForbiddenElementNames(), getEndTagOptionalElementNames()

                    getStartTagOptionalElementNames

                    public static java.util.Set<java.lang.String> getStartTagOptionalElementNames()
                    Returns a set containing the names of all of the HTML elements for which the start tag is optional.

                    Elements with optional start tags must be present in the document object model (DOM) in certain locations, either forming part of the structure of the HTML document as a whole (e.g. the HTML, HEAD, and BODY elements), or forming part of the structure of a TABLE element (e.g. the TBODY element). The location of an omitted start tag in the document's object model can be inferred from the surrounding elements.

                    This library does not use this property in any way when parsing documents, and does not construct a document object model from the source, so no implied element is created where an optional start tag is omitted.

                    When the start tag has been omitted in the document text, the corresponding end tag should also be omitted.

                    The index of elements in the HTML 4.01 specification includes the letter 'O' in the "Start Tag" column for elements whose start tag is optional.

                    Returns:
                    a set containing the names of all of the HTML elements for which the start tag is optional.

                    getTerminatingStartTagNames

                    public static java.util.Set<java.lang.String> getTerminatingStartTagNames(java.lang.String endTagOptionalElementName)
                    Returns the names of start tags that implicitly terminate an HTML element with the specified name.

                    This method is only relevant to HTML elements for which the end tag is optional. It returns null if
                    getEndTagOptionalElementNames().contains(endTagOptionalElementName.toLowerCase())==null.

                    Parameters:
                    endTagOptionalElementName - the name of an element for which the end tag is optional.
                    Returns:
                    the names of start tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the end tag is optional.
                    See Also:
                    getTerminatingEndTagNames(String endTagOptionalElementName), getNonterminatingElementNames(String endTagOptionalElementName)

                    getTerminatingEndTagNames

                    public static java.util.Set<java.lang.String> getTerminatingEndTagNames(java.lang.String endTagOptionalElementName)
                    Returns the names of end tags that implicitly terminate an HTML element with the specified name.

                    This method is only relevant to HTML elements for which the end tag is optional. It returns null if
                    getEndTagOptionalElementNames().contains(endTagOptionalElementName.toLowerCase())==null.

                    Note that removing the tag name matching the specified element has no effect on the behaviour of the parser, as it is always assumed that a start tag is terminated by an end tag with a matching name.

                    Parameters:
                    endTagOptionalElementName - the name of an element for which the end tag is optional.
                    Returns:
                    the names of end tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the end tag is optional.
                    See Also:
                    getTerminatingStartTagNames(String endTagOptionalElementName), getNonterminatingElementNames(String endTagOptionalElementName)

                    getNonterminatingElementNames

                    public static java.util.Set<java.lang.String> getNonterminatingElementNames(java.lang.String endTagOptionalElementName)
                    Returns the names of elements that do NOT implicitly terminate an HTML element with the specified name. Neither can any tag nested inside any of these elements implicitly terminate the specified element, even if it is listed as one of the terminating start tags or terminating end tags.

                    This method is only relevant to HTML elements for which the end tag is optional. It returns null if
                    getEndTagOptionalElementNames().contains(endTagOptionalElementName.toLowerCase())==null.

                    Parameters:
                    endTagOptionalElementName - the name of an element for which the end tag is optional.
                    Returns:
                    the names of elements that do NOT implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the end tag is optional.
                    See Also:
                    getTerminatingStartTagNames(String endTagOptionalElementName), getTerminatingEndTagNames(String endTagOptionalElementName)

                    getNestingForbiddenElementNames

                    public static java.util.Set<java.lang.String> getNestingForbiddenElementNames()
                    Returns a set containing the names of all of the HTML elements which should never contain elements of the same name, either as direct or indirect descendants.

                    Returns:
                    a set containing the names of all of the HTML elements which should never contain elements of the same name.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/EndTagType.html0000644000175000017500000010257311214132420025521 0ustar twernertwerner EndTagType (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class EndTagType

                    java.lang.Object
                      extended by TagType
                          extended by EndTagType
                    
                    Direct Known Subclasses:
                    EndTagTypeGenericImplementation

                    public abstract class EndTagType
                    extends TagType

                    Defines the syntax for an end tag type.

                    An end tag type is a TagType that starts with the characters '</'.

                    The singleton instances of all the standard end tag types are available in this class as static fields.

                    Because all EndTagType instaces must be singletons, the '==' operator can be used to test for a particular tag type instead of the equals(Object) method.

                    See Also:
                    StartTagType

                    Field Summary
                    static EndTagType NORMAL
                              The tag type given to a normal HTML or XML end tag (</name>).
                    static EndTagType UNREGISTERED
                              The tag type given to an unregistered end tag (</ ... >).
                     
                    Constructor Summary
                    protected EndTagType(java.lang.String description, java.lang.String startDelimiter, java.lang.String closingDelimiter, boolean isServerTag)
                              Constructs a new EndTagType object with the specified properties.
                     
                    Method Summary
                    protected  EndTag constructEndTag(Source source, int begin, int end, java.lang.String name)
                              Internal method for the construction of an EndTag object of this type.
                     java.lang.String generateHTML(java.lang.String startTagName)
                              Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                     StartTagType getCorrespondingStartTagType()
                              Returns the type of start tag that is usually paired with an end tag of this type to form an Element.
                     java.lang.String getEndTagName(java.lang.String startTagName)
                              Returns the end tag name that is required to match a corresponding start tag with the specified name.
                     
                    Methods inherited from class TagType
                    constructTagAt, deregister, getClosingDelimiter, getDescription, getNamePrefix, getRegisteredTagTypes, getStartDelimiter, getTagTypesIgnoringEnclosedMarkup, isServerTag, isValidPosition, register, setTagTypesIgnoringEnclosedMarkup, tagEncloses, toString
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    UNREGISTERED

                    public static final EndTagType UNREGISTERED
                    The tag type given to an unregistered end tag (</ ... >).

                    See the documentation of the Tag.isUnregistered() method for details.

                    Properties:
                    Property/MethodValue
                    Description/unregistered
                    StartDelimiter</
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix(empty string)
                    CorrespondingStartTagTypenull
                    generateHTML("StartTagName")</StartTagName>
                    Example:
                    </ "This is not recognised as any of the predefined end tag types in this library">

                    See Also:
                    StartTagType.UNREGISTERED

                    NORMAL

                    public static final EndTagType NORMAL
                    The tag type given to a normal HTML or XML end tag (</name>).

                    Properties:
                    Property/MethodValue
                    Description/normal
                    StartDelimiter</
                    ClosingDelimiter>
                    IsServerTagfalse
                    NamePrefix(empty string)
                    CorrespondingStartTagTypeStartTagType.NORMAL
                    generateHTML("StartTagName")</StartTagName>
                    Example:
                    </div>

                    Constructor Detail

                    EndTagType

                    protected EndTagType(java.lang.String description,
                                         java.lang.String startDelimiter,
                                         java.lang.String closingDelimiter,
                                         boolean isServerTag)
                    Constructs a new EndTagType object with the specified properties.
                    (implementation assistance method)

                    As EndTagType is an abstract class, this constructor is only called from sub-class constructors.

                    Parameters:
                    description - a description of the new end tag type useful for debugging purposes.
                    startDelimiter - the start delimiter of the new end tag type.
                    closingDelimiter - the closing delimiter of the new end tag type.
                    isServerTag - indicates whether the new end tag type is a server tag.
                    Method Detail

                    getCorrespondingStartTagType

                    public StartTagType getCorrespondingStartTagType()
                    Returns the type of start tag that is usually paired with an end tag of this type to form an Element.
                    (default implementation method)

                    The default implementation returns null.

                    This property is informational only and is not used by the parser in any way.

                    The mapping of end tag type to the corresponding start tag type is in any case one-to-many, which is why the definition emphasises the word "usually". An example of this is the PHPTagTypes.PHP_SCRIPT start tag type, whose corresponding end tag type is EndTagType.NORMAL, while the converse is not true.

                    The only predefined end tag type that returns null for this property is the special UNREGISTERED end tag type.

                    Although this method is used like a property method, it is implemented as a default implementation method to avoid cyclic references between statically instantiated StartTagType and EndTagType objects.

                    Standard Tag Type Values:
                    End Tag TypeCorresponding Start Tag Type
                    UNREGISTEREDnull
                    NORMALStartTagType.NORMAL
                    Extended Tag Type Values:
                    End Tag TypeCorresponding Start Tag Type
                    MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT_ENDMasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT
                    MasonTagTypes.MASON_NAMED_BLOCK_ENDMasonTagTypes.MASON_NAMED_BLOCK

                    Returns:
                    the type of start tag that is usually paired with an end tag of this type to form an Element.
                    See Also:
                    StartTagType.getCorrespondingEndTagType()

                    getEndTagName

                    public java.lang.String getEndTagName(java.lang.String startTagName)
                    Returns the end tag name that is required to match a corresponding start tag with the specified name.
                    (property method)

                    This default implementation simply returns startTagName.

                    Note that the startTagName parameter should include the start tag's name prefix if it has one.

                    Parameters:
                    startTagName - the name of a corresponding start tag, including its name prefix.
                    Returns:
                    the end tag name that is required to match a corresponding start tag with the specified name.

                    generateHTML

                    public java.lang.String generateHTML(java.lang.String startTagName)
                    Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                    (property method)

                    This default implementation returns "</"+getEndTagName(startTagName)+TagType.getClosingDelimiter().

                    Note that the startTagName parameter should include the start tag's name prefix if it has one.

                    Parameters:
                    startTagName - the name of a corresponding start tag, including its name prefix.
                    Returns:
                    the HTML text of an end tag of this type given the name of a corresponding start tag.

                    constructEndTag

                    protected final EndTag constructEndTag(Source source,
                                                           int begin,
                                                           int end,
                                                           java.lang.String name)
                    Internal method for the construction of an EndTag object of this type.
                    (implementation assistance method)

                    Intended for use from within the constructTagAt(Source, int pos) method.

                    Parameters:
                    source - the Source document.
                    begin - the character position in the source document where this tag begins.
                    end - the character position in the source document where this tag ends.
                    name - the name of the tag.
                    Returns:
                    the new EndTag object.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Renderer.html0000644000175000017500000014504311214132422025264 0ustar twernertwerner Renderer (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Renderer

                    java.lang.Object
                      extended by Renderer
                    
                    All Implemented Interfaces:
                    CharStreamSource

                    public class Renderer
                    extends java.lang.Object
                    implements CharStreamSource

                    Performs a simple rendering of HTML markup into text.

                    This provides a human readable version of the segment content that is modelled on the way Mozilla Thunderbird and other email clients provide an automatic conversion of HTML content to text in their alternative MIME encoding of emails.

                    The output using default settings complies with the "text/plain; format=flowed" (DelSp=No) protocol described in RFC3676.

                    Many properties are available to customise the output, possibly the most significant of which being MaxLineLength. See the individual property descriptions for details.

                    Use one of the following methods to obtain the output:

                    The rendering of some constructs, especially tables, is very rudimentary. No attempt is made to render nested tables properly, except to ensure that all of the text content is included in the output.

                    Rendering an entire Source object performs a full sequential parse automatically.

                    Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions.

                    To extract pure text without any rendering of the markup, use the TextExtractor class instead.


                    Constructor Summary
                    Renderer(Segment segment)
                              Constructs a new Renderer based on the specified Segment.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the output to the specified Appendable object.
                     int getBlockIndentSize()
                              Returns the size of the indent to be used for anything other than LI elements.
                     boolean getConvertNonBreakingSpaces()
                              Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     boolean getDecorateFontStyles()
                              Indicates whether decoration characters are to be included around the content of some font style elements and phrase elements.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     boolean getIncludeHyperlinkURLs()
                              Indicates whether hyperlink URL's are included in the output.
                     char[] getListBullets()
                              Returns the bullet characters to use for list items inside UL elements.
                     int getListIndentSize()
                              Returns the size of the indent to be used for LI elements.
                     int getMaxLineLength()
                              Returns the column at which lines are to be wrapped.
                     java.lang.String getNewLine()
                              Returns the string to be used to represent a newline in the output.
                     java.lang.String getTableCellSeparator()
                              Returns the string that is to separate table cells.
                     java.lang.String renderHyperlinkURL(StartTag startTag)
                              Renders the hyperlink URL from the specified StartTag.
                     Renderer setBlockIndentSize(int blockIndentSize)
                              Sets the size of the indent to be used for anything other than LI elements.
                     Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                              Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                     Renderer setDecorateFontStyles(boolean decorateFontStyles)
                              Sets whether decoration characters are to be included around the content of some font style elements and phrase elements.
                     Renderer setIncludeHyperlinkURLs(boolean includeHyperlinkURLs)
                              Sets whether hyperlink URL's are included in the output.
                     Renderer setListBullets(char[] listBullets)
                              Sets the bullet characters to use for list items inside UL elements.
                     Renderer setListIndentSize(int listIndentSize)
                              Sets the size of the indent to be used for LI elements.
                     Renderer setMaxLineLength(int maxLineLength)
                              Sets the column at which lines are to be wrapped.
                     Renderer setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     Renderer setTableCellSeparator(java.lang.String tableCellSeparator)
                              Sets the string that is to separate table cells.
                     java.lang.String toString()
                              Returns the output as a string.
                     void writeTo(java.io.Writer writer)
                              Writes the output to the specified Writer.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    Renderer

                    public Renderer(Segment segment)
                    Constructs a new Renderer based on the specified Segment.

                    Parameters:
                    segment - the segment containing the HTML to be rendered.
                    See Also:
                    Segment.getRenderer()
                    Method Detail

                    writeTo

                    public void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Writes the output to the specified Writer.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    public void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Appends the output to the specified Appendable object.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    getEstimatedMaximumOutputLength

                    public long getEstimatedMaximumOutputLength()
                    Description copied from interface: CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Specified by:
                    getEstimatedMaximumOutputLength in interface CharStreamSource
                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    public java.lang.String toString()
                    Description copied from interface: CharStreamSource
                    Returns the output as a string.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the output as a string.

                    setMaxLineLength

                    public Renderer setMaxLineLength(int maxLineLength)
                    Sets the column at which lines are to be wrapped.

                    Lines that would otherwise exceed this length are wrapped onto a new line at a word boundary.

                    A Line may still exceed this length if it consists of a single word, where the length of the word plus the line indent exceeds the maximum length. In this case the line is wrapped immediately after the end of the word.

                    The default value is 76, which reflects the maximum line length for sending email data specified in RFC2049 section 3.5.

                    Parameters:
                    maxLineLength - the column at which lines are to be wrapped.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getMaxLineLength()

                    getMaxLineLength

                    public int getMaxLineLength()
                    Returns the column at which lines are to be wrapped.

                    See the setMaxLineLength(int) method for a full description of this property.

                    Returns:
                    the column at which lines are to be wrapped.

                    setNewLine

                    public Renderer setNewLine(java.lang.String newLine)
                    Sets the string to be used to represent a newline in the output.

                    The default value is "\r\n" (CR+LF) regardless of the platform on which the library is running. This is so that the default configuration produces valid MIME plain/text output, which mandates the use of CR+LF for line breaks.

                    Specifying a null argument causes the output to use same new line string as is used in the source document, which is determined via the Source.getNewLine() method. If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, or using the value from the static Config.NewLine property.

                    Parameters:
                    newLine - the string to be used to represent a newline in the output, may be null.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getNewLine()

                    getNewLine

                    public java.lang.String getNewLine()
                    Returns the string to be used to represent a newline in the output.

                    See the setNewLine(String) method for a full description of this property.

                    Returns:
                    the string to be used to represent a newline in the output.

                    setIncludeHyperlinkURLs

                    public Renderer setIncludeHyperlinkURLs(boolean includeHyperlinkURLs)
                    Sets whether hyperlink URL's are included in the output.

                    The default value is true.

                    When this property is true, the URL of each hyperlink is included in the output as determined by the implementation of the renderHyperlinkURL(StartTag) method.

                    Example:

                    Assuming the default implementation of renderHyperlinkURL(StartTag), when this property is true, the following HTML:

                    <a href="http://jericho.htmlparser.net/">Jericho HTML Parser</a>
                    produces the following output:
                    Jericho HTML Parser <http://jericho.htmlparser.net/>

                    Parameters:
                    includeHyperlinkURLs - specifies whether hyperlink URL's are included in the output.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getIncludeHyperlinkURLs()

                    getIncludeHyperlinkURLs

                    public boolean getIncludeHyperlinkURLs()
                    Indicates whether hyperlink URL's are included in the output.

                    See the setIncludeHyperlinkURLs(boolean) method for a full description of this property.

                    Returns:
                    true if hyperlink URL's are included in the output, otherwise false.

                    renderHyperlinkURL

                    public java.lang.String renderHyperlinkURL(StartTag startTag)
                    Renders the hyperlink URL from the specified StartTag.

                    A return value of null indicates that the hyperlink URL should not be rendered at all.

                    The default implementation of this method returns null if the href attribute of the specified start tag is '#', starts with "javascript:", or is missing. In all other cases it returns the value of the href attribute enclosed in angle brackets.

                    See the documentation of the setIncludeHyperlinkURLs(boolean) method for an example of how a hyperlink is rendered by the default implementation.

                    This method can be overridden in a subclass to customise the rendering of hyperlink URLs.

                    Rendering of hyperlink URLs can be disabled completely without overriding this method by setting the IncludeHyperlinkURLs property to false.

                    Example:
                    To render hyperlink URLs without the enclosing angle brackets:

                    Renderer renderer=new Renderer(segment) {
                        public String renderHyperlinkURL(StartTag startTag) {
                            String href=startTag.getAttributeValue("href");
                            if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
                            return href;
                        }
                    };
                    String renderedSegment=renderer.toString();

                    Parameters:
                    startTag - the start tag of the hyperlink element, must not be null.
                    Returns:
                    The rendered hyperlink URL from the specified StartTag, or null if the hyperlink URL should not be rendered.

                    setDecorateFontStyles

                    public Renderer setDecorateFontStyles(boolean decorateFontStyles)
                    Sets whether decoration characters are to be included around the content of some font style elements and phrase elements.

                    The default value is false.

                    Below is a table summarising the decorated elements.

                    ElementsCharacterExample Output
                    B and STRONG**bold text*
                    I and EM//italic text/
                    U__underlined text_
                    CODE||code|

                    Parameters:
                    decorateFontStyles - specifies whether decoration characters are to be included around the content of some font style elements.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getDecorateFontStyles()

                    getDecorateFontStyles

                    public boolean getDecorateFontStyles()
                    Indicates whether decoration characters are to be included around the content of some font style elements and phrase elements.

                    See the setDecorateFontStyles(boolean) method for a full description of this property.

                    Returns:
                    true if decoration characters are to be included around the content of some font style elements, otherwise false.

                    setConvertNonBreakingSpaces

                    public Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces)
                    Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.

                    The default value is that of the static Config.ConvertNonBreakingSpaces property at the time the Renderer is instantiated.

                    Parameters:
                    convertNonBreakingSpaces - specifies whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getConvertNonBreakingSpaces()

                    getConvertNonBreakingSpaces

                    public boolean getConvertNonBreakingSpaces()
                    Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.

                    See the setConvertNonBreakingSpaces(boolean) method for a full description of this property.

                    Returns:
                    true if non-breaking space (&nbsp;) character entity references are converted to spaces, otherwise false.

                    setBlockIndentSize

                    public Renderer setBlockIndentSize(int blockIndentSize)
                    Sets the size of the indent to be used for anything other than LI elements.

                    At present this applies to BLOCKQUOTE and DD elements.

                    The default value is 4.

                    Parameters:
                    blockIndentSize - the size of the indent.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getBlockIndentSize()

                    getBlockIndentSize

                    public int getBlockIndentSize()
                    Returns the size of the indent to be used for anything other than LI elements.

                    See the setBlockIndentSize(int) method for a full description of this property.

                    Returns:
                    the size of the indent to be used for anything other than LI elements.

                    setListIndentSize

                    public Renderer setListIndentSize(int listIndentSize)
                    Sets the size of the indent to be used for LI elements.

                    The default value is 6.

                    This applies to LI elements inside both UL and OL elements.

                    The bullet or number of the list item is included as part of the indent.

                    Parameters:
                    listIndentSize - the size of the indent.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getListIndentSize()

                    getListIndentSize

                    public int getListIndentSize()
                    Returns the size of the indent to be used for LI elements.

                    See the setListIndentSize(int) method for a full description of this property.

                    Returns:
                    the size of the indent to be used for LI elements.

                    setListBullets

                    public Renderer setListBullets(char[] listBullets)
                    Sets the bullet characters to use for list items inside UL elements.

                    The values in the default array are *, o, + and #.

                    If the nesting of rendered lists goes deeper than the length of this array, the bullet characters start repeating from the first in the array.

                    WARNING: If any of the characters in the default array are modified, this will affect all other instances of this class using the default array.

                    Parameters:
                    listBullets - an array of characters to be used as bullets, must have at least one entry.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getListBullets()

                    getListBullets

                    public char[] getListBullets()
                    Returns the bullet characters to use for list items inside UL elements.

                    See the setListBullets(char[]) method for a full description of this property.

                    Returns:
                    the bullet characters to use for list items inside UL elements.

                    setTableCellSeparator

                    public Renderer setTableCellSeparator(java.lang.String tableCellSeparator)
                    Sets the string that is to separate table cells.

                    The default value is " \t" (a space followed by a tab).

                    Parameters:
                    tableCellSeparator - the string that is to separate table cells.
                    Returns:
                    this Renderer instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getTableCellSeparator()

                    getTableCellSeparator

                    public java.lang.String getTableCellSeparator()
                    Returns the string that is to separate table cells.

                    See the setTableCellSeparator(String) method for a full description of this property.

                    Returns:
                    the string that is to separate table cells.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/package-tree.html0000644000175000017500000003233711214132422026047 0ustar twernertwerner net.htmlparser.jericho Class Hierarchy (Jericho HTML Parser 3.1)

                    Hierarchy For Package net.htmlparser.jericho

                    Class Hierarchy

                    Interface Hierarchy

                    Enum Hierarchy



                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/SourceFormatter.html0000644000175000017500000011456011214132422026642 0ustar twernertwerner SourceFormatter (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class SourceFormatter

                    java.lang.Object
                      extended by SourceFormatter
                    
                    All Implemented Interfaces:
                    CharStreamSource

                    public final class SourceFormatter
                    extends java.lang.Object
                    implements CharStreamSource

                    Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.

                    Any indentation present in the original source text is removed.

                    Use one of the following methods to obtain the output:

                    The output text is functionally equivalent to the original source and should be rendered identically unless specified below.

                    The following points describe the process in general terms. Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions.

                    • Every element that is not an inline-level element appears on a new line with an indent corresponding to its depth in the document element hierarchy.
                    • The indent is formed by writing n repetitions of the string specified in the IndentString property, where n is the depth of the indentation.
                    • The content of an indented element starts on a new line and is indented at a depth one greater than that of the element, with the end tag appearing on a new line at the same depth as the start tag. If the content contains only text and inline-level elements, it may continue on the same line as the start tag. Additionally, if the output content contains no new lines, the end tag may also continue on the same line.
                    • The content of preformatted elements such as PRE and TEXTAREA are not indented, nor is the white space modified in any way.
                    • Only normal and document type declaration elements are indented. All others are treated as inline-level elements.
                    • White space and indentation inside HTML comments, CDATA sections, or any server tag is preserved, but with the indentation of new lines starting at a depth one greater than that of the surrounding text.
                    • White space and indentation inside SCRIPT elements is preserved, but with the indentation of new lines starting at a depth one greater than that of the SCRIPT element.
                    • If the TidyTags property is set to true, every tag in the document is replaced with the output from its Tag.tidy() method. If this property is set to false, the tag from the original text is used, including all white space, but with any new lines indented at a depth one greater than that of the element.
                    • If the CollapseWhiteSpace property is set to true, every string of one or more white space characters located outside of a tag is replaced with a single space in the output. White space located adjacent to a non-inline-level element tag (except server tags) may be removed.
                    • If the IndentAllElements property is set to true, every element appears indented on a new line, including inline-level elements. This generates output that is a good representation of the actual document element hierarchy, but is very likely to introduce white space that compromises the functional equivalency of the document.
                    • The NewLine property specifies the character sequence to use for each newline in the output document.
                    • If the source document contains server tags, the functional equivalency of the output document may be compromised.

                    Formatting an entire Source object performs a full sequential parse automatically.


                    Constructor Summary
                    SourceFormatter(Segment segment)
                              Constructs a new SourceFormatter based on the specified Segment.
                     
                    Method Summary
                     void appendTo(java.lang.Appendable appendable)
                              Appends the output to the specified Appendable object.
                     boolean getCollapseWhiteSpace()
                              Indicates whether white space in the text between the tags is to be collapsed.
                     long getEstimatedMaximumOutputLength()
                              Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                     boolean getIndentAllElements()
                              Indicates whether all elements are to be indented, including inline-level elements and those with preformatted contents.
                     java.lang.String getIndentString()
                              Returns the string to be used for indentation.
                     java.lang.String getNewLine()
                              Returns the string to be used to represent a newline in the output.
                     boolean getTidyTags()
                              Indicates whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                     SourceFormatter setCollapseWhiteSpace(boolean collapseWhiteSpace)
                              Sets whether white space in the text between the tags is to be collapsed.
                     SourceFormatter setIndentAllElements(boolean indentAllElements)
                              Sets whether all elements are to be indented, including inline-level elements and those with preformatted contents.
                     SourceFormatter setIndentString(java.lang.String indentString)
                              Sets the string to be used for indentation.
                     SourceFormatter setNewLine(java.lang.String newLine)
                              Sets the string to be used to represent a newline in the output.
                     SourceFormatter setTidyTags(boolean tidyTags)
                              Sets whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                     java.lang.String toString()
                              Returns the output as a string.
                     void writeTo(java.io.Writer writer)
                              Writes the output to the specified Writer.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Constructor Detail

                    SourceFormatter

                    public SourceFormatter(Segment segment)
                    Constructs a new SourceFormatter based on the specified Segment.

                    Parameters:
                    segment - the segment containing the HTML to be formatted.
                    See Also:
                    Source.getSourceFormatter()
                    Method Detail

                    writeTo

                    public void writeTo(java.io.Writer writer)
                                 throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Writes the output to the specified Writer.

                    Specified by:
                    writeTo in interface CharStreamSource
                    Parameters:
                    writer - the destination java.io.Writer for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    appendTo

                    public void appendTo(java.lang.Appendable appendable)
                                  throws java.io.IOException
                    Description copied from interface: CharStreamSource
                    Appends the output to the specified Appendable object.

                    Specified by:
                    appendTo in interface CharStreamSource
                    Parameters:
                    appendable - the destination java.lang.Appendable object for the output.
                    Throws:
                    java.io.IOException - if an I/O exception occurs.

                    getEstimatedMaximumOutputLength

                    public long getEstimatedMaximumOutputLength()
                    Description copied from interface: CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. There is no guarantee that the length of the output is indeed less than this value, as classes implementing this method often use assumptions based on typical usage to calculate the estimate.

                    Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. Standard practice is to interpret any negative value as meaning that no estimate is available.

                    Specified by:
                    getEstimatedMaximumOutputLength in interface CharStreamSource
                    Returns:
                    the estimated maximum number of characters in the output, or -1 if no estimate is available.

                    toString

                    public java.lang.String toString()
                    Description copied from interface: CharStreamSource
                    Returns the output as a string.

                    Specified by:
                    toString in interface CharStreamSource
                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the output as a string.

                    setIndentString

                    public SourceFormatter setIndentString(java.lang.String indentString)
                    Sets the string to be used for indentation.

                    The default value is a string containing a single tab character (U+0009).

                    The most commonly used indent strings are "\t" (single tab), " " (single space), "  " (2 spaces), and "    " (4 spaces).

                    Parameters:
                    indentString - the string to be used for indentation, must not be null.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getIndentString()

                    getIndentString

                    public java.lang.String getIndentString()
                    Returns the string to be used for indentation.

                    See the setIndentString(String) method for a full description of this property.

                    Returns:
                    the string to be used for indentation.

                    setTidyTags

                    public SourceFormatter setTidyTags(boolean tidyTags)
                    Sets whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.

                    The default value is false.

                    If this property is set to false, the tag from the original text is used, including all white space, but with any new lines indented at a depth one greater than that of the element.

                    Parameters:
                    tidyTags - specifies whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getTidyTags()

                    getTidyTags

                    public boolean getTidyTags()
                    Indicates whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.

                    See the setTidyTags(boolean) method for a full description of this property.

                    Returns:
                    true if the original text of each tag is to be replaced with the output from its Tag.tidy() method, otherwise false.

                    setCollapseWhiteSpace

                    public SourceFormatter setCollapseWhiteSpace(boolean collapseWhiteSpace)
                    Sets whether white space in the text between the tags is to be collapsed.

                    The default value is false.

                    If this property is set to true, every string of one or more white space characters located outside of a tag is replaced with a single space in the output. White space located adjacent to a non-inline-level element tag (except server tags) may be removed.

                    Parameters:
                    collapseWhiteSpace - specifies whether white space in the text between the tags is to be collapsed.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getCollapseWhiteSpace()

                    getCollapseWhiteSpace

                    public boolean getCollapseWhiteSpace()
                    Indicates whether white space in the text between the tags is to be collapsed.

                    See the setCollapseWhiteSpace(boolean collapseWhiteSpace) method for a full description of this property.

                    Returns:
                    true if white space in the text between the tags is to be collapsed, otherwise false.

                    setIndentAllElements

                    public SourceFormatter setIndentAllElements(boolean indentAllElements)
                    Sets whether all elements are to be indented, including inline-level elements and those with preformatted contents.

                    The default value is false.

                    If this property is set to true, every element appears indented on a new line, including inline-level elements.

                    This generates output that is a good representation of the actual document element hierarchy, but is very likely to introduce white space that compromises the functional equivalency of the document.

                    Parameters:
                    indentAllElements - specifies whether all elements are to be indented.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getIndentAllElements()

                    getIndentAllElements

                    public boolean getIndentAllElements()
                    Indicates whether all elements are to be indented, including inline-level elements and those with preformatted contents.

                    See the setIndentAllElements(boolean) method for a full description of this property.

                    Returns:
                    true if all elements are to be indented, otherwise false.

                    setNewLine

                    public SourceFormatter setNewLine(java.lang.String newLine)
                    Sets the string to be used to represent a newline in the output.

                    The default is to use the same new line string as is used in the source document, which is determined via the Source.getNewLine() method. If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, or using the value from the static Config.NewLine property.

                    Specifying a null argument resets the property to its default value, which is to use the same new line string as is used in the source document.

                    Parameters:
                    newLine - the string to be used to represent a newline in the output, may be null.
                    Returns:
                    this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement.
                    See Also:
                    getNewLine()

                    getNewLine

                    public java.lang.String getNewLine()
                    Returns the string to be used to represent a newline in the output.

                    See the setNewLine(String) method for a full description of this property.

                    Returns:
                    the string to be used to represent a newline in the output.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/Config.CompatibilityMode.html0000644000175000017500000014017611214132420030340 0ustar twernertwerner Config.CompatibilityMode (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Class Config.CompatibilityMode

                    java.lang.Object
                      extended by Config.CompatibilityMode
                    
                    Enclosing class:
                    Config

                    public static final class Config.CompatibilityMode
                    extends java.lang.Object

                    Represents a set of configuration parameters that relate to user agent compatibility issues.

                    The predefined compatibility modes IE, MOZILLA, OPERA and XHTML provide an easy means of ensuring the library interprets the markup in a way consistent with some of the most commonly used browsers, at least in relation to the behaviour described by the properties in this class.

                    The properties of any CompatibilityMode object can be modified individually, including those in the predefined instances as well as newly constructed instances. Take note however that modifying the properties of the predefined instances has a global affect.

                    The currently active compatibility mode is stored in the static Config.CurrentCompatibilityMode property.


                    Field Summary
                    static int CODE_POINTS_ALL
                              Indicates the recognition of all unicode code points.
                    static int CODE_POINTS_NONE
                              Indicates the recognition of no unicode code points.
                    static Config.CompatibilityMode IE
                              Microsoft Internet Explorer compatibility mode.
                    static Config.CompatibilityMode MOZILLA
                              Mozilla / Firefox / Netscape compatibility mode.
                    static Config.CompatibilityMode OPERA
                              Opera compatibility mode.
                    static Config.CompatibilityMode XHTML
                              XHTML compatibility mode.
                     
                    Constructor Summary
                    Config.CompatibilityMode(java.lang.String name)
                              Constructs a new CompatibilityMode with the given name.
                     
                    Method Summary
                     java.lang.String getDebugInfo()
                              Returns a string representation of this object useful for debugging purposes.
                     java.lang.String getName()
                              Returns the name of this compatibility mode.
                     int getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
                              Returns the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.
                     int getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
                              Returns the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
                     int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
                              Returns the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.
                     boolean isFormFieldNameCaseInsensitive()
                              Indicates whether form field names are treated as case insensitive.
                     void setFormFieldNameCaseInsensitive(boolean value)
                              Sets whether form field names are treated as case insensitive.
                     void setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
                              Sets the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.
                     void setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
                              Sets the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
                     void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
                              Sets the maximum unicode code point of an unterminated headecimal character reference which is to be recognised in the specified context.
                     java.lang.String toString()
                              Returns the name of this compatibility mode.
                     
                    Methods inherited from class java.lang.Object
                    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
                     

                    Field Detail

                    CODE_POINTS_ALL

                    public static final int CODE_POINTS_ALL
                    Indicates the recognition of all unicode code points.

                    This value is used in properties which specify a maximum unicode code point to be recognised by the parser.

                    See Also:
                    getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue), getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue), getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue), Constant Field Values

                    CODE_POINTS_NONE

                    public static final int CODE_POINTS_NONE
                    Indicates the recognition of no unicode code points.

                    This value is used in properties which specify a maximum unicode code point to be recognised by the parser.

                    See Also:
                    getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue), getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue), getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue), Constant Field Values

                    IE

                    public static final Config.CompatibilityMode IE
                    Microsoft Internet Explorer compatibility mode.

                    Name = IE
                    FormFieldNameCaseInsensitive = true
                    Recognition of unterminated character references:  (inside attribute)    (outside attribute)  
                    UnterminatedCharacterEntityReferenceMaxCodePoint =U+00FFU+00FF
                    UnterminatedDecimalCharacterReferenceMaxCodePoint =AllAll
                    UnterminatedHexadecimalCharacterReferenceMaxCodePoint =AllNone


                    MOZILLA

                    public static final Config.CompatibilityMode MOZILLA
                    Mozilla / Firefox / Netscape compatibility mode.

                    Name = Mozilla
                    FormFieldNameCaseInsensitive = false
                    Recognition of unterminated character references:  (inside attribute)    (outside attribute)  
                    UnterminatedCharacterEntityReferenceMaxCodePoint =U+00FFAll
                    UnterminatedDecimalCharacterReferenceMaxCodePoint =AllAll
                    UnterminatedHexadecimalCharacterReferenceMaxCodePoint =AllAll


                    OPERA

                    public static final Config.CompatibilityMode OPERA
                    Opera compatibility mode.

                    Name = Opera
                    FormFieldNameCaseInsensitive = true
                    Recognition of unterminated character references:  (inside attribute)    (outside attribute)  
                    UnterminatedCharacterEntityReferenceMaxCodePoint =U+003EAll
                    UnterminatedDecimalCharacterReferenceMaxCodePoint =AllAll
                    UnterminatedHexadecimalCharacterReferenceMaxCodePoint =AllAll


                    XHTML

                    public static final Config.CompatibilityMode XHTML
                    XHTML compatibility mode.

                    Name = XHTML
                    FormFieldNameCaseInsensitive = false
                    Recognition of unterminated character references:  (inside attribute)    (outside attribute)  
                    UnterminatedCharacterEntityReferenceMaxCodePoint =NoneNone
                    UnterminatedDecimalCharacterReferenceMaxCodePoint =NoneNone
                    UnterminatedHexadecimalCharacterReferenceMaxCodePoint =NoneNone

                    Constructor Detail

                    Config.CompatibilityMode

                    public Config.CompatibilityMode(java.lang.String name)
                    Constructs a new CompatibilityMode with the given name.

                    All properties in the new instance are initially assigned their default values, which are the same as the strict rules of the XHTML compatibility mode.

                    Parameters:
                    name - the name of the new compatibility mode
                    Method Detail

                    getName

                    public java.lang.String getName()
                    Returns the name of this compatibility mode.

                    Returns:
                    the name of this compatibility mode.

                    isFormFieldNameCaseInsensitive

                    public boolean isFormFieldNameCaseInsensitive()
                    Indicates whether form field names are treated as case insensitive.

                    Microsoft Internet Explorer treats field names as case insensitive, while Mozilla treats them as case sensitive.

                    The value of this property in the current compatibility mode affects all instances of the FormFields class. It should be set to the desired configuration before any instances of FormFields are created.

                    Returns:
                    true if form field names are treated as case insensitive, otherwise false.
                    See Also:
                    setFormFieldNameCaseInsensitive(boolean)

                    setFormFieldNameCaseInsensitive

                    public void setFormFieldNameCaseInsensitive(boolean value)
                    Sets whether form field names are treated as case insensitive.

                    See isFormFieldNameCaseInsensitive() for the documentation of this property.

                    Parameters:
                    value - the new value of the property

                    getUnterminatedCharacterEntityReferenceMaxCodePoint

                    public int getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
                    Returns the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.

                    For example, if getUnterminatedCharacterEntityReferenceMaxCodePoint(true) has the value 0xFF (U+00FF) in the current compatibility mode, then:

                    See the documentation of the Attribute.getValue() method for further discussion.

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    Returns:
                    the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.
                    See Also:
                    setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)

                    setUnterminatedCharacterEntityReferenceMaxCodePoint

                    public void setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue,
                                                                                    int maxCodePoint)
                    Sets the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.

                    See getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue) for the documentation of this property.

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    maxCodePoint - the maximum unicode code point.

                    getUnterminatedDecimalCharacterReferenceMaxCodePoint

                    public int getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
                    Returns the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.

                    For example, if getUnterminatedDecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF) in the current compatibility mode, then:

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    Returns:
                    the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
                    See Also:
                    setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)

                    setUnterminatedDecimalCharacterReferenceMaxCodePoint

                    public void setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue,
                                                                                     int maxCodePoint)
                    Sets the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.

                    See getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) for the documentation of this property.

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    maxCodePoint - the maximum unicode code point.

                    getUnterminatedHexadecimalCharacterReferenceMaxCodePoint

                    public int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
                    Returns the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.

                    For example, if getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF) in the current compatibility mode, then:

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    Returns:
                    the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.
                    See Also:
                    setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)

                    setUnterminatedHexadecimalCharacterReferenceMaxCodePoint

                    public void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue,
                                                                                         int maxCodePoint)
                    Sets the maximum unicode code point of an unterminated headecimal character reference which is to be recognised in the specified context.

                    See getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) for the documentation of this property.

                    Parameters:
                    insideAttributeValue - the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
                    maxCodePoint - the maximum unicode code point.

                    getDebugInfo

                    public java.lang.String getDebugInfo()
                    Returns a string representation of this object useful for debugging purposes.

                    Returns:
                    a string representation of this object useful for debugging purposes.

                    toString

                    public java.lang.String toString()
                    Returns the name of this compatibility mode.

                    Overrides:
                    toString in class java.lang.Object
                    Returns:
                    the name of this compatibility mode.


                    jericho-html-3.1/docs/javadoc/net/htmlparser/jericho/LoggerProvider.html0000644000175000017500000005500211214132422026443 0ustar twernertwerner LoggerProvider (Jericho HTML Parser 3.1)

                    net.htmlparser.jericho
                    Interface LoggerProvider


                    public interface LoggerProvider

                    Defines the interface for a factory class to provide Logger instances for each Source object.

                    It is not usually necessary for users to create implementations of this interface, as several predefined instances are defined which provide the most commonly required Logger implementations.

                    By default, a LoggerProvider is chosen automatically according to the algorithm described in the static Config.LoggerProvider property. This automatic choice can be overridden by setting the Config.LoggerProvider property manually with an instance of this interface, but this is also usually not necessary.


                    Field Summary
                    static LoggerProvider DISABLED
                              A LoggerProvider implementation that disables all log messages.
                    static LoggerProvider JAVA
                              A LoggerProvider implementation that wraps the standard java.util.logging system included in the Java SDK version 1.4 and above.
                    static LoggerProvider JCL
                              A LoggerProvider implementation that wraps the Jakarta Commons Logging (JCL) framework.
                    static LoggerProvider LOG4J
                              A LoggerProvider implementation that wraps the Apache Log4J framework.
                    static LoggerProvider SLF4J
                              A LoggerProvider implementation that wraps the SLF4J framework.
                    static LoggerProvider STDERR
                              A LoggerProvider implementation that sends all log messages to the standard error output stream (System.err).
                     
                    Method Summary
                     Logger getLogger(java.lang.String name)
                              Creates a new Logger instance with the specified name.
                     

                    Field Detail

                    DISABLED

                    static final LoggerProvider DISABLED
                    A LoggerProvider implementation that disables all log messages.


                    STDERR

                    static final LoggerProvider STDERR
                    A LoggerProvider implementation that sends all log messages to the standard error output stream (System.err).

                    The implementation uses the following code to create each logger:
                    new WriterLogger(new OutputStreamWriter(System.err),name)


                    JAVA

                    static final LoggerProvider JAVA
                    A LoggerProvider implementation that wraps the standard java.util.logging system included in the Java SDK version 1.4 and above.

                    This is the default used if no other logging framework is detected. See the description of the static Config.LoggerProvider property for more details.

                    The following mapping of logging levels is used:
                    Logger leveljava.util.logging.Level
                    ERRORSEVERE
                    WARNWARNING
                    INFOINFO
                    DEBUGFINE


                    JCL

                    static final LoggerProvider JCL
                    A LoggerProvider implementation that wraps the Jakarta Commons Logging (JCL) framework.

                    See the description of the static Config.LoggerProvider property for details on when this implementation is used as the default.

                    The following mapping of logging levels is used:
                    Logger levelorg.apache.commons.logging level
                    ERRORerror
                    WARNwarn
                    INFOinfo
                    DEBUGdebug


                    LOG4J

                    static final LoggerProvider LOG4J
                    A LoggerProvider implementation that wraps the Apache Log4J framework.

                    See the description of the static Config.LoggerProvider property for details on when this implementation is used as the default.

                    The following mapping of logging levels is used:
                    Logger levelorg.apache.log4j.Level
                    ERRORERROR
                    WARNWARN
                    INFOINFO
                    DEBUGDEBUG


                    SLF4J

                    static final LoggerProvider SLF4J
                    A LoggerProvider implementation that wraps the SLF4J framework.

                    See the description of the static Config.LoggerProvider property for details on when this implementation is used as the default.

                    The following mapping of logging levels is used:
                    Logger levelorg.slf4j.Logger level
                    ERRORerror
                    WARNwarn
                    INFOinfo
                    DEBUGdebug

                    Method Detail

                    getLogger

                    Logger getLogger(java.lang.String name)
                    Creates a new Logger instance with the specified name.

                    The name argument is used by the underlying logging implementation, and is normally a dot-separated name based on the package name or class name of the subsystem.

                    The name used for all automatically created Logger instances is "net.htmlparser.jericho".

                    Parameters:
                    name - the name of the logger, the use of which is determined by the underlying logging implementation, may be null.
                    Returns:
                    a new Logger instance with the specified name.


                    jericho-html-3.1/docs/javadoc/index-all.html0000644000175000017500000140120011214132424020772 0ustar twernertwerner Index (Jericho HTML Parser 3.1)
                    A B C D E F G H I J K L M N O P Q R S T U V W X _

                    A

                    A - Static variable in interface HTMLElementName
                    HTML element A - anchor.
                    ABBR - Static variable in interface HTMLElementName
                    HTML element ABBR - abbreviated form (e.g., WWW, HTTP, etc.).
                    ACRONYM - Static variable in interface HTMLElementName
                    HTML element ACRONYM - acronym.
                    ADDRESS - Static variable in interface HTMLElementName
                    HTML element ADDRESS - information on author.
                    addValue(String) - Method in class FormControl
                    Adds the specified value to this control's submission values *.
                    addValue(String) - Method in class FormField
                    Adds the specified value to the field submission values of this field.
                    addValue(String, String) - Method in class FormFields
                    Adds the specified value to the field submission values of the constituent form field with the specified name.
                    allowsMultipleValues() - Method in class FormField
                    Indicates whether the field allows multiple values.
                    appendCharTo(Appendable) - Method in class CharacterReference
                    Appends the character represented by this character reference to the specified appendable object.
                    appendTo(Appendable) - Method in interface CharStreamSource
                    Appends the output to the specified Appendable object.
                    appendTo(Appendable) - Method in class OutputDocument
                    Appends the final content of this output document to the specified Appendable object.
                    appendTo(Appendable, int, int) - Method in class OutputDocument
                    Appends the specified portion of the final content of this output document to the specified Appendable object.
                    appendTo(Appendable) - Method in interface OutputSegment
                    Appends the content of this output segment to the specified Appendable object.
                    appendTo(Appendable) - Method in class Renderer
                     
                    appendTo(Appendable) - Method in class SourceCompactor
                     
                    appendTo(Appendable) - Method in class SourceFormatter
                     
                    appendTo(Appendable) - Method in class TextExtractor
                     
                    APPLET - Static variable in interface HTMLElementName
                    HTML element APPLET - Java applet.
                    AREA - Static variable in interface HTMLElementName
                    HTML element AREA - client-side image map area.
                    atEndOfAttributes(Source, int, boolean) - Method in class StartTagType
                    Indicates whether the specified source document position is at the end of a tag's attributes.
                    Attribute - Class in net.htmlparser.jericho
                    Represents a single attribute name/value segment within a StartTag.
                    AttributeNames - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the names of the attributes that are copied from the normal form control output element to a display value element.
                    Attributes - Class in net.htmlparser.jericho
                    Represents the list of Attribute objects present within a particular StartTag.

                    B

                    B - Static variable in interface HTMLElementName
                    HTML element B - bold text style.
                    BASE - Static variable in interface HTMLElementName
                    HTML element BASE - document base URI.
                    BASEFONT - Static variable in interface HTMLElementName
                    HTML element BASEFONT - base font size.
                    BasicLogFormatter - Class in net.htmlparser.jericho
                    Provides basic formatting for log messages.
                    BasicLogFormatter() - Constructor for class BasicLogFormatter
                     
                    BDO - Static variable in interface HTMLElementName
                    HTML element BDO - I18N BiDi over-ride.
                    BIG - Static variable in interface HTMLElementName
                    HTML element BIG - large text style.
                    BLOCKQUOTE - Static variable in interface HTMLElementName
                    HTML element BLOCKQUOTE - long quotation.
                    BODY - Static variable in interface HTMLElementName
                    HTML element BODY - document body.
                    BR - Static variable in interface HTMLElementName
                    HTML element BR - forced line break.
                    BUTTON - Static variable in interface HTMLElementName
                    HTML element BUTTON - push button.

                    C

                    CAPTION - Static variable in interface HTMLElementName
                    HTML element CAPTION - table caption.
                    CDATA_SECTION - Static variable in class StartTagType
                    The tag type given to a CDATA section (<![CDATA[ ... ]]>).
                    CENTER - Static variable in interface HTMLElementName
                    HTML element CENTER - shorthand for DIV align=center.
                    CharacterEntityReference - Class in net.htmlparser.jericho
                    Represents an HTML Character Entity Reference.
                    CharacterReference - Class in net.htmlparser.jericho
                    Represents an HTML Character Reference, implemented by the subclasses CharacterEntityReference and NumericCharacterReference.
                    charAt(int) - Method in interface ParseText
                    Returns the character at the specified index.
                    charAt(int) - Method in class Segment
                    Returns the character at the specified index.
                    charAt(int) - Method in class Source
                     
                    CharStreamSource - Interface in net.htmlparser.jericho
                    Represents a character stream source.
                    CharStreamSourceUtil - Class in net.htmlparser.jericho
                    Contains static utility methods for manipulating the way data is retrieved from a CharStreamSource object.
                    CheckedHTML - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it contains a checked attribute.
                    CITE - Static variable in interface HTMLElementName
                    HTML element CITE - citation.
                    clearCache() - Method in class Source
                    Clears the tag cache of all tags.
                    clearValues() - Method in class FormControl
                    Clears the control's existing submission values.
                    clearValues() - Method in class FormField
                    Clears the submission values of all the constituent form controls in this field.
                    clearValues() - Method in class FormFields
                    Clears the submission values of all the constituent form controls.
                    close() - Method in class StreamedSource
                    Closes the underlying Reader or InputStream and releases any system resources associated with it.
                    CODE - Static variable in interface HTMLElementName
                    HTML element CODE - computer code fragment.
                    CODE_POINTS_ALL - Static variable in class Config.CompatibilityMode
                    Indicates the recognition of all unicode code points.
                    CODE_POINTS_NONE - Static variable in class Config.CompatibilityMode
                    Indicates the recognition of no unicode code points.
                    COL - Static variable in interface HTMLElementName
                    HTML element COL - table column.
                    COLGROUP - Static variable in interface HTMLElementName
                    HTML element COLGROUP - table column group.
                    ColumnMultipleValueSeparator - Static variable in class Config
                    Determines the string used to separate a single column's multiple values in the output of the FormFields.getColumnValues(Map) method.
                    ColumnValueFalse - Static variable in class Config
                    Determines the string that represents the value false in the output of the FormFields.getColumnValues(Map) method.
                    ColumnValueTrue - Static variable in class Config
                    Determines the string that represents the value true in the output of the FormFields.getColumnValues(Map) method.
                    COMMENT - Static variable in class StartTagType
                    The tag type given to an HTML comment (<!-- ... -->).
                    COMPARATOR - Static variable in interface OutputSegment
                    The comparator used to sort output segments in the OutputDocument before output.
                    compareTo(Segment) - Method in class Segment
                    Compares this Segment object to another object.
                    Config - Class in net.htmlparser.jericho
                    Encapsulates global configuration properties which determine the behaviour of various functions.
                    Config.CompatibilityMode - Class in net.htmlparser.jericho
                    Represents a set of configuration parameters that relate to user agent compatibility issues.
                    Config.CompatibilityMode(String) - Constructor for class Config.CompatibilityMode
                    Constructs a new CompatibilityMode with the given name.
                    constructEndTag(Source, int, int, String) - Method in class EndTagType
                    Internal method for the construction of an EndTag object of this type.
                    constructStartTag(Source, int, int, String, Attributes) - Method in class StartTagType
                    Internal method for the construction of a StartTag object if this type.
                    constructTagAt(Source, int) - Method in class EndTagTypeGenericImplementation
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    constructTagAt(Source, int) - Method in class StartTagTypeGenericImplementation
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    constructTagAt(Source, int) - Method in class TagType
                    Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
                    containsAt(String, int) - Method in interface ParseText
                    Indicates whether this parse text contains the specified string at the specified position.
                    ConvertNonBreakingSpaces - Static variable in class Config
                    Determines whether the CharacterReference.decode(CharSequence) and similar methods convert non-breaking space (&nbsp;) character references to normal spaces.
                    CurrentCompatibilityMode - Static variable in class Config
                    Determines the currently active compatibility mode.

                    D

                    DD - Static variable in interface HTMLElementName
                    HTML element DD - definition description.
                    debug(String) - Method in interface Logger
                    Logs a message at the DEBUG level.
                    debug(String) - Method in class WriterLogger
                     
                    decode(CharSequence) - Static method in class CharacterReference
                    Decodes the specified HTML encoded text into normal text.
                    decode(CharSequence, boolean) - Static method in class CharacterReference
                    Decodes the specified HTML encoded text into normal text.
                    decodeCollapseWhiteSpace(CharSequence) - Static method in class CharacterReference
                    Decodes the specified text after collapsing its white space.
                    defines(TagType) - Static method in class MasonTagTypes
                    Indicates whether the specified tag type is defined in this class.
                    defines(TagType) - Static method in class MicrosoftTagTypes
                    Indicates whether the specified tag type is defined in this class.
                    defines(TagType) - Static method in class PHPTagTypes
                    Indicates whether the specified tag type is defined in this class.
                    DEL - Static variable in interface HTMLElementName
                    HTML element DEL - deleted text.
                    deregister() - Method in class TagType
                    Deregisters this tag type.
                    DFN - Static variable in interface HTMLElementName
                    HTML element DFN - instance definition.
                    DIR - Static variable in interface HTMLElementName
                    HTML element DIR - directory list.
                    DISABLED - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that disables all log messages.
                    DIV - Static variable in interface HTMLElementName
                    HTML element DIV - generic language/style container.
                    DL - Static variable in interface HTMLElementName
                    HTML element DL - definition list.
                    DOCTYPE_DECLARATION - Static variable in class StartTagType
                    The tag type given to a document type declaration (<!DOCTYPE ... >).
                    DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT - Static variable in class MicrosoftTagTypes
                    The tag type given to a Microsoft® downlevel-revealed conditional comment (<![if ... ]> | <![endif]>).
                    DT - Static variable in interface HTMLElementName
                    HTML element DT - definition term.

                    E

                    Element - Class in net.htmlparser.jericho
                    Represents an element in a specific source document, which encompasses a start tag, an optional end tag and all content in between.
                    ElementName - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the name of display value elements.
                    EM - Static variable in interface HTMLElementName
                    HTML element EM - emphasis.
                    EmptyHTML - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the content of a display value element if the submission value of the control is null or an empty string.
                    encloses(Segment) - Method in class Segment
                    Indicates whether this Segment encloses the specified Segment.
                    encloses(int) - Method in class Segment
                    Indicates whether this segment encloses the specified character position in the source document.
                    encode(CharSequence) - Static method in class CharacterReference
                    Encodes the specified text, escaping special characters into character references.
                    encode(char) - Static method in class CharacterReference
                    Encodes the specified character into a character reference if required.
                    encode(CharSequence) - Static method in class NumericCharacterReference
                    Encodes the specified text, escaping special characters into numeric character references.
                    encodeDecimal(CharSequence) - Static method in class NumericCharacterReference
                    Encodes the specified text, escaping special characters into decimal character references.
                    encodeHexadecimal(CharSequence) - Static method in class NumericCharacterReference
                    Encodes the specified text, escaping special characters into hexadecimal character references.
                    encodeWithWhiteSpaceFormatting(CharSequence) - Static method in class CharacterReference
                    Encodes the specified text, preserving line breaks, tabs and spaces for rendering by converting them to markup.
                    EndTag - Class in net.htmlparser.jericho
                    Represents the end tag of an element in a specific source document.
                    EndTagType - Class in net.htmlparser.jericho
                    Defines the syntax for an end tag type.
                    EndTagType(String, String, String, boolean) - Constructor for class EndTagType
                    Constructs a new EndTagType object with the specified properties.
                    EndTagTypeGenericImplementation - Class in net.htmlparser.jericho
                    Provides a generic implementation of the abstract EndTagType class based on the most common end tag behaviour.
                    EndTagTypeGenericImplementation(String, String, String, boolean, boolean) - Constructor for class EndTagTypeGenericImplementation
                    Constructs a new EndTagTypeGenericImplementation object based on the specified properties.
                    equals(Object) - Method in class Segment
                    Compares the specified object with this Segment for equality.
                    error(String) - Method in interface Logger
                    Logs a message at the ERROR level.
                    error(String) - Method in class WriterLogger
                     
                    excludeElement(StartTag) - Method in class TextExtractor
                    Indicates whether the text inside the Element of the specified start tag should be excluded from the output.

                    F

                    FIELDSET - Static variable in interface HTMLElementName
                    HTML element FIELDSET - form control group.
                    finalize() - Method in class StreamedSource
                    Called by the garbage collector on an object when garbage collection determines that there are no more references to the object.
                    FONT - Static variable in interface HTMLElementName
                    HTML element FONT - local change to font.
                    FORM - Static variable in interface HTMLElementName
                    HTML element FORM - interactive form.
                    format(LogRecord) - Method in class BasicLogFormatter
                    Returns a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord.
                    format(String, String, String) - Static method in class BasicLogFormatter
                    Returns a formatted string representing the specified log entry information.
                    FormControl - Class in net.htmlparser.jericho
                    Represents an HTML form control.
                    FormControlOutputStyle - Enum in net.htmlparser.jericho
                    An enumerated type representing the three major output styles of a form control's output element.
                    FormControlOutputStyle.ConfigDisplayValue - Class in net.htmlparser.jericho
                    Contains static properties that configure the FormControlOutputStyle.DISPLAY_VALUE form control output style.
                    FormControlType - Enum in net.htmlparser.jericho
                    Represents the control type of a FormControl.
                    FormField - Class in net.htmlparser.jericho
                    Represents a field in an HTML form, a field being defined as the group of all form controls having the same name.
                    FormFields - Class in net.htmlparser.jericho
                    Represents a collection of FormField objects.
                    FormFields(Collection<FormControl>) - Constructor for class FormFields
                    Constructs a new FormFields object consisting of the specified form controls.
                    FRAME - Static variable in interface HTMLElementName
                    HTML element FRAME - subwindow.
                    FRAMESET - Static variable in interface HTMLElementName
                    HTML element FRAMESET - window subdivision.
                    fullSequentialParse() - Method in class Source
                    Parses all of the tags in this source document sequentially from beginning to end.

                    G

                    generateHTML(Map<String, String>) - Static method in class Attributes
                    Returns the contents of the specified attributes map as HTML attribute name/value pairs.
                    generateHTML(String) - Static method in class EndTag
                    Generates the HTML text of a normal end tag with the specified tag name.
                    generateHTML(String) - Method in class EndTagType
                    Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                    generateHTML(String) - Method in class EndTagTypeGenericImplementation
                    Generates the HTML text of an end tag of this type given the name of a corresponding start tag.
                    generateHTML(String, Map<String, String>, boolean) - Static method in class StartTag
                    Generates the HTML text of a normal start tag with the specified tag name and attributes map.
                    get(String) - Method in class Attributes
                    Returns the Attribute with the specified name (case insensitive).
                    get(String) - Method in class FormFields
                    Returns the FormField with the specified name.
                    getAllCharacterReferences() - Method in class Segment
                    Returns a list of all CharacterReference objects that are enclosed by this segment.
                    getAllElements() - Method in class Segment
                    Returns a list of all Element objects that are enclosed by this segment.
                    getAllElements(String) - Method in class Segment
                    Returns a list of all Element objects with the specified name that are enclosed by this segment.
                    getAllElements(StartTagType) - Method in class Segment
                    Returns a list of all Element objects with start tags of the specified type that are enclosed by this segment.
                    getAllElements(String, String, boolean) - Method in class Segment
                    Returns a list of all Element objects with the specified attribute name/value pair that are enclosed by this segment.
                    getAllElements(String, Pattern) - Method in class Segment
                    Returns a list of all Element objects with the specified attribute name and value pattern that are enclosed by this segment.
                    getAllElements() - Method in class Source
                    Returns a list of all elements in this source document.
                    getAllElementsByClass(String) - Method in class Segment
                    Returns a list of all Element objects with the specified class that are enclosed by this segment.
                    getAllStartTags() - Method in class Segment
                    Returns a list of all StartTag objects that are enclosed by this segment.
                    getAllStartTags(StartTagType) - Method in class Segment
                    Returns a list of all StartTag objects of the specified type that are enclosed by this segment.
                    getAllStartTags(String) - Method in class Segment
                    Returns a list of all normal StartTag objects with the specified name that are enclosed by this segment.
                    getAllStartTags(String, String, boolean) - Method in class Segment
                    Returns a list of all StartTag objects with the specified attribute name/value pair that are enclosed by this segment.
                    getAllStartTags(String, Pattern) - Method in class Segment
                    Returns a list of all StartTag objects with the specified attribute name and value pattern that are enclosed by this segment.
                    getAllStartTags() - Method in class Source
                    Returns a list of all start tags in this source document.
                    getAllStartTagsByClass(String) - Method in class Segment
                    Returns a list of all StartTag objects with the specified class that are enclosed by this segment.
                    getAllTags() - Method in class Segment
                    Returns a list of all Tag objects that are enclosed by this segment.
                    getAllTags(TagType) - Method in class Segment
                    Returns a list of all Tag objects of the specified type that are enclosed by this segment.
                    getAllTags() - Method in class Source
                    Returns a list of all tags in this source document.
                    getAttributes() - Method in class Element
                    Returns the attributes specified in this element's start tag.
                    getAttributes() - Method in class StartTag
                    Returns the attributes specified in this start tag.
                    getAttributesMap() - Method in class FormControl
                    Returns a map of the names and values of this form control's output attributes.
                    getAttributeValue(String) - Method in class Element
                    Returns the decoded value of the attribute with the specified name (case insensitive).
                    getAttributeValue(String) - Method in class StartTag
                    Returns the decoded value of the attribute with the specified name (case insensitive).
                    getBegin() - Method in interface OutputSegment
                    Returns the character position in the source text of the output document where this segment begins.
                    getBegin() - Method in class Segment
                    Returns the character position in the Source document at which this segment begins, inclusive.
                    getBlockIndentSize() - Method in class Renderer
                    Returns the size of the indent to be used for anything other than LI elements.
                    getBlockLevelElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all the block-level elements.
                    getBufferSize() - Method in class StreamedSource
                    Returns the current size of the internal character buffer.
                    getCacheDebugInfo() - Method in class Source
                    Returns a string representation of the tag cache, useful for debugging purposes.
                    getChar() - Method in class CharacterReference
                    Returns the character represented by this character reference.
                    getCharacterReferenceString() - Method in class CharacterEntityReference
                    Returns the correct encoded form of this character entity reference.
                    getCharacterReferenceString(int) - Static method in class CharacterEntityReference
                    Returns the character entity reference encoded form of the specified unicode code point.
                    getCharacterReferenceString() - Method in class CharacterReference
                    Returns the encoded form of this character reference.
                    getCharacterReferenceString(int) - Static method in class CharacterReference
                    Returns the encoded form of the specified unicode code point.
                    getCharacterReferenceString() - Method in class NumericCharacterReference
                    Returns the correct encoded form of this numeric character reference.
                    getCharacterReferenceString(int) - Static method in class NumericCharacterReference
                    Returns the numeric character reference encoded form of the specified unicode code point.
                    getChildElements() - Method in class Element
                    Returns a list of the immediate children of this element in the document element hierarchy.
                    getChildElements() - Method in class Segment
                    Returns a list of the immediate children of this segment in the document element hierarchy.
                    getChildElements() - Method in class Source
                    Returns a list of the top-level elements in the document element hierarchy.
                    getClosingDelimiter() - Method in class TagType
                    Returns the character sequence that marks the end of the tag.
                    getCodePoint() - Method in class CharacterReference
                    Returns the unicode code point represented by this character reference.
                    getCodePointFromCharacterReferenceString(CharSequence) - Static method in class CharacterReference
                    Parses a single encoded character reference text into a unicode code point.
                    getCodePointFromName(String) - Static method in class CharacterEntityReference
                    Returns the unicode code point of the specified character entity reference name.
                    getCollapseWhiteSpace() - Method in class SourceFormatter
                    Indicates whether white space in the text between the tags is to be collapsed.
                    getColumn() - Method in class RowColumnVector
                    Returns the column number of this character position in the source document.
                    getColumn(int) - Method in class Source
                    Returns the column number of the specified character position in the source document.
                    getColumnLabels() - Method in class FormFields
                    Returns a string array containing the column labels corresponding to the values from the FormFields.getColumnValues(Map) method.
                    getColumnValues(Map<String, String[]>) - Method in class FormFields
                    Converts the data values in the specified field data set into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.
                    getColumnValues() - Method in class FormFields
                    Converts all the form submission values of the constituent form fields into a simple string array, suitable for storage in a tabular format such as a database table or .CSV file.
                    getContent() - Method in class Element
                    Returns the segment representing the content of the element.
                    getConvertNonBreakingSpaces() - Method in class Renderer
                    Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    getConvertNonBreakingSpaces() - Method in class TextExtractor
                    Indicates whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    getCorrespondingEndTagType() - Method in class StartTagType
                    Returns the type of end tag required to pair with a start tag of this type to form an element.
                    getCorrespondingStartTagType() - Method in class EndTagType
                    Returns the type of start tag that is usually paired with an end tag of this type to form an Element.
                    getCount() - Method in class Attributes
                    Returns the number of attributes.
                    getCount() - Method in class FormFields
                    Returns the number of FormField objects.
                    getCurrentSegment() - Method in class StreamedSource
                    Returns the current Segment from the StreamedSource.iterator().
                    getCurrentSegmentCharBuffer() - Method in class StreamedSource
                    Returns a CharBuffer containing the source text of the current segment.
                    getDataSet() - Method in class FormFields
                    Returns the entire field data set represented by the values of the constituent form fields.
                    getDebugInfo() - Method in class Attribute
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class Attributes
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class CharacterEntityReference
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class Config.CompatibilityMode
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class Element
                     
                    getDebugInfo() - Method in class EndTag
                     
                    getDebugInfo() - Method in class FormControl
                     
                    getDebugInfo() - Method in enum FormControlOutputStyle
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class FormField
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class FormFields
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class NumericCharacterReference
                     
                    getDebugInfo() - Method in class OutputDocument
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in interface OutputSegment
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class Segment
                    Returns a string representation of this object useful for debugging purposes.
                    getDebugInfo() - Method in class StartTag
                     
                    getDecimalCharacterReferenceString() - Method in class CharacterReference
                    Returns the decimal encoded form of this character reference.
                    getDecimalCharacterReferenceString(int) - Static method in class CharacterReference
                    Returns the decimal encoded form of the specified unicode code point.
                    getDecorateFontStyles() - Method in class Renderer
                    Indicates whether decoration characters are to be included around the content of some font style elements and phrase elements.
                    getDefaultMaxErrorCount() - Static method in class Attributes
                    Returns the default maximum error count allowed when parsing attributes.
                    getDeprecatedElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all deprecated elements in HTML 4.01.
                    getDepth() - Method in class Element
                    Returns the nesting depth of this element in the document element hierarchy.
                    getDescription() - Method in class TagType
                    Returns a description of this tag type useful for debugging purposes.
                    getDocumentSpecifiedEncoding() - Method in class Source
                    Returns the document encoding specified within the text of the document.
                    getElement() - Method in class EndTag
                    Returns the element that is ended by this end tag.
                    getElement() - Method in class FormControl
                    Returns the element representing this form control in the source document.
                    getElement() - Method in class StartTag
                    Returns the element that is started by this start tag.
                    getElement() - Method in class Tag
                    Returns the element that is started or ended by this tag.
                    getElementById(String) - Method in class Source
                    Returns the Element with the specified id attribute value.
                    getElementName() - Method in enum FormControlType
                    Returns the name of the Element that constitues this form control type.
                    getElementNames() - Static method in class HTMLElements
                    Returns a list containing all of the HTML element names.
                    getEnclosingElement(int) - Method in class Source
                    Returns the most nested normal Element that encloses the specified position in the source document.
                    getEnclosingElement(int, String) - Method in class Source
                    Returns the most nested normal Element with the specified name that encloses the specified position in the source document.
                    getEnclosingTag(int) - Method in class Source
                    Returns the Tag that encloses the specified position in the source document.
                    getEnclosingTag(int, TagType) - Method in class Source
                    Returns the Tag of the specified type that encloses the specified position in the source document.
                    getEncoding() - Method in class Source
                    Returns the character encoding scheme of the source byte stream used to create this object.
                    getEncoding() - Method in class StreamedSource
                    Returns the character encoding scheme of the source byte stream used to create this object.
                    getEncodingFilterWriter(Writer) - Static method in class CharacterReference
                    Returns a filter Writer that encodes all text before passing it through to the specified Writer.
                    getEncodingSpecificationInfo() - Method in class Source
                    Returns a concise description of how the encoding of the source document was determined.
                    getEncodingSpecificationInfo() - Method in class StreamedSource
                    Returns a concise description of how the encoding of the source document was determined.
                    getEnd() - Method in interface OutputSegment
                    Returns the character position in the source text of the output document where this segment ends.
                    getEnd() - Method in class Segment
                    Returns the character position in the Source document immediately after the end of this segment.
                    getEnd(Source, int) - Method in class StartTagTypeGenericImplementation
                    Returns the end of a tag of this type, starting from the specified position in the specified source document.
                    getEndTag() - Method in class Element
                    Returns the end tag of the element.
                    getEndTagForbiddenElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all of the HTML elements for which the end tag is forbidden.
                    getEndTagName(String) - Method in class EndTagType
                    Returns the end tag name that is required to match a corresponding start tag with the specified name.
                    getEndTagName(String) - Method in class EndTagTypeGenericImplementation
                    Returns the end tag name that is required to match a corresponding start tag with the specified name.
                    getEndTagOptionalElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all of the HTML elements for which the end tag is optional.
                    getEndTagRequiredElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all of the HTML elements for which the end tag is required.
                    getEndTagType() - Method in class EndTag
                    Returns the type of this end tag.
                    getEstimatedMaximumOutputLength() - Method in interface CharStreamSource
                    Returns the estimated maximum number of characters in the output, or -1 if no estimate is available.
                    getEstimatedMaximumOutputLength() - Method in class OutputDocument
                     
                    getEstimatedMaximumOutputLength() - Method in class Renderer
                     
                    getEstimatedMaximumOutputLength() - Method in class SourceCompactor
                     
                    getEstimatedMaximumOutputLength() - Method in class SourceFormatter
                     
                    getEstimatedMaximumOutputLength() - Method in class TextExtractor
                     
                    getExcludeNonHTMLElements() - Method in class TextExtractor
                    Indicates whether the content of non-HTML elements is excluded from the output.
                    getFirstElement() - Method in class Segment
                    Returns the first Element enclosed by this segment.
                    getFirstElement(String) - Method in class Segment
                    Returns the first normal Element with the specified name enclosed by this segment.
                    getFirstElement(String, String, boolean) - Method in class Segment
                    Returns the first Element with the specified attribute name/value pair enclosed by this segment.
                    getFirstElement(String, Pattern) - Method in class Segment
                    Returns the first Element with the specified attribute name and value pattern that is enclosed by this segment.
                    getFirstElementByClass(String) - Method in class Segment
                    Returns the first Element with the specified class that is enclosed by this segment.
                    getFirstStartTag() - Method in class Segment
                    Returns the first StartTag enclosed by this segment.
                    getFirstStartTag(StartTagType) - Method in class Segment
                    Returns the first StartTag of the specified type enclosed by this segment.
                    getFirstStartTag(String) - Method in class Segment
                    Returns the first normal StartTag enclosed by this segment.
                    getFirstStartTag(String, String, boolean) - Method in class Segment
                    Returns the first StartTag with the specified attribute name/value pair enclosed by this segment.
                    getFirstStartTag(String, Pattern) - Method in class Segment
                    Returns the first StartTag with the specified attribute name and value pattern that is enclosed by this segment.
                    getFirstStartTagByClass(String) - Method in class Segment
                    Returns the first StartTag with the specified class that is enclosed by this segment.
                    getFormControl() - Method in class Element
                    Returns the FormControl defined by this element.
                    getFormControl(String) - Method in class FormField
                    Returns the constituent FormControl with the specified predefined value.
                    getFormControl() - Method in class FormField
                    Returns the first FormControl from this field.
                    getFormControl() - Method in class StartTag
                    Returns the FormControl defined by this start tag.
                    getFormControls() - Method in class FormField
                    Returns a collection of all the constituent form controls in this field.
                    getFormControls() - Method in class FormFields
                    Returns a list of all the constituent form controls from all the form fields in this collection.
                    getFormControls() - Method in class Segment
                    Returns a list of the FormControl objects that are enclosed by this segment.
                    getFormControlType() - Method in class FormControl
                    Returns the type of this form control.
                    getFormFields() - Method in class Segment
                    Returns the FormFields object representing all form fields that are enclosed by this segment.
                    getHexadecimalCharacterReferenceString() - Method in class CharacterReference
                    Returns the hexadecimal encoded form of this character reference.
                    getHexadecimalCharacterReferenceString(int) - Static method in class CharacterReference
                    Returns the hexadecimal encoded form of the specified unicode code point.
                    getIncludeAttributes() - Method in class TextExtractor
                    Indicates whether any attribute values are included in the output.
                    getIncludeHyperlinkURLs() - Method in class Renderer
                    Indicates whether hyperlink URL's are included in the output.
                    getIndentAllElements() - Method in class SourceFormatter
                    Indicates whether all elements are to be indented, including inline-level elements and those with preformatted contents.
                    getIndentString() - Method in class SourceFormatter
                    Returns the string to be used for indentation.
                    getInlineLevelElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all the inline-level elements.
                    getKey() - Method in class Attribute
                    Returns the name of this attribute in lower case.
                    getListBullets() - Method in class Renderer
                    Returns the bullet characters to use for list items inside UL elements.
                    getListIndentSize() - Method in class Renderer
                    Returns the size of the indent to be used for LI elements.
                    getLogger(String) - Method in interface LoggerProvider
                    Creates a new Logger instance with the specified name.
                    getLogger() - Method in class Source
                    Returns the Logger that handles log messages.
                    getLogger() - Method in class StreamedSource
                    Returns the Logger that handles log messages.
                    getMaxLineLength() - Method in class Renderer
                    Returns the column at which lines are to be wrapped.
                    getName() - Method in class Attribute
                    Returns the name of this attribute in original case.
                    getName() - Method in class CharacterEntityReference
                    Returns the name of this character entity reference.
                    getName(char) - Static method in class CharacterEntityReference
                    Returns the character entity reference name of the specified character.
                    getName(int) - Static method in class CharacterEntityReference
                    Returns the character entity reference name of the specified unicode code point.
                    getName() - Method in class Config.CompatibilityMode
                    Returns the name of this compatibility mode.
                    getName() - Method in class Element
                    Returns the name of the start tag of this element, always in lower case.
                    getName() - Method in class FormControl
                    Returns the name of the control.
                    getName() - Method in class FormField
                    Returns the control name shared by all of this field's constituent controls.
                    getName() - Method in class Tag
                    Returns the name of this tag, always in lower case.
                    getName() - Method in class WriterLogger
                    Returns the name of this logger.
                    getNameEnd(int) - Method in class Source
                    Returns the end position of the XML Name that starts at the specified position.
                    getNamePrefix() - Method in class TagType
                    Returns the name prefix required by this tag type.
                    getNameSegment() - Method in class Attribute
                    Returns the segment spanning the name of this attribute.
                    getNameSegment() - Method in class Tag
                    Returns the segment spanning the name of this tag.
                    getNameToCodePointMap() - Static method in class CharacterEntityReference
                    Returns a map of character entity reference names (String) to unicode code points (Integer).
                    getNestingForbiddenElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all of the HTML elements which should never contain elements of the same name, either as direct or indirect descendants.
                    getNewLine() - Method in class Renderer
                    Returns the string to be used to represent a newline in the output.
                    getNewLine() - Method in class Source
                    Returns the newline character sequence used in the source document.
                    getNewLine() - Method in class SourceCompactor
                    Returns the string to be used to represent a newline in the output.
                    getNewLine() - Method in class SourceFormatter
                    Returns the string to be used to represent a newline in the output.
                    getNextCharacterReference(int) - Method in class Source
                    Returns the CharacterReference beginning at or immediately following the specified position in the source document.
                    getNextElement(int) - Method in class Source
                    Returns the Element beginning at or immediately following the specified position in the source document.
                    getNextElement(int, String) - Method in class Source
                    Returns the normal Element with the specified name beginning at or immediately following the specified position in the source document.
                    getNextElement(int, String, String, boolean) - Method in class Source
                    Returns the Element with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                    getNextElement(int, String, Pattern) - Method in class Source
                    Returns the Element with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                    getNextElementByClass(int, String) - Method in class Source
                    Returns the Element with the specified class beginning at or immediately following the specified position in the source document.
                    getNextEndTag(int) - Method in class Source
                    Returns the EndTag beginning at or immediately following the specified position in the source document.
                    getNextEndTag(int, EndTagType) - Method in class Source
                    Returns the EndTag of the specified type beginning at or immediately following the specified position in the source document.
                    getNextEndTag(int, String) - Method in class Source
                    Returns the normal EndTag with the specified name beginning at or immediately following the specified position in the source document.
                    getNextEndTag(int, String, EndTagType) - Method in class Source
                    Returns the EndTag with the specified name and type beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int) - Method in class Source
                    Returns the StartTag beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int, StartTagType) - Method in class Source
                    Returns the StartTag of the specified type beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int, String) - Method in class Source
                    Returns the normal StartTag with the specified name beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int, String, StartTagType) - Method in class Source
                    Returns the StartTag with the specified name and type beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int, String, String, boolean) - Method in class Source
                    Returns the StartTag with the specified attribute name/value pair beginning at or immediately following the specified position in the source document.
                    getNextStartTag(int, String, Pattern) - Method in class Source
                    Returns the StartTag with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document.
                    getNextStartTagByClass(int, String) - Method in class Source
                    Returns the StartTag with the specified class beginning at or immediately following the specified position in the source document.
                    getNextTag(int) - Method in class Source
                    Returns the Tag beginning at or immediately following the specified position in the source document.
                    getNextTag(int, TagType) - Method in class Source
                    Returns the Tag of the specified type beginning at or immediately following the specified position in the source document.
                    getNextTag() - Method in class Tag
                    Returns the next tag in the source document.
                    getNodeIterator() - Method in class Segment
                    Returns an iterator over every tag, character reference and plain text segment contained within this segment.
                    getNonterminatingElementNames(String) - Static method in class HTMLElements
                    Returns the names of elements that do NOT implicitly terminate an HTML element with the specified name.
                    getOptionElementIterator() - Method in class FormControl
                    Returns an iterator over the OPTION elements contained within this control, in order of appearance.
                    getOutputStyle() - Method in class FormControl
                    Returns the current output style of this form control.
                    getParentElement() - Method in class Element
                    Returns the parent of this element in the document element hierarchy.
                    getParseText() - Method in class Source
                    Returns the parse text of this source document.
                    getPos() - Method in class RowColumnVector
                    Returns the character position in the source document.
                    getPredefinedValue() - Method in class FormControl
                    Returns the initial value of this control if it has a predefined value.
                    getPredefinedValues() - Method in class FormControl
                    Returns a collection of all predefined values in this control in order of appearance.
                    getPredefinedValues() - Method in class FormField
                    Returns a collection of the predefined values of all constituent controls in this field.
                    getPreliminaryEncodingInfo() - Method in class Source
                    Returns the preliminary encoding of the source document together with a concise description of how it was determined.
                    getPreliminaryEncodingInfo() - Method in class StreamedSource
                    Returns the preliminary encoding of the source document together with a concise description of how it was determined.
                    getPreviousCharacterReference(int) - Method in class Source
                    Returns the CharacterReference at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousEndTag(int) - Method in class Source
                    Returns the EndTag at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousEndTag(int, EndTagType) - Method in class Source
                    Returns the EndTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousEndTag(int, String) - Method in class Source
                    Returns the normal EndTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousStartTag(int) - Method in class Source
                    Returns the StartTag at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousStartTag(int, StartTagType) - Method in class Source
                    Returns the StartTag of the specified type at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousStartTag(int, String) - Method in class Source
                    Returns the normal StartTag with the specified name at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousStartTag(int, String, StartTagType) - Method in class Source
                    Returns the StartTag with the specified name and type at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousTag(int) - Method in class Source
                    Returns the Tag beginning at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousTag(int, TagType) - Method in class Source
                    Returns the Tag of the specified type beginning at or immediately preceding (or enclosing) the specified position in the source document.
                    getPreviousTag() - Method in class Tag
                    Returns the previous tag in the source document.
                    getQuoteChar() - Method in class Attribute
                    Returns the character used to quote the value.
                    getReader(CharStreamSource) - Static method in class CharStreamSourceUtil
                    Returns a Reader that reads the output of the specified CharStreamSource.
                    getRegisteredOutputSegments() - Method in class OutputDocument
                    Returns a list all of the registered OutputSegment objects in this output document.
                    getRegisteredTagTypes() - Static method in class TagType
                    Returns a list of all the currently registered tag types in order of lowest to highest precedence.
                    getRenderer() - Method in class Segment
                    Performs a simple rendering of the HTML markup in this segment into text.
                    getRow() - Method in class RowColumnVector
                    Returns the row number of this character position in the source document.
                    getRow(int) - Method in class Source
                    Returns the row number of the specified character position in the source document.
                    getRowColumnVector(int) - Method in class Source
                    Returns a RowColumnVector object representing the row and column number of the specified character position in the source document.
                    getSource() - Method in class Segment
                    Returns the Source document containing this segment.
                    getSourceFormatter() - Method in class Source
                    Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                    getSourceText() - Method in class OutputDocument
                    Returns the original source text upon which this output document is based.
                    getStartDelimiter() - Method in class TagType
                    Returns the character sequence that marks the start of the tag.
                    getStartTag() - Method in class Element
                    Returns the start tag of the element.
                    getStartTagOptionalElementNames() - Static method in class HTMLElements
                    Returns a set containing the names of all of the HTML elements for which the start tag is optional.
                    getStartTagType() - Method in class StartTag
                    Returns the type of this start tag.
                    getString(Reader) - Static method in class Util
                    Returns the text loaded from the specified Reader as a string.
                    getTableCellSeparator() - Method in class Renderer
                    Returns the string that is to separate table cells.
                    getTagAt(int) - Method in class Source
                    Returns the Tag at the specified position in the source document.
                    getTagContent() - Method in class StartTag
                    Returns the segment between the end of the tag's name and the start of its end delimiter.
                    getTagType() - Method in class EndTag
                     
                    getTagType() - Method in class StartTag
                     
                    getTagType() - Method in class Tag
                    Returns the type of this tag.
                    getTagTypesIgnoringEnclosedMarkup() - Static method in class TagType
                    Returns an array of all the tag types inside which the parser ignores all non-server tags in parse on demand mode.
                    getTerminatingEndTagNames(String) - Static method in class HTMLElements
                    Returns the names of end tags that implicitly terminate an HTML element with the specified name.
                    getTerminatingStartTagNames(String) - Static method in class HTMLElements
                    Returns the names of start tags that implicitly terminate an HTML element with the specified name.
                    getTextExtractor() - Method in class Segment
                    Extracts the textual content from the HTML markup of this segment.
                    getTidyTags() - Method in class SourceFormatter
                    Indicates whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                    getUnicodeText() - Method in class CharacterReference
                    Returns the unicode code point of this character reference in U+ notation.
                    getUnicodeText(int) - Static method in class CharacterReference
                    Returns the specified unicode code point in U+ notation.
                    getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) - Method in class Config.CompatibilityMode
                    Returns the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.
                    getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) - Method in class Config.CompatibilityMode
                    Returns the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
                    getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) - Method in class Config.CompatibilityMode
                    Returns the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.
                    getUserData() - Method in class Tag
                    Returns the general purpose user data object that has previously been associated with this tag via the Tag.setUserData(Object) method.
                    getUserValueCount() - Method in class FormField
                    Returns the number of constituent user value controls in this field.
                    getValue() - Method in class Attribute
                    Returns the decoded value of this attribute, or null if it has no value.
                    getValue(String) - Method in class Attributes
                    Returns the decoded value of the attribute with the specified name (case insensitive).
                    getValues() - Method in class FormControl
                    Returns a list of the control's submission values in order of appearance.
                    getValues() - Method in class FormField
                    Returns a list of the field submission values in order of appearance.
                    getValues(String) - Method in class FormFields
                    Returns a list of the field submission values of all the specified constituent form fields with the specified name.
                    getValueSegment() - Method in class Attribute
                    Returns the segment spanning the value of this attribute, or null if it has no value.
                    getValueSegmentIncludingQuotes() - Method in class Attribute
                    Returns the segment spanning the value of this attribute, including quotation marks if any, or null if it has no value.
                    getWriter() - Method in class WriterLogger
                    Returns the Writer to which all output is sent.

                    H

                    H1 - Static variable in interface HTMLElementName
                    HTML element H1 - heading.
                    H2 - Static variable in interface HTMLElementName
                    HTML element H2 - heading.
                    H3 - Static variable in interface HTMLElementName
                    HTML element H3 - heading.
                    H4 - Static variable in interface HTMLElementName
                    HTML element H4 - heading.
                    H5 - Static variable in interface HTMLElementName
                    HTML element H5 - heading.
                    H6 - Static variable in interface HTMLElementName
                    HTML element H6 - heading.
                    hasAttributes() - Method in class StartTagType
                    Indicates whether a start tag of this type contains attributes.
                    hashCode() - Method in class Segment
                    Returns a hash code value for the segment.
                    hasPredefinedValue() - Method in enum FormControlType
                    Indicates whether any value submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts.
                    hasValue() - Method in class Attribute
                    Indicates whether this attribute has a value.
                    HEAD - Static variable in interface HTMLElementName
                    HTML element HEAD - document head.
                    HR - Static variable in interface HTMLElementName
                    HTML element HR - horizontal rule.
                    HTML - Static variable in interface HTMLElementName
                    HTML element HTML - document root element.
                    HTMLElementName - Interface in net.htmlparser.jericho
                    Contains static fields representing the names of all elements defined in the HTML 4.01 specification.
                    HTMLElements - Class in net.htmlparser.jericho
                    Contains static methods which group HTML element names by the characteristics of their associated elements.

                    I

                    I - Static variable in interface HTMLElementName
                    HTML element I - italic text style.
                    IE - Static variable in class Config.CompatibilityMode
                    Microsoft Internet Explorer compatibility mode.
                    IFRAME - Static variable in interface HTMLElementName
                    HTML element IFRAME - inline subwindow.
                    ignoreWhenParsing() - Method in class Segment
                    Causes the this segment to be ignored when parsing.
                    ignoreWhenParsing(int, int) - Method in class Source
                    Causes the specified range of the source text to be ignored when parsing.
                    ignoreWhenParsing(Collection<? extends Segment>) - Method in class Source
                    Causes all of the segments in the specified collection to be ignored when parsing.
                    IMG - Static variable in interface HTMLElementName
                    HTML element IMG - Embedded image.
                    includeAttribute(StartTag, Attribute) - Method in class TextExtractor
                    Indicates whether the value of the specified attribute in the specified start tag is included in the output.
                    indexOf(char, int) - Method in interface ParseText
                    Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex.
                    indexOf(char, int, int) - Method in interface ParseText
                    Returns the index within this parse text of the first occurrence of the specified character, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                    indexOf(String, int) - Method in interface ParseText
                    Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex.
                    indexOf(String, int, int) - Method in interface ParseText
                    Returns the index within this parse text of the first occurrence of the specified string, starting the search at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                    info(String) - Method in interface Logger
                    Logs a message at the INFO level.
                    info(String) - Method in class WriterLogger
                     
                    INPUT - Static variable in interface HTMLElementName
                    HTML element INPUT - form control.
                    INS - Static variable in interface HTMLElementName
                    HTML element INS - inserted text.
                    insert(int, CharSequence) - Method in class OutputDocument
                    Inserts the specified text at the specified character position in this output document.
                    INVALID_CODE_POINT - Static variable in class CharacterReference
                    Represents an invalid unicode code point.
                    IsApostropheEncoded - Static variable in class Config
                    Determines whether apostrophes are encoded when calling the CharacterReference.encode(CharSequence) method.
                    isChecked() - Method in class FormControl
                    Indicates whether this form control is checked.
                    isConditionalCommentEndifTag(Tag) - Static method in class MicrosoftTagTypes
                    Indicates whether the specified tag is a downlevel-revealed conditional comment "endif" tag (<![endif]>).
                    isConditionalCommentIfTag(Tag) - Static method in class MicrosoftTagTypes
                    Indicates whether the specified tag is a downlevel-revealed conditional comment "if" tag (<![if ... ]>).
                    isDebugEnabled() - Method in interface Logger
                    Indicates whether logging is enabled at the DEBUG level.
                    isDebugEnabled() - Method in class WriterLogger
                     
                    isDecimal() - Method in class NumericCharacterReference
                    Indicates whether this numeric character reference specifies the unicode code point in decimal format.
                    isDisabled() - Method in class FormControl
                    Indicates whether this form control is disabled.
                    isEmpty() - Method in class Element
                    Indicates whether this element has zero-length content.
                    isEmptyElementTag() - Method in class Element
                    Indicates whether this element is an empty-element tag.
                    isEmptyElementTag() - Method in class StartTag
                    Indicates whether this start tag is an empty-element tag.
                    isEndTagForbidden() - Method in class StartTag
                    Indicates whether a matching end tag is forbidden.
                    isEndTagRequired() - Method in class StartTag
                    Indicates whether a matching end tag is required.
                    isErrorEnabled() - Method in interface Logger
                    Indicates whether logging is enabled at the ERROR level.
                    isErrorEnabled() - Method in class WriterLogger
                     
                    isFormFieldNameCaseInsensitive() - Method in class Config.CompatibilityMode
                    Indicates whether form field names are treated as case insensitive.
                    isHexadecimal() - Method in class NumericCharacterReference
                    Indicates whether this numeric character reference specifies the unicode code point in hexadecimal format.
                    ISINDEX - Static variable in interface HTMLElementName
                    HTML element ISINDEX - single line prompt.
                    isInfoEnabled() - Method in interface Logger
                    Indicates whether logging is enabled at the INFO level.
                    isInfoEnabled() - Method in class WriterLogger
                     
                    isNameAfterPrefixRequired() - Method in class StartTagType
                    Indicates whether a valid XML tag name is required directly after the prefix.
                    isParsedByMason(TagType) - Static method in class MasonTagTypes
                    Indicates whether the specified tag type is recognised by a Mason parser.
                    isParsedByPHP(TagType) - Static method in class PHPTagTypes
                    Indicates whether the specified tag type is recognised by a PHP parser.
                    isServerTag() - Method in class TagType
                    Indicates whether this tag type represents a server tag.
                    isStatic() - Method in class EndTagTypeGenericImplementation
                    Indicates whether the end tag text is static.
                    isSubmit() - Method in enum FormControlType
                    Indicates whether this control type causes the form to be submitted.
                    isSyntacticalEmptyElementTag() - Method in class StartTag
                    Indicates whether this start tag is syntactically an empty-element tag.
                    isTerminated() - Method in class CharacterReference
                    Indicates whether this character reference is terminated by a semicolon (;).
                    isUnregistered() - Method in class EndTag
                     
                    isUnregistered() - Method in class StartTag
                     
                    isUnregistered() - Method in class Tag
                    Indicates whether this tag has a syntax that does not match any of the registered tag types.
                    isValidPosition(Source, int, int[]) - Method in class TagType
                    Indicates whether a tag of this type is valid in the specified position of the specified source document.
                    isWarnEnabled() - Method in interface Logger
                    Indicates whether logging is enabled at the WARN level.
                    isWarnEnabled() - Method in class WriterLogger
                     
                    isWhiteSpace() - Method in class Segment
                    Indicates whether this segment consists entirely of white space.
                    isWhiteSpace(char) - Static method in class Segment
                    Indicates whether the specified character is white space.
                    isXML() - Method in class Source
                    Indicates whether the source document is likely to be XML.
                    isXML() - Method in class StreamedSource
                    Indicates whether the source document is likely to be XML.
                    isXMLName(CharSequence) - Static method in class Tag
                    Indicates whether the specified text is a valid XML Name.
                    isXMLNameChar(char) - Static method in class Tag
                    Indicates whether the specified character is valid anywhere in an XML Name.
                    isXMLNameStartChar(char) - Static method in class Tag
                    Indicates whether the specified character is valid at the start of an XML Name.
                    iterator() - Method in class Attributes
                    Returns an iterator over the Attribute objects in this list in order of appearance.
                    iterator() - Method in class FormFields
                    Returns an iterator over the FormField objects in the collection.
                    iterator() - Method in class Source
                    Returns an iterator over every tag, character reference and plain text segment contained within the source document.
                    iterator() - Method in class StreamedSource
                    Returns an iterator over every tag, character reference and plain text segment contained within the source document.

                    J

                    JAVA - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that wraps the standard java.util.logging system included in the Java SDK version 1.4 and above.
                    JCL - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that wraps the Jakarta Commons Logging (JCL) framework.

                    K

                    KBD - Static variable in interface HTMLElementName
                    HTML element KBD - text to be entered by the user.

                    L

                    LABEL - Static variable in interface HTMLElementName
                    HTML element LABEL - form field label text.
                    lastIndexOf(char, int) - Method in interface ParseText
                    Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex.
                    lastIndexOf(char, int, int) - Method in interface ParseText
                    Returns the index within this parse text of the last occurrence of the specified character, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                    lastIndexOf(String, int) - Method in interface ParseText
                    Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex.
                    lastIndexOf(String, int, int) - Method in interface ParseText
                    Returns the index within this parse text of the last occurrence of the specified string, searching backwards starting at the position specified by fromIndex, and breaking the search at the index specified by breakAtIndex.
                    LegacyIteratorCompatabilityMode - Static variable in class Source
                    Deprecated. Modify existing code to explicitly handle CharacterReference segments.
                    LEGEND - Static variable in interface HTMLElementName
                    HTML element LEGEND - fieldset legend.
                    length() - Method in interface ParseText
                    Returns the length of the parse text.
                    length() - Method in class Segment
                    Returns the length of the segment.
                    length() - Method in class Source
                    Returns the length of the source document.
                    LI - Static variable in interface HTMLElementName
                    HTML element LI - list item.
                    LINK - Static variable in interface HTMLElementName
                    HTML element LINK - a media-independent link.
                    listIterator(int) - Method in class Attributes
                    Returns a list iterator of the Attribute objects in this list in order of appearance, starting at the specified position in the list.
                    log(String, String) - Method in class WriterLogger
                    Logs the specified message at the specified level.
                    LOG4J - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that wraps the Apache Log4J framework.
                    Logger - Interface in net.htmlparser.jericho
                    Defines the interface for handling log messages.
                    LoggerProvider - Static variable in class Config
                    Determines the LoggerProvider that is used to create the default Logger object for each new Source object.
                    LoggerProvider - Interface in net.htmlparser.jericho
                    Defines the interface for a factory class to provide Logger instances for each Source object.

                    M

                    MAP - Static variable in interface HTMLElementName
                    HTML element MAP - client-side image map.
                    MARKUP_DECLARATION - Static variable in class StartTagType
                    The tag type given to a markup declaration (<!ELEMENT ... > | <!ATTLIST ... > | <!ENTITY ... > | <!NOTATION ... >).
                    MASON_COMPONENT_CALL - Static variable in class MasonTagTypes
                    The tag type given to a Mason component call (<& ... &>).
                    MASON_COMPONENT_CALLED_WITH_CONTENT - Static variable in class MasonTagTypes
                    The tag type given to the start tag of a Mason component called with content (<&| ... &> ... </&>).
                    MASON_COMPONENT_CALLED_WITH_CONTENT_END - Static variable in class MasonTagTypes
                    The tag type given to the end tag of a Mason component called with content.
                    MASON_NAMED_BLOCK - Static variable in class MasonTagTypes
                    The tag type given to the start tag of a Mason named block (<%name ... > ... </%name>).
                    MASON_NAMED_BLOCK_END - Static variable in class MasonTagTypes
                    The tag type given to the end tag of a Mason named block.
                    MasonTagTypes - Class in net.htmlparser.jericho
                    Contains tag types related to the Mason server platform.
                    MENU - Static variable in interface HTMLElementName
                    HTML element MENU - menu list.
                    merge(FormFields) - Method in class FormFields
                    Merges the specified FormFields into this FormFields collection.
                    META - Static variable in interface HTMLElementName
                    HTML element META - generic metainformation.
                    MicrosoftTagTypes - Class in net.htmlparser.jericho
                    Contains tag types recognised exclusively by Microsoft® Internet Explorer.
                    MOZILLA - Static variable in class Config.CompatibilityMode
                    Mozilla / Firefox / Netscape compatibility mode.
                    MultipleValueSeparator - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the text that is used to separate multiple values in a display value element.

                    N

                    net.htmlparser.jericho - package net.htmlparser.jericho
                    Jericho HTML Parser 3.1
                    NewLine - Static variable in class Config
                    Determines the string used to represent a newline in text output throughout the library.
                    NO_BREAK - Static variable in interface ParseText
                    A value to use as the breakAtIndex argument in certain methods to indicate that the search should continue to the start or end of the parse text.
                    NOFRAMES - Static variable in interface HTMLElementName
                    HTML element NOFRAMES - alternate content container for non frame-based rendering.
                    NORMAL - Static variable in class EndTagType
                    The tag type given to a normal HTML or XML end tag (</name>).
                    NORMAL - Static variable in class StartTagType
                    The tag type given to a normal HTML or XML start tag (<name ... >).
                    NOSCRIPT - Static variable in interface HTMLElementName
                    HTML element NOSCRIPT - alternate content container for non script-based rendering.
                    NumericCharacterReference - Class in net.htmlparser.jericho
                    Represents an HTML Numeric Character Reference.

                    O

                    OBJECT - Static variable in interface HTMLElementName
                    HTML element OBJECT - generic embedded object.
                    OL - Static variable in interface HTMLElementName
                    HTML element OL - ordered list.
                    OPERA - Static variable in class Config.CompatibilityMode
                    Opera compatibility mode.
                    OPTGROUP - Static variable in interface HTMLElementName
                    HTML element OPTGROUP - option group.
                    OPTION - Static variable in interface HTMLElementName
                    HTML element OPTION - selectable choice.
                    outputCSVLine(Writer, String[]) - Static method in class Util
                    Outputs the specified array of strings to the specified Writer in the format of a line for a CSV file.
                    OutputDocument - Class in net.htmlparser.jericho
                    Represents a modified version of an original Source document or Segment.
                    OutputDocument(Source) - Constructor for class OutputDocument
                    Constructs a new output document based on the specified source document.
                    OutputDocument(Segment) - Constructor for class OutputDocument
                    Constructs a new output document based on the specified Segment.
                    OutputLevel - Static variable in class BasicLogFormatter
                    Determines whether the logging level is included in the output.
                    OutputName - Static variable in class BasicLogFormatter
                    Determines whether the logger name is included in the output.
                    OutputSegment - Interface in net.htmlparser.jericho
                    Defines the interface for an output segment, which is used in an OutputDocument to replace segments of the source document with other text.

                    P

                    P - Static variable in interface HTMLElementName
                    HTML element P - paragraph.
                    PARAM - Static variable in interface HTMLElementName
                    HTML element PARAM - named property value.
                    parse(CharSequence) - Static method in class CharacterReference
                    Parses a single encoded character reference text into a CharacterReference object.
                    parseAttributes() - Method in class Segment
                    Parses any Attributes within this segment.
                    parseAttributes(int, int) - Method in class Source
                    Parses any Attributes starting at the specified position.
                    parseAttributes(int, int, int) - Method in class Source
                    Parses any Attributes starting at the specified position.
                    parseAttributes() - Method in class StartTag
                    Parses the attributes specified in this start tag, regardless of the type of start tag.
                    parseAttributes(int) - Method in class StartTag
                    Parses the attributes specified in this start tag, regardless of the type of start tag.
                    parseAttributes(Source, int, String) - Method in class StartTagType
                    Internal method for the parsing of Attributes.
                    ParseText - Interface in net.htmlparser.jericho
                    Represents the text from the source document that is to be parsed.
                    PasswordChar - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the character used to represent the value of a PASSWORD form control in a display value element.
                    PHP_SCRIPT - Static variable in class PHPTagTypes
                    The tag type given to a script-style PHP start tag (<script language="php"> ... </script>).
                    PHP_SHORT - Static variable in class PHPTagTypes
                    The tag type given to a short-form PHP tag (<? ... ?>).
                    PHP_STANDARD - Static variable in class PHPTagTypes
                    The tag type given to a standard PHP tag (<?php ... ?>).
                    PHPTagTypes - Class in net.htmlparser.jericho
                    Contains tag types related to the PHP server platform.
                    populateMap(Map<String, String>, boolean) - Method in class Attributes
                    Populates the specified Map with the name/value pairs from these attributes.
                    PRE - Static variable in interface HTMLElementName
                    HTML element PRE - preformatted text.

                    Q

                    Q - Static variable in interface HTMLElementName
                    HTML element Q - short inline quotation.

                    R

                    reencode(CharSequence) - Static method in class CharacterReference
                    Re-encodes the specified text, equivalent to decoding and then encoding again.
                    register() - Static method in class MasonTagTypes
                    Registers all of the tag types defined in this class at once.
                    register() - Static method in class MicrosoftTagTypes
                    Registers all of the tag types defined in this class at once.
                    register(OutputSegment) - Method in class OutputDocument
                    Registers the specified output segment in this output document.
                    register() - Static method in class PHPTagTypes
                    Registers all of the tag types defined in this class at once.
                    register() - Method in class TagType
                    Registers this tag type for recognition by the parser.
                    remove(Segment) - Method in class OutputDocument
                    Removes the specified segment from this output document.
                    remove(Collection<? extends Segment>) - Method in class OutputDocument
                    Removes all the segments from this output document represented by the specified source Segment objects.
                    Renderer - Class in net.htmlparser.jericho
                    Performs a simple rendering of HTML markup into text.
                    Renderer(Segment) - Constructor for class Renderer
                    Constructs a new Renderer based on the specified Segment.
                    renderHyperlinkURL(StartTag) - Method in class Renderer
                    Renders the hyperlink URL from the specified StartTag.
                    replace(Segment, CharSequence) - Method in class OutputDocument
                    Replaces the specified segment in this output document with the specified text.
                    replace(int, int, CharSequence) - Method in class OutputDocument
                    Replaces the specified segment of this output document with the specified text.
                    replace(int, int, char) - Method in class OutputDocument
                    Replaces the specified segment of this output document with the specified character.
                    replace(FormControl) - Method in class OutputDocument
                    Replaces the specified FormControl in this output document.
                    replace(FormFields) - Method in class OutputDocument
                    Replaces all the constituent form controls from the specified FormFields in this output document.
                    replace(Attributes, boolean) - Method in class OutputDocument
                    Replaces the specified Attributes segment in this output document with the name/value entries in the returned Map.
                    replace(Attributes, Map<String, String>) - Method in class OutputDocument
                    Replaces the specified attributes segment in this source document with the name/value entries in the specified Map.
                    replaceWithSpaces(int, int) - Method in class OutputDocument
                    Replaces the specified segment of this output document with a string of spaces of the same length.
                    requiresEncoding(char) - Static method in class CharacterReference
                    Indicates whether the specified character would need to be encoded in HTML text.
                    RowColumnVector - Class in net.htmlparser.jericho
                    Represents the row and column number of a character position in the source document.

                    S

                    S - Static variable in interface HTMLElementName
                    HTML element S - strike-through text style.
                    SAMP - Static variable in interface HTMLElementName
                    HTML element SAMP - sample program output, scripts, etc..
                    SCRIPT - Static variable in interface HTMLElementName
                    HTML element SCRIPT - script statements.
                    Segment - Class in net.htmlparser.jericho
                    Represents a segment of a Source document.
                    Segment(Source, int, int) - Constructor for class Segment
                    Constructs a new Segment within the specified source document with the specified begin and end character positions.
                    SELECT - Static variable in interface HTMLElementName
                    HTML element SELECT - option selector.
                    SERVER_COMMON - Static variable in class StartTagType
                    The tag type given to a common server tag (<% ... %>).
                    SERVER_COMMON_ESCAPED - Static variable in class StartTagType
                    The tag type given to an escaped common server tag (<\% ... %>).
                    setBlockIndentSize(int) - Method in class Renderer
                    Sets the size of the indent to be used for anything other than LI elements.
                    setBuffer(char[]) - Method in class StreamedSource
                    Specifies an existing character array to use for buffering the incoming character stream.
                    setCoalescing(boolean) - Method in class StreamedSource
                    Specifies whether an unbroken section of plain text in the source document should always be coalesced into a single Segment by the iterator.
                    setCollapseWhiteSpace(boolean) - Method in class SourceFormatter
                    Sets whether white space in the text between the tags is to be collapsed.
                    setConvertNonBreakingSpaces(boolean) - Method in class Renderer
                    Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    setConvertNonBreakingSpaces(boolean) - Method in class TextExtractor
                    Sets whether non-breaking space (&nbsp;) character entity references are converted to spaces.
                    setDataSet(Map<String, String[]>) - Method in class FormFields
                    Sets the submission values of all the constituent form controls to match the data in the specified field data set.
                    setDebugEnabled(boolean) - Method in class WriterLogger
                    Sets whether logging is enabled at the DEBUG level.
                    setDecorateFontStyles(boolean) - Method in class Renderer
                    Sets whether decoration characters are to be included around the content of some font style elements and phrase elements.
                    setDefaultMaxErrorCount(int) - Static method in class Attributes
                    Sets the default maximum error count allowed when parsing attributes.
                    setDisabled(boolean) - Method in class FormControl
                    Sets whether this form control is disabled.
                    setErrorEnabled(boolean) - Method in class WriterLogger
                    Sets whether logging is enabled at the ERROR level.
                    setExcludeNonHTMLElements(boolean) - Method in class TextExtractor
                    Sets whether the content of non-HTML elements is excluded from the output.
                    setFormFieldNameCaseInsensitive(boolean) - Method in class Config.CompatibilityMode
                    Sets whether form field names are treated as case insensitive.
                    setIncludeAttributes(boolean) - Method in class TextExtractor
                    Sets whether any attribute values are included in the output.
                    setIncludeHyperlinkURLs(boolean) - Method in class Renderer
                    Sets whether hyperlink URL's are included in the output.
                    setIndentAllElements(boolean) - Method in class SourceFormatter
                    Sets whether all elements are to be indented, including inline-level elements and those with preformatted contents.
                    setIndentString(String) - Method in class SourceFormatter
                    Sets the string to be used for indentation.
                    setInfoEnabled(boolean) - Method in class WriterLogger
                    Sets whether logging is enabled at the INFO level.
                    setListBullets(char[]) - Method in class Renderer
                    Sets the bullet characters to use for list items inside UL elements.
                    setListIndentSize(int) - Method in class Renderer
                    Sets the size of the indent to be used for LI elements.
                    setLogger(Logger) - Method in class Source
                    Sets the Logger that handles log messages.
                    setLogger(Logger) - Method in class StreamedSource
                    Sets the Logger that handles log messages.
                    setMaxLineLength(int) - Method in class Renderer
                    Sets the column at which lines are to be wrapped.
                    setNewLine(String) - Method in class Renderer
                    Sets the string to be used to represent a newline in the output.
                    setNewLine(String) - Method in class SourceCompactor
                    Sets the string to be used to represent a newline in the output.
                    setNewLine(String) - Method in class SourceFormatter
                    Sets the string to be used to represent a newline in the output.
                    setOutputStyle(FormControlOutputStyle) - Method in class FormControl
                    Sets the output style of this form control.
                    setTableCellSeparator(String) - Method in class Renderer
                    Sets the string that is to separate table cells.
                    setTagTypesIgnoringEnclosedMarkup(TagType[]) - Static method in class TagType
                    Sets the tag types inside which the parser ignores all non-server tags.
                    setTidyTags(boolean) - Method in class SourceFormatter
                    Sets whether the original text of each tag is to be replaced with the output from its Tag.tidy() method.
                    setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean, int) - Method in class Config.CompatibilityMode
                    Sets the maximum unicode code point of an unterminated character entity reference which is to be recognised in the specified context.
                    setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean, int) - Method in class Config.CompatibilityMode
                    Sets the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
                    setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean, int) - Method in class Config.CompatibilityMode
                    Sets the maximum unicode code point of an unterminated headecimal character reference which is to be recognised in the specified context.
                    setUserData(Object) - Method in class Tag
                    Associates the specified general purpose user data object with this tag.
                    setValue(String) - Method in class FormControl
                    Sets the control's submission value *.
                    setValue(String) - Method in class FormField
                    Sets the field submission values of this field to the single specified value.
                    setValue(String, String) - Method in class FormFields
                    Sets the field submission values of the constituent form field with the specified name to the single specified value.
                    setValues(Collection<String>) - Method in class FormField
                    Sets the field submission values of this field to the specified values.
                    setWarnEnabled(boolean) - Method in class WriterLogger
                    Sets whether logging is enabled at the WARN level.
                    size() - Method in class FormFields
                    Returns the number of FormField objects.
                    SLF4J - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that wraps the SLF4J framework.
                    SMALL - Static variable in interface HTMLElementName
                    HTML element SMALL - small text style.
                    Source - Class in net.htmlparser.jericho
                    Represents a source HTML document.
                    Source(CharSequence) - Constructor for class Source
                    Constructs a new Source object from the specified text.
                    Source(Reader) - Constructor for class Source
                    Constructs a new Source object by loading the content from the specified Reader.
                    Source(InputStream) - Constructor for class Source
                    Constructs a new Source object by loading the content from the specified InputStream.
                    Source(URL) - Constructor for class Source
                    Constructs a new Source object by loading the content from the specified URL.
                    Source(URLConnection) - Constructor for class Source
                    Constructs a new Source object by loading the content from the specified URLConnection.
                    SourceCompactor - Class in net.htmlparser.jericho
                    Compacts HTML source by removing all unnecessary white space.
                    SourceCompactor(Segment) - Constructor for class SourceCompactor
                    Constructs a new SourceCompactor based on the specified Segment.
                    SourceFormatter - Class in net.htmlparser.jericho
                    Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
                    SourceFormatter(Segment) - Constructor for class SourceFormatter
                    Constructs a new SourceFormatter based on the specified Segment.
                    SPAN - Static variable in interface HTMLElementName
                    HTML element SPAN - generic language/style container.
                    StartTag - Class in net.htmlparser.jericho
                    Represents the start tag of an element in a specific source document.
                    StartTagType - Class in net.htmlparser.jericho
                    Defines the syntax for a start tag type.
                    StartTagType(String, String, String, EndTagType, boolean, boolean, boolean) - Constructor for class StartTagType
                    Constructs a new StartTagType object with the specified properties.
                    StartTagTypeGenericImplementation - Class in net.htmlparser.jericho
                    Provides a generic implementation of the abstract StartTagType class based on the most common start tag behaviour.
                    StartTagTypeGenericImplementation(String, String, String, EndTagType, boolean) - Constructor for class StartTagTypeGenericImplementation
                    Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    StartTagTypeGenericImplementation(String, String, String, EndTagType, boolean, boolean, boolean) - Constructor for class StartTagTypeGenericImplementation
                    Constructs a new StartTagTypeGenericImplementation object with the specified properties.
                    STDERR - Static variable in interface LoggerProvider
                    A LoggerProvider implementation that sends all log messages to the standard error output stream (System.err).
                    StreamedSource - Class in net.htmlparser.jericho
                    Represents a streamed source HTML document.
                    StreamedSource(Reader) - Constructor for class StreamedSource
                    Constructs a new StreamedSource object by loading the content from the specified Reader.
                    StreamedSource(InputStream) - Constructor for class StreamedSource
                    Constructs a new StreamedSource object by loading the content from the specified InputStream.
                    StreamedSource(URL) - Constructor for class StreamedSource
                    Constructs a new StreamedSource object by loading the content from the specified URL.
                    StreamedSource(URLConnection) - Constructor for class StreamedSource
                    Constructs a new StreamedSource object by loading the content from the specified URLConnection.
                    StreamedSource(CharSequence) - Constructor for class StreamedSource
                    Constructs a new StreamedSource object from the specified text.
                    STRIKE - Static variable in interface HTMLElementName
                    HTML element STRIKE - strike-through text.
                    STRONG - Static variable in interface HTMLElementName
                    HTML element STRONG - strong emphasis.
                    STYLE - Static variable in interface HTMLElementName
                    HTML element STYLE - style info.
                    SUB - Static variable in interface HTMLElementName
                    HTML element SUB - subscript.
                    subSequence(int, int) - Method in interface ParseText
                    Returns a new character sequence that is a subsequence of this sequence.
                    subSequence(int, int) - Method in class Segment
                    Returns a new character sequence that is a subsequence of this sequence.
                    subSequence(int, int) - Method in class Source
                    Returns a new character sequence that is a subsequence of this source document.
                    SUP - Static variable in interface HTMLElementName
                    HTML element SUP - superscript.

                    T

                    TABLE - Static variable in interface HTMLElementName
                    HTML element TABLE - table.
                    Tag - Class in net.htmlparser.jericho
                    Represents either a StartTag or EndTag in a specific source document.
                    tagEncloses(Source, int) - Method in class TagType
                    Indicates whether a tag of this type encloses the specified position of the specified source document.
                    TagType - Class in net.htmlparser.jericho
                    Defines the syntax for a tag type that can be recognised by the parser.
                    TBODY - Static variable in interface HTMLElementName
                    HTML element TBODY - table body.
                    TD - Static variable in interface HTMLElementName
                    HTML element TD - table data cell.
                    TEXTAREA - Static variable in interface HTMLElementName
                    HTML element TEXTAREA - multi-line text field.
                    TextExtractor - Class in net.htmlparser.jericho
                    Extracts the textual content from HTML markup.
                    TextExtractor(Segment) - Constructor for class TextExtractor
                    Constructs a new TextExtractor based on the specified Segment.
                    TFOOT - Static variable in interface HTMLElementName
                    HTML element TFOOT - table footer.
                    TH - Static variable in interface HTMLElementName
                    HTML element TH - table header cell.
                    THEAD - Static variable in interface HTMLElementName
                    HTML element THEAD - table header.
                    tidy() - Method in class EndTag
                    Returns an XML representation of this end tag.
                    tidy() - Method in class StartTag
                    Returns an XML representation of this start tag.
                    tidy(boolean) - Method in class StartTag
                    Returns an XML or XHTML representation of this start tag.
                    tidy() - Method in class Tag
                    Returns an XML representation of this tag.
                    TITLE - Static variable in interface HTMLElementName
                    HTML element TITLE - document title.
                    toString() - Method in interface CharStreamSource
                    Returns the output as a string.
                    toString(CharStreamSource) - Static method in class CharStreamSourceUtil
                    Returns the output of the specified CharStreamSource as a string.
                    toString() - Method in class Config.CompatibilityMode
                    Returns the name of this compatibility mode.
                    toString() - Method in class FormField
                    Returns a string representation of this object useful for debugging purposes.
                    toString() - Method in class FormFields
                    Returns a string representation of this object useful for debugging purposes.
                    toString() - Method in class OutputDocument
                    Returns the final content of this output document as a String.
                    toString() - Method in interface OutputSegment
                    Returns the content of this output segment as a String.
                    toString() - Method in interface ParseText
                    Returns the content of the parse text as a String.
                    toString() - Method in class Renderer
                     
                    toString() - Method in class RowColumnVector
                    Returns a string representation of this character position.
                    toString() - Method in class Segment
                    Returns the source text of this segment as a String.
                    toString() - Method in class Source
                    Returns the source text as a String.
                    toString() - Method in class SourceCompactor
                     
                    toString() - Method in class SourceFormatter
                     
                    toString() - Method in class StreamedSource
                    Returns a string representation of the object as generated by the default Object.toString() implementation.
                    toString() - Method in class TagType
                    Returns a string representation of this object useful for debugging purposes.
                    toString() - Method in class TextExtractor
                     
                    TR - Static variable in interface HTMLElementName
                    HTML element TR - table row.
                    TT - Static variable in interface HTMLElementName
                    HTML element TT - teletype or monospaced text style.

                    U

                    U - Static variable in interface HTMLElementName
                    HTML element U - underlined text style.
                    UL - Static variable in interface HTMLElementName
                    HTML element UL - unordered list.
                    UncheckedHTML - Static variable in class FormControlOutputStyle.ConfigDisplayValue
                    Defines the HTML which replaces the normal output element of a CHECKBOX or RADIO form control if it does not contain a checked attribute.
                    UNREGISTERED - Static variable in class EndTagType
                    The tag type given to an unregistered end tag (</ ... >).
                    UNREGISTERED - Static variable in class StartTagType
                    The tag type given to an unregistered start tag (< ... >).
                    Util - Class in net.htmlparser.jericho
                    Contains miscellaneous utility methods not directly associated with the HTML Parser library.

                    V

                    valueOf(String) - Static method in enum FormControlOutputStyle
                    Returns the enum constant of this type with the specified name.
                    valueOf(String) - Static method in enum FormControlType
                    Returns the enum constant of this type with the specified name.
                    values() - Static method in enum FormControlOutputStyle
                    Returns an array containing the constants of this enum type, in the order they're declared.
                    values() - Static method in enum FormControlType
                    Returns an array containing the constants of this enum type, in the order they're declared.
                    VAR - Static variable in interface HTMLElementName
                    HTML element VAR - instance of a variable or program argument.

                    W

                    warn(String) - Method in interface Logger
                    Logs a message at the WARN level.
                    warn(String) - Method in class WriterLogger
                     
                    WriterLogger - Class in net.htmlparser.jericho
                    Provides an implementation of the Logger interface that sends output to the specified java.io.Writer.
                    WriterLogger(Writer) - Constructor for class WriterLogger
                    Constructs a new WriterLogger with the specified Writer and the default name.
                    WriterLogger(Writer, String) - Constructor for class WriterLogger
                    Constructs a new WriterLogger with the specified Writer and name.
                    writeTo(Writer) - Method in interface CharStreamSource
                    Writes the output to the specified Writer.
                    writeTo(Writer) - Method in class OutputDocument
                    Writes the final content of this output document to the specified Writer.
                    writeTo(Writer, int, int) - Method in class OutputDocument
                    Writes the specified portion of the final content of this output document to the specified Writer.
                    writeTo(Writer) - Method in interface OutputSegment
                    Writes the content of this output segment to the specified Writer.
                    writeTo(Writer) - Method in class Renderer
                     
                    writeTo(Writer) - Method in class SourceCompactor
                     
                    writeTo(Writer) - Method in class SourceFormatter
                     
                    writeTo(Writer) - Method in class TextExtractor
                     

                    X

                    XHTML - Static variable in class Config.CompatibilityMode
                    XHTML compatibility mode.
                    XML_DECLARATION - Static variable in class StartTagType
                    The tag type given to an XML declaration (<?xml ... ?>).
                    XML_PROCESSING_INSTRUCTION - Static variable in class StartTagType
                    The tag type given to an XML processing instruction (<?PITarget ... ?>).

                    _

                    _Aacute - Static variable in class CharacterEntityReference
                    Á &Aacute; = &#193; -- latin capital letter A with acute, U+00C1 ISOlat1.
                    _aacute - Static variable in class CharacterEntityReference
                    á &aacute; = &#225; -- latin small letter a with acute, U+00E1 ISOlat1.
                    _Acirc - Static variable in class CharacterEntityReference
                    Â &Acirc; = &#194; -- latin capital letter A with circumflex, U+00C2 ISOlat1.
                    _acirc - Static variable in class CharacterEntityReference
                    â &acirc; = &#226; -- latin small letter a with circumflex, U+00E2 ISOlat1.
                    _acute - Static variable in class CharacterEntityReference
                    ´ &acute; = &#180; -- acute accent = spacing acute, U+00B4 ISOdia.
                    _AElig - Static variable in class CharacterEntityReference
                    Æ &AElig; = &#198; -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1.
                    _aelig - Static variable in class CharacterEntityReference
                    æ &aelig; = &#230; -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1.
                    _Agrave - Static variable in class CharacterEntityReference
                    À &Agrave; = &#192; -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1.
                    _agrave - Static variable in class CharacterEntityReference
                    à &agrave; = &#224; -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1.
                    _alefsym - Static variable in class CharacterEntityReference
                    &alefsym; = &#8501; -- alef symbol = first transfinite cardinal, U+2135 NEW
                    (see comments).
                    _Alpha - Static variable in class CharacterEntityReference
                    Α &Alpha; = &#913; -- greek capital letter alpha, U+0391.
                    _alpha - Static variable in class CharacterEntityReference
                    α &alpha; = &#945; -- greek small letter alpha, U+03B1 ISOgrk3.
                    _amp - Static variable in class CharacterEntityReference
                    & &amp; = &#38; -- ampersand, U+0026 ISOnum.
                    _and - Static variable in class CharacterEntityReference
                    &and; = &#8743; -- logical and = wedge, U+2227 ISOtech.
                    _ang - Static variable in class CharacterEntityReference
                    &ang; = &#8736; -- angle, U+2220 ISOamso.
                    _apos - Static variable in class CharacterEntityReference
                    ' &apos; = &#39; -- apostrophe = APL quote, U+0027 ISOnum
                    (see comments).
                    _Aring - Static variable in class CharacterEntityReference
                    Å &Aring; = &#197; -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1.
                    _aring - Static variable in class CharacterEntityReference
                    å &aring; = &#229; -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1.
                    _asymp - Static variable in class CharacterEntityReference
                    &asymp; = &#8776; -- almost equal to = asymptotic to, U+2248 ISOamsr.
                    _Atilde - Static variable in class CharacterEntityReference
                    Ã &Atilde; = &#195; -- latin capital letter A with tilde, U+00C3 ISOlat1.
                    _atilde - Static variable in class CharacterEntityReference
                    ã &atilde; = &#227; -- latin small letter a with tilde, U+00E3 ISOlat1.
                    _Auml - Static variable in class CharacterEntityReference
                    Ä &Auml; = &#196; -- latin capital letter A with diaeresis, U+00C4 ISOlat1.
                    _auml - Static variable in class CharacterEntityReference
                    ä &auml; = &#228; -- latin small letter a with diaeresis, U+00E4 ISOlat1.
                    _bdquo - Static variable in class CharacterEntityReference
                    &bdquo; = &#8222; -- double low-9 quotation mark, U+201E NEW.
                    _Beta - Static variable in class CharacterEntityReference
                    Β &Beta; = &#914; -- greek capital letter beta, U+0392.
                    _beta - Static variable in class CharacterEntityReference
                    β &beta; = &#946; -- greek small letter beta, U+03B2 ISOgrk3.
                    _brvbar - Static variable in class CharacterEntityReference
                    ¦ &brvbar; = &#166; -- broken bar = broken vertical bar, U+00A6 ISOnum.
                    _bull - Static variable in class CharacterEntityReference
                    &bull; = &#8226; -- bullet = black small circle, U+2022 ISOpub
                    (see comments).
                    _cap - Static variable in class CharacterEntityReference
                    &cap; = &#8745; -- intersection = cap, U+2229 ISOtech.
                    _Ccedil - Static variable in class CharacterEntityReference
                    Ç &Ccedil; = &#199; -- latin capital letter C with cedilla, U+00C7 ISOlat1.
                    _ccedil - Static variable in class CharacterEntityReference
                    ç &ccedil; = &#231; -- latin small letter c with cedilla, U+00E7 ISOlat1.
                    _cedil - Static variable in class CharacterEntityReference
                    ¸ &cedil; = &#184; -- cedilla = spacing cedilla, U+00B8 ISOdia.
                    _cent - Static variable in class CharacterEntityReference
                    ¢ &cent; = &#162; -- cent sign, U+00A2 ISOnum.
                    _Chi - Static variable in class CharacterEntityReference
                    Χ &Chi; = &#935; -- greek capital letter chi, U+03A7.
                    _chi - Static variable in class CharacterEntityReference
                    χ &chi; = &#967; -- greek small letter chi, U+03C7 ISOgrk3.
                    _circ - Static variable in class CharacterEntityReference
                    ˆ &circ; = &#710; -- modifier letter circumflex accent, U+02C6 ISOpub.
                    _clubs - Static variable in class CharacterEntityReference
                    &clubs; = &#9827; -- black club suit = shamrock, U+2663 ISOpub.
                    _cong - Static variable in class CharacterEntityReference
                    &cong; = &#8773; -- approximately equal to, U+2245 ISOtech.
                    _copy - Static variable in class CharacterEntityReference
                    © &copy; = &#169; -- copyright sign, U+00A9 ISOnum.
                    _crarr - Static variable in class CharacterEntityReference
                    &crarr; = &#8629; -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW.
                    _cup - Static variable in class CharacterEntityReference
                    &cup; = &#8746; -- union = cup, U+222A ISOtech.
                    _curren - Static variable in class CharacterEntityReference
                    ¤ &curren; = &#164; -- currency sign, U+00A4 ISOnum.
                    _dagger - Static variable in class CharacterEntityReference
                    &dagger; = &#8224; -- dagger, U+2020 ISOpub.
                    _Dagger - Static variable in class CharacterEntityReference
                    &Dagger; = &#8225; -- double dagger, U+2021 ISOpub.
                    _darr - Static variable in class CharacterEntityReference
                    &darr; = &#8595; -- downwards arrow, U+2193 ISOnum.
                    _dArr - Static variable in class CharacterEntityReference
                    &dArr; = &#8659; -- downwards double arrow, U+21D3 ISOamsa.
                    _deg - Static variable in class CharacterEntityReference
                    ° &deg; = &#176; -- degree sign, U+00B0 ISOnum.
                    _Delta - Static variable in class CharacterEntityReference
                    Δ &Delta; = &#916; -- greek capital letter delta, U+0394 ISOgrk3.
                    _delta - Static variable in class CharacterEntityReference
                    δ &delta; = &#948; -- greek small letter delta, U+03B4 ISOgrk3.
                    _diams - Static variable in class CharacterEntityReference
                    &diams; = &#9830; -- black diamond suit, U+2666 ISOpub.
                    _divide - Static variable in class CharacterEntityReference
                    ÷ &divide; = &#247; -- division sign, U+00F7 ISOnum.
                    _Eacute - Static variable in class CharacterEntityReference
                    É &Eacute; = &#201; -- latin capital letter E with acute, U+00C9 ISOlat1.
                    _eacute - Static variable in class CharacterEntityReference
                    é &eacute; = &#233; -- latin small letter e with acute, U+00E9 ISOlat1.
                    _Ecirc - Static variable in class CharacterEntityReference
                    Ê &Ecirc; = &#202; -- latin capital letter E with circumflex, U+00CA ISOlat1.
                    _ecirc - Static variable in class CharacterEntityReference
                    ê &ecirc; = &#234; -- latin small letter e with circumflex, U+00EA ISOlat1.
                    _Egrave - Static variable in class CharacterEntityReference
                    È &Egrave; = &#200; -- latin capital letter E with grave, U+00C8 ISOlat1.
                    _egrave - Static variable in class CharacterEntityReference
                    è &egrave; = &#232; -- latin small letter e with grave, U+00E8 ISOlat1.
                    _empty - Static variable in class CharacterEntityReference
                    &empty; = &#8709; -- empty set = null set = diameter, U+2205 ISOamso.
                    _emsp - Static variable in class CharacterEntityReference
                    &emsp; = &#8195; -- em space, U+2003 ISOpub.
                    _ensp - Static variable in class CharacterEntityReference
                    &ensp; = &#8194; -- en space, U+2002 ISOpub.
                    _Epsilon - Static variable in class CharacterEntityReference
                    Ε &Epsilon; = &#917; -- greek capital letter epsilon, U+0395.
                    _epsilon - Static variable in class CharacterEntityReference
                    ε &epsilon; = &#949; -- greek small letter epsilon, U+03B5 ISOgrk3.
                    _equiv - Static variable in class CharacterEntityReference
                    &equiv; = &#8801; -- identical to, U+2261 ISOtech.
                    _Eta - Static variable in class CharacterEntityReference
                    Η &Eta; = &#919; -- greek capital letter eta, U+0397.
                    _eta - Static variable in class CharacterEntityReference
                    η &eta; = &#951; -- greek small letter eta, U+03B7 ISOgrk3.
                    _ETH - Static variable in class CharacterEntityReference
                    Ð &ETH; = &#208; -- latin capital letter ETH, U+00D0 ISOlat1.
                    _eth - Static variable in class CharacterEntityReference
                    ð &eth; = &#240; -- latin small letter eth, U+00F0 ISOlat1.
                    _Euml - Static variable in class CharacterEntityReference
                    Ë &Euml; = &#203; -- latin capital letter E with diaeresis, U+00CB ISOlat1.
                    _euml - Static variable in class CharacterEntityReference
                    ë &euml; = &#235; -- latin small letter e with diaeresis, U+00EB ISOlat1.
                    _euro - Static variable in class CharacterEntityReference
                    &euro; = &#8364; -- euro sign, U+20AC NEW.
                    _exist - Static variable in class CharacterEntityReference
                    &exist; = &#8707; -- there exists, U+2203 ISOtech.
                    _fnof - Static variable in class CharacterEntityReference
                    ƒ &fnof; = &#402; -- latin small letter f with hook = function = florin, U+0192 ISOtech.
                    _forall - Static variable in class CharacterEntityReference
                    &forall; = &#8704; -- for all, U+2200 ISOtech.
                    _frac12 - Static variable in class CharacterEntityReference
                    ½ &frac12; = &#189; -- vulgar fraction one half = fraction one half, U+00BD ISOnum.
                    _frac14 - Static variable in class CharacterEntityReference
                    ¼ &frac14; = &#188; -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum.
                    _frac34 - Static variable in class CharacterEntityReference
                    ¾ &frac34; = &#190; -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum.
                    _frasl - Static variable in class CharacterEntityReference
                    &frasl; = &#8260; -- fraction slash, U+2044 NEW.
                    _Gamma - Static variable in class CharacterEntityReference
                    Γ &Gamma; = &#915; -- greek capital letter gamma, U+0393 ISOgrk3.
                    _gamma - Static variable in class CharacterEntityReference
                    γ &gamma; = &#947; -- greek small letter gamma, U+03B3 ISOgrk3.
                    _ge - Static variable in class CharacterEntityReference
                    &ge; = &#8805; -- greater-than or equal to, U+2265 ISOtech.
                    _gt - Static variable in class CharacterEntityReference
                    > &gt; = &#62; -- greater-than sign, U+003E ISOnum.
                    _harr - Static variable in class CharacterEntityReference
                    &harr; = &#8596; -- left right arrow, U+2194 ISOamsa.
                    _hArr - Static variable in class CharacterEntityReference
                    &hArr; = &#8660; -- left right double arrow, U+21D4 ISOamsa.
                    _hearts - Static variable in class CharacterEntityReference
                    &hearts; = &#9829; -- black heart suit = valentine, U+2665 ISOpub.
                    _hellip - Static variable in class CharacterEntityReference
                    &hellip; = &#8230; -- horizontal ellipsis = three dot leader, U+2026 ISOpub.
                    _Iacute - Static variable in class CharacterEntityReference
                    Í &Iacute; = &#205; -- latin capital letter I with acute, U+00CD ISOlat1.
                    _iacute - Static variable in class CharacterEntityReference
                    í &iacute; = &#237; -- latin small letter i with acute, U+00ED ISOlat1.
                    _Icirc - Static variable in class CharacterEntityReference
                    Î &Icirc; = &#206; -- latin capital letter I with circumflex, U+00CE ISOlat1.
                    _icirc - Static variable in class CharacterEntityReference
                    î &icirc; = &#238; -- latin small letter i with circumflex, U+00EE ISOlat1.
                    _iexcl - Static variable in class CharacterEntityReference
                    ¡ &iexcl; = &#161; -- inverted exclamation mark, U+00A1 ISOnum.
                    _Igrave - Static variable in class CharacterEntityReference
                    Ì &Igrave; = &#204; -- latin capital letter I with grave, U+00CC ISOlat1.
                    _igrave - Static variable in class CharacterEntityReference
                    ì &igrave; = &#236; -- latin small letter i with grave, U+00EC ISOlat1.
                    _image - Static variable in class CharacterEntityReference
                    &image; = &#8465; -- black-letter capital I = imaginary part, U+2111 ISOamso.
                    _infin - Static variable in class CharacterEntityReference
                    &infin; = &#8734; -- infinity, U+221E ISOtech.
                    _int - Static variable in class CharacterEntityReference
                    &int; = &#8747; -- integral, U+222B ISOtech.
                    _Iota - Static variable in class CharacterEntityReference
                    Ι &Iota; = &#921; -- greek capital letter iota, U+0399.
                    _iota - Static variable in class CharacterEntityReference
                    ι &iota; = &#953; -- greek small letter iota, U+03B9 ISOgrk3.
                    _iquest - Static variable in class CharacterEntityReference
                    ¿ &iquest; = &#191; -- inverted question mark = turned question mark, U+00BF ISOnum.
                    _isin - Static variable in class CharacterEntityReference
                    &isin; = &#8712; -- element of, U+2208 ISOtech.
                    _Iuml - Static variable in class CharacterEntityReference
                    Ï &Iuml; = &#207; -- latin capital letter I with diaeresis, U+00CF ISOlat1.
                    _iuml - Static variable in class CharacterEntityReference
                    ï &iuml; = &#239; -- latin small letter i with diaeresis, U+00EF ISOlat1.
                    _Kappa - Static variable in class CharacterEntityReference
                    Κ &Kappa; = &#922; -- greek capital letter kappa, U+039A.
                    _kappa - Static variable in class CharacterEntityReference
                    κ &kappa; = &#954; -- greek small letter kappa, U+03BA ISOgrk3.
                    _Lambda - Static variable in class CharacterEntityReference
                    Λ &Lambda; = &#923; -- greek capital letter lambda, U+039B ISOgrk3.
                    _lambda - Static variable in class CharacterEntityReference
                    λ &lambda; = &#955; -- greek small letter lambda, U+03BB ISOgrk3.
                    _lang - Static variable in class CharacterEntityReference
                    &lang; = &#9001; -- left-pointing angle bracket = bra, U+2329 ISOtech
                    (see comments).
                    _laquo - Static variable in class CharacterEntityReference
                    « &laquo; = &#171; -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum.
                    _larr - Static variable in class CharacterEntityReference
                    &larr; = &#8592; -- leftwards arrow, U+2190 ISOnum.
                    _lArr - Static variable in class CharacterEntityReference
                    &lArr; = &#8656; -- leftwards double arrow, U+21D0 ISOtech
                    (see comments).
                    _lceil - Static variable in class CharacterEntityReference
                    &lceil; = &#8968; -- left ceiling = APL upstile, U+2308 ISOamsc.
                    _ldquo - Static variable in class CharacterEntityReference
                    &ldquo; = &#8220; -- left double quotation mark, U+201C ISOnum.
                    _le - Static variable in class CharacterEntityReference
                    &le; = &#8804; -- less-than or equal to, U+2264 ISOtech.
                    _lfloor - Static variable in class CharacterEntityReference
                    &lfloor; = &#8970; -- left floor = APL downstile, U+230A ISOamsc.
                    _lowast - Static variable in class CharacterEntityReference
                    &lowast; = &#8727; -- asterisk operator, U+2217 ISOtech.
                    _loz - Static variable in class CharacterEntityReference
                    &loz; = &#9674; -- lozenge, U+25CA ISOpub.
                    _lrm - Static variable in class CharacterEntityReference
                    &lrm; = &#8206; -- left-to-right mark, U+200E NEW RFC 2070.
                    _lsaquo - Static variable in class CharacterEntityReference
                    &lsaquo; = &#8249; -- single left-pointing angle quotation mark, U+2039 ISO proposed
                    (see comments).
                    _lsquo - Static variable in class CharacterEntityReference
                    &lsquo; = &#8216; -- left single quotation mark, U+2018 ISOnum.
                    _lt - Static variable in class CharacterEntityReference
                    < &lt; = &#60; -- less-than sign, U+003C ISOnum.
                    _macr - Static variable in class CharacterEntityReference
                    ¯ &macr; = &#175; -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia.
                    _mdash - Static variable in class CharacterEntityReference
                    &mdash; = &#8212; -- em dash, U+2014 ISOpub.
                    _micro - Static variable in class CharacterEntityReference
                    µ &micro; = &#181; -- micro sign, U+00B5 ISOnum.
                    _middot - Static variable in class CharacterEntityReference
                    · &middot; = &#183; -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum.
                    _minus - Static variable in class CharacterEntityReference
                    &minus; = &#8722; -- minus sign, U+2212 ISOtech.
                    _Mu - Static variable in class CharacterEntityReference
                    Μ &Mu; = &#924; -- greek capital letter mu, U+039C.
                    _mu - Static variable in class CharacterEntityReference
                    μ &mu; = &#956; -- greek small letter mu, U+03BC ISOgrk3.
                    _nabla - Static variable in class CharacterEntityReference
                    &nabla; = &#8711; -- nabla = backward difference, U+2207 ISOtech.
                    _nbsp - Static variable in class CharacterEntityReference
                      &nbsp; = &#160; -- no-break space = non-breaking space, U+00A0 ISOnum.
                    _ndash - Static variable in class CharacterEntityReference
                    &ndash; = &#8211; -- en dash, U+2013 ISOpub.
                    _ne - Static variable in class CharacterEntityReference
                    &ne; = &#8800; -- not equal to, U+2260 ISOtech.
                    _ni - Static variable in class CharacterEntityReference
                    &ni; = &#8715; -- contains as member, U+220B ISOtech
                    (see comments).
                    _not - Static variable in class CharacterEntityReference
                    ¬ &not; = &#172; -- not sign = angled dash, U+00AC ISOnum.
                    _notin - Static variable in class CharacterEntityReference
                    &notin; = &#8713; -- not an element of, U+2209 ISOtech.
                    _nsub - Static variable in class CharacterEntityReference
                    &nsub; = &#8836; -- not a subset of, U+2284 ISOamsn.
                    _Ntilde - Static variable in class CharacterEntityReference
                    Ñ &Ntilde; = &#209; -- latin capital letter N with tilde, U+00D1 ISOlat1.
                    _ntilde - Static variable in class CharacterEntityReference
                    ñ &ntilde; = &#241; -- latin small letter n with tilde, U+00F1 ISOlat1.
                    _Nu - Static variable in class CharacterEntityReference
                    Ν &Nu; = &#925; -- greek capital letter nu, U+039D.
                    _nu - Static variable in class CharacterEntityReference
                    ν &nu; = &#957; -- greek small letter nu, U+03BD ISOgrk3.
                    _Oacute - Static variable in class CharacterEntityReference
                    Ó &Oacute; = &#211; -- latin capital letter O with acute, U+00D3 ISOlat1.
                    _oacute - Static variable in class CharacterEntityReference
                    ó &oacute; = &#243; -- latin small letter o with acute, U+00F3 ISOlat1.
                    _Ocirc - Static variable in class CharacterEntityReference
                    Ô &Ocirc; = &#212; -- latin capital letter O with circumflex, U+00D4 ISOlat1.
                    _ocirc - Static variable in class CharacterEntityReference
                    ô &ocirc; = &#244; -- latin small letter o with circumflex, U+00F4 ISOlat1.
                    _OElig - Static variable in class CharacterEntityReference
                    Œ &OElig; = &#338; -- latin capital ligature OE, U+0152 ISOlat2.
                    _oelig - Static variable in class CharacterEntityReference
                    œ &oelig; = &#339; -- latin small ligature oe, U+0153 ISOlat2
                    (see comments).
                    _Ograve - Static variable in class CharacterEntityReference
                    Ò &Ograve; = &#210; -- latin capital letter O with grave, U+00D2 ISOlat1.
                    _ograve - Static variable in class CharacterEntityReference
                    ò &ograve; = &#242; -- latin small letter o with grave, U+00F2 ISOlat1.
                    _oline - Static variable in class CharacterEntityReference
                    &oline; = &#8254; -- overline = spacing overscore, U+203E NEW.
                    _Omega - Static variable in class CharacterEntityReference
                    Ω &Omega; = &#937; -- greek capital letter omega, U+03A9 ISOgrk3.
                    _omega - Static variable in class CharacterEntityReference
                    ω &omega; = &#969; -- greek small letter omega, U+03C9 ISOgrk3.
                    _Omicron - Static variable in class CharacterEntityReference
                    Ο &Omicron; = &#927; -- greek capital letter omicron, U+039F.
                    _omicron - Static variable in class CharacterEntityReference
                    ο &omicron; = &#959; -- greek small letter omicron, U+03BF NEW.
                    _oplus - Static variable in class CharacterEntityReference
                    &oplus; = &#8853; -- circled plus = direct sum, U+2295 ISOamsb.
                    _or - Static variable in class CharacterEntityReference
                    &or; = &#8744; -- logical or = vee, U+2228 ISOtech.
                    _ordf - Static variable in class CharacterEntityReference
                    ª &ordf; = &#170; -- feminine ordinal indicator, U+00AA ISOnum.
                    _ordm - Static variable in class CharacterEntityReference
                    º &ordm; = &#186; -- masculine ordinal indicator, U+00BA ISOnum.
                    _Oslash - Static variable in class CharacterEntityReference
                    Ø &Oslash; = &#216; -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1.
                    _oslash - Static variable in class CharacterEntityReference
                    ø &oslash; = &#248; -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1.
                    _Otilde - Static variable in class CharacterEntityReference
                    Õ &Otilde; = &#213; -- latin capital letter O with tilde, U+00D5 ISOlat1.
                    _otilde - Static variable in class CharacterEntityReference
                    õ &otilde; = &#245; -- latin small letter o with tilde, U+00F5 ISOlat1.
                    _otimes - Static variable in class CharacterEntityReference
                    &otimes; = &#8855; -- circled times = vector product, U+2297 ISOamsb.
                    _Ouml - Static variable in class CharacterEntityReference
                    Ö &Ouml; = &#214; -- latin capital letter O with diaeresis, U+00D6 ISOlat1.
                    _ouml - Static variable in class CharacterEntityReference
                    ö &ouml; = &#246; -- latin small letter o with diaeresis, U+00F6 ISOlat1.
                    _para - Static variable in class CharacterEntityReference
                    &para; = &#182; -- pilcrow sign = paragraph sign, U+00B6 ISOnum.
                    _part - Static variable in class CharacterEntityReference
                    &part; = &#8706; -- partial differential, U+2202 ISOtech.
                    _permil - Static variable in class CharacterEntityReference
                    &permil; = &#8240; -- per mille sign, U+2030 ISOtech.
                    _perp - Static variable in class CharacterEntityReference
                    &perp; = &#8869; -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech.
                    _Phi - Static variable in class CharacterEntityReference
                    Φ &Phi; = &#934; -- greek capital letter phi, U+03A6 ISOgrk3.
                    _phi - Static variable in class CharacterEntityReference
                    φ &phi; = &#966; -- greek small letter phi, U+03C6 ISOgrk3.
                    _Pi - Static variable in class CharacterEntityReference
                    Π &Pi; = &#928; -- greek capital letter pi, U+03A0 ISOgrk3.
                    _pi - Static variable in class CharacterEntityReference
                    π &pi; = &#960; -- greek small letter pi, U+03C0 ISOgrk3.
                    _piv - Static variable in class CharacterEntityReference
                    ϖ &piv; = &#982; -- greek pi symbol, U+03D6 ISOgrk3.
                    _plusmn - Static variable in class CharacterEntityReference
                    ± &plusmn; = &#177; -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum.
                    _pound - Static variable in class CharacterEntityReference
                    £ &pound; = &#163; -- pound sign, U+00A3 ISOnum.
                    _prime - Static variable in class CharacterEntityReference
                    &prime; = &#8242; -- prime = minutes = feet, U+2032 ISOtech.
                    _Prime - Static variable in class CharacterEntityReference
                    &Prime; = &#8243; -- double prime = seconds = inches, U+2033 ISOtech.
                    _prod - Static variable in class CharacterEntityReference
                    &prod; = &#8719; -- n-ary product = product sign, U+220F ISOamsb
                    (see comments).
                    _prop - Static variable in class CharacterEntityReference
                    &prop; = &#8733; -- proportional to, U+221D ISOtech.
                    _Psi - Static variable in class CharacterEntityReference
                    Ψ &Psi; = &#936; -- greek capital letter psi, U+03A8 ISOgrk3.
                    _psi - Static variable in class CharacterEntityReference
                    ψ &psi; = &#968; -- greek small letter psi, U+03C8 ISOgrk3.
                    _quot - Static variable in class CharacterEntityReference
                    " &quot; = &#34; -- quotation mark = APL quote, U+0022 ISOnum.
                    _radic - Static variable in class CharacterEntityReference
                    &radic; = &#8730; -- square root = radical sign, U+221A ISOtech.
                    _rang - Static variable in class CharacterEntityReference
                    &rang; = &#9002; -- right-pointing angle bracket = ket, U+232A ISOtech
                    (see comments).
                    _raquo - Static variable in class CharacterEntityReference
                    » &raquo; = &#187; -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum.
                    _rarr - Static variable in class CharacterEntityReference
                    &rarr; = &#8594; -- rightwards arrow, U+2192 ISOnum.
                    _rArr - Static variable in class CharacterEntityReference
                    &rArr; = &#8658; -- rightwards double arrow, U+21D2 ISOtech
                    (see comments).
                    _rceil - Static variable in class CharacterEntityReference
                    &rceil; = &#8969; -- right ceiling, U+2309 ISOamsc.
                    _rdquo - Static variable in class CharacterEntityReference
                    &rdquo; = &#8221; -- right double quotation mark, U+201D ISOnum.
                    _real - Static variable in class CharacterEntityReference
                    &real; = &#8476; -- black-letter capital R = real part symbol, U+211C ISOamso.
                    _reg - Static variable in class CharacterEntityReference
                    ® &reg; = &#174; -- registered sign = registered trade mark sign, U+00AE ISOnum.
                    _rfloor - Static variable in class CharacterEntityReference
                    &rfloor; = &#8971; -- right floor, U+230B ISOamsc.
                    _Rho - Static variable in class CharacterEntityReference
                    Ρ &Rho; = &#929; -- greek capital letter rho, U+03A1.
                    _rho - Static variable in class CharacterEntityReference
                    ρ &rho; = &#961; -- greek small letter rho, U+03C1 ISOgrk3.
                    _rlm - Static variable in class CharacterEntityReference
                    &rlm; = &#8207; -- right-to-left mark, U+200F NEW RFC 2070.
                    _rsaquo - Static variable in class CharacterEntityReference
                    &rsaquo; = &#8250; -- single right-pointing angle quotation mark, U+203A ISO proposed
                    (see comments).
                    _rsquo - Static variable in class CharacterEntityReference
                    &rsquo; = &#8217; -- right single quotation mark, U+2019 ISOnum.
                    _sbquo - Static variable in class CharacterEntityReference
                    &sbquo; = &#8218; -- single low-9 quotation mark, U+201A NEW.
                    _Scaron - Static variable in class CharacterEntityReference
                    Š &Scaron; = &#352; -- latin capital letter S with caron, U+0160 ISOlat2.
                    _scaron - Static variable in class CharacterEntityReference
                    š &scaron; = &#353; -- latin small letter s with caron, U+0161 ISOlat2.
                    _sdot - Static variable in class CharacterEntityReference
                    &sdot; = &#8901; -- dot operator, U+22C5 ISOamsb
                    (see comments).
                    _sect - Static variable in class CharacterEntityReference
                    § &sect; = &#167; -- section sign, U+00A7 ISOnum.
                    _shy - Static variable in class CharacterEntityReference
                    ­ &shy; = &#173; -- soft hyphen = discretionary hyphen, U+00AD ISOnum.
                    _Sigma - Static variable in class CharacterEntityReference
                    Σ &Sigma; = &#931; -- greek capital letter sigma, U+03A3 ISOgrk3.
                    _sigma - Static variable in class CharacterEntityReference
                    σ &sigma; = &#963; -- greek small letter sigma, U+03C3 ISOgrk3.
                    _sigmaf - Static variable in class CharacterEntityReference
                    ς &sigmaf; = &#962; -- greek small letter final sigma, U+03C2 ISOgrk3.
                    _sim - Static variable in class CharacterEntityReference
                    &sim; = &#8764; -- tilde operator = varies with = similar to, U+223C ISOtech
                    (see comments).
                    _spades - Static variable in class CharacterEntityReference
                    &spades; = &#9824; -- black spade suit, U+2660 ISOpub
                    (see comments).
                    _sub - Static variable in class CharacterEntityReference
                    &sub; = &#8834; -- subset of, U+2282 ISOtech.
                    _sube - Static variable in class CharacterEntityReference
                    &sube; = &#8838; -- subset of or equal to, U+2286 ISOtech.
                    _sum - Static variable in class CharacterEntityReference
                    &sum; = &#8721; -- n-ary summation, U+2211 ISOamsb
                    (see comments).
                    _sup - Static variable in class CharacterEntityReference
                    &sup; = &#8835; -- superset of, U+2283 ISOtech
                    (see comments).
                    _sup1 - Static variable in class CharacterEntityReference
                    ¹ &sup1; = &#185; -- superscript one = superscript digit one, U+00B9 ISOnum.
                    _sup2 - Static variable in class CharacterEntityReference
                    ² &sup2; = &#178; -- superscript two = superscript digit two = squared, U+00B2 ISOnum.
                    _sup3 - Static variable in class CharacterEntityReference
                    ³ &sup3; = &#179; -- superscript three = superscript digit three = cubed, U+00B3 ISOnum.
                    _supe - Static variable in class CharacterEntityReference
                    &supe; = &#8839; -- superset of or equal to, U+2287 ISOtech.
                    _szlig - Static variable in class CharacterEntityReference
                    ß &szlig; = &#223; -- latin small letter sharp s = ess-zed, U+00DF ISOlat1.
                    _Tau - Static variable in class CharacterEntityReference
                    Τ &Tau; = &#932; -- greek capital letter tau, U+03A4.
                    _tau - Static variable in class CharacterEntityReference
                    τ &tau; = &#964; -- greek small letter tau, U+03C4 ISOgrk3.
                    _there4 - Static variable in class CharacterEntityReference
                    &there4; = &#8756; -- therefore, U+2234 ISOtech.
                    _Theta - Static variable in class CharacterEntityReference
                    Θ &Theta; = &#920; -- greek capital letter theta, U+0398 ISOgrk3.
                    _theta - Static variable in class CharacterEntityReference
                    θ &theta; = &#952; -- greek small letter theta, U+03B8 ISOgrk3.
                    _thetasym - Static variable in class CharacterEntityReference
                    ϑ &thetasym; = &#977; -- greek small letter theta symbol, U+03D1 NEW.
                    _thinsp - Static variable in class CharacterEntityReference
                    &thinsp; = &#8201; -- thin space, U+2009 ISOpub.
                    _THORN - Static variable in class CharacterEntityReference
                    Þ &THORN; = &#222; -- latin capital letter THORN, U+00DE ISOlat1.
                    _thorn - Static variable in class CharacterEntityReference
                    þ &thorn; = &#254; -- latin small letter thorn, U+00FE ISOlat1.
                    _tilde - Static variable in class CharacterEntityReference
                    ˜ &tilde; = &#732; -- small tilde, U+02DC ISOdia.
                    _times - Static variable in class CharacterEntityReference
                    × &times; = &#215; -- multiplication sign, U+00D7 ISOnum.
                    _trade - Static variable in class CharacterEntityReference
                    &trade; = &#8482; -- trade mark sign, U+2122 ISOnum.
                    _Uacute - Static variable in class CharacterEntityReference
                    Ú &Uacute; = &#218; -- latin capital letter U with acute, U+00DA ISOlat1.
                    _uacute - Static variable in class CharacterEntityReference
                    ú &uacute; = &#250; -- latin small letter u with acute, U+00FA ISOlat1.
                    _uarr - Static variable in class CharacterEntityReference
                    &uarr; = &#8593; -- upwards arrow, U+2191 ISOnum.
                    _uArr - Static variable in class CharacterEntityReference
                    &uArr; = &#8657; -- upwards double arrow, U+21D1 ISOamsa.
                    _Ucirc - Static variable in class CharacterEntityReference
                    Û &Ucirc; = &#219; -- latin capital letter U with circumflex, U+00DB ISOlat1.
                    _ucirc - Static variable in class CharacterEntityReference
                    û &ucirc; = &#251; -- latin small letter u with circumflex, U+00FB ISOlat1.
                    _Ugrave - Static variable in class CharacterEntityReference
                    Ù &Ugrave; = &#217; -- latin capital letter U with grave, U+00D9 ISOlat1.
                    _ugrave - Static variable in class CharacterEntityReference
                    ù &ugrave; = &#249; -- latin small letter u with grave, U+00F9 ISOlat1.
                    _uml - Static variable in class CharacterEntityReference
                    ¨ &uml; = &#168; -- diaeresis = spacing diaeresis, U+00A8 ISOdia.
                    _upsih - Static variable in class CharacterEntityReference
                    ϒ &upsih; = &#978; -- greek upsilon with hook symbol, U+03D2 NEW.
                    _Upsilon - Static variable in class CharacterEntityReference
                    Υ &Upsilon; = &#933; -- greek capital letter upsilon, U+03A5 ISOgrk3.
                    _upsilon - Static variable in class CharacterEntityReference
                    υ &upsilon; = &#965; -- greek small letter upsilon, U+03C5 ISOgrk3.
                    _Uuml - Static variable in class CharacterEntityReference
                    Ü &Uuml; = &#220; -- latin capital letter U with diaeresis, U+00DC ISOlat1.
                    _uuml - Static variable in class CharacterEntityReference
                    ü &uuml; = &#252; -- latin small letter u with diaeresis, U+00FC ISOlat1.
                    _weierp - Static variable in class CharacterEntityReference
                    &weierp; = &#8472; -- script capital P = power set = Weierstrass p, U+2118 ISOamso.
                    _Xi - Static variable in class CharacterEntityReference
                    Ξ &Xi; = &#926; -- greek capital letter xi, U+039E ISOgrk3.
                    _xi - Static variable in class CharacterEntityReference
                    ξ &xi; = &#958; -- greek small letter xi, U+03BE ISOgrk3.
                    _Yacute - Static variable in class CharacterEntityReference
                    Ý &Yacute; = &#221; -- latin capital letter Y with acute, U+00DD ISOlat1.
                    _yacute - Static variable in class CharacterEntityReference
                    ý &yacute; = &#253; -- latin small letter y with acute, U+00FD ISOlat1.
                    _yen - Static variable in class CharacterEntityReference
                    ¥ &yen; = &#165; -- yen sign = yuan sign, U+00A5 ISOnum.
                    _yuml - Static variable in class CharacterEntityReference
                    ÿ &yuml; = &#255; -- latin small letter y with diaeresis, U+00FF ISOlat1.
                    _Yuml - Static variable in class CharacterEntityReference
                    Ÿ &Yuml; = &#376; -- latin capital letter Y with diaeresis, U+0178 ISOlat2.
                    _Zeta - Static variable in class CharacterEntityReference
                    Ζ &Zeta; = &#918; -- greek capital letter zeta, U+0396.
                    _zeta - Static variable in class CharacterEntityReference
                    ζ &zeta; = &#950; -- greek small letter zeta, U+03B6 ISOgrk3.
                    _zwj - Static variable in class CharacterEntityReference
                    &zwj; = &#8205; -- zero width joiner, U+200D NEW RFC 2070.
                    _zwnj - Static variable in class CharacterEntityReference
                    &zwnj; = &#8204; -- zero width non-joiner, U+200C NEW RFC 2070.

                    A B C D E F G H I J K L M N O P Q R S T U V W X _

                    jericho-html-3.1/docs/javadoc/help-doc.html0000644000175000017500000002236411214132424020621 0ustar twernertwerner API Help (Jericho HTML Parser 3.1)

                    How This API Document Is Organized

                    This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.

                    Package

                    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain four categories:

                    • Interfaces (italic)
                    • Classes
                    • Enums
                    • Exceptions
                    • Errors
                    • Annotation Types

                    Class/Interface

                    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

                    • Class inheritance diagram
                    • Direct Subclasses
                    • All Known Subinterfaces
                    • All Known Implementing Classes
                    • Class/interface declaration
                    • Class/interface description

                    • Nested Class Summary
                    • Field Summary
                    • Constructor Summary
                    • Method Summary

                    • Field Detail
                    • Constructor Detail
                    • Method Detail
                    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

                    Annotation Type

                    Each annotation type has its own separate page with the following sections:

                    • Annotation Type declaration
                    • Annotation Type description
                    • Required Element Summary
                    • Optional Element Summary
                    • Element Detail

                    Enum

                    Each enum has its own separate page with the following sections:

                    • Enum declaration
                    • Enum description
                    • Enum Constant Summary
                    • Enum Constant Detail

                    Use

                    Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.

                    Tree (Class Hierarchy)

                    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.
                    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
                    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.

                    Deprecated API

                    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

                    Index

                    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

                    Prev/Next

                    These links take you to the next or previous class, interface, package, or related page.

                    Frames/No Frames

                    These links show and hide the HTML frames. All pages are available with or without frames.

                    Serialized Form

                    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

                    Constant Field Values

                    The Constant Field Values page lists the static final fields and their values.

                    This help file applies to API documentation generated using the standard doclet.



                    jericho-html-3.1/docs/javadoc/resources/0000755000175000017500000000000011214132424020243 5ustar twernertwernerjericho-html-3.1/docs/javadoc/resources/inherit.gif0000644000175000017500000000007111214132424022372 0ustar twernertwernerGIF89a, DrjԐ;߀Q@N;jericho-html-3.1/docs/index.html0000644000175000017500000004710111214012662016623 0ustar twernertwerner Jericho HTML Parser
                    Jericho HTML Parser at SourceForge.net

                    Jericho HTML Parser

                    Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

                    It is an open source library released under both the Eclipse Public License (EPL) and GNU Lesser General Public License (LGPL). You are therefore free to use it in commercial applications subject to the terms detailed in either one of these licence documents.

                    The javadocs provide comprehensive documentation of the entire API, as well as being a very useful reference on aspects of HTML and XML in general.

                    Visit the SourceForge.net project page at http://sourceforge.net/projects/jerichohtml/ for downloads and support.

                    You can also rate the project highly at http://freshmeat.net/projects/jerichohtml/

                    Release notes for each version can be found in a file called release.txt in the project root directory.

                    Features

                    The library distinguishes itself from other HTML parsers with the following major features:

                    • The presence of badly formatted HTML does not interfere with the parsing of the rest of the document, which makes the library ideal for use with "real-world" HTML that chokes other parsers.
                    • ASP, JSP, PSP, PHP and Mason server tags are explicitly recognised by the parser. This means that normal HTML is still parsed properly even if there are server tags inside them, which is common for example when dynamically setting element attributes.
                    • A new stream based parsing option using the StreamedSource class, which allows memory efficient processing of large files using an event iterator. This is essentially a StAX alternative with the ability to process HTML and non-validating XML, as well as several other features not available in other streaming parsers.
                    • In its standard form it is neither an event nor tree based parser, but rather uses a combination of simple text search, efficient tag recognition and a tag position cache. The text of the whole source document is first loaded into memory, and then only the relevant segments searched for the relevant characters of each search operation.
                    • Compared to a tree based parser such as DOM, the memory and resource requirements can be far better if only small sections of the document need to be parsed or modified. Incorrect or badly formatted HTML can easily be ignored, unlike tree based parsers which must identify every node in the document from top to bottom.
                    • Compared to an event based parser such as SAX, the interface is on a much higher level and more intuitive, and a tree representation of the document element hierarchy is easily created if required.
                    • The begin and end positions in the source document of all parsed segments are accessible, allowing modification of only selected segments of the document without having to reconstruct the entire document from a tree.
                    • The row and column number of each position in the source document are easily accessible.
                    • Provides a simple but comprehensive interface for the analysis and manipulation of HTML form controls, including the extraction and population of initial values, and conversion to read-only or data display modes. Analysis of the form controls also allows data received from the form to be stored and presented in an appropriate manner.
                    • Custom tag types can be easily defined and registered for recognition by the parser.
                    • Built-in functionality to extract all text from HTML markup, suitable for feeding into a text search engine such as Apache Lucene.
                    • Built-in functionality to render HTML markup with simple text formatting.
                    • Built-in functionality to format HTML source code that indents elements according to their depth in the document element hierarchy. (Click here for an online demonstration)
                    • Built-in functionality to compact HTML source code by removing all unnecessary white space.

                    Sample Programs

                    The samples/console directory in the download package contains sample programs for performing common tasks and demonstrating the functionality of the library. The .bat files can be run directly on a MS-Windows operating system, or the following syntax can be used on a UNIX based operating system from the samples/console directory:

                    java -classpath classes;../dist/jericho-html-x.x.jar ProgramName

                    where x.x is the current release number and ProgramName is the name of the sample program to run.

                    The following sample programs are available:

                    DisplayAllElements.java Demonstrates the behaviour of the library when retrieving all elements from a document containing a mix of normal HTML, different types of server tags, and badly formatted HTML.
                    FindSpecificTags.java Demonstrates how to search for tags with a specified name, in a specified namespace, or special tags such as document type declarations, XML declarations, XML processing instructions, common server tags, PHP tags, Mason tags, and HTML comments.
                    ExtractText.java Demonstrates the use of the TextExtractor class that extracts all of the text from a document, as well as the title, description, keywords and links.
                    RenderToText.java Demonstrates the use of the Renderer class that performs a simple text rendering of HTML markup, similar to the way Mozilla Thunderbird and other email clients provide an automatic conversion of HTML content to text in their alternative MIME encoding of emails.
                    HTMLSanitiser.java Demonstrates how to sanitise HTML containing unwanted or invalid tags into clean HTML. The unit test class for this functionality is available here.
                    StreamedSourceCopy.java Demonstrates the use of the StreamedSource class by iterating through the parsed segments of a source document and creating an exact copy of it.
                    FormControlDisplayCharacteristics.java Demonstrates setting the display characteristics of individual form controls. This allows a control to be disabled, removed, or replaced with a plain text representation of its value (display value). The new document is written to a file called NewForm.html
                    FormFieldCSVOutput.java Demonstrates the use of the FormFields.getColumnValues(Map) method to store form data in a .CSV file, automatically creating separate columns for fields that can contain multiple values (such as checkboxes). The output is written to a file called FormData.csv
                    FormFieldList.java Demonstrates the use of the Segment.findFormFields() method to list all form fields and their associated controls in a document.
                    FormFieldSetValues.java Demonstrates setting the values of form controls, which is best done via the FormFields object. The new document is written to a file called NewForm.html
                    FormatSource.java Demonstrates the use of the SourceFormatter class that formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent. Also known as a "source beautifier". (Click here for an online demonstration)
                    CompactSource.java Demonstrates the use of the SourceCompactor class that compacts HTML source by removing all unnecessary white space.
                    Encoding.java Demonstrates the use of the EncodingDetector class and how to determine the encoding of a source document.
                    SplitLongLines.java Demonstrates how to reformat a document so that lines exceeding a certain number of characters are split into multiple lines.
                    ConvertStyleSheets.java Demonstrates how to detect all external style sheets and place them inline into the document.

                    Building

                    The build and sample files are implemented as DOS .bat files only, sorry to all the unix users for the inconvenience.

                    On the Drawing Board...

                    • Ability to generate a JDOM document, making it a JTidy alternative
                    • .NET (DotNet) version if enough interest shown (register your interest via the forums)

                    Alternative HTML Parsers

                    This package was originally written in the latter half of 2002. At that time I evaluated 6 other parsers, none of which were capable of achieving my aims. Most couldn't reproduce a typical HTML document without change, none could reproduce a source document containing badly formatted or non-HTML components without change, and none provided a means to track the positions of nodes in the source text. A list of these parsers and a brief description follows, but please note that I have not revised this analysis since the before this package was written. Please let me know if there are any errors.

                    • JavaCC HTML Parser by Quiotix Corporation (http://www.quiotix.com/downloads/html-parser/)
                      GNU GPL licence, expensive licence fee to use in commercial application. Does not support document structure (parses into a flat node stream).
                    • Demonstrational HTML 3.2 parser bundled with JavaCC. Virtually useless.
                    • JTidy (http://jtidy.sourceforge.net/)
                      Supports document structure, but by its very nature it "tidies" up anything it doesn't like in the source document. On first glance it looks like the positions of nodes in the source are accessible, at least in protected start and end fields in the Node class, but these are pointers into a different buffer and are of no use.
                    • javax.swing.text.html.parser.Parser
                      Comes standard in the JDK. Supports document structure. Does not track the positions of nodes in the source text, but can be easily modified to do so (although not sure of legal implications of modifications). Requires a DTD to function, but only comes with HTML3.2 DTD which is unsuitable. Even if an HTML 4.01 DTD were found, the parser itself might need tweaking to cater for the new element types. The DTD needs to be in the format of a "bdtd" file, which is a binary format used only by Sun in this parser implementation. I have found many requests for a 4.01 bdtd file in newsgroups etc on the web, but they all reamain unanswered. Building it from scratch is not so easy.
                    • Kizna HTML Parser v1.1 (http://htmlparser.sourceforge.net/)
                      GNU LGPL licence. Version 1.1 was very simple without support for document structure. I have since revisited this project at sourceforge (early 2004), where version 1.4 is now available. There are now two separate libraries, one with and one without document structure support. It claims to now also be capable of reproducing source text verbatim.
                    • CyberNeko HTML Parser (http://www.apache.org/~andyc/neko/doc/html/index.html)
                      Apache-style licence. Supports document structure. Based on the very popular Xerces XML parser. At the time of evaluation this parser didn't regenerate the source accurately enough.
                    Sponsors:
                    Corporate Translations
                    Taking Care of Trees
                    Jericho HTML Parser at SourceForge.net
                    jericho-html-3.1/docs/images/0000755000175000017500000000000011167436714016107 5ustar twernertwernerjericho-html-3.1/docs/images/sflogolocal.png0000644000175000017500000000333311163112022021076 0ustar twernertwernerPNG  IHDRxhMsRGB pHYs  tIME5'qsmIDAThZOhy~ &AxIcAsVTBA%eh.jCH6BS)`!!vE2 3- 3N2{xͪ˲._{͟vj-[`cccΝ_~0i4Mc'4MÎFa>ћh1:6iti8bmmucѿw/Ͳ(˅BR@OO˲CCCB,, BwY,a (Sp8,*VTX!9B( I]Tp#ɥ߫(UQZ}0ŢV."Bϩ7l6{XZZ4MEI4M;vX8eY4A HvAA i(vDQD  #vG_~3iǃY<qHN ˲iKx8ӗТ%Y â( 188h~^ӧhVx\`0( D"ϯ㗦~z0Hd߾}HDGeEo  1;; tٳgVŋEs---H$200DEqzzZQZ?LܸqԩS,-$KV+KteQ1zZ议=8::*/rLLLW\q:^'F7 UUa[nEr(l6 a%$ +x b(l6h4^˗/1#p0 #Iٳg!٬.d[]]V… -Fo߾]l6eY|> SSSEjw>sL P͛Xϟ1Rd2ƒɓ'cVd4r[\\$)֕ll6K6"ծ]0|ݻP(T**<6L&򠼼vC2ϟ'()JR]nډEZ\XXzdI(ڻw/_pTuT*P؛7odY~ .%i q7??O:z'N( ;wױ0ܹsn"y[h񸪪fytt(wÇ;zll 5/?4D"122RVd333~9MR\'Or92:&'' >JMNNi>| >9oȀafikt Fˍ6~Ď&WF@WWM۶m3>|mϞ=O3IENDB`jericho-html-3.1/docs/src/0000755000175000017500000000000011167436712015427 5ustar twernertwernerjericho-html-3.1/docs/src/append/0000755000175000017500000000000011167436712016676 5ustar twernertwernerjericho-html-3.1/docs/src/append/stylesheet.css0000644000175000017500000000250210752714024021572 0ustar twernertwernerbody, code var {font-family: Arial,sans-serif; font-size: 10pt} var {font-style: italic} samp {font-family: sans-serif; font-size: 20px} code {white-space: nowrap} th {text-align: left} th,td {vertical-align: top; border-width: 1px} table {border-width: 1px; border-style: none} table.bordered {border-collapse: collapse} table.bordered td, table.bordered th {border-style: solid; border-color: black; padding: 0px 15px 0px 15px; vertical-align: middle} table.CompactDL {border-collapse: collapse; border-style: solid; margin-top: 0.5em; margin-bottom: 0.5em} table.CompactDL td {border-bottom-style: solid; padding-left: 0.5em; padding-right: 0.5em} dt {font-weight: bold} .Separated dd, .Separated li {margin-bottom: 1.6em; margin-top: 1.6em} .HalfSeparated li {margin-bottom: 0.5em; margin-top: 0.5em} .Unseparated li {margin-bottom: 0; margin-top: 0} blockquote {margin-bottom: 0; margin-top: 0} blockquote.code {margin-bottom: 10px; margin-top: 10px} .SmallVerticalMargin {margin-bottom: 0.5em; margin-top: 0.5em} dl.compact dt {display: compact} dl.compact dd {margin-left: 4em; margin-bottom: 0; margin-top: 0} ol ol {list-style-type: lower-roman} pre {margin: 0} .AllClassesBody h4 {font-size: 9pt; font-weight: bold; margin-top: 7px; padding-top: 2px; margin-bottom: 0; border-top-style: inset; border-width: 1px} jericho-html-3.1/docs/src/replace/0000755000175000017500000000000011167436712017042 5ustar twernertwernerjericho-html-3.1/docs/src/replace/allclasses-frame-alphabetical.html0000644000175000017500000001247011172063262025551 0ustar twernertwerner All Classes (Jericho HTML Parser)
                    All Classes (alphabetical) » categorised

                    jericho-html-3.1/docs/src/replace/allclasses-frame.html0000644000175000017500000001315011172063330023132 0ustar twernertwerner All Classes (Jericho HTML Parser)
                    All Classes (categorised) » alphabetical

                    Document Segments:

                    Tag Types:

                    HTML:

                    Output:

                    Document Manipulation/Output:

                    Form Data:

                    Extended Tag Types:

                    Configuration:

                    Logging:

                    Utilities:

                    Custom Tag Type Creation:

                    jericho-html-3.1/licence-epl-1.0.html0000644000175000017500000003106010611434760017244 0ustar twernertwerner Eclipse Public License - Version 1.0

                    Eclipse Public License - v 1.0

                    THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.

                    1. DEFINITIONS

                    "Contribution" means:

                    a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and

                    b) in the case of each subsequent Contributor:

                    i) changes to the Program, and

                    ii) additions to the Program;

                    where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.

                    "Contributor" means any person or entity that distributes the Program.

                    "Licensed Patents" mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.

                    "Program" means the Contributions distributed in accordance with this Agreement.

                    "Recipient" means anyone who receives the Program under this Agreement, including all Contributors.

                    2. GRANT OF RIGHTS

                    a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.

                    b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.

                    c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.

                    d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.

                    3. REQUIREMENTS

                    A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:

                    a) it complies with the terms and conditions of this Agreement; and

                    b) its license agreement:

                    i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;

                    ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;

                    iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and

                    iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange.

                    When the Program is made available in source code form:

                    a) it must be made available under this Agreement; and

                    b) a copy of this Agreement must be included with each copy of the Program.

                    Contributors may not remove or alter any copyright notices contained within the Program.

                    Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.

                    4. COMMERCIAL DISTRIBUTION

                    Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.

                    For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.

                    5. NO WARRANTY

                    EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.

                    6. DISCLAIMER OF LIABILITY

                    EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

                    7. GENERAL

                    If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.

                    If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.

                    All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.

                    Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.

                    This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.

                    jericho-html-3.1/src/0000755000175000017500000000000011167436712014477 5ustar twernertwernerjericho-html-3.1/src/java/0000755000175000017500000000000011167436712015420 5ustar twernertwernerjericho-html-3.1/src/java/net/0000755000175000017500000000000011167436712016206 5ustar twernertwernerjericho-html-3.1/src/java/net/htmlparser/0000755000175000017500000000000011167436712020367 5ustar twernertwernerjericho-html-3.1/src/java/net/htmlparser/jericho/0000755000175000017500000000000011173426332022004 5ustar twernertwernerjericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProvider.java0000644000175000017500000001472111204550410025574 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; /** * Defines the interface for a factory class to provide {@link Logger} instances for each {@link Source} object. *

                    * It is not usually necessary for users to create implementations of this interface, as * several predefined instances are defined which provide the most commonly required {@link Logger} implementations. *

                    * By default, a LoggerProvider is chosen automatically according to the algorithm described in the static {@link Config#LoggerProvider} property. * This automatic choice can be overridden by setting the {@link Config#LoggerProvider} property manually with an instance of this interface, * but this is also usually not necessary. */ public interface LoggerProvider { /** * A {@link LoggerProvider} implementation that disables all log messages. */ public static final LoggerProvider DISABLED=LoggerProviderDisabled.INSTANCE; /** * A {@link LoggerProvider} implementation that sends all log messages to the standard error output stream (System.err). *

                    * The implementation uses the following code to create each logger:
                    * new {@link WriterLogger}(new OutputStreamWriter(System.err),name) */ public static final LoggerProvider STDERR=LoggerProviderSTDERR.INSTANCE; /** * A {@link LoggerProvider} implementation that wraps the standard java.util.logging system included in the Java SDK version 1.4 and above. *

                    * This is the default used if no other logging framework is detected. See the description of the static {@link Config#LoggerProvider} property for more details. *

                    * The following mapping of logging levels is used: * *
                    {@link Logger} leveljava.util.logging.Level *
                    {@link Logger#error(String) ERROR}SEVERE *
                    {@link Logger#warn(String) WARN}WARNING *
                    {@link Logger#info(String) INFO}INFO *
                    {@link Logger#debug(String) DEBUG}FINE *
                    */ public static final LoggerProvider JAVA=LoggerProviderJava.INSTANCE; /** * A {@link LoggerProvider} implementation that wraps the Jakarta Commons Logging (JCL) framework. *

                    * See the description of the static {@link Config#LoggerProvider} property for details on when this implementation is used as the default. *

                    * The following mapping of logging levels is used: * *
                    {@link Logger} levelorg.apache.commons.logging level *
                    {@link Logger#error(String) ERROR}error *
                    {@link Logger#warn(String) WARN}warn *
                    {@link Logger#info(String) INFO}info *
                    {@link Logger#debug(String) DEBUG}debug *
                    */ public static final LoggerProvider JCL=LoggerProviderJCL.INSTANCE; /** * A {@link LoggerProvider} implementation that wraps the Apache Log4J framework. *

                    * See the description of the static {@link Config#LoggerProvider} property for details on when this implementation is used as the default. *

                    * The following mapping of logging levels is used: * *
                    {@link Logger} levelorg.apache.log4j.Level *
                    {@link Logger#error(String) ERROR}ERROR *
                    {@link Logger#warn(String) WARN}WARN *
                    {@link Logger#info(String) INFO}INFO *
                    {@link Logger#debug(String) DEBUG}DEBUG *
                    */ public static final LoggerProvider LOG4J=LoggerProviderLog4J.INSTANCE; /** * A {@link LoggerProvider} implementation that wraps the SLF4J framework. *

                    * See the description of the static {@link Config#LoggerProvider} property for details on when this implementation is used as the default. *

                    * The following mapping of logging levels is used: * *
                    {@link Logger} levelorg.slf4j.Logger level *
                    {@link Logger#error(String) ERROR}error *
                    {@link Logger#warn(String) WARN}warn *
                    {@link Logger#info(String) INFO}info *
                    {@link Logger#debug(String) DEBUG}debug *
                    */ public static final LoggerProvider SLF4J=LoggerProviderSLF4J.INSTANCE; /** * Creates a new {@link Logger} instance with the specified name. *

                    * The name argument is used by the underlying logging implementation, and is normally a dot-separated name based on * the package name or class name of the subsystem. *

                    * The name used for all automatically created {@link Logger} instances is "net.htmlparser.jericho". * * @param name the name of the logger, the use of which is determined by the underlying logging implementation, may be null. * @return a new {@link Logger} instance with the specified name. */ public Logger getLogger(String name); } jericho-html-3.1/src/java/net/htmlparser/jericho/MicrosoftTagTypes.java0000644000175000017500000001337711204550410026276 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Contains {@linkplain TagType tag types} recognised exclusively by Microsoft® Internet Explorer. *

                    * The tag type defined in this class is not {@linkplain TagType#register() registered} by default. */ public final class MicrosoftTagTypes { /** * The tag type given to a Microsoft® downlevel-revealed conditional comment * (<![if ... ]> | <![endif]>). *

                    * The only valid {@linkplain Tag#getName() names} for tags of this type are "![if" and "![endif". *

                    * This start tag type is used to represent both the "if" and "endif" tags. * Because the "endif" tag can not be represented by an {@linkplain EndTagType end tag type} (it doesn't start with "</"), * the parser makes no attempt to match if-endif tag pairs to form {@linkplain Element elements}. *

                    * The {@link #isConditionalCommentIfTag(Tag)} and {@link #isConditionalCommentEndifTag(Tag)} methods provide an efficient means of determining whether * a given tag is of the "if" or "endif" variety. *

                    * The expression consituting the condition of an "if" tag can be extracted using the {@link StartTag#getTagContent()} method. * For example, if the variable conditionalCommentIfTag represents the tag <![if !IE]>, then the expression * conditionalCommentIfTag.getTagContent().toString().trim() yields the string "!IE". *

                    *

                    *
                    Properties:
                    *
                    * *
                    PropertyValue *
                    {@link StartTagType#getDescription() Description}Microsoft downlevel-revealed conditional comment *
                    {@link StartTagType#getStartDelimiter() StartDelimiter}<![ *
                    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}]> *
                    {@link StartTagType#isServerTag() IsServerTag}false *
                    {@link StartTagType#getNamePrefix() NamePrefix}![ *
                    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}null *
                    {@link StartTagType#hasAttributes() HasAttributes}false *
                    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}true *
                    *
                    Example:
                    *
                    <![if !IE]>
                    *
                    */ public static final StartTagType DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT=StartTagTypeMicrosoftDownlevelRevealedConditionalComment.INSTANCE; private static final TagType[] TAG_TYPES={ DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT }; private MicrosoftTagTypes() {} /** * Indicates whether the specified tag is a {@linkplain #DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT downlevel-revealed conditional comment} "if" tag * (<![if ... ]>). * * @param tag the {@link Tag} to test. * @return true if the specified tag is a conditional comment "if" tag, otherwise false. */ public static boolean isConditionalCommentIfTag(final Tag tag) { return tag.getName()==StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF; } /** * Indicates whether the specified tag is a {@linkplain #DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT downlevel-revealed conditional comment} "endif" tag * (<![endif]>). * * @param tag the {@link Tag} to test. * @return true if the specified tag is a conditional comment "endif" tag, otherwise false. */ public static boolean isConditionalCommentEndifTag(final Tag tag) { return tag.getName()==StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF; } /** * {@linkplain TagType#register() Registers} all of the tag types defined in this class at once. *

                    * The tag types must be registered before the parser will recognise them. */ public static void register() { for (TagType tagType : TAG_TYPES) tagType.register(); } /** * Indicates whether the specified tag type is defined in this class. * * @param tagType the {@link TagType} to test. * @return true if the specified tag type is defined in this class, otherwise false. */ public static boolean defines(final TagType tagType) { for (TagType definedTagType : TAG_TYPES) if (tagType==definedTagType) return true; return false; } } jericho-html-3.1/src/java/net/htmlparser/jericho/HTMLElements.java0000644000175000017500000031327111204550410025105 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import static net.htmlparser.jericho.HTMLElementName.*; /** * Contains static methods which group {@linkplain HTMLElementName HTML element names} by the characteristics of their associated * elements. *

                    * An HTML element is a normal element with a * {@linkplain Element#getName() name} that matches one of the {@linkplain HTMLElementName HTML element names} (ignoring case). * This type of element spans the logical HTML element as described in the * HTML 4.01 specification section 3.2.1, * which may be implicitly terminated if it specifies an * {@linkplain #getEndTagOptionalElementNames() optional end tag}. *

                    * The term Non-HTML element refers to a normal element * with a {@linkplain Element#getName() name} that does not match one of the {@linkplain HTMLElementName HTML element names}. * This type of element must be either a single tag element or * explicitly terminated. *

                    * All of the sets returned by the methods in this class may be modified to customise the behaviour of the parser. * Care must be taken however to ensure that the sets only contain tag names in lower case. *

                    * Below is a table summarising the default characteristics of each HTML element. See also the * index of elements in the HTML 4.01 specification * for the official table containing similar information. *

                    * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
                    {@linkplain Element#getName() Name}Box Type{@linkplain #getStartTagOptionalElementNames() Start Tag}{@linkplain #getEndTagOptionalElementNames() End Tag}{@linkplain #getNestingForbiddenElementNames() Nest}{@linkplain #getDeprecatedElementNames() Depr.}Description / Specification
                    {@link HTMLElementName#A A}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} anchor
                    {@link HTMLElementName#ABBR ABBR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  abbreviated form (e.g., WWW, HTTP, etc.)
                    {@link HTMLElementName#ACRONYM ACRONYM}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  acronym
                    {@link HTMLElementName#ADDRESS ADDRESS}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} information on author
                    {@link HTMLElementName#APPLET APPLET}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF}DJava applet
                    {@link HTMLElementName#AREA AREA}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} client-side image map area
                    {@link HTMLElementName#B B}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  bold text style
                    {@link HTMLElementName#BASE BASE}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} document base URI
                    {@link HTMLElementName#BASEFONT BASEFONT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF}Dbase font size
                    {@link HTMLElementName#BDO BDO}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  I18N BiDi over-ride
                    {@link HTMLElementName#BIG BIG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  large text style
                    {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  long quotation
                    {@link HTMLElementName#BODY BODY} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document body
                    {@link HTMLElementName#BR BR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} forced line break
                    {@link HTMLElementName#BUTTON BUTTON}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} push button
                    {@link HTMLElementName#CAPTION CAPTION}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} table caption
                    {@link HTMLElementName#CENTER CENTER}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Dshorthand for DIV align=center
                    {@link HTMLElementName#CITE CITE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  citation
                    {@link HTMLElementName#CODE CODE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  computer code fragment
                    {@link HTMLElementName#COL COL}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} table column
                    {@link HTMLElementName#COLGROUP COLGROUP}  {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} table column group
                    {@link HTMLElementName#DD DD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  definition description
                    {@link HTMLElementName#DEL DEL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  deleted text
                    {@link HTMLElementName#DFN DFN}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  instance definition
                    {@link HTMLElementName#DIR DIR}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Ddirectory list
                    {@link HTMLElementName#DIV DIV}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  generic language/style container
                    {@link HTMLElementName#DL DL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  definition list
                    {@link HTMLElementName#DT DT}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  definition term
                    {@link HTMLElementName#EM EM}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  emphasis
                    {@link HTMLElementName#FIELDSET FIELDSET}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  form control group
                    {@link HTMLElementName#FONT FONT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dlocal change to font
                    {@link HTMLElementName#FORM FORM}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} interactive form
                    {@link HTMLElementName#FRAME FRAME}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} subwindow
                    {@link HTMLElementName#FRAMESET FRAMESET}  {@linkplain #getEndTagRequiredElementNames() Required}  window subdivision
                    {@link HTMLElementName#H1 H1}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#H2 H2}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#H3 H3}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#H4 H4}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#H5 H5}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#H6 H6}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  heading
                    {@link HTMLElementName#HEAD HEAD} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document head
                    {@link HTMLElementName#HR HR}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} horizontal rule
                    {@link HTMLElementName#HTML HTML} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} document root element
                    {@link HTMLElementName#I I}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  italic text style
                    {@link HTMLElementName#IFRAME IFRAME}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} inline subwindow
                    {@link HTMLElementName#IMG IMG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} Embedded image
                    {@link HTMLElementName#INPUT INPUT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} form control
                    {@link HTMLElementName#INS INS}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  inserted text
                    {@link HTMLElementName#ISINDEX ISINDEX}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF}Dsingle line prompt
                    {@link HTMLElementName#KBD KBD}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  text to be entered by the user
                    {@link HTMLElementName#LABEL LABEL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} form field label text
                    {@link HTMLElementName#LEGEND LEGEND}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} fieldset legend
                    {@link HTMLElementName#LI LI}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  list item
                    {@link HTMLElementName#LINK LINK}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} a media-independent link
                    {@link HTMLElementName#MAP MAP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  client-side image map
                    {@link HTMLElementName#MENU MENU}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required} Dmenu list
                    {@link HTMLElementName#META META}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} generic metainformation
                    {@link HTMLElementName#NOFRAMES NOFRAMES}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  alternate content container for non frame-based rendering
                    {@link HTMLElementName#NOSCRIPT NOSCRIPT}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  alternate content container for non script-based rendering
                    {@link HTMLElementName#OBJECT OBJECT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  generic embedded object
                    {@link HTMLElementName#OL OL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  ordered list
                    {@link HTMLElementName#OPTGROUP OPTGROUP}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} option group
                    {@link HTMLElementName#OPTION OPTION}  {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} selectable choice
                    {@link HTMLElementName#P P}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagOptionalElementNames() Optional} (details){@linkplain #getNestingForbiddenElementNames() NF} paragraph
                    {@link HTMLElementName#PARAM PARAM}  {@linkplain #getEndTagForbiddenElementNames() Forbidden}{@linkplain #getNestingForbiddenElementNames() NF} named property value
                    {@link HTMLElementName#PRE PRE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  preformatted text
                    {@link HTMLElementName#Q Q}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  short inline quotation
                    {@link HTMLElementName#S S}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dstrike-through text style
                    {@link HTMLElementName#SAMP SAMP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  sample program output, scripts, etc.
                    {@link HTMLElementName#SCRIPT SCRIPT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} script statements
                    {@link HTMLElementName#SELECT SELECT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} option selector
                    {@link HTMLElementName#SMALL SMALL}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  small text style
                    {@link HTMLElementName#SPAN SPAN}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  generic language/style container
                    {@link HTMLElementName#STRIKE STRIKE}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dstrike-through text
                    {@link HTMLElementName#STRONG STRONG}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  strong emphasis
                    {@link HTMLElementName#STYLE STYLE}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} style info
                    {@link HTMLElementName#SUB SUB}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  subscript
                    {@link HTMLElementName#SUP SUP}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  superscript
                    {@link HTMLElementName#TABLE TABLE}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  table
                    {@link HTMLElementName#TBODY TBODY} {@linkplain #getStartTagOptionalElementNames() Optional}{@linkplain #getEndTagOptionalElementNames() Optional} (details)  table body
                    {@link HTMLElementName#TD TD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table data cell
                    {@link HTMLElementName#TEXTAREA TEXTAREA}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} multi-line text field
                    {@link HTMLElementName#TFOOT TFOOT}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table footer
                    {@link HTMLElementName#TH TH}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table header cell
                    {@link HTMLElementName#THEAD THEAD}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table header
                    {@link HTMLElementName#TITLE TITLE}  {@linkplain #getEndTagRequiredElementNames() Required}{@linkplain #getNestingForbiddenElementNames() NF} document title
                    {@link HTMLElementName#TR TR}  {@linkplain #getEndTagOptionalElementNames() Optional} (details)  table row
                    {@link HTMLElementName#TT TT}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  teletype or monospaced text style
                    {@link HTMLElementName#U U}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required} Dunderlined text style
                    {@link HTMLElementName#UL UL}{@linkplain #getBlockLevelElementNames() Block} {@linkplain #getEndTagRequiredElementNames() Required}  unordered list
                    {@link HTMLElementName#VAR VAR}{@linkplain #getInlineLevelElementNames() Inline} {@linkplain #getEndTagRequiredElementNames() Required}  instance of a variable or program argument
                    * * @see HTMLElementName * @see Element */ public final class HTMLElements { private static final List ALL=new ArrayList(Arrays.asList(new String[] {A,ABBR,ACRONYM,ADDRESS,APPLET,AREA,B,BASE,BASEFONT,BDO,BIG,BLOCKQUOTE,BODY,BR,BUTTON,CAPTION,CENTER,CITE,CODE,COL,COLGROUP,DD,DEL,DFN,DIR,DIV,DL,DT,EM,FIELDSET,FONT,FORM,FRAME,FRAMESET,H1,H2,H3,H4,H5,H6,HEAD,HR,HTML,I,IFRAME,IMG,INPUT,INS,ISINDEX,KBD,LABEL,LEGEND,LI,LINK,MAP,MENU,META,NOFRAMES,NOSCRIPT,OBJECT,OL,OPTGROUP,OPTION,P,PARAM,PRE,Q,S,SAMP,SCRIPT,SELECT,SMALL,SPAN,STRIKE,STRONG,STYLE,SUB,SUP,TABLE,TBODY,TD,TEXTAREA,TFOOT,TH,THEAD,TITLE,TR,TT,U,UL,VAR})); private static final HTMLElementNameSet BLOCK=new HTMLElementNameSet(new String[] {P,H1,H2,H3,H4,H5,H6,UL,OL,DIR,MENU,PRE,DL,DIV,CENTER,NOSCRIPT,NOFRAMES,BLOCKQUOTE,FORM,ISINDEX,HR,TABLE,FIELDSET,ADDRESS}); private static final HTMLElementNameSet INLINE=new HTMLElementNameSet(new String[] {TT,I,B,U,S,STRIKE,BIG,SMALL,EM,STRONG,DFN,CODE,SAMP,KBD,VAR,CITE,ABBR,ACRONYM,A,IMG,APPLET,OBJECT,FONT,BASEFONT,BR,SCRIPT,MAP,Q,SUB,SUP,SPAN,BDO,IFRAME,INPUT,SELECT,TEXTAREA,LABEL,BUTTON,INS,DEL}); private static final HTMLElementNameSet END_TAG_FORBIDDEN_SET=new HTMLElementNameSet(new String[] {AREA,BASE,BASEFONT,BR,COL,FRAME,HR,IMG,INPUT,ISINDEX,LINK,META,PARAM}); private static final HTMLElementNameSet _UL_OL=new HTMLElementNameSet(UL).union(OL); private static final HTMLElementNameSet _DD_DT=new HTMLElementNameSet(DD).union(DT); private static final HTMLElementNameSet _THEAD_TBODY_TFOOT_TR=new HTMLElementNameSet(THEAD).union(TBODY).union(TFOOT).union(TR); private static final HTMLElementNameSet _THEAD_TBODY_TFOOT_TR_TD_TH=new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TD).union(TH); private static final HTMLElementNameSet DEPRECATED=new HTMLElementNameSet().union(APPLET).union(BASEFONT).union(CENTER).union(DIR).union(FONT).union(ISINDEX).union(MENU).union(S).union(STRIKE).union(U); private static final HTMLElementNameSet START_TAG_OPTIONAL_SET=new HTMLElementNameSet().union(BODY).union(HEAD).union(HTML).union(TBODY); private static final HashMap CONSTANT_NAME_MAP=buildTagMap(); // contains a map of tag names to the tag constants, allowing standard tags to be tested using == operator instead of equals() private static final HashMap TERMINATING_TAG_NAME_SETS_MAP=buildTerminatingTagNameSetsMap(); // contains a map of tags having optional end tags to the HTMLElementTerminatingTagNameSets that can terminate the element if the end tag is not present private static final Set END_TAG_OPTIONAL_SET=TERMINATING_TAG_NAME_SETS_MAP.keySet(); private static final HTMLElementNameSet END_TAG_REQUIRED_SET=new HTMLElementNameSet().union(ALL).minus(END_TAG_FORBIDDEN_SET).minus(END_TAG_OPTIONAL_SET); private static final HTMLElementNameSet CLOSING_SLASH_IGNORED_SET=new HTMLElementNameSet().union(END_TAG_OPTIONAL_SET).union(END_TAG_REQUIRED_SET); static final HTMLElementNameSet END_TAG_REQUIRED_NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(A).union(ADDRESS).union(APPLET).union(BUTTON).union(CAPTION).union(FORM).union(IFRAME).union(LABEL).union(LEGEND).union(OPTGROUP).union(SCRIPT).union(SELECT).union(STYLE).union(TEXTAREA).union(TITLE); private static final HTMLElementNameSet END_TAG_OPTIONAL_NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(BODY).union(COLGROUP).union(HEAD).union(HTML).union(OPTION).union(P); private static final HTMLElementNameSet NESTING_FORBIDDEN_SET=new HTMLElementNameSet().union(END_TAG_REQUIRED_NESTING_FORBIDDEN_SET).union(END_TAG_OPTIONAL_NESTING_FORBIDDEN_SET).union(END_TAG_FORBIDDEN_SET); private HTMLElements() {} /** * Returns a list containing all of the {@linkplain HTMLElementName HTML element names}. *

                    * The returned list is in alphabetical order. * * @return a list containing of all the {@linkplain HTMLElementName HTML element names}. */ public static final List getElementNames() { return ALL; } /** * Returns a set containing the {@linkplain Element#getName() names} of all the * block-level elements. *

                    * The element names contained in this set are:
                    * {@link HTMLElementName#ADDRESS ADDRESS}, {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE}, {@link HTMLElementName#CENTER CENTER}, {@link HTMLElementName#DIR DIR}, {@link HTMLElementName#DIV DIV}, * {@link HTMLElementName#DL DL}, {@link HTMLElementName#FIELDSET FIELDSET}, {@link HTMLElementName#FORM FORM}, * {@link HTMLElementName#H1 H1}, {@link HTMLElementName#H2 H2}, {@link HTMLElementName#H3 H3}, {@link HTMLElementName#H4 H4}, {@link HTMLElementName#H5 H5}, {@link HTMLElementName#H6 H6}, * {@link HTMLElementName#HR HR}, {@link HTMLElementName#ISINDEX ISINDEX}, {@link HTMLElementName#MENU MENU}, {@link HTMLElementName#NOFRAMES NOFRAMES}, {@link HTMLElementName#NOSCRIPT NOSCRIPT}, * {@link HTMLElementName#OL OL}, {@link HTMLElementName#P P}, {@link HTMLElementName#PRE PRE}, {@link HTMLElementName#TABLE TABLE}, {@link HTMLElementName#UL UL} *

                    * This set is defined in the HTML 4.01 Transitional DTD, * but more detailed information can be found in the * HTML 4.01 specification section 7.5.3 - Block-level and inline elements * and the CSS2 specification section 9.2.1 - Block-level elements and block boxes. *

                    * The CSS2 display property can be used * to override the normal box type of an element. * * @return a set containing the {@linkplain Element#getName() names} of all the block-level elements. * @see #getInlineLevelElementNames() */ public static Set getBlockLevelElementNames() { return BLOCK; } /** * Returns a set containing the {@linkplain Element#getName() names} of all the * inline-level elements. *

                    * The element names contained in this set are:
                    * {@link HTMLElementName#A A}, {@link HTMLElementName#ABBR ABBR}, {@link HTMLElementName#ACRONYM ACRONYM}, {@link HTMLElementName#APPLET APPLET}, {@link HTMLElementName#B B}, {@link HTMLElementName#BASEFONT BASEFONT}, * {@link HTMLElementName#BDO BDO}, {@link HTMLElementName#BIG BIG}, {@link HTMLElementName#BR BR}, {@link HTMLElementName#BUTTON BUTTON}, {@link HTMLElementName#CITE CITE}, {@link HTMLElementName#CODE CODE}, * {@link HTMLElementName#DEL DEL}, {@link HTMLElementName#DFN DFN}, {@link HTMLElementName#EM EM}, {@link HTMLElementName#FONT FONT}, {@link HTMLElementName#I I}, {@link HTMLElementName#IFRAME IFRAME}, {@link HTMLElementName#IMG IMG}, * {@link HTMLElementName#INPUT INPUT}, {@link HTMLElementName#INS INS}, {@link HTMLElementName#KBD KBD}, {@link HTMLElementName#LABEL LABEL}, {@link HTMLElementName#MAP MAP}, {@link HTMLElementName#OBJECT OBJECT}, * {@link HTMLElementName#Q Q}, {@link HTMLElementName#S S}, {@link HTMLElementName#SAMP SAMP}, {@link HTMLElementName#SCRIPT SCRIPT}, {@link HTMLElementName#SELECT SELECT}, {@link HTMLElementName#SMALL SMALL}, * {@link HTMLElementName#SPAN SPAN}, {@link HTMLElementName#STRIKE STRIKE}, {@link HTMLElementName#STRONG STRONG}, {@link HTMLElementName#SUB SUB}, {@link HTMLElementName#SUP SUP}, {@link HTMLElementName#TEXTAREA TEXTAREA}, * {@link HTMLElementName#TT TT}, {@link HTMLElementName#U U}, {@link HTMLElementName#VAR VAR} *

                    * This set is defined in the HTML 4.01 Transitional DTD, * but more detailed information can be found in the * HTML 4.01 specification section 7.5.3 - Block-level and inline elements * and the CSS2 specification section 9.2.2 - Inline-level elements and inline boxes. *

                    * The CSS2 display property can be used * to override the normal box type of an element. *

                    * The HTML Document Type Definitions * forbid the presence of {@linkplain #getBlockLevelElementNames() block-level elements} inside inline-level elements, * but it is tolerated by all popular browsers in various situations, even in XHTML documents. * The most notorious example of this is the common inclusion of block-level elements inside {@link HTMLElementName#FONT FONT} elements. * * @return a set containing the {@linkplain Element#getName() names} of all the inline-level elements. * @see #getBlockLevelElementNames() */ public static Set getInlineLevelElementNames() { return INLINE; } /** * Returns a set containing the {@linkplain Element#getName() names} of all * deprecated elements in HTML 4.01. * @return a set containing the {@linkplain Element#getName() names} of all deprecated elements in HTML 4.01. */ public static Set getDeprecatedElementNames() { return DEPRECATED; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is forbidden. *

                    * See the element parsing rules for HTML elements with forbidden end tags * for more information. *

                    * The index of elements in the HTML 4.01 specification * includes the letter 'F' in the "End Tag" column for elements whose end tag is forbidden. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is forbidden. * @see #getEndTagOptionalElementNames() * @see #getEndTagRequiredElementNames() */ public static Set getEndTagForbiddenElementNames() { return END_TAG_FORBIDDEN_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is optional. *

                    * Elements with these names may be implicitly terminated by a subsequent * {@linkplain #getTerminatingStartTagNames(String) terminating start tag} or * {@linkplain #getTerminatingEndTagNames(String) terminating end tag}. * A list of the these terminating tags, and the names of {@linkplain #getNonterminatingElementNames(String) non-terminating elements} * that can be nested within the element, can be found in the documentation of each relevant element in the {@link HTMLElementName} class. *

                    * See the element parsing rules for HTML elements with optional end tags * for more information. *

                    * The index of elements in the HTML 4.01 specification * includes the letter 'O' in the "End Tag" column for elements whose end tag is optional. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is optional. * @see #getEndTagForbiddenElementNames() * @see #getEndTagRequiredElementNames() */ public static Set getEndTagOptionalElementNames() { return END_TAG_OPTIONAL_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getEndTag() end tag} is required. *

                    * See the element parsing rules for HTML elements with required end tags * for more information. *

                    * The index of elements in the HTML 4.01 specification * leaves the "End Tag" column blank for elements whose end tag is required. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getEndTag() end tag} is required. * @see #getEndTagForbiddenElementNames() * @see #getEndTagOptionalElementNames() */ public static Set getEndTagRequiredElementNames() { return END_TAG_REQUIRED_SET; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * for which the {@linkplain Element#getStartTag() start tag} is optional. *

                    * Elements with optional start tags must be present in the document object model (DOM) * in certain locations, either forming part of the structure of the HTML document as a whole * (e.g. the {@link HTMLElementName#HTML HTML}, {@link HTMLElementName#HEAD HEAD}, and {@link HTMLElementName#BODY BODY} elements), * or forming part of the structure of a {@link HTMLElementName#TABLE TABLE} element (e.g. the {@link HTMLElementName#TBODY TBODY} element). * The location of an omitted start tag * in the document's object model can be inferred from the surrounding elements. *

                    * This library does not use this property in any way when parsing documents, and does not construct a document object model from the * source, so no implied element is created where an optional start tag is omitted. *

                    * When the start tag has been omitted in the document text, the corresponding end tag should also be omitted. *

                    * The index of elements in the HTML 4.01 specification * includes the letter 'O' in the "Start Tag" column for elements whose start tag is optional. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements for which the {@linkplain Element#getStartTag() start tag} is optional. */ public static Set getStartTagOptionalElementNames() { return START_TAG_OPTIONAL_SET; } /** * Returns the {@linkplain StartTag#getName() names} of start tags that implicitly terminate * an HTML element with the specified name. *

                    * This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
                    {@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain StartTag#getName() names} of start tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingEndTagNames(String endTagOptionalElementName) * @see #getNonterminatingElementNames(String endTagOptionalElementName) */ public static Set getTerminatingStartTagNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.TerminatingStartTagNameSet; } /** * Returns the {@linkplain EndTag#getName() names} of end tags that implicitly terminate * an HTML element with the specified name. *

                    * This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
                    {@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. *

                    * Note that removing the tag name matching the specified element has no effect on the behaviour of the parser, * as it is always assumed that a start tag is terminated by an end tag with a matching name. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain StartTag#getName() names} of end tags that implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingStartTagNames(String endTagOptionalElementName) * @see #getNonterminatingElementNames(String endTagOptionalElementName) */ public static Set getTerminatingEndTagNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.TerminatingEndTagNameSet; } /** * Returns the {@linkplain Element#getName() names} of elements that do NOT implicitly terminate * an HTML element with the specified name. * Neither can any tag nested inside any of these elements implicitly terminate the specified element, * even if it is listed as one of the {@linkplain #getTerminatingStartTagNames(String) terminating start tags} or * {@linkplain #getTerminatingEndTagNames(String) terminating end tags}. *

                    * This method is only relevant to HTML elements for which the * {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * It returns null if *
                    {@link #getEndTagOptionalElementNames()}.contains(endTagOptionalElementName.toLowerCase())==null. * * @param endTagOptionalElementName the {@linkplain Element#getName() name} of an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @return the {@linkplain Element#getName() names} of elements that do NOT implicitly terminate an HTML element with the specified name, or null if the name does not identify an element for which the {@linkplain #getEndTagOptionalElementNames() end tag is optional}. * @see #getTerminatingStartTagNames(String endTagOptionalElementName) * @see #getTerminatingEndTagNames(String endTagOptionalElementName) */ public static Set getNonterminatingElementNames(final String endTagOptionalElementName) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=getTerminatingTagNameSets(endTagOptionalElementName); if (terminatingTagNameSets==null) return null; return terminatingTagNameSets.NonterminatingElementNameSet; } /** * Returns a set containing the {@linkplain Element#getName() names} of all of the HTML elements * which should never contain elements of the same name, either as direct or indirect descendants. * * @return a set containing the {@linkplain Element#getName() names} of all of the HTML elements which should never contain elements of the same name. */ public static Set getNestingForbiddenElementNames() { return NESTING_FORBIDDEN_SET; } static final String getConstantElementName(final String elementName) { final String elementNameConstant=CONSTANT_NAME_MAP.get(elementName); return elementNameConstant!=null ? elementNameConstant : elementName; } static final boolean isClosingSlashIgnored(final String elementName) { return CLOSING_SLASH_IGNORED_SET.contains(elementName); } static final HTMLElementTerminatingTagNameSets getTerminatingTagNameSets(final String endTagOptionalElementName) { return TERMINATING_TAG_NAME_SETS_MAP.get(endTagOptionalElementName); } private static HashMap buildTerminatingTagNameSetsMap() { // HTML is included in the NonterminatingElementNameSet of BODY and HTML in case the source contains (illegaly) nested HTML documents final HashMap map=new HashMap(20,1.0F); // 15 entries in total map.put(BODY,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(), new HTMLElementNameSet(HTML).union(BODY), new HTMLElementNameSet(HTML))); map.put(COLGROUP,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(COLGROUP), new HTMLElementNameSet(TABLE).union(COLGROUP), new HTMLElementNameSet(TABLE))); map.put(DD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_DD_DT), new HTMLElementNameSet(DL).union(DD), new HTMLElementNameSet(DL))); map.put(DT,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_DD_DT), new HTMLElementNameSet(DL).union(DT), new HTMLElementNameSet(DL))); map.put(HEAD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(BODY).union(FRAMESET), new HTMLElementNameSet(HTML).union(HEAD), new HTMLElementNameSet())); map.put(HTML,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(), new HTMLElementNameSet(HTML), new HTMLElementNameSet(HTML))); map.put(LI,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(LI), new HTMLElementNameSet(_UL_OL).union(LI), new HTMLElementNameSet(_UL_OL))); map.put(OPTION,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(OPTION).union(OPTGROUP), new HTMLElementNameSet(SELECT).union(OPTION), new HTMLElementNameSet())); map.put(P,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(BLOCK).union(_DD_DT).union(TH).union(TD).union(LI), new HTMLElementNameSet(BLOCK).union(_DD_DT).union(BODY).union(HTML).union(_THEAD_TBODY_TFOOT_TR_TD_TH).union(CAPTION).union(LEGEND), new HTMLElementNameSet())); map.put(TBODY,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(TBODY), new HTMLElementNameSet(TABLE))); map.put(TD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR_TD_TH), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE).union(TD), new HTMLElementNameSet(TABLE))); map.put(TFOOT,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(TFOOT), new HTMLElementNameSet(TABLE))); map.put(TH,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR_TD_TH), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE).union(TH), new HTMLElementNameSet(TABLE))); map.put(THEAD,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(TBODY).union(TFOOT).union(THEAD), new HTMLElementNameSet(TABLE).union(THEAD), new HTMLElementNameSet(TABLE))); map.put(TR,new HTMLElementTerminatingTagNameSets(new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR), new HTMLElementNameSet(_THEAD_TBODY_TFOOT_TR).union(TABLE), new HTMLElementNameSet(TABLE))); return map; } private static HashMap buildTagMap() { final HashMap map=new HashMap(132,1.0F); // 99 tags total for (String tagName : ALL) map.put(tagName,tagName); map.put(StartTagTypeMarkupDeclaration.ELEMENT,StartTagTypeMarkupDeclaration.ELEMENT); map.put(StartTagTypeMarkupDeclaration.ATTLIST,StartTagTypeMarkupDeclaration.ATTLIST); map.put(StartTagTypeMarkupDeclaration.ENTITY,StartTagTypeMarkupDeclaration.ENTITY); map.put(StartTagTypeMarkupDeclaration.NOTATION,StartTagTypeMarkupDeclaration.NOTATION); map.put(StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF,StartTagTypeMicrosoftDownlevelRevealedConditionalComment.IF); map.put(StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF,StartTagTypeMicrosoftDownlevelRevealedConditionalComment.ENDIF); return map; } } jericho-html-3.1/src/java/net/htmlparser/jericho/SourceFormatter.java0000644000175000017500000010064511204550410025767 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; import java.net.*; /** * Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent. *

                    * Any indentation present in the original source text is removed. *

                    * Use one of the following methods to obtain the output: *

                      *
                    • {@link #writeTo(Writer)}
                    • *
                    • {@link #appendTo(Appendable)}
                    • *
                    • {@link #toString()}
                    • *
                    • {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
                    • *
                    *

                    * The output text is functionally equivalent to the original source and should be rendered identically unless specified below. *

                    * The following points describe the process in general terms. * Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions. *

                    *

                      *
                    • Every element that is not an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level element} appears on a new line * with an indent corresponding to its {@linkplain Element#getDepth() depth} in the document element hierarchy. *
                    • The indent is formed by writing n repetitions of the string specified in the {@link #setIndentString(String) IndentString} property, * where n is the depth of the indentation. *
                    • The {@linkplain Element#getContent() content} of an indented element starts on a new line and is indented at a depth one greater than that of the element, * with the end tag appearing on a new line at the same depth as the start tag. * If the content contains only text and {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}, * it may continue on the same line as the start tag. Additionally, if the output content contains no new lines, the end tag may also continue on the same line. *
                    • The content of preformatted elements such as {@link HTMLElementName#PRE PRE} and {@link HTMLElementName#TEXTAREA TEXTAREA} are not indented, * nor is the white space modified in any way. *
                    • Only {@linkplain StartTagType#NORMAL normal} and {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} elements are indented. * All others are treated as {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}. *
                    • White space and indentation inside HTML {@linkplain StartTagType#COMMENT comments}, {@linkplain StartTagType#CDATA_SECTION CDATA sections}, or any * {@linkplain TagType#isServerTag() server tag} is preserved, * but with the indentation of new lines starting at a depth one greater than that of the surrounding text. *
                    • White space and indentation inside {@link HTMLElementName#SCRIPT SCRIPT} elements is preserved, * but with the indentation of new lines starting at a depth one greater than that of the SCRIPT element. *
                    • If the {@link #setTidyTags(boolean) TidyTags} property is set to true, * every tag in the document is replaced with the output from its {@link Tag#tidy()} method. * If this property is set to false, the tag from the original text is used, including all white space, * but with any new lines indented at a depth one greater than that of the element. *
                    • If the {@link #setCollapseWhiteSpace(boolean) CollapseWhiteSpace} property * is set to true, every string of one or more {@linkplain Segment#isWhiteSpace(char) white space} characters * located outside of a tag is replaced with a single space in the output. * White space located adjacent to a non-inline-level element tag (except {@linkplain TagType#isServerTag() server tags}) may be removed. *
                    • If the {@link #setIndentAllElements(boolean) IndentAllElements} property * is set to true, every element appears indented on a new line, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}. * This generates output that is a good representation of the actual document element hierarchy, * but is very likely to introduce white space that compromises the functional equivalency of the document. *
                    • The {@link #setNewLine(String) NewLine} property specifies the character sequence * to use for each newline in the output document. *
                    • If the source document contains {@linkplain TagType#isServerTag() server tags}, the functional equivalency of the output document may be compromised. *
                    *

                    * Formatting an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically. */ public final class SourceFormatter implements CharStreamSource { private final Segment segment; private String indentString="\t"; private boolean tidyTags=false; private boolean collapseWhiteSpace=false; private boolean removeLineBreaks=false; private boolean indentAllElements=false; private String newLine=null; /** * Constructs a new SourceFormatter based on the specified {@link Segment}. * @param segment the segment containing the HTML to be formatted. * @see Source#getSourceFormatter() */ public SourceFormatter(final Segment segment) { this.segment=segment; } // Documentation inherited from CharStreamSource public void writeTo(final Writer writer) throws IOException { appendTo(writer); writer.flush(); } // Documentation inherited from CharStreamSource public void appendTo(final Appendable appendable) throws IOException { new Processor(segment,getIndentString(),getTidyTags(),getCollapseWhiteSpace(),getRemoveLineBreaks(),getIndentAllElements(),getIndentAllElements(),getNewLine()).appendTo(appendable); } // Documentation inherited from CharStreamSource public long getEstimatedMaximumOutputLength() { return segment.length()*2; } // Documentation inherited from CharStreamSource public String toString() { return CharStreamSourceUtil.toString(this); } /** * Sets the string to be used for indentation. *

                    * The default value is a string containing a single tab character (U+0009). *

                    * The most commonly used indent strings are "\t" (single tab), " " (single space), "  " (2 spaces), and "    " (4 spaces). * * @param indentString the string to be used for indentation, must not be null. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getIndentString() */ public SourceFormatter setIndentString(final String indentString) { if (indentString==null) throw new IllegalArgumentException("indentString property must not be null"); this.indentString=indentString; return this; } /** * Returns the string to be used for indentation. *

                    * See the {@link #setIndentString(String)} method for a full description of this property. * * @return the string to be used for indentation. */ public String getIndentString() { return indentString; } /** * Sets whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method. *

                    * The default value is false. *

                    * If this property is set to false, the tag from the original text is used, including all white space, * but with any new lines indented at a depth one greater than that of the element. * * @param tidyTags specifies whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getTidyTags() */ public SourceFormatter setTidyTags(final boolean tidyTags) { this.tidyTags=tidyTags; return this; } /** * Indicates whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method. *

                    * See the {@link #setTidyTags(boolean)} method for a full description of this property. * * @return true if the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method, otherwise false. */ public boolean getTidyTags() { return tidyTags; } /** * Sets whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed. *

                    * The default value is false. *

                    * If this property is set to true, every string of one or more {@linkplain Segment#isWhiteSpace(char) white space} characters * located outside of a tag is replaced with a single space in the output. * White space located adjacent to a non-inline-level element tag (except {@linkplain TagType#isServerTag() server tags}) may be removed. * * @param collapseWhiteSpace specifies whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getCollapseWhiteSpace() */ public SourceFormatter setCollapseWhiteSpace(final boolean collapseWhiteSpace) { this.collapseWhiteSpace=collapseWhiteSpace; return this; } /** * Indicates whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed. *

                    * See the {@link #setCollapseWhiteSpace(boolean collapseWhiteSpace)} method for a full description of this property. * * @return true if {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed, otherwise false. */ public boolean getCollapseWhiteSpace() { return collapseWhiteSpace; } /** * Sets whether all non-essential line breaks are removed. *

                    * The default value is false. *

                    * If this property is set to true, only essential line breaks are retained in the output. *

                    * Setting this property automatically engages the {@link #setCollapseWhiteSpace(boolean) CollapseWhiteSpace} option, regardless of its property setting. *

                    * It is recommended to set the {@link #setTidyTags(boolean) TidyTags} property when this option is used so that non-essential line breaks are also removed from tags. * * @param removeLineBreaks specifies whether all non-essential line breaks are removed. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getRemoveLineBreaks() */ SourceFormatter setRemoveLineBreaks(final boolean removeLineBreaks) { this.removeLineBreaks=removeLineBreaks; return this; } /** * Indicates whether all non-essential line breaks are removed. *

                    * See the {@link #setRemoveLineBreaks(boolean removeLineBreaks)} method for a full description of this property. * * @return true if all non-essential line breaks are removed, otherwise false. */ boolean getRemoveLineBreaks() { return removeLineBreaks; } /** * Sets whether all elements are to be indented, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements} and those with preformatted contents. *

                    * The default value is false. *

                    * If this property is set to true, every element appears indented on a new line, including * {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}. *

                    * This generates output that is a good representation of the actual document element hierarchy, * but is very likely to introduce white space that compromises the functional equivalency of the document. * * @param indentAllElements specifies whether all elements are to be indented. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getIndentAllElements() */ public SourceFormatter setIndentAllElements(final boolean indentAllElements) { this.indentAllElements=indentAllElements; return this; } /** * Indicates whether all elements are to be indented, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements} and those with preformatted contents. *

                    * See the {@link #setIndentAllElements(boolean)} method for a full description of this property. * * @return true if all elements are to be indented, otherwise false. */ public boolean getIndentAllElements() { return indentAllElements; } /** * Sets the string to be used to represent a newline in the output. *

                    * The default is to use the same new line string as is used in the source document, which is determined via the {@link Source#getNewLine()} method. * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, * or using the value from the static {@link Config#NewLine} property. *

                    * Specifying a null argument resets the property to its default value, which is to use the same new line string as is used in the source document. * * @param newLine the string to be used to represent a newline in the output, may be null. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getNewLine() */ public SourceFormatter setNewLine(final String newLine) { this.newLine=newLine; return this; } /** * Returns the string to be used to represent a newline in the output. *

                    * See the {@link #setNewLine(String)} method for a full description of this property. * * @return the string to be used to represent a newline in the output. */ public String getNewLine() { if (newLine==null) newLine=segment.source.getBestGuessNewLine(); return newLine; } /** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */ private static final class Processor { private final Segment segment; private final CharSequence sourceText; private final String indentString; private final boolean tidyTags; private final boolean collapseWhiteSpace; private final boolean removeLineBreaks; // Indicates whether all non-essential line breaks are removed. Must be used with collapseWhiteSpace=true. private final boolean indentAllElements; private final boolean indentScriptElements; // at present this parameter is tied to indentAllElements. SCRIPT elements need to be inline to keep functional equivalency of output private final String newLine; private Appendable appendable; private Tag nextTag; private int index; public Processor(final Segment segment, final String indentString, final boolean tidyTags, final boolean collapseWhiteSpace, final boolean removeLineBreaks, final boolean indentAllElements, final boolean indentScriptElements, final String newLine) { this.segment=segment; sourceText=segment.source.toString(); this.indentString=indentString; this.tidyTags=tidyTags; this.collapseWhiteSpace=collapseWhiteSpace || removeLineBreaks; this.removeLineBreaks=removeLineBreaks; this.indentAllElements=indentAllElements; this.indentScriptElements=indentScriptElements; this.newLine=newLine; } public void appendTo(final Appendable appendable) throws IOException { this.appendable=appendable; if (segment instanceof Source) ((Source)segment).fullSequentialParse(); nextTag=segment.source.getNextTag(segment.begin); index=segment.begin; appendContent(segment.end,segment.getChildElements(),0); } private void appendContent(final int end, final List childElements, final int depth) throws IOException { assert index<=end; for (Element element : childElements) { final int elementBegin=element.begin; if (elementBegin>=end) break; if (indentAllElements) { appendText(elementBegin,depth); appendElement(element,depth,end,false,false); } else { if (inlinable(element)) continue; // skip over elements that can be inlined. appendText(elementBegin,depth); final String elementName=element.getName(); if (elementName==HTMLElementName.PRE || elementName==HTMLElementName.TEXTAREA) { appendElement(element,depth,end,true,true); } else if (elementName==HTMLElementName.SCRIPT) { appendElement(element,depth,end,true,false); } else { appendElement(element,depth,end,false,!removeLineBreaks && containsOnlyInlineLevelChildElements(element)); } } } appendText(end,depth); assert index==end; } private boolean inlinable(final Element element) { // returns true if the specified element should be inlined final StartTagType startTagType=element.getStartTag().getStartTagType(); if (startTagType==StartTagType.DOCTYPE_DECLARATION) return false; if (startTagType!=StartTagType.NORMAL) return true; // element is a normal type final String elementName=element.getName(); if (elementName==HTMLElementName.SCRIPT) return !indentScriptElements; if (removeLineBreaks && !HTMLElements.getElementNames().contains(elementName)) return true; // inline non-HTML elements if removing line breaks if (!HTMLElements.getInlineLevelElementNames().contains(elementName)) return false; // element is inline type if (removeLineBreaks) return true; return containsOnlyInlineLevelChildElements(element); // only inline if it doesn't illegally contain non-inline elements } private void appendText(final int end, int depth) throws IOException { assert index<=end; if (index==end) return; while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return; // trim whitespace. appendIndent(depth); if (collapseWhiteSpace) { appendTextCollapseWhiteSpace(end,depth); } else { appendTextInline(end,depth,false); } appendFormattingNewLine(); assert index==end; } private void appendElement(final Element element, final int depth, final int end, final boolean preformatted, boolean renderContentInline) throws IOException { assert index==element.begin; assert indexendTag.begin) { if (!renderContentInline) appendIndent(depth); assert index==endTag.begin; appendTag(endTag,depth,end); appendFormattingNewLine(); } else if (renderContentInline) { appendFormattingNewLine(); } assert index==Math.min(element.end,end) : index; } private void updateNextTag() { // ensures that nextTag is up to date while (nextTag!=null) { if (nextTag.begin>=index) return; nextTag=nextTag.getNextTag(); } } private void appendIndentedScriptContent(final int end, final int depth) throws IOException { assert index=textLength) return; // trim whitespace. appendEssentialNewLine(); appendIndent(subsequentLineDepth); i=appendSpecifiedLine(text,i); } while (i=textLength) return i; } } private boolean appendTextInline(final int end, int depth, final boolean increaseIndentAfterFirstLineBreak) throws IOException { // returns true if all text was on one line, otherwise false assert index=end) { assert index<=end; return; } if (!singleLineContent) { appendEssentialNewLine(); // some server or client side scripting languages might need the final new line appendIndent(depth); } assert index==endTag.begin; appendTag(endTag,depth,end); } assert index<=end; } private void appendIndent(final int depth) throws IOException { if (!removeLineBreaks) for (int x=0; x childElements=element.getChildElements(); if (childElements.isEmpty()) return true; for (Element childElement : childElements) { final String elementName=childElement.getName(); if (elementName==HTMLElementName.SCRIPT || !HTMLElements.getInlineLevelElementNames().contains(elementName)) return false; if (!containsOnlyInlineLevelChildElements(childElement)) return false; } return true; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/IntStringHashMap.java0000644000175000017500000001067411204550410026030 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * This is an internal class used to efficiently map integers to strings, which is used in the CharacterEntityReference class. */ final class IntStringHashMap { private static final int DEFAULT_INITIAL_CAPACITY=15; private static final float DEFAULT_LOAD_FACTOR=0.75f; private transient Entry[] entries; // length must always be a power of 2. private transient int size; private int threshold; private float loadFactor; private int bitmask; // always entries.length-1 public IntStringHashMap(int initialCapacity, final float loadFactor) { this.loadFactor=loadFactor; int capacity=1; while (capacity=threshold) increaseCapacity(); return null; } private void increaseCapacity() { final int oldCapacity=entries.length; final Entry[] oldEntries=entries; entries=new Entry[oldCapacity<<1]; bitmask=entries.length-1; for (Entry entry : oldEntries) { while (entry!=null) { final Entry next=entry.next; final int index=getIndex(entry.key); entry.next=entries[index]; entries[index]=entry; entry=next; } } threshold=(int)(entries.length*loadFactor); } public String remove(final int key) { final int index=getIndex(key); Entry previous=null; for (Entry entry=entries[index]; entry!=null; entry=(previous=entry).next) { if (key==entry.key) { if (previous==null) entries[index]=entry.next; else previous.next=entry.next; size--; return entry.value; } } return null; } public void clear() { for (int i=bitmask; i>=0; i--) entries[i]=null; size=0; } public boolean containsValue(final String value) { if (value==null) { for (int i=bitmask; i>=0; i--) for (Entry entry=entries[i]; entry!=null; entry=entry.next) if (entry.value==null) return true; } else { for (int i=bitmask; i>=0; i--) for (Entry entry=entries[i]; entry!=null; entry=entry.next) if (value.equals(entry.value)) return true; } return false; } private static final class Entry { final int key; String value; Entry next; public Entry(final int key, final String value, final Entry next) { this.key=key; this.value=value; this.next=next; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeServerCommonEscaped.java0000644000175000017500000000227611204550410031064 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeServerCommonEscaped extends StartTagTypeGenericImplementation { static final StartTagTypeServerCommonEscaped INSTANCE=new StartTagTypeServerCommonEscaped(); private StartTagTypeServerCommonEscaped() { super("escaped common server tag","<\\%","%>",null,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/SourceCompactor.java0000644000175000017500000001041311204550410025744 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; import java.net.*; /** * Compacts HTML source by removing all unnecessary white space. *

                    * Use one of the following methods to obtain the output: *

                      *
                    • {@link #writeTo(Writer)}
                    • *
                    • {@link #appendTo(Appendable)}
                    • *
                    • {@link #toString()}
                    • *
                    • {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
                    • *
                    *

                    * The output text is functionally equivalent to the original source and should be rendered identically. *

                    * Compacting an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically. */ public final class SourceCompactor implements CharStreamSource { private final Segment segment; private String newLine=null; /** * Constructs a new SourceCompactor based on the specified {@link Segment}. * @param segment the segment containing the HTML to be compacted. */ public SourceCompactor(final Segment segment) { this.segment=segment; } // Documentation inherited from CharStreamSource public void writeTo(final Writer writer) throws IOException { appendTo(writer); writer.flush(); } // Documentation inherited from CharStreamSource public void appendTo(final Appendable appendable) throws IOException { new SourceFormatter(segment).setTidyTags(true).setNewLine(newLine).setRemoveLineBreaks(true).appendTo(appendable); } // Documentation inherited from CharStreamSource public long getEstimatedMaximumOutputLength() { return segment.length(); } // Documentation inherited from CharStreamSource public String toString() { return CharStreamSourceUtil.toString(this); } /** * Sets the string to be used to represent a newline in the output. *

                    * The default is to use the same new line string as is used in the source document, which is determined via the {@link Source#getNewLine()} method. * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, * or using the value from the static {@link Config#NewLine} property. *

                    * Specifying a null argument resets the property to its default value, which is to use the same new line string as is used in the source document. * * @param newLine the string to be used to represent a newline in the output, may be null. * @return this SourceFormatter instance, allowing multiple property setting methods to be chained in a single statement. * @see #getNewLine() */ public SourceCompactor setNewLine(final String newLine) { this.newLine=newLine; return this; } /** * Returns the string to be used to represent a newline in the output. *

                    * See the {@link #setNewLine(String)} method for a full description of this property. * * @return the string to be used to represent a newline in the output. */ public String getNewLine() { if (newLine==null) newLine=segment.source.getBestGuessNewLine(); return newLine; } } jericho-html-3.1/src/java/net/htmlparser/jericho/Logger.java0000644000175000017500000001020011204550410024045 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; /** * Defines the interface for handling log messages. *

                    * It is not usually necessary for users to create implementations of this interface, as * the {@link LoggerProvider} interface contains several predefined instances which provide the most commonly required Logger implementations. *

                    * By default, logging is configured automatically according to the algorithm described in the static {@link Config#LoggerProvider} property. *

                    * An instance of a class that implements this interface is used by calling the {@link Source#setLogger(Logger)} method on the relevant {@link Source} object. *

                    * Four logging levels are defined in this interface. * The logging level is specified only by the use of different method names, there is no class or type defining the levels. * This makes the code required to wrap other logging frameworks much simpler and more efficient. *

                    * The four logging levels are: *

                      *
                    • {@link #error(String) ERROR} *
                    • {@link #warn(String) WARN} *
                    • {@link #info(String) INFO} *
                    • {@link #debug(String) DEBUG} *
                    *

                    * IMPLEMENTATION NOTE: Ideally the java.util.logging.Logger class could have been used as a basis for logging, even if used to define a wrapper * around other logging frameworks. * This would have avoided the need to define yet another logging interface, but because java.util.logging.Logger is implemented very poorly, * it is quite tricky to extend it as a wrapper. * Other logging wrapper frameworks such as SLF4J or * Jakarta Commons Logging provide good logging interfaces, but to avoid * introducing dependencies it was decided to create this new interface. * * @see Config#LoggerProvider */ public interface Logger { /** * Logs a message at the ERROR level. * @param message the message to log. */ void error(String message); /** * Logs a message at the WARN level. * @param message the message to log. */ void warn(String message); /** * Logs a message at the INFO level. * @param message the message to log. */ void info(String message); /** * Logs a message at the DEBUG level. * @param message the message to log. */ void debug(String message); /** * Indicates whether logging is enabled at the ERROR level. * @return true if logging is enabled at the ERROR level, otherwise false. */ boolean isErrorEnabled(); /** * Indicates whether logging is enabled at the WARN level. * @return true if logging is enabled at the WARN level, otherwise false. */ boolean isWarnEnabled(); /** * Indicates whether logging is enabled at the INFO level. * @return true if logging is enabled at the INFO level, otherwise false. */ boolean isInfoEnabled(); /** * Indicates whether logging is enabled at the DEBUG level. * @return true if logging is enabled at the DEBUG level, otherwise false. */ boolean isDebugEnabled(); }jericho-html-3.1/src/java/net/htmlparser/jericho/TagTypeRegister.java0000644000175000017500000002047711204550410025731 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; final class TagTypeRegister { private TagTypeRegister parent=null; private char ch=NULL_CHAR; private TagTypeRegister[] children=null; // always in alphabetical order private TagType[] tagTypes=null; // in descending order of priority private static final char NULL_CHAR='\u0000'; private static final TagType[] DEFAULT_TAG_TYPES={ StartTagType.UNREGISTERED, StartTagType.NORMAL, StartTagType.COMMENT, StartTagType.MARKUP_DECLARATION, StartTagType.DOCTYPE_DECLARATION, StartTagType.CDATA_SECTION, StartTagType.XML_PROCESSING_INSTRUCTION, StartTagType.XML_DECLARATION, StartTagType.SERVER_COMMON, StartTagType.SERVER_COMMON_ESCAPED, EndTagType.UNREGISTERED, EndTagType.NORMAL }; private static TagTypeRegister root=new TagTypeRegister(); static { add(DEFAULT_TAG_TYPES); } private TagTypeRegister() {} private static synchronized void add(final TagType[] tagTypes) { for (int i=0; i getList() { final ArrayList list=new ArrayList(); root.addTagTypesToList(list); return list; } private void addTagTypesToList(final List list) { if (tagTypes!=null) for (int i=tagTypes.length-1; i>=0; i--) list.add(tagTypes[i]); if (children!=null) for (TagTypeRegister tagTypeRegister : children) tagTypeRegister.addTagTypesToList(list); } public static final String getDebugInfo() { return root.appendDebugInfo(new StringBuilder(),0).toString(); } static final class ProspectiveTagTypeIterator implements Iterator { private TagTypeRegister cursor; private int tagTypeIndex=0; public ProspectiveTagTypeIterator(final Source source, final int pos) { // returns empty iterator if pos out of range final ParseText parseText=source.getParseText(); cursor=root; int posIndex=0; try { // find deepest node that matches the text at pos: while (true) { final TagTypeRegister child=cursor.getChild(parseText.charAt(pos+(posIndex++))); if (child==null) break; cursor=child; } } catch (IndexOutOfBoundsException ex) {} // not avoiding this exception is expensive but only happens in the very rare circumstance that the end of file is encountered in the middle of a potential tag. // go back up until we reach a node that contains a list of tag types: while (cursor.tagTypes==null) if ((cursor=cursor.parent)==null) break; } public boolean hasNext() { return cursor!=null; } public TagType next() { final TagType[] tagTypes=cursor.tagTypes; final TagType nextTagType=tagTypes[tagTypeIndex]; if ((++tagTypeIndex)==tagTypes.length) { tagTypeIndex=0; do {cursor=cursor.parent;} while (cursor!=null && cursor.tagTypes==null); } return nextTagType; } public void remove() { throw new UnsupportedOperationException(); } } public String toString() { return appendDebugInfo(new StringBuilder(),0).toString(); } private StringBuilder appendDebugInfo(final StringBuilder sb, final int level) { for (int i=0; i> 1; final char midChar=children[mid].ch; if (midCharch) high=mid-1; else return children[mid]; } return null; } private void addChild(final TagTypeRegister child) { // assumes the character associated with the child register does not already exist in this register's children. if (children==null) { children=new TagTypeRegister[] {child}; } else { final TagTypeRegister[] newChildren=new TagTypeRegister[children.length+1]; int i=0; while (i0; i--) tagTypes[i]=tagTypes[i-1]; tagTypes[0]=tagType; } } private void removeTagType(final TagType tagType) { final int indexOfTagType=indexOfTagType(tagType); if (indexOfTagType==-1) return; if (tagTypes.length==1) { tagTypes=null; return; } final TagType[] newTagTypes=new TagType[tagTypes.length-1]; for (int i=0; iJericho HTML Parser 3.1

  • *

    * A java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, * while reproducing verbatim any unrecognised or invalid HTML. * Also provides high-level HTML form manipulation functions. *

    * For an introduction to the API, the documentation of the {@link net.htmlparser.jericho.Source} class is the best place to start. *

    * For a summary of features and sample applications, visit the homepage at * http://jerichohtml.sourceforge.net *

    * For downloads, support and updates visit the SourceForge.net project page at * http://sourceforge.net/projects/jerichohtml/ *

    * The Jericho HTML Parser is an open source library released under both the * Eclipse Public License (EPL) and * GNU Lesser General Public License (LGPL). * You are therefore free to use it in commercial applications subject to the terms detailed in either one of these licence documents. */ package net.htmlparser.jericho;jericho-html-3.1/src/java/net/htmlparser/jericho/TextExtractor.java0000644000175000017500000004620311214016056025466 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; import java.net.*; /** * Extracts the textual content from HTML markup. *

    * The output is ideal for feeding into a text search engine such as Apache Lucene, * especially when the {@link #setIncludeAttributes(boolean) IncludeAttributes} property has been set to true. *

    * Use one of the following methods to obtain the output: *

      *
    • {@link #writeTo(Writer)}
    • *
    • {@link #appendTo(Appendable)}
    • *
    • {@link #toString()}
    • *
    • {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
    • *
    *

    * The process removes all of the tags and * {@linkplain CharacterReference#decodeCollapseWhiteSpace(CharSequence) decodes the result, collapsing all white space}. * A space character is included in the output where a normal tag is present in the source, * unless the tag belongs to an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. * An exception to this is the {@link HTMLElementName#BR BR} element, which is also converted to a space despite being an inline-level element. *

    * Text inside {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements contained within this segment * is ignored. *

    * Setting the {@link #setExcludeNonHTMLElements(boolean) ExcludeNonHTMLElements} property results in the exclusion of any content within a * non-HTML element. *

    * See the {@link #excludeElement(StartTag)} method for details on how to implement a more complex mechanism to determine whether the * {@linkplain Element#getContent() content} of each {@link Element} is to be excluded from the output. *

    * All tags that are not normal tags, such as {@linkplain TagType#isServerTag() server tags}, * {@linkplain StartTagType#COMMENT comments} etc., are removed from the output without adding white space to the output. *

    * Note that segments on which the {@link Segment#ignoreWhenParsing()} method has been called are treated as text rather than markup, * resulting in their inclusion in the output. * To remove specific segments before extracting the text, create an {@link OutputDocument} and call its {@link OutputDocument#remove(Segment) remove(Segment)} or * {@link OutputDocument#replaceWithSpaces(int,int) replaceWithSpaces(int begin, int end)} method for each segment to be removed. * Then create a new source document using {@link Source#Source(CharSequence) new Source(outputDocument.toString())} * and perform the text extraction on this new source object. *

    * Extracting the text from an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically. *

    * To perform a simple rendering of HTML markup into text, which is more readable than the output of this class, use the {@link Renderer} class instead. *

    *
    Example:
    *
    Using the default settings, the source segment:
    * "<div><b>O</b>ne</div><div title="Two"><b>Th</b><script>//a script </script>ree</div>"
    * produces the text "One Two Three". *
    */ public class TextExtractor implements CharStreamSource { private final Segment segment; private boolean convertNonBreakingSpaces=Config.ConvertNonBreakingSpaces; private boolean includeAttributes=false; private boolean excludeNonHTMLElements=false; private static Map map=initDefaultAttributeIncludeCheckerMap(); // maps each possibly included attribute name to an AttributeIncludeChecker instance /** * Constructs a new TextExtractor based on the specified {@link Segment}. * @param segment the segment from which the text will be extracted. * @see Segment#getTextExtractor() */ public TextExtractor(final Segment segment) { this.segment=segment; } // Documentation inherited from CharStreamSource public void writeTo(final Writer writer) throws IOException { appendTo(writer); writer.flush(); } // Documentation inherited from CharStreamSource public void appendTo(final Appendable appendable) throws IOException { appendable.append(toString()); } // Documentation inherited from CharStreamSource public long getEstimatedMaximumOutputLength() { return segment.length(); } // Documentation inherited from CharStreamSource public String toString() { return new Processor(segment,getConvertNonBreakingSpaces(),getIncludeAttributes(),getExcludeNonHTMLElements()).toString(); } /** * Sets whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

    * The default value is that of the static {@link Config#ConvertNonBreakingSpaces} property at the time the TextExtractor is instantiated. * * @param convertNonBreakingSpaces specifies whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. * @return this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement. * @see #getConvertNonBreakingSpaces() */ public TextExtractor setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces) { this.convertNonBreakingSpaces=convertNonBreakingSpaces; return this; } /** * Indicates whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

    * See the {@link #setConvertNonBreakingSpaces(boolean)} method for a full description of this property. * * @return true if non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces, otherwise false. */ public boolean getConvertNonBreakingSpaces() { return convertNonBreakingSpaces; } /** * Sets whether any attribute values are included in the output. *

    * If the value of this property is true, then each attribute still has to match the conditions implemented in the * {@link #includeAttribute(StartTag,Attribute)} method in order for its value to be included in the output. *

    * The default value is false. * * @param includeAttributes specifies whether any attribute values are included in the output. * @return this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement. * @see #getIncludeAttributes() */ public TextExtractor setIncludeAttributes(boolean includeAttributes) { this.includeAttributes=includeAttributes; return this; } /** * Indicates whether any attribute values are included in the output. *

    * See the {@link #setIncludeAttributes(boolean)} method for a full description of this property. * * @return true if any attribute values are included in the output, otherwise false. */ public boolean getIncludeAttributes() { return includeAttributes; } /** * Indicates whether the value of the specified {@linkplain Attribute attribute} in the specified {@linkplain StartTag start tag} is included in the output. *

    * This method is ignored if the {@link #setIncludeAttributes(boolean) IncludeAttributes} property is set to false, in which case * no attribute values are included in the output. *

    * If the {@link #setIncludeAttributes(boolean) IncludeAttributes} property is set to true, every attribute of every * start tag encountered in the segment is checked using this method to determine whether the value of the attribute should be included in the output. *

    * The default implementation of this method returns true if the {@linkplain Attribute#getName() name} of the specified {@linkplain Attribute attribute} * is one of * title, * alt, * label, * summary, * content*, or * href, * but the method can be overridden in a subclass to perform a check of arbitrary complexity on each attribute. *

    * * The value of a content attribute is only included if a * name attribute is also present in the specified start tag, * as the content attribute of a {@link HTMLElementName#META META} tag only contains human readable text if the name attribute is used as opposed to an * http-equiv attribute. *

    *

    *
    Example:
    *
    * To include only the value of title and * alt attributes:

    * * final Set includeAttributeNames=new HashSet(Arrays.asList(new String[] {"title","alt"}));
    * TextExtractor textExtractor=new TextExtractor(segment) {
    *     public boolean includeAttribute(StartTag startTag, Attribute attribute) {
    *         return includeAttributeNames.contains(attribute.getKey());
    *     }
    * };
    * textExtractor.setIncludeAttributes(true);
    * String extractedText=textExtractor.toString(); *
    *
    *
    * @param startTag the start tag of the element to check for inclusion. * @return if the text inside the {@link Element} of the specified start tag should be excluded from the output, otherwise false. */ public boolean includeAttribute(final StartTag startTag, final Attribute attribute) { AttributeIncludeChecker attributeIncludeChecker=map.get(attribute.getKey()); if (attributeIncludeChecker==null) return false; return attributeIncludeChecker.includeAttribute(startTag,attribute); } /** * Sets whether the content of non-HTML elements is excluded from the output. *

    * The default value is false, meaning that content from all elements meeting the other criteria is included. * * @param excludeNonHTMLElements specifies whether content non-HTML elements is excluded from the output. * @return this TextExtractor instance, allowing multiple property setting methods to be chained in a single statement. * @see #getExcludeNonHTMLElements() */ public TextExtractor setExcludeNonHTMLElements(boolean excludeNonHTMLElements) { this.excludeNonHTMLElements=excludeNonHTMLElements; return this; } /** * Indicates whether the content of non-HTML elements is excluded from the output. *

    * See the {@link #setExcludeNonHTMLElements(boolean)} method for a full description of this property. * * @return true if the content of non-HTML elements is excluded from the output, otherwise false. */ public boolean getExcludeNonHTMLElements() { return excludeNonHTMLElements; } /** * Indicates whether the text inside the {@link Element} of the specified start tag should be excluded from the output. *

    * During the text extraction process, every start tag encountered in the segment is checked using this method to determine whether the text inside its * {@linkplain StartTag#getElement() associated element} should be excluded from the output. *

    * The default implementation of this method is to always return false, so that every element is included, * but the method can be overridden in a subclass to perform a check of arbitrary complexity on each start tag. *

    * All elements nested inside an excluded element are also implicitly excluded, as are all * {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements. * Such elements are skipped over without calling this method, so there is no way to include them by overriding the method. *

    *

    *
    Example:
    *
    * To extract the text from a segment, excluding any text inside elements with the attribute class="NotIndexed":

    * * TextExtractor textExtractor=new TextExtractor(segment) {
    *     public boolean excludeElement(StartTag startTag) {
    *         return "NotIndexed".equalsIgnoreCase(startTag.getAttributeValue("class"));
    *     }
    * };
    * String extractedText=textExtractor.toString(); *
    *
    *
    * @param startTag the start tag of the element to check for inclusion. * @return if the text inside the {@link Element} of the specified start tag should be excluded from the output, otherwise false. */ public boolean excludeElement(final StartTag startTag) { return false; } private static interface AttributeIncludeChecker { boolean includeAttribute(final StartTag startTag, final Attribute attribute); } private static AttributeIncludeChecker ALWAYS_INCLUDE=new AttributeIncludeChecker() { public boolean includeAttribute(final StartTag startTag, final Attribute attribute) { return true; } }; private static AttributeIncludeChecker INCLUDE_IF_NAME_ATTRIBUTE_PRESENT=new AttributeIncludeChecker() { public boolean includeAttribute(final StartTag startTag, final Attribute attribute) { return startTag.getAttributes().get("name")!=null; } }; private static Map initDefaultAttributeIncludeCheckerMap() { Map map=new HashMap(); map.put("title",ALWAYS_INCLUDE); // add title attribute map.put("alt",ALWAYS_INCLUDE); // add alt attribute (APPLET, AREA, IMG and INPUT elements) map.put("label",ALWAYS_INCLUDE); // add label attribute (OPTION and OPTGROUP elements) map.put("summary",ALWAYS_INCLUDE); // add summary attribute (TABLE element) map.put("content",INCLUDE_IF_NAME_ATTRIBUTE_PRESENT); // add content attribute (META element) map.put("href",ALWAYS_INCLUDE); // add href attribute (A, AREA and LINK elements) // don't bother with the prompt attribute from the ININDEX element as the element is deprecated and very rarely used. return map; } /** * This class does the actual work, but is first passed final copies of all the parameters for efficiency. * Note at present this is not implemented in a memory-efficient manner. * Once the CharacterReference.decodeCollapseWhiteSpace functionality is available as a FilterWriter (possible with java 5 support), * the main algorithm can be implemented in the writeTo(Writer) method to allow for more memory-efficient processing. */ private final class Processor { private final Segment segment; private final Source source; private final boolean convertNonBreakingSpaces; private final boolean includeAttributes; private final boolean excludeNonHTMLElements; public Processor(final Segment segment, final boolean convertNonBreakingSpaces, final boolean includeAttributes, final boolean excludeNonHTMLElements) { this.segment=segment; source=segment.source; this.convertNonBreakingSpaces=convertNonBreakingSpaces; this.includeAttributes=includeAttributes; this.excludeNonHTMLElements=excludeNonHTMLElements; } public String toString() { final StringBuilder sb=new StringBuilder(segment.length()); for (NodeIterator nodeIterator=new NodeIterator(segment); nodeIterator.hasNext();) { Segment segment=nodeIterator.next(); if (segment instanceof Tag) { final Tag tag=(Tag)segment; if (tag.getTagType().isServerTag()) { // elementContainsMarkup should be made into a TagType property one day. // for the time being assume all server element content is code, although this is not true for some Mason elements. final boolean elementContainsMarkup=false; if (!elementContainsMarkup) { final Element element=tag.getElement(); if (element!=null && element.getEnd()>tag.getEnd()) nodeIterator.skipToPos(element.getEnd()); } continue; } if (tag.getTagType()==StartTagType.NORMAL) { final StartTag startTag=(StartTag)tag; if (tag.name==HTMLElementName.SCRIPT || tag.name==HTMLElementName.STYLE || excludeElement(startTag) || (excludeNonHTMLElements && !HTMLElements.getElementNames().contains(tag.name))) { nodeIterator.skipToPos(startTag.getElement().getEnd()); continue; } if (includeAttributes) { for (Attribute attribute : startTag.getAttributes()) { if (includeAttribute(startTag,attribute)) sb.append(' ').append(attribute.getValueSegment()).append(' '); } } } // Treat both start and end tags not belonging to inline-level elements as whitespace: if (tag.getName()==HTMLElementName.BR || !HTMLElements.getInlineLevelElementNames().contains(tag.getName())) sb.append(' '); } else { sb.append(segment); } } final String decodedText=CharacterReference.decodeCollapseWhiteSpace(sb,convertNonBreakingSpaces); return decodedText; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/FormFields.java0000644000175000017500000010365711204550410024703 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents a collection of {@link FormField} objects. *

    * This class provides the main interface for the analysis and manipulation of {@linkplain FormControl form controls}. * A FormFields object is a collection of {@link FormField} objects, with each form field consisting of * a group of {@linkplain FormControl form controls} having the same {@linkplain FormControl#getName() name}. *

    * The functionality provided by this class can be used to accomplish two main tasks: *

      *
    1. * Modify the submission values of the constituent form controls * for subsequent output in an {@link OutputDocument}. *

      * The methods available for this purpose are:
      * {@link #getValues(String) List<String> getValues(String fieldName)}
      * {@link #getDataSet() Map<String,String[]> getDataSet()}
      * {@link #clearValues() void clearValues()}
      * {@link #setDataSet(Map) void setDataSet(Map<String,String[]>)}
      * {@link #setValue(String,String) boolean setValue(String fieldName, String value)}
      * {@link #addValue(String,String) boolean addValue(String fieldName, String value)}
      *

      * Although the {@link FormField} and {@link FormControl} classes provide methods for directly modifying * the submission values of individual form fields and controls, it is generally recommended to use the interface provided by this * (the FormFields) class unless there is a specific requirement for the lower level functionality. *

      * The display characteristics of individual controls, * such as whether the control is {@linkplain FormControl#setDisabled(boolean) disabled}, replaced with a simple * {@linkplain FormControlOutputStyle#DISPLAY_VALUE value}, or {@linkplain FormControlOutputStyle#REMOVE removed} altogether, * can only be set on the individual {@link FormControl} objects. * See below for information about retrieving a specific FormControl object from the FormFields object. *

    2. * Convert data from a form data set * (represented as a field data set) into a simple array format, * suitable for storage in a tabular format such as a database table or .CSV file. *

      * The methods available for this purpose are:
      * {@link #getColumnLabels() String[] getColumnLabels()}
      * {@link #getColumnValues(Map) String[] getColumnValues(Map)}
      * {@link #getColumnValues() String[] getColumnValues()}
      *

      * The {@link Util} class contains a method called {@link Util#outputCSVLine(Writer,String[]) outputCSVLine(Writer,String[])} * which writes the String[] output of these methods to the specified Writer in .CSV format. *

      * The implementation of these methods makes use of certain properties * in the {@link FormField} class that describe the structure of the data in each field. * These properties can be utilised directly in the event that a * form data set is to be converted * from its normal format into some other type of data structure. *

    *

    * To access a specific {@link FormControl} from a FormFields object, use: *

      *
    • formFields.{@link #get(String) get(fieldName)}.{@link FormField#getFormControl() getFormControl()} * if the control is the only one with the specified {@linkplain FormControl#getName() name}, or *
    • formFields.{@link #get(String) get(fieldName)}.{@link FormField#getFormControl(String) getFormControl(predefinedValue)} * to retrieve the control having the speficied {@linkplain FormControl#getPredefinedValue() predefined value} * if it is part of a {@linkplain FormField field} containing multiple controls. *
    *

    * The term field data set is used in this library to refer to a data structure consisting of * a set of names (in lower case), each mapped to one or more values. * Generally, this is represented by a data type of java.util.Map<String,String[]>, * with the keys (names) being of type String and the values represented by an array containing one or more items of type String. * A field data set can be used to represent the data in an HTML * form data set. *

    * FormFields instances are obtained using the {@link #FormFields(Collection formControls)} constructor * or by calling the {@link Segment#getFormFields()} method. *

    * The case sensitivity of form field names is determined by the static * {@link Config#CurrentCompatibilityMode}.{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. *

    * Examples: *

      *
    1. * Write the data received from in the current ServletRequest to a .CSV file, * and then display the form populated with this data: *

       *    Source source=new Source(htmlTextOfOriginatingForm);
       *    FormFields formFields=source.getFormFields();
       *
       *    File csvOutputFile=new File("FormData.csv");
       *    boolean outputHeadings=!csvOutputFile.exists();
       *    Writer writer=new FileWriter(csvOutputFile,true);
       *    if (outputHeadings) Util.outputCSVLine(writer,formFields.getColumnLabels());
       *    Util.outputCSVLine(writer,formFields.getColumnValues(servletRequest.getParameterMap()));
       *    writer.close();
       *
       *    formFields.setDataSet(servletRequest.getParameterMap());
       *    OutputDocument outputDocument=new OutputDocument(source);
       *    outputDocument.replace(formFields);
       *    outputDocument.writeTo(servletResponse.getWriter());
      *

      See also the sample program FormFieldCSVOutput.

      *

    2. Replace the initial values of controls in the form named "MyForm" with new values: *

       *    Source source=new Source(htmlText);
       *    Element myForm=null;
       *    List formElements=source.getAllElements(Tag.FORM);
       *    for (Iterator i=formElements.iterator(); i.hasNext();) {
       *      Element formElement=(Element)i.next();
       *      String formName=formElement.getAttributes().getValue("name");
       *      if ("MyForm".equals(formName)) {
       *        myForm=form;
       *        break;
       *      }
       *    }
       *    FormFields formFields=myForm.getFormFields();
       *    formFields.clearValues(); // clear any values that might be set in the source document
       *    formFields.addValue("Name","Humphrey Bear");
       *    formFields.addValue("MailingList","A");
       *    formFields.addValue("MailingList","B");
       *    formFields.addValue("FavouriteFare","honey");
       *    OutputDocument outputDocument=new OutputDocument(source);
       *    outputDocument.replace(formFields);
       *    String newHtmlText=outputDocument.toString();
      *

      See also the sample program FormFieldSetValues.

      *

    3. Change the display characteristics of individual controls: *

       *    Source source=new Source(htmlText);
       *    FormFields formFields=source.getFormFields();
       *    // disable some controls:
       *    formFields.get("Password").getFormControl().setDisabled(true);
       *    FormField mailingListFormField=formFields.get("MailingList");
       *    mailingListFormField.setValue("C");
       *    mailingListFormField.getFormControl("C").setDisabled(true);
       *    mailingListFormField.getFormControl("D").setDisabled(true);
       *    // remove some controls:
       *    formFields.get("button1").getFormControl().setOutputStyle(FormControlOutputStyle.REMOVE);
       *    FormControl rhubarbFormControl=formFields.get("FavouriteFare").getFormControl("rhubarb");
       *    rhubarbFormControl.setOutputStyle(FormControlOutputStyle.REMOVE);
       *    // set some controls to display value:
       *    formFields.setValue("Address","The Lodge\nDeakin  ACT  2600\nAustralia");
       *    formFields.get("Address").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE);
       *    FormField favouriteSportsFormField=formFields.get("FavouriteSports");
       *    favouriteSportsFormField.setValue("BB");
       *    favouriteSportsFormField.addValue("AFL");
       *    favouriteSportsFormField.getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE);
       *    OutputDocument outputDocument=new OutputDocument(source);
       *    outputDocument.replace(formFields); // adds all segments necessary to effect changes
       *    String newHtmlText=outputDocument.toString();
      *

      See also the sample program FormControlDisplayCharacteristics.

      *

    * @see FormField * @see FormControl */ public final class FormFields extends AbstractCollection { private final LinkedHashMap map=new LinkedHashMap(); private final ArrayList formControls=new ArrayList(); /** * Constructs a new FormFields object consisting of the specified {@linkplain FormControl form controls}. * @param formControls a collection of {@link FormControl} objects. * @see Segment#getFormFields() */ public FormFields(final Collection formControls) { // Passing "this" as a parameter inside a constructor used to cause some strange problems back in java 1.0, // but it seems to work here and there is no explicit mention in the Java language spec about any potential problems. // The alternative is an ugly static FormFields constructFrom(List formControls) method. for (FormControl formControl : formControls) { if (formControl.getName()!=null && formControl.getName().length()!=0) { formControl.addToFormFields(this); this.formControls.add(formControl); } } } /** * Returns the number of FormField objects. * @return the number of FormField objects. */ public int getCount() { return map.size(); } /** * Returns the number of FormField objects. *

    * This is equivalent to {@link #getCount()}, * and is necessary to for the implementation of the java.util.Collection interface. * * @return the number of FormField objects. */ public int size() { return getCount(); } /** * Returns the FormField with the specified {@linkplain FormField#getName() name}. *

    * The case sensitivity of the fieldName argument is determined by the static * {@link Config#CurrentCompatibilityMode}.{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. * * @param fieldName the name of the FormField to get. * @return the FormField with the specified {@linkplain FormField#getName() name}, or null if no FormField with the specified name exists. */ public FormField get(String fieldName) { if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase(); return map.get(fieldName); } /** * Returns an iterator over the {@link FormField} objects in the collection. *

    * The order in which the form fields are iterated corresponds to the order of appearance * of each form field's first {@link FormControl} in the source document. *

    * If this FormFields object has been {@linkplain #merge(FormFields) merged} with another, * the ordering is no longer defined. * * @return an iterator over the {@link FormField} objects in the collection. */ public Iterator iterator() { return map.values().iterator(); } /** * Returns a list of the field submission values of all the specified constituent {@linkplain FormField form fields} with the specified {@linkplain FormField#getName() name}. *

    * All objects in the returned list are of type String, with no null entries. *

    * This is equivalent to {@link #get(String) get(fieldName)}.{@link FormField#getValues() getValues()}, * assuming that a field with the specified name exists in this collection. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @return a list of the field submission values of all the specified constituent {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}, or null if no form field with this name exists. * @see FormField#getValues() */ public List getValues(final String fieldName) { final FormField formField=get(fieldName); return formField==null ? null : formField.getValues(); } /** * Returns the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. *

    * The values in the map returned by this method are represented as a string array, giving the map a format consistent with the * javax.servlet.ServletRequest.getParameterMap() * method. *

    * Only the {@linkplain FormField#getName() names} of form fields with at least one {@linkplain FormField#getValues() value} * are included in the map, meaning every String[] is guaranteed to have at least one entry. *

    * Iterating over the map keys returns them in the order of appearance in the source document. * * @return the entire field data set represented by the {@linkplain FormField#getValues() values} of the constituent form fields. * @see #setDataSet(Map) */ public Map getDataSet() { final LinkedHashMap map=new LinkedHashMap((int)(getCount()/0.7)); for (FormField formField : this) { final List values=formField.getValues(); if (values.isEmpty()) continue; map.put(formField.getName(),values.toArray(new String[values.size()])); } return map; } /** * Clears the submission values of all the constituent {@linkplain #getFormControls() form controls}. * @see FormControl#clearValues() */ public void clearValues() { for (FormControl formControl : formControls) formControl.clearValues(); } /** * Sets the submission values of all the constituent * {@linkplain FormControl form controls} to match the data in the specified field data set. *

    * The map keys must be String {@linkplain FormField#getName() field names}, * with each map value an array of String objects containing the field's new {@linkplain FormField#setValues(Collection) values}. *

    * The map returned by the * javax.servlet.ServletRequest.getParameterMap() * method has a suitable format for use with this method. *

    * All existing values are {@linkplain #clearValues() cleared} before the values from the field data set are added. *

    * Any map entries with a null value are ignored. * * @param dataSet the field data set containing the new {@linkplain FormField#setValues(Collection) values} of the constituent form fields. * @see #getDataSet() */ public void setDataSet(final Map dataSet) { clearValues(); if (map==null) return; for (Map.Entry entry : dataSet.entrySet()) { final String fieldName=entry.getKey(); final FormField formField=get(fieldName); if (formField!=null) formField.addValues(entry.getValue()); } } /** * Sets the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name} to the single specified value. *

    * This is equivalent to {@link #get(String) get(fieldName)}.{@link FormField#setValue(String) setValue(value)}, * assuming that a field with the specified name exists in this collection. *

    * The return value indicates whether the specified form field "accepted" the value. * A return value of false implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value of the specified field, or null to {@linkplain FormField#clearValues() clear} the field of all submission values. * @return true if a field of the specified name exists in this collection and it accepts the specified value, otherwise false. */ public boolean setValue(final String fieldName, final String value) { final FormField formField=get(fieldName); return formField==null ? false : formField.setValue(value); } /** * Adds the specified value to the field submission values of the constituent * {@linkplain FormField form field} with the specified {@linkplain FormField#getName() name}. *

    * This is equivalent to {@link #get(String) get(fieldName)}.{@link FormField#addValue(String) addValue(value)}, * assuming that a field with the specified name exists in this collection. *

    * The return value indicates whether the specified form field "accepted" the value. * A return value of false implies an error condition as either no field with the specified name exists, or * the specified value is not compatible with the specified field. * * @param fieldName the {@linkplain FormField#getName() name} of the form field. * @param value the new field submission value to add to the specified field, must not be null. * @return true if a field of the specified name exists in this collection and it accepts the specified value, otherwise false. */ public boolean addValue(final String fieldName, final String value) { final FormField formField=get(fieldName); return formField==null ? false : formField.addValue(value); } /** * Returns a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. *

    * Instead of using the {@linkplain FormField#getName() name} of each constituent form field to construct the labels, * the {@linkplain FormControl#getName() name} of the first {@linkplain FormControl form control} from each form field is used. * This allows the labels to be constructed using the names with the original case from the source document rather than * unsing the all lower case names of the form fields. *

    * See the documentation of the {@link #getColumnValues(Map)} method for more details. * * @return a string array containing the column labels corresponding to the values from the {@link #getColumnValues(Map)} method. * @see Util#outputCSVLine(Writer,String[]) */ public String[] getColumnLabels() { initColumns(); final String[] columnLabels=new String[columns.length]; for (int i=0 ; ifield data set into a simple string array, * suitable for storage in a tabular format such as a database table or .CSV file. *

    * The conversion is performed in a way that allows the multiple values of certain fields to be stored in separate columns, * by analysing the possible form data sets * that can be generated from the constituent {@linkplain #getFormControls() form controls}. *

    * The column labels and values are determined as follows: *

    *

      *
    • * For each {@linkplain FormField form field} in this collection (taken in {@linkplain #iterator() iterator} order): *
        *
      • * If the form field has no {@linkplain FormField#getPredefinedValues() predefined values}, * such as a single {@linkplain FormControlType#TEXT text control}, then: *
          *
        • * Add a single column: * *
          {@linkplain #getColumnLabels() Label}:the {@linkplain FormField#getName() name} of the form field in original case *
          Value:the single value mapped to this field in the specified field data set. *
          * In the unlikely event that this field contains more than one value, all values are included in this one column and * separated by the text defined in the static {@link Config#ColumnMultipleValueSeparator} property. *
        *
      • * Otherwise, if the form field does have {@linkplain FormField#getPredefinedValues() predefined values}, * but does not {@linkplain FormField#allowsMultipleValues() allow multiple values}, then: *
          *
        • * If the form field has only one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a single {@linkplain FormControlType#CHECKBOX checkbox}, then: *
            *
          • * Add a single boolean column: * *
            {@linkplain #getColumnLabels() Label}:the {@linkplain FormField#getName() name} of the form field in original case *
            Value:the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} *
            *
          *
        • * Otherwise, if the form field has more than one {@linkplain FormField#getPredefinedValues() predefined value}, * such as a set of {@linkplain FormControlType#RADIO radio buttons}, then: *
            *
          • * Add a single column: * *
            {@linkplain #getColumnLabels() Label}:the {@linkplain FormField#getName() name} of the form field in original case *
            Value:the single value mapped to this field in the specified field data set, * which in the case of a set of radio buttons should be the {@linkplain FormControl#getPredefinedValue() predefined value} * of the {@linkplain FormControl#isChecked() checked} radio button. *
            *
          *
        *
      • * Otherwise, if the form field has {@linkplain FormField#getPredefinedValues() predefined values} * and {@linkplain FormField#allowsMultipleValues() allows multiple values}, * such as a set of {@linkplain FormControlType#CHECKBOX checkboxes}, then: *
          *
        • * For each {@linkplain FormField#getPredefinedValues() predefined value} in the form field: *
            *
          • * Add a boolean column: * *
            {@linkplain #getColumnLabels() Label}:"FieldName.PredefinedValue", * where FieldName is the {@linkplain FormField#getName() name} of the form field in original case, * and PredefinedValue is the {@linkplain FormField#getPredefinedValues() predefined value}. *
            Value:the currently configured string representation for {@linkplain Config#ColumnValueTrue true} * if a value mapped to this field in the specified field data set matches the * {@linkplain FormField#getPredefinedValues() predefined value}, otherwise {@linkplain Config#ColumnValueFalse false} *
            *
          *
        • * In addition, if the form field can also contain user values ({@link FormField#getUserValueCount()}>0), then: *
            *
          • * Add another column: * *
            {@linkplain #getColumnLabels() Label}:the {@linkplain FormField#getName() name} of the form field in original case *
            Value:all values mapped to this field in the specified field data set * that do not match any of the {@linkplain FormField#getPredefinedValues() predefined values}, * separated by the text defined in the static {@link Config#ColumnMultipleValueSeparator} property. *
            *
          *
        *
      *
    *

    * The sample program FormFieldCSVOutput demonstrates the use of this method and its output. * * @param dataSet a field data set containing the data to convert. * @return the data values in the specified field data set in the form of a simple string array. * @see Util#outputCSVLine(Writer,String[]) * @see #getColumnLabels() * @see #getColumnValues() */ public String[] getColumnValues(final Map dataSet) { initColumns(); final String[] columnValues=new String[columns.length]; if (Config.ColumnValueFalse!=null) { // initialise all boolean columns with false string for (int i=0; i entry : dataSet.entrySet()) { final String fieldName=entry.getKey(); final FormField formField=get(fieldName); if (formField!=null) { final int columnIndex=formField.columnIndex; for (String value : entry.getValue()) { for (int ci=columnIndex; ci.CSV file. *

    * This is equivalent to {@link #getColumnValues(Map) getColumnValues}({@link #getDataSet()}). * * @return all the {@linkplain FormField#getValues() form submission values} of the constituent form fields in the form of a simple string array. */ public String[] getColumnValues() { return getColumnValues(getDataSet()); } private void initColumns() { if (columns!=null) return; final ArrayList columnList=new ArrayList(); for (FormField formField : this) { formField.columnIndex=columnList.size(); if (!formField.allowsMultipleValues() || formField.getPredefinedValues().isEmpty()) { columnList.add(new Column(formField,formField.getPredefinedValues().size()==1,null)); } else { // add a column for every predefined value for (String predefinedValue : formField.getPredefinedValues()) columnList.add(new Column(formField,true,predefinedValue)); if (formField.getUserValueCount()>0) columnList.add(new Column(formField,false,null)); // add a column for user values, must come after predefined values for algorithm in getColumnValues to work } } columns=columnList.toArray(new Column[columnList.size()]); } private Column[] columns=null; private static class Column { public FormField formField; public boolean isBoolean; public String predefinedValue; public Column(final FormField formField, final boolean isBoolean, final String predefinedValue) { this.formField=formField; this.isBoolean=isBoolean; this.predefinedValue=predefinedValue; } } /** * Returns a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. * @return a list of all the {@linkplain FormField#getFormControls() constituent form controls} from all the {@linkplain FormField form fields} in this collection. */ public List getFormControls() { return formControls; } /** * Merges the specified FormFields into this FormFields collection. * This is useful if a full collection of possible form fields is required from multiple {@linkplain Source source} documents. *

    * If both collections contain a FormField with the same {@linkplain FormField#getName() name}, * the resulting FormField has the following properties: *

      *
    • {@link FormField#getUserValueCount() getUserValueCount()} : the maximum user value count from both form fields
    • *
    • {@link FormField#allowsMultipleValues() allowsMultipleValues()} : true if either form field allows multiple values
    • *
    • {@link FormField#getPredefinedValues() getPredefinedValues()} : the union of predefined values in both form fields
    • *
    • {@link FormField#getFormControls() getFormControls()} : the union of {@linkplain FormControl form controls} from both form fields
    • *
    *

    * NOTE: Some underlying data structures may end up being shared between the two merged FormFields collections. */ public void merge(final FormFields formFields) { for (FormField formField : formFields) { final String fieldName=formField.getName(); final FormField existingFormField=get(fieldName); if (existingFormField==null) map.put(formField.getName(),formField); else existingFormField.merge(formField); } } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); for (FormField formField : this) sb.append(formField); return sb.toString(); } /** * Returns a string representation of this object useful for debugging purposes. *

    * This is equivalent to {@link #getDebugInfo()}. * * @return a string representation of this object useful for debugging purposes. */ public String toString() { return getDebugInfo(); } void add(final FormControl formControl) { add(formControl,formControl.getPredefinedValue()); } void add(final FormControl formControl, final String predefinedValue) { add(formControl,predefinedValue,formControl.name); } void addName(final FormControl formControl, final String fieldName) { add(formControl,null,fieldName); } void add(final FormControl formControl, final String predefinedValue, String fieldName) { if (Config.CurrentCompatibilityMode.isFormFieldNameCaseInsensitive()) fieldName=fieldName.toLowerCase(); FormField formField=(FormField)map.get(fieldName); if (formField==null) { formField=new FormField(fieldName); map.put(formField.getName(),formField); } formField.addFormControl(formControl,predefinedValue); } void replaceInOutputDocument(final OutputDocument outputDocument) { for (FormControl formControl : formControls) outputDocument.replace(formControl); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StreamedText.java0000644000175000017500000002205711204550410025254 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; import java.nio.*; /** * Implements a buffered window into a stream of characters. *

    * Unless the buffer is explicitly {@linkplain #setBuffer(char[]) set}, it expands automatically as further characters are fetched from the stream. *

    * The {@link #setMinRequiredBufferBegin(int)} method can be used to inform the StreamedText object that characters up to a specified * position are no longer required, allowing more characters to be fetched without the need to increase the buffer size. */ final class StreamedText implements CharSequence { private final Reader reader; private char[] buffer; private boolean expandableBuffer; private int bufferBegin=0; // the current position of the first byte of the buffer. all text before it has been discarded. private int readerPos=0; // the next position into which text will be loaded from the reader stream. must be >=bufferBegin and <=bufferBegin+buffer.length, except if one of the "text" argument constructors was used, in which case =Integer.MAX_VALUE. private int minRequiredBufferBegin=0; // the minimum pos that must be kept in buffer. always >=bufferBegin. private int end=Integer.MAX_VALUE; public static int INITIAL_EXPANDABLE_BUFFER_SIZE=8192; // same default as StAX public StreamedText(final Reader reader, final char[] buffer) { this.reader=reader; setBuffer(buffer); } public StreamedText(final Reader reader) { this(reader,null); } private StreamedText(final char[] text, final int length) { reader=null; buffer=text; expandableBuffer=false; end=length; readerPos=Integer.MAX_VALUE; } public StreamedText(final char[] text) { this(text,text.length); } public StreamedText(final CharBuffer text) { this(text.array(),text.length()); } public StreamedText(final CharSequence text) { this(toCharArray(text)); } public StreamedText setBuffer(char[] buffer) { if (buffer!=null) { this.buffer=buffer; expandableBuffer=false; } else { this.buffer=new char[INITIAL_EXPANDABLE_BUFFER_SIZE]; expandableBuffer=true; } return this; } public boolean hasExpandableBuffer() { return expandableBuffer; } /** * Returns the character at the specified index. * @param index the index of the character. * @return the character at the specified index. */ public char charAt(final int pos) { if (pos>=readerPos) readToPosition(pos); checkPos(pos); return buffer[pos-bufferBegin]; } public void setMinRequiredBufferBegin(final int minRequiredBufferBegin) { if (minRequiredBufferBegin * This method returns Integer.MAX_VALUE until an attempt is made to access a position past the end of the stream. * * @return the length of the text stream. */ public int length() { if (end==Integer.MAX_VALUE) throw new IllegalStateException("Length of streamed text cannot be determined until end of file has been reached"); return end; } public int getEnd() { return end; } private void prepareBufferRange(final int begin, final int end) { final int lastRequiredPos=end-1; if (lastRequiredPos>readerPos) readToPosition(lastRequiredPos); checkPos(begin); if (end>this.end) throw new IndexOutOfBoundsException(); } public void writeTo(final Writer writer, final int begin, final int end) throws IOException { prepareBufferRange(begin,end); writer.write(buffer,begin-bufferBegin,end-begin); } /** * Returns a new string that is a substring of this text. *

    * The substring begins at the specified begin position and extends to the character at position end - 1. * Thus the length of the substring is end-begin. * * @param begin the begin position, inclusive. * @param end the end position, exclusive. * @return a new string that is a substring of this text. */ public String substring(final int begin, final int end) { prepareBufferRange(begin,end); return new String(buffer,begin-bufferBegin,end-begin); } /** * Returns a new character sequence that is a subsequence of this sequence. *

    * The returned CharSequence is only guaranteed to be valid as long as no futher operations are performed on this StreamedText object. * Any subsequent method call could invalidate the underlying buffer used by the CharSequence. * * @param begin the begin position, inclusive. * @param end the end position, exclusive. * @return a new character sequence that is a subsequence of this sequence. */ public CharSequence subSequence(final int begin, final int end) { // This has not been benchmarked. It is possible that returning substring(begin,end) results in faster code even though it requires more memory allocation. return getCharBuffer(begin,end); } public CharBuffer getCharBuffer(final int begin, final int end) { prepareBufferRange(begin,end); return CharBuffer.wrap(buffer,begin-bufferBegin,end-begin); } public String toString() { throw new UnsupportedOperationException("Streamed text can not be converted to a string"); } public String getDebugInfo() { return "Buffer size: \""+buffer.length+"\", bufferBegin="+bufferBegin+", minRequiredBufferBegin="+minRequiredBufferBegin+", readerPos="+readerPos; } public char[] getBuffer() { return buffer; } public int getBufferBegin() { return bufferBegin; } private void checkPos(final int pos) { // hopefully inlined by the compiler if (pos=end) throw new IndexOutOfBoundsException(); } public int getBufferOverflowPosition() { return minRequiredBufferBegin+buffer.length; } private void readToPosition(final int pos) { try { if (pos>=bufferBegin+buffer.length) { if (pos>=minRequiredBufferBegin+buffer.length) { if (!expandableBuffer) throw new BufferOverflowException(); // unfortunately BufferOverflowException doesn't accept a message argument, otherwise it would include the message "StreamedText buffer too small to keep positions "+minRequiredBufferBegin+" and "+pos+" simultaneously" expandBuffer(pos-minRequiredBufferBegin+1); } discardUsedText(); } while (readerPos<=pos) { final int charCount=reader.read(buffer,readerPos-bufferBegin,bufferBegin+buffer.length-readerPos); if (charCount==-1) { end=readerPos; break; } readerPos+=charCount; } } catch (IOException ex) { throw new RuntimeException(ex); } } private void expandBuffer(final int minSize) throws IOException { int newSize=buffer.length*2; if (newSize * An end tag type is a {@link TagType} that {@linkplain #getStartDelimiter() starts} with the characters '</'. *

    * The singleton instances of all the standard end tag types are available in this class as static * fields. *

    * Because all EndTagType instaces must be singletons, the '==' operator can be used to test for a particular tag type * instead of the equals(Object) method. * * @see StartTagType */ public abstract class EndTagType extends TagType { static final String START_DELIMITER_PREFIX="</ ... >). *

    * See the documentation of the {@link Tag#isUnregistered()} method for details. *

    *

    *
    Properties:
    *
    * *
    Property/MethodValue *
    {@link #getDescription() Description}/unregistered *
    {@link #getStartDelimiter() StartDelimiter}</ *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}(empty string) *
    {@link #getCorrespondingStartTagType() CorrespondingStartTagType}null *
    {@link #generateHTML(String) generateHTML}("StartTagName")</StartTagName> *
    *
    Example:
    *
    </ "This is not recognised as any of the predefined end tag types in this library">
    *
    * @see StartTagType#UNREGISTERED */ public static final EndTagType UNREGISTERED=EndTagTypeUnregistered.INSTANCE; /** * The tag type given to a normal HTML or XML {@linkplain EndTag end tag} (</name>). *

    *

    *
    Properties:
    *
    * *
    Property/MethodValue *
    {@link #getDescription() Description}/normal *
    {@link #getStartDelimiter() StartDelimiter}</ *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}(empty string) *
    {@link #getCorrespondingStartTagType() CorrespondingStartTagType}{@link StartTagType#NORMAL} *
    {@link #generateHTML(String) generateHTML}("StartTagName")</StartTagName> *
    *
    Example:
    *
    </div>
    *
    */ public static final EndTagType NORMAL=EndTagTypeNormal.INSTANCE; /** * Constructs a new EndTagType object with the specified properties. *
    (implementation assistance method) *

    * As EndTagType is an abstract class, this constructor is only called from sub-class constructors. * * @param description a {@linkplain #getDescription() description} of the new end tag type useful for debugging purposes. * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new end tag type. * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new end tag type. * @param isServerTag indicates whether the new end tag type is a {@linkplain #isServerTag() server tag}. */ protected EndTagType(final String description, final String startDelimiter, final String closingDelimiter, final boolean isServerTag) { super(description,startDelimiter.toLowerCase(),closingDelimiter,isServerTag,START_DELIMITER_PREFIX); if (!getStartDelimiter().startsWith(START_DELIMITER_PREFIX)) throw new IllegalArgumentException("startDelimiter of an end tag must start with \""+START_DELIMITER_PREFIX+'"'); } /** * Returns the {@linkplain StartTagType type} of {@linkplain StartTag start tag} that is usually paired with an * {@linkplain EndTag end tag} of this type to form an {@link Element}. *
    (default implementation method) *

    * The default implementation returns null. *

    * This property is informational only and is not used by the parser in any way. *

    * The mapping of end tag type to the corresponding start tag type is in any case one-to-many, which is why the definition * emphasises the word "usually". * An example of this is the {@link PHPTagTypes#PHP_SCRIPT} start tag type, * whose {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type} is {@link #NORMAL EndTagType.NORMAL}, * while the converse is not true. *

    * The only predefined end tag type that returns null for this property is the * special {@link #UNREGISTERED} end tag type. *

    * Although this method is used like a property method, it is implemented as a * default implementation method to avoid cyclic references between statically * instantiated {@link StartTagType} and EndTagType objects. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    End Tag TypeCorresponding Start Tag Type *
    {@link EndTagType#UNREGISTERED}null *
    {@link EndTagType#NORMAL}{@link StartTagType#NORMAL} *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    End Tag TypeCorresponding Start Tag Type *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT} *
    {@link MasonTagTypes#MASON_NAMED_BLOCK_END}{@link MasonTagTypes#MASON_NAMED_BLOCK} *
    *
    * * @return the {@linkplain StartTagType type} of {@linkplain StartTag start tag} that is usually paired with an {@linkplain EndTag end tag} of this type to form an {@link Element}. * @see StartTagType#getCorrespondingEndTagType() */ public StartTagType getCorrespondingStartTagType() { return null; } /** * Returns the end tag {@linkplain EndTag#getName() name} that is required to match a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag} with the specified {@linkplain StartTag#getName() name}. *
    (property method) *

    * This default implementation simply returns startTagName. *

    * Note that the startTagName parameter should include the start tag's {@linkplain TagType#getNamePrefix() name prefix} if it has one. * * @param startTagName the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}, including its {@linkplain TagType#getNamePrefix() name prefix}. * @return the end tag {@linkplain EndTag#getName() name} that is required to match a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag} with the specified {@linkplain StartTag#getName() name}. */ public String getEndTagName(final String startTagName) { return startTagName; } /** * Generates the HTML text of an {@linkplain EndTag end tag} of this type given the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}. *
    (property method) *

    * This default implementation returns "</"+{@link #getEndTagName(String) getEndTagName}(startTagName)+{@link #getClosingDelimiter()}. *

    * Note that the startTagName parameter should include the start tag's {@linkplain TagType#getNamePrefix() name prefix} if it has one. * * @param startTagName the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}, including its {@linkplain TagType#getNamePrefix() name prefix}. * @return the HTML text of an {@linkplain EndTag end tag} of this type given the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}. */ public String generateHTML(final String startTagName) { return START_DELIMITER_PREFIX+getEndTagName(startTagName)+getClosingDelimiter(); } /** * Internal method for the construction of an {@link EndTag} object of this type. *
    (implementation assistance method) *

    * Intended for use from within the {@link #constructTagAt(Source,int) constructTagAt(Source, int pos)} method. * * @param source the {@link Source} document. * @param begin the character position in the source document where this tag {@linkplain Segment#getBegin() begins}. * @param end the character position in the source document where this tag {@linkplain Segment#getEnd() ends}. * @param name the {@linkplain Tag#getName() name} of the tag. * @return the new {@link EndTag} object. */ protected final EndTag constructEndTag(final Source source, final int begin, final int end, final String name) { return new EndTag(source,begin,end,this,name); } } jericho-html-3.1/src/java/net/htmlparser/jericho/Tag.java0000644000175000017500000010346411204607234023367 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents either a {@link StartTag} or {@link EndTag} in a specific {@linkplain Source source} document. *

    * Take the following HTML segment as an example: *

    * <p>This is a sample paragraph.</p> *

    * The "<p>" is represented by a {@link StartTag} object, and the "</p>" is represented by an {@link EndTag} object, * both of which are subclasses of the Tag class. * The whole segment, including the start tag, its corresponding end tag and all of the content in between, is represented by an {@link Element} object. * *

    Tag Parsing Process

    * The following process describes how each tag is identified by the parser: *
      *
    1. * Every '<' character found in the source document is considered to be the start of a tag. * The characters following it are compared with the {@linkplain TagType#getStartDelimiter() start delimiters} * of all the {@linkplain TagType#register() registered} {@linkplain TagType tag types}, and a list of matching tag types * is determined. *
    2. * A more detailed analysis of the source is performed according to the features of each matching tag type from the first step, * in order of precedence, until a valid tag is able to be constructed. *

      * The analysis performed in relation to each candidate tag type is a two-stage process: *

        *
      1. * The position of the tag is checked to determine whether it is {@linkplain TagType#isValidPosition(Source,int,int[]) valid}. * In theory, a {@linkplain TagType#isServerTag() server tag} is valid in any position, but a non-server tag is not valid inside any other tag, * nor inside elements with CDATA content such as {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements. * Theory dictates therefore that {@linkplain StartTagType#COMMENT comments} and explicit {@linkplain StartTagType#CDATA_SECTION CDATA sections} * inside script elements should not be recognised as tags. * The behaviour of the parser however does not always strictly adhere to the theory, to maintain compatability with major browsers * and also for efficiency reasons. *

        * The {@link TagType#isValidPosition(Source, int pos, int[] fullSequentialParseData)} method is responsible for this check * and has a common default implementation for all tag types * (although custom tag types can override it if necessary). * Its behaviour differs depending on whether or not a {@linkplain Source#fullSequentialParse() full sequential parse} is peformed. * See the documentation of the {@link TagType#isValidPosition(Source,int,int[]) isValidPosition} method for full details. *

      2. * A final analysis is performed by the {@link TagType#constructTagAt(Source, int pos)} method of the candidate tag type. * This method returns a valid {@link Tag} object if all conditions of the candidate tag type are met, otherwise it returns * null and the process continues with the next candidate tag type. *
      *
    3. * If the source does not match the start delimiter or syntax of any registered tag type, the segment spanning it and the next * '>' character is taken to be an {@linkplain #isUnregistered() unregistered} tag. * Some tag search methods ignore unregistered tags. See the {@link #isUnregistered()} method for more information. *
    *

    * See the documentation of the {@link TagType} class for more details on how tags are recognised. * *

    Tag Search Methods

    *

    * Methods that get tags in a source document are collectively referred to as Tag Search Methods. * They are found mostly in the {@link Source} and {@link Segment} classes, and can be generally categorised as follows: *

    *
    Open Search: *
    These methods search for tags of any {@linkplain #getName() name} and {@linkplain #getTagType() type}. *
      *
    • {@link Tag#getNextTag()} *
    • {@link Tag#getPreviousTag()} *
    • {@link Segment#getAllElements()} *
    • {@link Segment#getFirstElement()} *
    • {@link Source#getTagAt(int pos)} *
    • {@link Source#getPreviousTag(int pos)} *
    • {@link Source#getNextTag(int pos)} *
    • {@link Source#getEnclosingTag(int pos)} *
    • {@link Segment#getAllTags()} *
    • {@link Segment#getAllStartTags()} *
    • {@link Segment#getFirstStartTag()} *
    • {@link Source#getPreviousStartTag(int pos)} *
    • {@link Source#getNextStartTag(int pos)} *
    • {@link Source#getPreviousEndTag(int pos)} *
    • {@link Source#getNextEndTag(int pos)} *
    *
    Named Search: *
    These methods include a parameter called name which is used to specify the {@linkplain #getName() name} of the tag to search for. * Specifying a name that ends in a colon (:) searches for all elements or tags in the specified XML namespace. *
      *
    • {@link Segment#getAllElements(String name)} *
    • {@link Segment#getFirstElement(String name)} *
    • {@link Segment#getAllStartTags(String name)} *
    • {@link Segment#getFirstStartTag(String name)} *
    • {@link Source#getPreviousStartTag(int pos, String name)} *
    • {@link Source#getNextStartTag(int pos, String name)} *
    • {@link Source#getPreviousEndTag(int pos, String name)} *
    • {@link Source#getNextEndTag(int pos, String name)} *
    • {@link Source#getNextEndTag(int pos, String name, EndTagType)} *
    *
    Tag Type Search: *
    These methods typically include a parameter called tagType which is used to specify the {@linkplain #getTagType() type} of the tag to search for. * In some methods the search parameter is restricted to the {@link StartTagType} or {@link EndTagType} subclass of TagType. *
      *
    • {@link Segment#getAllElements(StartTagType)} *
    • {@link Segment#getAllTags(TagType)} *
    • {@link Segment#getAllStartTags(StartTagType)} *
    • {@link Segment#getFirstStartTag(StartTagType)} *
    • {@link Source#getPreviousTag(int pos, TagType)} *
    • {@link Source#getPreviousStartTag(int pos, StartTagType)} *
    • {@link Source#getPreviousEndTag(int pos, EndTagType)} *
    • {@link Source#getNextTag(int pos, TagType)} *
    • {@link Source#getNextStartTag(int pos, StartTagType)} *
    • {@link Source#getNextEndTag(int pos, EndTagType)} *
    • {@link Source#getEnclosingTag(int pos, TagType)} *
    • {@link Source#getNextEndTag(int pos, String name, EndTagType)} *
    *
    Attribute Search: *
    These methods perform the search based on an attribute name and value. *
      *
    • {@link Segment#getAllElements(String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Segment#getFirstElement(String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Segment#getAllStartTags(String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Segment#getFirstStartTag(String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Segment#getAllElements(String attributeName, Pattern valueRegexPattern)} *
    • {@link Segment#getFirstElement(String attributeName, Pattern valueRegexPattern)} *
    • {@link Segment#getAllStartTags(String attributeName, Pattern valueRegexPattern)} *
    • {@link Segment#getFirstStartTag(String attributeName, Pattern valueRegexPattern)} *
    • {@link Segment#getAllElementsByClass(String className)} *
    • {@link Segment#getFirstElementByClass(String className)} *
    • {@link Segment#getAllStartTagsByClass(String className)} *
    • {@link Segment#getFirstStartTagByClass(String className)} *
    • {@link Source#getElementById(String id)} *
    • {@link Source#getNextElement(int pos, String attributeName, Pattern valueRegexPattern)} *
    • {@link Source#getNextElement(int pos, String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Source#getNextElementByClass(int pos, String className)} *
    • {@link Source#getNextStartTag(int pos, String attributeName, Pattern valueRegexPattern)} *
    • {@link Source#getNextStartTag(int pos, String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Source#getNextStartTagByClass(int pos, String className)} *
    *
    */ public abstract class Tag extends Segment { String name=null; // always lower case, can always use == operator to compare with constants in HTMLElementName interface private Object userData=null; // cached values: Element element=Element.NOT_CACHED; private Tag previousTag=NOT_CACHED; // does not include unregistered tags private Tag nextTag=NOT_CACHED; // does not include unregistered tags // A NOT_CACHED value in nextTag can also indicate that this tag is not in the cache. See isOrphaned() for details. static final Tag NOT_CACHED=new StartTag(); private static final boolean INCLUDE_UNREGISTERED_IN_SEARCH=false; // determines whether unregistered tags are included in searches Tag(final Source source, final int begin, final int end, final String name) { super(source,begin,end); this.name=HTMLElements.getConstantElementName(name.toLowerCase()); } // only used to create Tag.NOT_CACHED Tag() {} /** * Returns the {@linkplain Element element} that is started or ended by this tag. *

    * {@link StartTag#getElement()} is guaranteed not null. *

    * {@link EndTag#getElement()} can return null if the end tag is not properly matched to a start tag. * * @return the {@linkplain Element element} that is started or ended by this tag. */ public abstract Element getElement(); /** * Returns the name of this tag, always in lower case. *

    * The name always starts with the {@linkplain TagType#getNamePrefix() name prefix} defined in this tag's {@linkplain TagType type}. * For some tag types, the name consists only of this prefix, while in others it must be followed by a valid * XML name * (see {@link StartTagType#isNameAfterPrefixRequired()}). *

    * If the name is equal to one of the constants defined in the {@link HTMLElementName} interface, this method is guaranteed to return * the constant itself. * This allows comparisons to be performed using the == operator instead of the less efficient * String.equals(Object) method. *

    * For example, the following expression can be used to test whether a {@link StartTag} is from a * SELECT element: *
    startTag.getName()==HTMLElementName.SELECT *

    * To get the name of this tag in its original case, use {@link #getNameSegment()}.toString(). * * @return the name of this tag, always in lower case. */ public final String getName() { return name; } /** * Returns the segment spanning the {@linkplain #getName() name} of this tag. *

    * The code getNameSegment().toString() can be used to retrieve the name of this tag in its original case. *

    * Every call to this method constructs a new Segment object. * * @return the segment spanning the {@linkplain #getName() name} of this tag. * @see #getName() */ public Segment getNameSegment() { final int nameSegmentBegin=begin+getTagType().startDelimiterPrefix.length(); return new Segment(source,nameSegmentBegin,nameSegmentBegin+name.length()); } /** * Returns the {@linkplain TagType type} of this tag. * @return the {@linkplain TagType type} of this tag. */ public abstract TagType getTagType(); /** * Returns the general purpose user data object that has previously been associated with this tag via the {@link #setUserData(Object)} method. *

    * If {@link #setUserData(Object)} has not been called, this method returns null. * * @return the generic data object that has previously been associated with this tag via the {@link #setUserData(Object)} method. */ public Object getUserData() { return userData; } /** * Associates the specified general purpose user data object with this tag. *

    * This property can be useful for applications that need to associate extra information with tags. * The object can be retrieved later via the {@link #getUserData()} method. * * @param userData general purpose user data of any type. */ public void setUserData(final Object userData) { this.userData=userData; } /** * Returns the next tag in the source document. *

    * This method also returns {@linkplain TagType#isServerTag() server tags}. *

    * The result of a call to this method is cached. * Performing a {@linkplain Source#fullSequentialParse() full sequential parse} prepopulates this cache. *

    * If the result is not cached, a call to this method is equivalent to source.{@link Source#getNextTag(int) getNextTag}({@link #getBegin() getBegin()}+1). *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return the next tag in the source document, or null if this is the last tag. */ public Tag getNextTag() { if (nextTag==NOT_CACHED) { final Tag localNextTag=getNextTag(source,begin+1); if (source.wasFullSequentialParseCalled()) return localNextTag; // Don't set nextTag if this is an orphaned tag. See isOrphaned() for details. nextTag=localNextTag; } return nextTag; } /** * Returns the previous tag in the source document. *

    * This method also returns {@linkplain TagType#isServerTag() server tags}. *

    * The result of a call to this method is cached. * Performing a {@linkplain Source#fullSequentialParse() full sequential parse} prepopulates this cache. *

    * If the result is not cached, a call to this method is equivalent to source.{@link Source#getPreviousTag(int) getPreviousTag}({@link #getBegin() getBegin()}-1). *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return the previous tag in the source document, or null if this is the first tag. */ public Tag getPreviousTag() { if (previousTag==NOT_CACHED) previousTag=getPreviousTag(source,begin-1); return previousTag; } /** * Indicates whether this tag has a syntax that does not match any of the {@linkplain TagType#register() registered} {@linkplain TagType tag types}. *

    * The only requirement of an unregistered tag type is that it {@linkplain TagType#getStartDelimiter() starts} with * '<' and there is a {@linkplain TagType#getClosingDelimiter() closing} '>' character * at some position after it in the source document. *

    * The absence or presence of a '/' character after the initial '<' determines whether an * unregistered tag is respectively a * {@link StartTag} with a {@linkplain #getTagType() type} of {@link StartTagType#UNREGISTERED} or an * {@link EndTag} with a {@linkplain #getTagType() type} of {@link EndTagType#UNREGISTERED}. *

    * There are no restrictions on the characters that might appear between these delimiters, including other '<' * characters. This may result in a '>' character that is identified as the closing delimiter of two * separate tags, one an unregistered tag, and the other a tag of any type that {@linkplain #getBegin() begins} in the middle * of the unregistered tag. As explained below, unregistered tags are usually only found when specifically looking for them, * so it is up to the user to detect and deal with any such nonsensical results. *

    * Unregistered tags are only returned by the {@link Source#getTagAt(int pos)} method, * named search methods, where the specified name * matches the first characters inside the tag, and by tag type search methods, where the * specified tagType is either {@link StartTagType#UNREGISTERED} or {@link EndTagType#UNREGISTERED}. *

    * Open tag searches and other searches always ignore * unregistered tags, although every discovery of an unregistered tag is {@linkplain Source#getLogger() logged} by the parser. *

    * The logic behind this design is that unregistered tag types are usually the result of a '<' character * in the text that was mistakenly left {@linkplain CharacterReference#encode(CharSequence) unencoded}, or a less-than * operator inside a script, or some other occurrence which is of no interest to the user. * By returning unregistered tags in named and tag type * search methods, the library allows the user to specifically search for tags with a certain syntax that does not match any * existing {@link TagType}. This expediency feature avoids the need for the user to create a * custom tag type to define the syntax before searching for these tags. * By not returning unregistered tags in the less specific search methods, it is providing only the information that * most users are interested in. * * @return true if this tag has a syntax that does not match any of the {@linkplain TagType#register() registered} {@linkplain TagType tag types}, otherwise false. */ public abstract boolean isUnregistered(); /** * Returns an XML representation of this tag. *

    * This is an abstract method which is implemented in the {@link StartTag} and {@link EndTag} subclasses. * See the documentation of the {@link StartTag#tidy()} and {@link EndTag#tidy()} methods for details. * * @return an XML representation of this tag. */ public abstract String tidy(); /** * Indicates whether the specified text is a valid XML Name. *

    * This implementation first checks that the first character of the specified text is a valid XML Name start character * as defined by the {@link #isXMLNameStartChar(char)} method, and then checks that the rest of the characters are valid * XML Name characters as defined by the {@link #isXMLNameChar(char)} method. *

    * Note that this implementation does not exactly adhere to the * formal definition of an XML Name, * but the differences are unlikely to be significant in real-world XML or HTML documents. * * @param text the text to test. * @return true if the specified text is a valid XML Name, otherwise false. * @see Source#getNameEnd(int pos) */ public static final boolean isXMLName(final CharSequence text) { if (text==null || text.length()==0 || !isXMLNameStartChar(text.charAt(0))) return false; for (int i=1; iXML Name. *

    * The XML 1.0 specification section 2.3 defines a * Name as starting with one of the characters *
    (Letter | '_' | ':'). *

    * This method uses the expression *
    Character.isLetter(ch) || ch=='_' || ch==':'. *

    * Note that there are many differences between the Character.isLetter() definition of a Letter and the * XML definition of a Letter, * but these differences are unlikely to be significant in real-world XML or HTML documents. * * @param ch the character to test. * @return true if the specified character is valid at the start of an XML Name, otherwise false. * @see Source#getNameEnd(int pos) */ public static final boolean isXMLNameStartChar(final char ch) { return Character.isLetter(ch) || ch=='_' || ch==':'; } /** * Indicates whether the specified character is valid anywhere in an * XML Name. *

    * The XML 1.0 specification section 2.3 uses the * entity NameChar to represent this set of * characters, which is defined as *
    (Letter * | Digit | '.' | '-' | '_' | ':' * | CombiningChar * | Extender). *

    * This method uses the expression *
    Character.isLetterOrDigit(ch) || ch=='.' || ch=='-' || ch=='_' || ch==':'. *

    * Note that there are many differences between these definitions, * but these differences are unlikely to be significant in real-world XML or HTML documents. * * @param ch the character to test. * @return true if the specified character is valid anywhere in an XML Name, otherwise false. * @see Source#getNameEnd(int pos) */ public static final boolean isXMLNameChar(final char ch) { return Character.isLetterOrDigit(ch) || ch=='.' || ch=='-' || ch=='_' || ch==':'; } // *** consider making public StartTag getNextStartTag() { Tag tag=this; while (true) { tag=tag.getNextTag(); if (tag==null) return null; if (tag instanceof StartTag) return (StartTag)tag; } } // *** consider making public StartTag getPreviousStartTag() { Tag tag=this; while (true) { tag=tag.getPreviousTag(); if (tag==null) return null; if (tag instanceof StartTag) return (StartTag)tag; } } // *** consider making public Tag getNextTag(final TagType tagType) { if (tagType==null) return getNextTag(); if (tagType==StartTagType.UNREGISTERED || tagType==EndTagType.UNREGISTERED) return getNextTag(source,begin+1,tagType); Tag tag=this; while (true) { if (tag.nextTag==NOT_CACHED) return getNextTag(source,tag.begin+1,tagType); tag=tag.nextTag; if (tag==null) return null; if (tag.getTagType()==tagType) return tag; } } // *** consider making public Tag getPreviousTag(final TagType tagType) { if (tagType==null) return getPreviousTag(); if (tagType==StartTagType.UNREGISTERED || tagType==EndTagType.UNREGISTERED) return getPreviousTag(source,begin-1,tagType); Tag tag=this; while (true) { if (tag.previousTag==NOT_CACHED) return getPreviousTag(source,tag.begin-1,tagType); tag=tag.previousTag; if (tag==null) return null; if (tag.getTagType()==tagType) return tag; } } final boolean includeInSearch() { return INCLUDE_UNREGISTERED_IN_SEARCH || !isUnregistered(); } static final Tag getPreviousTag(final Source source, final int pos) { // returns null if pos is out of range. return source.useAllTypesCache ? source.cache.getPreviousTag(pos) : getPreviousTagUncached(source,pos,ParseText.NO_BREAK); } static final Tag getNextTag(final Source source, final int pos) { // returns null if pos is out of range. return source.useAllTypesCache ? source.cache.getNextTag(pos) : getNextTagUncached(source,pos,ParseText.NO_BREAK); } static final Tag getPreviousTagUncached(final Source source, final int pos, final int breakAtPos) { // returns null if pos is out of range. try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.lastIndexOf('<',begin,breakAtPos); // this assumes that all tags start with '<' // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.includeInSearch()) return tag; } while ((begin-=1)>=0); } catch (IndexOutOfBoundsException ex) { throw new AssertionError("Unexpected internal exception"); } return null; } static final Tag getNextTagUncached(final Source source, final int pos, final int breakAtPos) { // returns null if pos is out of range. try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf('<',begin,breakAtPos); // this assumes that all tags start with '<' // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.includeInSearch()) return tag; } while ((begin+=1)=0); } catch (IndexOutOfBoundsException ex) { // this should never happen during a get previous operation so rethrow it: throw ex; } return null; } static final Tag getNextTagUncached(final Source source, final int pos, final TagType tagType, final int breakAtPos) { // returns null if pos is out of range. if (tagType==null) return getNextTagUncached(source,pos,breakAtPos); final String startDelimiter=tagType.getStartDelimiter(); try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf(startDelimiter,begin,breakAtPos); // parseText.lastIndexOf and indexOf return -1 if pos is out of range. if (begin==-1) return null; final Tag tag=getTagAt(source,begin,false); if (tag!=null && tag.getTagType()==tagType) return tag; } while ((begin+=1) list=new ArrayList(); source.fullSequentialParseData=new int[1]; // fullSequentialParseData is simply a holder for a single mutable integer. It holds the end position of the last normal tag (ie one that ignores enclosed markup), or MAX_VALUE if we are in a SCRIPT element. if (source.end!=0) { final ParseText parseText=source.getParseText(); Tag tag=parseAllgetNextTag(source,parseText,0,assumeNoNestedTags); while (tag!=null) { list.add(tag); if (!tag.isUnregistered()) { registeredTagCount++; if (tag instanceof StartTag) registeredStartTagCount++; } // Look for next tag after end of next tag if we're assuming tags don't appear inside other tags, as long as the last tag found was not an unregistered tag: final int pos=(assumeNoNestedTags && !tag.isUnregistered()) ? tag.end : tag.begin+1; if (pos==source.end) break; tag=parseAllgetNextTag(source,parseText,pos,assumeNoNestedTags); } } final Tag[] allRegisteredTags=new Tag[registeredTagCount]; final StartTag[] allRegisteredStartTags=new StartTag[registeredStartTagCount]; source.cache.loadAllTags(list,allRegisteredTags,allRegisteredStartTags); source.allTagsArray=allRegisteredTags; source.allTags=Arrays.asList(allRegisteredTags); source.allStartTags=Arrays.asList(allRegisteredStartTags); final int lastIndex=allRegisteredTags.length-1; for (int i=0; i0 ? allRegisteredTags[i-1] : null; tag.nextTag=isource.fullSequentialParseData[0] && tagType!=StartTagType.DOCTYPE_DECLARATION && tagType!=StartTagType.UNREGISTERED && tagType!=EndTagType.UNREGISTERED) { source.fullSequentialParseData[0]=(tagType==StartTagType.NORMAL && tag.name==HTMLElementName.SCRIPT) ? Integer.MAX_VALUE : tag.end; } } return tag; } } while ((begin+=1)element * in a specific {@linkplain Source source} document, which encompasses a {@linkplain #getStartTag() start tag}, * an optional {@linkplain #getEndTag() end tag} and all {@linkplain #getContent() content} in between. *

    * Take the following HTML segment as an example: *

    * <p>This is a sample paragraph.</p> *

    * The whole segment is represented by an Element object. This is comprised of the {@link StartTag} "<p>", * the {@link EndTag} "</p>", as well as the text in between. * An element may also contain other elements between its start and end tags. *

    * The term normal element refers to an element having a {@linkplain #getStartTag() start tag} * with a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}. * This comprises all {@linkplain HTMLElements HTML elements} and non-HTML elements. *

    * Element instances are obtained using one of the following methods: *

      *
    • {@link StartTag#getElement()} *
    • {@link EndTag#getElement()} *
    • {@link Segment#getAllElements()} *
    • {@link Segment#getAllElements(String name)} *
    • {@link Segment#getAllElements(StartTagType)} *
    * See also the {@link HTMLElements} class, and the * XML 1.0 specification for elements. *

    Element Structure

    *

    * The three possible structures of an element are listed below: *

    *
    Single Tag Element: *
    * Example:
    * <img src="mypicture.jpg"> *

    * The element consists only of a single {@linkplain #getStartTag() start tag} and has no {@linkplain #getContent() element content} * (although the start tag itself may have {@linkplain StartTag#getTagContent() tag content}). *
    {@link #getEndTag()}==null *
    {@link #isEmpty()}==true *
    {@link #getEnd() getEnd()}=={@link #getStartTag()}.{@link #getEnd() getEnd()} *

    * This occurs in the following situations: *

      *
    • An HTML element for which the {@linkplain HTMLElements#getEndTagForbiddenElementNames() end tag is forbidden}. *
    • An HTML element for which the {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required}, * but the end tag is not present in the source document. *
    • An HTML element for which the {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}, * where the implicitly terminating tag is situated immediately after the element's * {@linkplain #getStartTag() start tag}. *
    • An {@linkplain #isEmptyElementTag() empty element tag} *
    • A non-HTML element that is not an {@linkplain #isEmptyElementTag() empty element tag} but is missing its end tag. *
    • An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does not define a * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}. *
    • An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that does define a * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type} but is missing its end tag. *
    *
    Explicitly Terminated Element: *
    * Example:
    * <p>This is a sample paragraph.</p> *

    * The element consists of a {@linkplain #getStartTag() start tag}, {@linkplain #getContent() content}, * and an {@linkplain #getEndTag() end tag}. *
    {@link #getEndTag()}!=null. *
    {@link #isEmpty()}==false (provided the end tag doesn't immediately follow the start tag) *
    {@link #getEnd() getEnd()}=={@link #getEndTag()}.{@link #getEnd() getEnd()}. *

    * This occurs in the following situations, assuming the start tag's matching end tag is present in the source document: *

      *
    • An HTML element for which the end tag is either * {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}. *
    • A non-HTML element that is not an {@linkplain #isEmptyElementTag() empty element tag}. *
    • An element with a start tag of a {@linkplain StartTag#getStartTagType() type} that defines a * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}. *
    *
    Implicitly Terminated Element: *
    * Example:
    * <p>This text is included in the paragraph element even though no end tag is present.
    * <p>This is the next paragraph. *

    * The element consists of a {@linkplain #getStartTag() start tag} and {@linkplain #getContent() content}, * but no {@linkplain #getEndTag() end tag}. *
    {@link #getEndTag()}==null. *
    {@link #isEmpty()}==false *
    {@link #getEnd() getEnd()}!={@link #getStartTag()}.{@link #getEnd() getEnd()}. *

    * This only occurs in an HTML element for which the * {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}. *

    * The element ends at the start of a tag which implies the termination of the element, called the implicitly terminating tag. * If the implicitly terminating tag is situated immediately after the element's {@linkplain #getStartTag() start tag}, * the element is classed as a single tag element. *

    * See the element parsing rules for HTML elements with optional end tags * for details on which tags can implicitly terminate a given element. *

    * See also the documentation of the {@link HTMLElements#getEndTagOptionalElementNames()} method. *

    *

    Element Parsing Rules

    * The following rules describe the algorithm used in the {@link StartTag#getElement()} method to construct an element. * The detection of the start tag's matching end tag or other terminating tags always takes into account the possible nesting of elements. *

    *

      *
    • * If the start tag has a {@linkplain StartTag#getStartTagType() type} of {@link StartTagType#NORMAL}: *
        *
      • * If the {@linkplain StartTag#getName() name} of the start tag matches one of the * recognised {@linkplain HTMLElementName HTML element names} (indicating an HTML element): *
          *
        • * * If the end tag for an element of this {@linkplain StartTag#getName() name} is * {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}, * the parser does not conduct any search for an end tag and a single tag element is created. *
        • * * If the end tag for an element of this {@linkplain StartTag#getName() name} is * {@linkplain HTMLElements#getEndTagRequiredElementNames() required}, the parser searches for the start tag's matching end tag. *
            *
          • * If the matching end tag is found, an explicitly terminated element is created. *
          • * If no matching end tag is found, the source document is not valid HTML and the incident is * {@linkplain Source#getLogger() logged} as a missing required end tag. * In this situation a single tag element is created. *
          *
        • * * If the end tag for an element of this {@linkplain StartTag#getName() name} is * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}, the parser searches not only for the start tag's matching end tag, * but also for any other tag that implicitly terminates the element. *
          For each tag (T2) following the start tag (ST1) of this element (E1): *
            *
          • * If T2 is a start tag: *
              *
            • * If the {@linkplain StartTag#getName() name} of T2 is in the list of * {@linkplain HTMLElements#getNonterminatingElementNames(String) non-terminating element names} for E1, * then continue evaluating tags from the {@linkplain Element#getEnd() end} of T2's corresponding * {@linkplain StartTag#getElement() element}. *
            • * If the {@linkplain StartTag#getName() name} of T2 is in the list of * {@linkplain HTMLElements#getTerminatingStartTagNames(String) terminating start tag names} for E1, * then E1 ends at the {@linkplain StartTag#getBegin() beginning} of T2. * If T2 follows immediately after ST1, a single tag element is created, * otherwise an implicitly terminated element is created. *
            *
          • * If T2 is an end tag: *
              *
            • * If the {@linkplain EndTag#getName() name} of T2 is the same as that of ST1, * an explicitly terminated element is created. *
            • * If the {@linkplain EndTag#getName() name} of T2 is in the list of * {@linkplain HTMLElements#getTerminatingEndTagNames(String) terminating end tag names} for E1, * then E1 ends at the {@linkplain EndTag#getBegin() beginning} of T2. * If T2 follows immediately after ST1, a single tag element is created, * otherwise an implicitly terminated element is created. *
            *
          • * If no more tags are present in the source document, then E1 ends at the end of the file, and an * implicitly terminated element is created. *
          *
        * Note that the syntactical indication of an {@linkplain StartTag#isSyntacticalEmptyElementTag() empty-element tag} in the start tag * is ignored when determining the end of HTML elements. * See the documentation of the {@link #isEmptyElementTag()} method for more information. *
      • * If the {@linkplain StartTag#getName() name} of the start tag does not match one of the * recognised {@linkplain HTMLElementName HTML element names} (indicating a non-HTML element): *
          *
        • * If the start tag is {@linkplain StartTag#isSyntacticalEmptyElementTag() syntactically an empty-element tag}, * the parser does not conduct any search for an end tag and a single tag element is created. *
        • * Otherwise, section 3.1 * of the XML 1.0 specification states that a matching end tag MUST be present, and * the parser searches for the start tag's matching end tag. *
            *
          • * If the matching end tag is found, an explicitly terminated element is created. *
          • * If no matching end tag is found, the source document is not valid XML and the incident is * {@linkplain Source#getLogger() logged} as a missing required end tag. * In this situation a single tag element is created. *
          *
        *
      *
    • * If the start tag has any {@linkplain StartTag#getStartTagType() type} other than {@link StartTagType#NORMAL}: *
        *
      • * If the start tag's type does not define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}, * the parser does not conduct any search for an end tag and a single tag element is created. *
      • * If the start tag's type does define a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}, * the parser assumes that a matching end tag is required and searches for it. * *
      *
    * @see HTMLElements */ public final class Element extends Segment { private final StartTag startTag; private final EndTag endTag; private Segment content=null; Element parentElement=Element.NOT_CACHED; private int depth=-1; private List childElements=null; static final Element NOT_CACHED=new Element(); private static final boolean INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHY=true; Element(final Source source, final StartTag startTag, final EndTag endTag) { super(source, startTag.begin, endTag==null ? startTag.end : endTag.end); if (source.isStreamed()) throw new UnsupportedOperationException("Elements are not supported when using StreamedSource"); this.startTag=startTag; this.endTag=(endTag==null || endTag.length()==0) ? null : endTag; } // used only to construct NOT_CACHED private Element() { startTag=null; endTag=null; } /** * Returns the parent of this element in the document element hierarchy. *

    * The {@link Source#fullSequentialParse()} method must be called (either explicitly or implicitly) immediately after construction of the Source object if this method is to be used. * An IllegalStateException is thrown if a full sequential parse has not been performed or if it was performed after this element was found. *

    * This method returns null for a top-level element, * as well as any element formed from a {@linkplain TagType#isServerTag() server tag}, regardless of whether it is nested inside a normal element. *

    * See the {@link Source#getChildElements()} method for more details. * * @return the parent of this element in the document element hierarchy, or null if this element is a top-level element. * @throws IllegalStateException if a {@linkplain Source#fullSequentialParse() full sequential parse} has not been performed or if it was performed after this element was found. * @see #getChildElements() */ public Element getParentElement() { if (parentElement==Element.NOT_CACHED) { if (!source.wasFullSequentialParseCalled()) throw new IllegalStateException("This operation is only possible after a full sequential parse has been performed"); if (startTag.isOrphaned()) throw new IllegalStateException("This operation is only possible if a full sequential parse was performed immediately after construction of the Source object"); source.getChildElements(); if (parentElement==Element.NOT_CACHED) parentElement=null; } return parentElement; } /** * Returns a list of the immediate children of this element in the document element hierarchy. *

    * The objects in the list are all of type {@link Element}. *

    * See the {@link Source#getChildElements()} method for more details. * * @return a list of the immediate children of this element in the document element hierarchy, guaranteed not null. * @see #getParentElement() */ @Override public final List getChildElements() { return childElements!=null ? childElements : getChildElements(-1); } final List getChildElements(int depth) { if (depth!=-1) this.depth=depth; if (childElements==null) { if (!Config.IncludeServerTagsInElementHierarchy && end==startTag.end) { childElements=Collections.emptyList(); } else { final int childDepth=(depth==-1 ? -1 : depth+1); childElements=new ArrayList(); int pos=Config.IncludeServerTagsInElementHierarchy ? begin+1 : startTag.end; final int maxChildBegin=(Config.IncludeServerTagsInElementHierarchy || endTag==null) ? end : endTag.begin; while (true) { final StartTag childStartTag=source.getNextStartTag(pos); if (childStartTag==null || childStartTag.begin>=maxChildBegin) break; if (Config.IncludeServerTagsInElementHierarchy) { if (childStartTag.beginend) { if (source.logger.isInfoEnabled()) source.logger.info("Child "+childElement.getDebugInfo()+" extends beyond end of parent "+getDebugInfo()); if (!INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHY) { pos=childElement.end; continue; } } childElement.getChildElements(childDepth); if (childElement.parentElement==Element.NOT_CACHED) { // make sure element was not added as a child of a descendent element (can happen with overlapping elements) childElement.parentElement=this; childElements.add(childElement); } pos=childElement.end; } } } return childElements; } /** * Returns the nesting depth of this element in the document element hierarchy. *

    * The {@link Source#fullSequentialParse()} method must be called (either explicitly or implicitly) after construction of the Source object if this method is to be used. * An IllegalStateException is thrown if a full sequential parse has not been performed or if it was performed after this element was found. *

    * A top-level element has a nesting depth of 0. *

    * An element formed from a {@linkplain TagType#isServerTag() server tag} always have a nesting depth of 0, * regardless of whether it is nested inside a normal element. *

    * See the {@link Source#getChildElements()} method for more details. * * @return the nesting depth of this element in the document element hierarchy. * @throws IllegalStateException if a {@linkplain Source#fullSequentialParse() full sequential parse} has not been performed or if it was performed after this element was found. * @see #getParentElement() */ public int getDepth() { if (depth==-1) { getParentElement(); if (depth==-1) depth=0; } return depth; } /** * Returns the segment representing the content of the element. *

    * This segment spans between the end of the start tag and the start of the end tag. * If the end tag is not present, the content reaches to the end of the element. *

    * A zero-length segment is returned if the element is {@linkplain #isEmpty() empty}, * * @return the segment representing the content of the element, guaranteed not null. */ public Segment getContent() { if (content==null) content=new Segment(source,startTag.end,getContentEnd()); return content; } /** * Returns the start tag of the element. * @return the start tag of the element. */ public StartTag getStartTag() { return startTag; } /** * Returns the end tag of the element. *

    * If the element has no end tag this method returns null. * * @return the end tag of the element, or null if the element has no end tag. */ public EndTag getEndTag() { return endTag; } /** * Returns the {@linkplain StartTag#getName() name} of the {@linkplain #getStartTag() start tag} of this element, always in lower case. *

    * This is equivalent to {@link #getStartTag()}.{@link StartTag#getName() getName()}. *

    * See the {@link Tag#getName()} method for more information. * * @return the name of the {@linkplain #getStartTag() start tag} of this element, always in lower case. */ public String getName() { return startTag.getName(); } /** * Indicates whether this element has zero-length {@linkplain #getContent() content}. *

    * This is equivalent to {@link #getContent()}.{@link Segment#length() length()}==0. *

    * Note that this is a broader definition than that of both the * HTML definition of an empty element, * which is only those elements whose end tag is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}, and the * XML definition of an empty element, * which is "either a start-tag immediately followed by an end-tag, or an {@linkplain #isEmptyElementTag() empty-element tag}". * The other possibility covered by this property is the case of an HTML element with an * {@linkplain HTMLElements#getEndTagOptionalElementNames() optional} end tag that is immediately followed by another tag that implicitly * terminates the element. * * @return true if this element has zero-length {@linkplain #getContent() content}, otherwise false. * @see #isEmptyElementTag() */ public boolean isEmpty() { return startTag.end==getContentEnd(); } /** * Indicates whether this element is an empty-element tag. *

    * This is equivalent to {@link #getStartTag()}.{@link StartTag#isEmptyElementTag() isEmptyElementTag()}. * * @return true if this element is an empty-element tag, otherwise false. */ public boolean isEmptyElementTag() { return startTag.isEmptyElementTag(); } /** * Returns the attributes specified in this element's start tag. *

    * This is equivalent to {@link #getStartTag()}.{@link StartTag#getAttributes() getAttributes()}. * * @return the attributes specified in this element's start tag. * @see StartTag#getAttributes() */ public Attributes getAttributes() { return getStartTag().getAttributes(); } /** * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive). *

    * Returns null if the {@linkplain #getStartTag() start tag of this element} does not * {@linkplain StartTagType#hasAttributes() have attributes}, * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}. *

    * This is equivalent to {@link #getStartTag()}.{@link StartTag#getAttributeValue(String) getAttributeValue(attributeName)}. * * @param attributeName the name of the attribute to get. * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or null if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}. */ public String getAttributeValue(final String attributeName) { return getStartTag().getAttributeValue(attributeName); } /** * Returns the {@link FormControl} defined by this element. * @return the {@link FormControl} defined by this element, or null if it is not a control. */ public FormControl getFormControl() { return FormControl.construct(this); } public String getDebugInfo() { if (this==NOT_CACHED) return "NOT_CACHED"; final StringBuilder sb=new StringBuilder(); sb.append("Element "); startTag.appendDebugTag(sb); if (!isEmpty()) sb.append('-'); if (endTag!=null) sb.append(endTag); sb.append(' '); startTag.appendDebugTagType(sb); sb.append(super.getDebugInfo()); return sb.toString(); } int getContentEnd() { return endTag!=null ? endTag.begin : end; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMasonNamedBlock.java0000644000175000017500000000330511204550410030147 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeMasonNamedBlock extends StartTagTypeGenericImplementation { protected static final StartTagTypeMasonNamedBlock INSTANCE=new StartTagTypeMasonNamedBlock(); private StartTagTypeMasonNamedBlock() { super("mason named block","<%",">",EndTagTypeMasonNamedBlock.INSTANCE,true,false,true); } protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; // A mason named block does not have a '%' before its closing '>' delimiter and requires a matching end tag. if (source.charAt(tag.getEnd()-2)=='%') return null; // this is a common server tag, not a named block if (source.getNextEndTag(tag.getEnd(),tag.getName(),getCorrespondingEndTagType())==null) return null; return tag; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMarkupDeclaration.java0000644000175000017500000000413611204550410030562 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; class StartTagTypeMarkupDeclaration extends StartTagTypeGenericImplementation { static final StartTagTypeMarkupDeclaration INSTANCE=new StartTagTypeMarkupDeclaration(); static final String ELEMENT="!element"; static final String ATTLIST="!attlist"; static final String ENTITY="!entity"; static final String NOTATION="!notation"; private StartTagTypeMarkupDeclaration() { super("markup declaration","",null,false,false,true); } protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; final String name=tag.getName(); if (name!=ELEMENT && name!=ATTLIST && name!=ENTITY && name!=NOTATION) return null; // can use == instead of .equals() because the names are in HtmlElements.CONSTANT_NAME_MAP return tag; } protected int getEnd(final Source source, int pos) { final ParseText parseText=source.getParseText(); boolean insideQuotes=false; do { final char c=parseText.charAt(pos); if (c=='"') { insideQuotes=!insideQuotes; } else if (c=='>' && !insideQuotes) { return pos+1; } } while ((++pos) * This provides a human readable version of the segment content that is modelled on the way * Mozilla Thunderbird and other email clients provide an automatic conversion of * HTML content to text in their alternative MIME encoding of emails. *

    * The output using default settings complies with the "text/plain; format=flowed" (DelSp=No) protocol described in * RFC3676. *

    * Many properties are available to customise the output, possibly the most significant of which being {@link #setMaxLineLength(int) MaxLineLength}. * See the individual property descriptions for details. *

    * Use one of the following methods to obtain the output: *

      *
    • {@link #writeTo(Writer)}
    • *
    • {@link #appendTo(Appendable)}
    • *
    • {@link #toString()}
    • *
    • {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
    • *
    *

    * The rendering of some constructs, especially tables, is very rudimentary. * No attempt is made to render nested tables properly, except to ensure that all of the text content is included in the output. *

    * Rendering an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically. *

    * Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions. *

    * To extract pure text without any rendering of the markup, use the {@link TextExtractor} class instead. */ public class Renderer implements CharStreamSource { private final Segment rootSegment; private int maxLineLength=76; private String newLine="\r\n"; private boolean includeHyperlinkURLs=true; private boolean decorateFontStyles=false; private boolean convertNonBreakingSpaces=Config.ConvertNonBreakingSpaces; private int blockIndentSize=4; private int listIndentSize=6; private char[] listBullets=new char[] {'*','o','+','#'}; private String tableCellSeparator=" \t"; /** * Constructs a new Renderer based on the specified {@link Segment}. * @param segment the segment containing the HTML to be rendered. * @see Segment#getRenderer() */ public Renderer(final Segment segment) { rootSegment=segment; } // Documentation inherited from CharStreamSource public void writeTo(final Writer writer) throws IOException { appendTo(writer); writer.flush(); } // Documentation inherited from CharStreamSource public void appendTo(final Appendable appendable) throws IOException { new Processor(this,rootSegment,getMaxLineLength(),getNewLine(),getIncludeHyperlinkURLs(),getDecorateFontStyles(),getConvertNonBreakingSpaces(),getBlockIndentSize(),getListIndentSize(),getListBullets(),getTableCellSeparator()).appendTo(appendable); } // Documentation inherited from CharStreamSource public long getEstimatedMaximumOutputLength() { return rootSegment.length(); } // Documentation inherited from CharStreamSource public String toString() { return CharStreamSourceUtil.toString(this); } /** * Sets the column at which lines are to be wrapped. *

    * Lines that would otherwise exceed this length are wrapped onto a new line at a word boundary. *

    * A Line may still exceed this length if it consists of a single word, where the length of the word plus the line indent exceeds the maximum length. * In this case the line is wrapped immediately after the end of the word. *

    * The default value is 76, which reflects the maximum line length for sending * email data specified in RFC2049 section 3.5. * * @param maxLineLength the column at which lines are to be wrapped. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getMaxLineLength() */ public Renderer setMaxLineLength(final int maxLineLength) { this.maxLineLength=maxLineLength; return this; } /** * Returns the column at which lines are to be wrapped. *

    * See the {@link #setMaxLineLength(int)} method for a full description of this property. * * @return the column at which lines are to be wrapped. */ public int getMaxLineLength() { return maxLineLength; } /** * Sets the string to be used to represent a newline in the output. *

    * The default value is "\r\n" (CR+LF) regardless of the platform on which the library is running. * This is so that the default configuration produces valid * MIME plain/text output, which mandates the use of CR+LF for line breaks. *

    * Specifying a null argument causes the output to use same new line string as is used in the source document, which is * determined via the {@link Source#getNewLine()} method. * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, * or using the value from the static {@link Config#NewLine} property. * * @param newLine the string to be used to represent a newline in the output, may be null. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getNewLine() */ public Renderer setNewLine(final String newLine) { this.newLine=newLine; return this; } /** * Returns the string to be used to represent a newline in the output. *

    * See the {@link #setNewLine(String)} method for a full description of this property. * * @return the string to be used to represent a newline in the output. */ public String getNewLine() { if (newLine==null) newLine=rootSegment.source.getBestGuessNewLine(); return newLine; } /** * Sets whether hyperlink URL's are included in the output. *

    * The default value is true. *

    * When this property is true, the URL of each hyperlink is included in the output as determined by the implementation of the * {@link #renderHyperlinkURL(StartTag)} method. *

    *

    *
    Example:
    *
    *

    * Assuming the default implementation of {@link #renderHyperlinkURL(StartTag)}, when this property is true, the following HTML: *

    * <a href="http://jericho.htmlparser.net/">Jericho HTML Parser</a> *
    * produces the following output: *
    * Jericho HTML Parser <http://jericho.htmlparser.net/> *
    *
    *
    * * @param includeHyperlinkURLs specifies whether hyperlink URL's are included in the output. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getIncludeHyperlinkURLs() */ public Renderer setIncludeHyperlinkURLs(final boolean includeHyperlinkURLs) { this.includeHyperlinkURLs=includeHyperlinkURLs; return this; } /** * Indicates whether hyperlink URL's are included in the output. *

    * See the {@link #setIncludeHyperlinkURLs(boolean)} method for a full description of this property. * * @return true if hyperlink URL's are included in the output, otherwise false. */ public boolean getIncludeHyperlinkURLs() { return includeHyperlinkURLs; } /** * Renders the hyperlink URL from the specified {@link StartTag}. *

    * A return value of null indicates that the hyperlink URL should not be rendered at all. *

    * The default implementation of this method returns null if the href attribute of the specified start tag * is '#', starts with "javascript:", or is missing. * In all other cases it returns the value of the href attribute enclosed in angle brackets. *

    * See the documentation of the {@link #setIncludeHyperlinkURLs(boolean)} method for an example of how a hyperlink is rendered by the default implementation. *

    * This method can be overridden in a subclass to customise the rendering of hyperlink URLs. *

    * Rendering of hyperlink URLs can be disabled completely without overriding this method by setting the * {@link #setIncludeHyperlinkURLs(boolean) IncludeHyperlinkURLs} property to false. *

    *

    *
    Example:
    *
    * To render hyperlink URLs without the enclosing angle brackets:

    * * Renderer renderer=new Renderer(segment) {
    *     public String renderHyperlinkURL(StartTag startTag) {
    *         String href=startTag.getAttributeValue("href");
    *         if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
    *         return href;
    *     }
    * };
    * String renderedSegment=renderer.toString(); *
    *
    *
    * @param startTag the start tag of the hyperlink element, must not be null. * @return The rendered hyperlink URL from the specified {@link StartTag}, or null if the hyperlink URL should not be rendered. */ public String renderHyperlinkURL(final StartTag startTag) { final String href=startTag.getAttributeValue("href"); if (href==null || href.equals("#") || href.startsWith("javascript:")) return null; return '<'+href+'>'; } /** * Sets whether decoration characters are to be included around the content of some * font style elements and * phrase elements. *

    * The default value is false. *

    * Below is a table summarising the decorated elements. *

    * * * * * * * *
    ElementsCharacterExample Output
    {@link HTMLElementName#B B} and {@link HTMLElementName#STRONG STRONG}**bold text*
    {@link HTMLElementName#I I} and {@link HTMLElementName#EM EM}//italic text/
    {@link HTMLElementName#U U}__underlined text_
    {@link HTMLElementName#CODE CODE}||code|
    * * @param decorateFontStyles specifies whether decoration characters are to be included around the content of some font style elements. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getDecorateFontStyles() */ public Renderer setDecorateFontStyles(final boolean decorateFontStyles) { this.decorateFontStyles=decorateFontStyles; return this; } /** * Indicates whether decoration characters are to be included around the content of some * font style elements and * phrase elements. *

    * See the {@link #setDecorateFontStyles(boolean)} method for a full description of this property. * * @return true if decoration characters are to be included around the content of some font style elements, otherwise false. */ public boolean getDecorateFontStyles() { return decorateFontStyles; } /** * Sets whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

    * The default value is that of the static {@link Config#ConvertNonBreakingSpaces} property at the time the Renderer is instantiated. * * @param convertNonBreakingSpaces specifies whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getConvertNonBreakingSpaces() */ public Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces) { this.convertNonBreakingSpaces=convertNonBreakingSpaces; return this; } /** * Indicates whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

    * See the {@link #setConvertNonBreakingSpaces(boolean)} method for a full description of this property. * * @return true if non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces, otherwise false. */ public boolean getConvertNonBreakingSpaces() { return convertNonBreakingSpaces; } /** * Sets the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. *

    * At present this applies to {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE} and {@link HTMLElementName#DD DD} elements. *

    * The default value is 4. * * @param blockIndentSize the size of the indent. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getBlockIndentSize() */ public Renderer setBlockIndentSize(final int blockIndentSize) { this.blockIndentSize=blockIndentSize; return this; } /** * Returns the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. *

    * See the {@link #setBlockIndentSize(int)} method for a full description of this property. * * @return the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. */ public int getBlockIndentSize() { return blockIndentSize; } /** * Sets the size of the indent to be used for {@link HTMLElementName#LI LI} elements. *

    * The default value is 6. *

    * This applies to {@link HTMLElementName#LI LI} elements inside both {@link HTMLElementName#UL UL} and {@link HTMLElementName#OL OL} elements. *

    * The bullet or number of the list item is included as part of the indent. * * @param listIndentSize the size of the indent. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getListIndentSize() */ public Renderer setListIndentSize(final int listIndentSize) { this.listIndentSize=listIndentSize; return this; } /** * Returns the size of the indent to be used for {@link HTMLElementName#LI LI} elements. *

    * See the {@link #setListIndentSize(int)} method for a full description of this property. * * @return the size of the indent to be used for {@link HTMLElementName#LI LI} elements. */ public int getListIndentSize() { return listIndentSize; } /** * Sets the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. *

    * The values in the default array are *, o, + and #. *

    * If the nesting of rendered lists goes deeper than the length of this array, the bullet characters start repeating from the first in the array. *

    * WARNING: If any of the characters in the default array are modified, this will affect all other instances of this class using the default array. * * @param listBullets an array of characters to be used as bullets, must have at least one entry. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getListBullets() */ public Renderer setListBullets(final char[] listBullets) { if (listBullets==null || listBullets.length==0) throw new IllegalArgumentException("listBullets argument must be an array of at least one character"); this.listBullets=listBullets; return this; } /** * Returns the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. *

    * See the {@link #setListBullets(char[])} method for a full description of this property. * * @return the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. */ public char[] getListBullets() { return listBullets; } /** * Sets the string that is to separate table cells. *

    * The default value is " \t" (a space followed by a tab). * * @param tableCellSeparator the string that is to separate table cells. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getTableCellSeparator() */ public Renderer setTableCellSeparator(final String tableCellSeparator) { this.tableCellSeparator=tableCellSeparator; return this; } /** * Returns the string that is to separate table cells. *

    * See the {@link #setTableCellSeparator(String)} method for a full description of this property. * * @return the string that is to separate table cells. */ public String getTableCellSeparator() { return tableCellSeparator; } /** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */ private static final class Processor { private final Renderer renderer; private final Segment rootSegment; private final Source source; private final int maxLineLength; private final String newLine; private final boolean includeHyperlinkURLs; private final boolean decorateFontStyles; private final boolean convertNonBreakingSpaces; private final int blockIndentSize; private final int listIndentSize; private final char[] listBullets; private final String tableCellSeparator; private Appendable appendable; private int renderedIndex; // keeps track of where rendering is up to in case of overlapping elements private boolean atStartOfLine; private int col; private int blockIndentLevel; private int listIndentLevel; private int blockVerticalMargin; // minimum number of blank lines to output at the current block boundary, or NO_MARGIN (-1) if we are not currently at a block boundary. private boolean preformatted; private boolean lastCharWhiteSpace; private boolean ignoreInitialWhitespace; private boolean bullet; private int listBulletNumber; private static final int NO_MARGIN=-1; private static final int UNORDERED_LIST=-1; private static Map ELEMENT_HANDLERS=new HashMap(); static { ELEMENT_HANDLERS.put(HTMLElementName.A,A_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.ADDRESS,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.APPLET,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.B,FontStyleElementHandler.INSTANCE_B); ELEMENT_HANDLERS.put(HTMLElementName.BLOCKQUOTE,StandardBlockElementHandler.INSTANCE_1_1_INDENT); ELEMENT_HANDLERS.put(HTMLElementName.BR,BR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.BUTTON,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.CAPTION,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.CENTER,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.CODE,FontStyleElementHandler.INSTANCE_CODE); ELEMENT_HANDLERS.put(HTMLElementName.DD,StandardBlockElementHandler.INSTANCE_0_0_INDENT); ELEMENT_HANDLERS.put(HTMLElementName.DIR,ListElementHandler.INSTANCE_UL); ELEMENT_HANDLERS.put(HTMLElementName.DIV,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.DT,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.EM,FontStyleElementHandler.INSTANCE_I); ELEMENT_HANDLERS.put(HTMLElementName.FIELDSET,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.FORM,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.H1,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H2,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H3,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H4,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H5,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H6,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.HEAD,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.HR,HR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.I,FontStyleElementHandler.INSTANCE_I); ELEMENT_HANDLERS.put(HTMLElementName.LEGEND,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.LI,LI_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.MENU,ListElementHandler.INSTANCE_UL); ELEMENT_HANDLERS.put(HTMLElementName.MAP,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.NOFRAMES,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.NOSCRIPT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.OL,ListElementHandler.INSTANCE_OL); ELEMENT_HANDLERS.put(HTMLElementName.P,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.PRE,PRE_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.SCRIPT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.SELECT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.STRONG,FontStyleElementHandler.INSTANCE_B); ELEMENT_HANDLERS.put(HTMLElementName.STYLE,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TEXTAREA,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TD,TD_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TH,TD_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TR,TR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.U,FontStyleElementHandler.INSTANCE_U); ELEMENT_HANDLERS.put(HTMLElementName.UL,ListElementHandler.INSTANCE_UL); } public Processor(final Renderer renderer, final Segment rootSegment, final int maxLineLength, final String newLine, final boolean includeHyperlinkURLs, final boolean decorateFontStyles, final boolean convertNonBreakingSpaces, final int blockIndentSize, final int listIndentSize, final char[] listBullets, final String tableCellSeparator) { this.renderer=renderer; this.rootSegment=rootSegment; source=rootSegment.source; this.maxLineLength=maxLineLength; this.newLine=newLine; this.includeHyperlinkURLs=includeHyperlinkURLs; this.decorateFontStyles=decorateFontStyles; this.convertNonBreakingSpaces=convertNonBreakingSpaces; this.blockIndentSize=blockIndentSize; this.listIndentSize=listIndentSize; this.listBullets=listBullets; this.tableCellSeparator=tableCellSeparator; } public void appendTo(final Appendable appendable) throws IOException { reset(); this.appendable=appendable; appendSegmentProcessingChildElements(rootSegment.begin,rootSegment.end,rootSegment.getChildElements()); } private void reset() { renderedIndex=0; atStartOfLine=true; col=0; blockIndentLevel=0; listIndentLevel=0; blockVerticalMargin=NO_MARGIN; preformatted=false; lastCharWhiteSpace=ignoreInitialWhitespace=false; bullet=false; } private void appendElementContent(final Element element) throws IOException { final int contentEnd=element.getContentEnd(); if (element.isEmpty() || renderedIndex>=contentEnd) return; final int contentBegin=element.getStartTag().end; appendSegmentProcessingChildElements(Math.max(renderedIndex,contentBegin),contentEnd,element.getChildElements()); } private void appendSegmentProcessingChildElements(final int begin, final int end, final List childElements) throws IOException { int index=begin; for (Element childElement : childElements) { if (index>=childElement.end) continue; if (index=end) break; appendSegment(index,tag.begin); index=tag.end; } appendSegment(index,end); } private void appendSegment(int begin, final int end) throws IOException { assert begin<=end; if (begin=end) return; try { if (preformatted) appendPreformattedSegment(begin,end); else appendNonPreformattedSegment(begin,end); } finally { if (renderedIndex=renderedIndex; if (isStartOfBlock()) appendBlockVerticalMargin(); final String text=CharacterReference.decode(source.subSequence(begin,end),false,convertNonBreakingSpaces); for (int i=0; i=renderedIndex; final String text=CharacterReference.decodeCollapseWhiteSpace(source.subSequence(begin,end),convertNonBreakingSpaces); if (text.length()==0) { if (!ignoreInitialWhitespace) lastCharWhiteSpace=true; return; } if (isStartOfBlock()) { appendBlockVerticalMargin(); } else if (lastCharWhiteSpace || (Segment.isWhiteSpace(source.charAt(begin)) && !ignoreInitialWhitespace)) { append(' '); } int textIndex=0; int i=0; lastCharWhiteSpace=ignoreInitialWhitespace=false; while (true) { for (; i" or "From ". if (i+1') continue; if (i+6=maxLineLength) { if (lastCharWhiteSpace && (blockIndentLevel|listIndentLevel)==0) append(' '); startNewLine(0); } else if (lastCharWhiteSpace) { append(' '); } append(text,textIndex,i); if (i==text.length()) break; lastCharWhiteSpace=true; textIndex=++i; } lastCharWhiteSpace=Segment.isWhiteSpace(source.charAt(end-1)); } private boolean isStartOfBlock() { return blockVerticalMargin!=NO_MARGIN; } private void appendBlockVerticalMargin() throws IOException { assert blockVerticalMargin!=NO_MARGIN; startNewLine(blockVerticalMargin); blockVerticalMargin=NO_MARGIN; } private void blockBoundary(final int verticalMargin) throws IOException { // Set a block boundary with the given vertical margin. The vertical margin is the minimum number of blank lines to output between the blocks. // This method can be called multiple times at a block boundary, and the next textual output will output the number of blank lines determined by the // maximum vertical margin of all the method calls. if (blockVerticalMargin0; i--) appendable.append(' '); if (bullet) { for (int i=(listIndentLevel-1)*listIndentSize; i>0; i--) appendable.append(' '); if (listBulletNumber==UNORDERED_LIST) { for (int i=listIndentSize-2; i>0; i--) appendable.append(' '); appendable.append(listBullets[(listIndentLevel-1)%listBullets.length]).append(' '); } else { String bulletNumberString=Integer.toString(listBulletNumber); for (int i=listIndentSize-bulletNumberString.length()-2; i>0; i--) appendable.append(' '); appendable.append(bulletNumberString).append(". "); } bullet=false; } else { for (int i=listIndentLevel*listIndentSize; i>0; i--) appendable.append(' '); } col=blockIndentLevel*blockIndentSize+listIndentLevel*listIndentSize; atStartOfLine=false; } private Processor append(final char ch) throws IOException { if (atStartOfLine) appendIndent(); appendable.append(ch); col++; return this; } private Processor append(final String text) throws IOException { if (atStartOfLine) appendIndent(); appendable.append(text); col+=text.length(); return this; } private void append(final CharSequence text, final int begin, final int end) throws IOException { if (atStartOfLine) appendIndent(); for (int i=begin; i=x.maxLineLength) { x.startNewLine(0); } else { x.append(' '); } x.append(renderedHyperlinkURL); x.lastCharWhiteSpace=true; } } private static class BR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new BR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.newLine(); x.blockBoundary(0); } } private static class HR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new HR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); x.appendBlockVerticalMargin(); for (int i=0; i<72; i++) x.append('-'); x.blockBoundary(0); } } private static class ListElementHandler implements ElementHandler { public static final ElementHandler INSTANCE_OL=new ListElementHandler(0); public static final ElementHandler INSTANCE_UL=new ListElementHandler(UNORDERED_LIST); private final int initialListBulletNumber; public ListElementHandler(int initialListBulletNumber) { this.initialListBulletNumber=initialListBulletNumber; } public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); int oldListBulletNumber=x.listBulletNumber; x.listBulletNumber=initialListBulletNumber; x.listIndentLevel++; x.appendElementContent(element); x.listIndentLevel--; x.listBulletNumber=oldListBulletNumber; x.blockBoundary(0); } } private static class LI_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new LI_ElementHandler(); public void process(Processor x, Element element) throws IOException { if (x.listBulletNumber!=UNORDERED_LIST) x.listBulletNumber++; x.bullet=true; x.blockBoundary(0); x.appendBlockVerticalMargin(); x.appendIndent(); x.ignoreInitialWhitespace=true; x.appendElementContent(element); x.bullet=false; x.blockBoundary(0); } } private static class PRE_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new PRE_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(1); boolean oldPreformatted=x.preformatted; // should always be false x.preformatted=true; x.appendElementContent(element); x.preformatted=oldPreformatted; x.blockBoundary(1); } } private static class TD_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new TD_ElementHandler(); public void process(Processor x, Element element) throws IOException { if (!x.isStartOfBlock()) x.append(x.tableCellSeparator); x.lastCharWhiteSpace=false; x.appendElementContent(element); } } private static class TR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new TR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); x.appendElementContent(element); x.blockBoundary(0); } } } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerFactory.java0000644000175000017500000000530411204550410025406 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class LoggerFactory { private static LoggerProvider defaultLoggerProvider=null; public static Logger getLogger(final String name) { return getLoggerProvider().getLogger(name); } public static Logger getLogger(final Class loggedClass) { return getLogger(loggedClass.getName()); } public static LoggerProvider getLoggerProvider() { return (Config.LoggerProvider!=null) ? Config.LoggerProvider : getDefaultLoggerProvider(); } private static LoggerProvider getDefaultLoggerProvider() { if (defaultLoggerProvider==null) defaultLoggerProvider=determineDefaultLoggerProvider(); return defaultLoggerProvider; } private static LoggerProvider determineDefaultLoggerProvider() { if (isClassAvailable("org.slf4j.impl.StaticLoggerBinder")) { if (isClassAvailable("org.slf4j.impl.JDK14LoggerFactory")) return LoggerProvider.JAVA; if (isClassAvailable("org.slf4j.impl.Log4jLoggerFactory")) return LoggerProvider.LOG4J; if (!isClassAvailable("org.slf4j.impl.JCLLoggerFactory")) return LoggerProvider.SLF4J; // fall through to next check if SLF4J is configured to use JCL } if (isClassAvailable("org.apache.commons.logging.Log")) { final String logClassName=org.apache.commons.logging.LogFactory.getLog("test").getClass().getName(); if (logClassName.equals("org.apache.commons.logging.impl.Jdk14Logger")) return LoggerProvider.JAVA; if (logClassName.equals("org.apache.commons.logging.impl.Log4JLogger")) return LoggerProvider.LOG4J; return LoggerProvider.JCL; } if (isClassAvailable("org.apache.log4j.Logger")) return LoggerProvider.LOG4J; return LoggerProvider.JAVA; } private static boolean isClassAvailable(final String className) { try { Class.forName(className); return true; } catch (Throwable t) { return false; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeXMLDeclaration.java0000644000175000017500000000225511204550410027763 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeXMLDeclaration extends StartTagTypeGenericImplementation { static final StartTagTypeXMLDeclaration INSTANCE=new StartTagTypeXMLDeclaration(); private StartTagTypeXMLDeclaration() { super("XML declaration","",null,false,true,false); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeComment.java0000644000175000017500000000246411204550410026561 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; // note that according to the spec, whitespace may be present between the "--" and ">" of the end delimiter, but ignoring this probably yields results consistent with most browsers final class StartTagTypeComment extends StartTagTypeGenericImplementation { static final StartTagTypeComment INSTANCE=new StartTagTypeComment(); private StartTagTypeComment() { super("comment","",null,false); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypePHPStandard.java0000644000175000017500000000224211204550410027261 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypePHPStandard extends StartTagTypeGenericImplementation { protected static final StartTagTypePHPStandard INSTANCE=new StartTagTypePHPStandard(); private StartTagTypePHPStandard() { super("PHP standard tag","",null,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProviderSTDERR.java0000644000175000017500000000236011204550410026514 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.logging.*; import java.io.*; final class LoggerProviderSTDERR implements LoggerProvider { public static final LoggerProvider INSTANCE=new LoggerProviderSTDERR(); private LoggerProviderSTDERR() {} public Logger getLogger(final String name) { return new WriterLogger(new OutputStreamWriter(System.err),name); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.java0000644000175000017500000000352611204550410030734 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeDoctypeDeclaration extends StartTagTypeGenericImplementation { static final StartTagTypeDoctypeDeclaration INSTANCE=new StartTagTypeDoctypeDeclaration(); private StartTagTypeDoctypeDeclaration() { super("document type declaration","",null,false,false,false); } protected int getEnd(final Source source, int pos) { final ParseText parseText=source.getParseText(); boolean insideQuotes=false; boolean insideSquareBrackets=false; do { final char c=parseText.charAt(pos); if (insideQuotes) { if (c=='"') insideQuotes=false; } else { switch (c) { case '>': if (!insideSquareBrackets) return pos+1; break; case '"': insideQuotes=true; break; case '[': insideSquareBrackets=true; break; case ']': insideSquareBrackets=false; break; } } } while ((++pos) * This class has been removed from the public API and the functionality replaced with the * {@link OutputDocument#ReplaceWithSpaces(int begin, int end)} method. */ final class BlankOutputSegment implements OutputSegment { private final int begin; private final int end; /** * Constructs a new BlankOutputSegment with the specified begin and end positions. * @param begin the position in the {@link OutputDocument} where this OutputSegment begins. * @param end the position in the {@link OutputDocument} where this OutputSegment ends. */ public BlankOutputSegment(final int begin, final int end) { this.begin=begin; this.end=end; } /** * Constructs a new BlankOutputSegment with the same span as the specified {@link Segment}. * @param segment a {@link Segment} defining the begin and end character positions of the new OutputSegment. */ public BlankOutputSegment(final Segment segment) { this(segment.getBegin(),segment.getEnd()); } public int getBegin() { return begin; } public int getEnd() { return end; } public void writeTo(final Writer writer) throws IOException { appendTo(writer); } public void appendTo(final Appendable appendable) throws IOException { for (int i=begin; iPHP server platform. *

    * There is no specific tag type defined for the * ASP-style PHP tag * as it is recognised using the {@linkplain StartTagType#SERVER_COMMON common server tag type}. *

    * The tag types defined in this class are not {@linkplain TagType#register() registered} by default. * The {@link #register()} method is provided as a convenient way to register them all at once. */ public final class PHPTagTypes { /** * The tag type given to a * standard PHP tag * (<?php ... ?>). *

    * Note that the standard PHP processor includes as part of the tag any newline characters directly following the * {@linkplain TagType#getClosingDelimiter() closing delimiter}, but PHP tags recognised by this library do not include * trailing newlines. They must be removed manually if required. *

    * This library only correctly recognises standard PHP tags that comply with the XML syntax for processing instructions. * Specifically, the tag is terminated by the first occurrence of the {@linkplain TagType#getClosingDelimiter() closing delimiter} * "?>", even if it occurs within a PHP string expression. * Unfortunately there is no reliable way to determine the end of a PHP tag without the use of a PHP parser. * The following code is an example of a standard PHP tag that is not recognised correctly by this parser * because of the presence of the closing delimiter within a string expression: *

    *

    <?php echo("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); ?>
    *

    * This is recognised as the PHP tag: <?php echo("<?xml version=\"1.0\" encoding=\"UTF-8\"?>
    * followed by the plain text: \n"); ?> *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}PHP standard tag *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<?php *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}?> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}?php *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link StartTagType#hasAttributes() HasAttributes}false *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <?php echo '<p>Hello World</p>'; ?>
    *
    */ public static final StartTagType PHP_STANDARD=StartTagTypePHPStandard.INSTANCE; /** * The tag type given to a * short-form PHP tag * (<? ... ?>). *

    * When this tag type is {@linkplain TagType#register() registered}, all * {@linkplain StartTagType#XML_PROCESSING_INSTRUCTION XML processing instructions} are recognised as short-form PHP tags instead. *

    * The comments in the documentation of the {@link #PHP_STANDARD} tag type regarding the termination of PHP tags and * trailing newlines are also applicable to this tag type. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}PHP short tag *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<? *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}?> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}? *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link StartTagType#hasAttributes() HasAttributes}false *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <? echo '<p>Hello World</p>'; ?>
    *
    */ public static final StartTagType PHP_SHORT=StartTagTypePHPShort.INSTANCE; /** * The tag type given to a * script-style PHP start tag * (<script language="php"> ... </script>). *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}PHP script *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<script *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}script *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}{@link EndTagType#NORMAL} *
    {@link StartTagType#hasAttributes() HasAttributes}true *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <script language="php"> echo '<p>Hello World</p>'; </script>
    *
    */ public static final StartTagType PHP_SCRIPT=StartTagTypePHPScript.INSTANCE; private static final TagType[] TAG_TYPES={ PHP_STANDARD, PHP_SHORT, PHP_SCRIPT }; private PHPTagTypes() {} /** * {@linkplain TagType#register() Registers} all of the tag types defined in this class at once. *

    * The tag types must be registered before the parser will recognise them. */ public static void register() { for (TagType tagType : TAG_TYPES) tagType.register(); } /** * Indicates whether the specified tag type is defined in this class. * * @param tagType the {@link TagType} to test. * @return true if the specified tag type is defined in this class, otherwise false. */ public static boolean defines(final TagType tagType) { for (TagType definedTagType : TAG_TYPES) if (tagType==definedTagType) return true; return false; } /** * Indicates whether the specified tag type is recognised by a PHP parser. *

    * This is true if the specified tag type is {@linkplain #defines(TagType) defined in this class} or if it is the * {@linkplain StartTagType#SERVER_COMMON common server tag type}. * * @param tagType the {@link TagType} to test. * @return true if the specified tag type is recognised by a PHP parser, otherwise false. */ public static boolean isParsedByPHP(final TagType tagType) { return tagType==StartTagType.SERVER_COMMON || defines(tagType); } } jericho-html-3.1/src/java/net/htmlparser/jericho/Source.java0000644000175000017500000030324011207510014024076 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.Iterator; import java.util.List; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.io.IOException; import java.io.Reader; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.regex.Pattern; /** * Represents a source HTML document. *

    * The first step in parsing an HTML document is always to construct a Source object from the source data, which can be a * String, Reader, InputStream, URLConnection or URL. * Each constructor uses all the evidence available to determine the original {@linkplain #getEncoding() character encoding} of the data. *

    * Once the Source object has been created, you can immediately start searching for {@linkplain Tag tags} or {@linkplain Element elements} within the document * using the tag search methods. *

    * In certain circumstances you may be able to improve performance by calling the {@link #fullSequentialParse()} method before calling any * tag search methods. See the documentation of the {@link #fullSequentialParse()} method for details. *

    * Any issues encountered while parsing are logged to a {@link Logger} object. * The {@link #setLogger(Logger)} method can be used to explicitly set a Logger implementation for a particular Source instance, * otherwise the static {@link Config#LoggerProvider} property determines how the logger is set by default for all Source instances. * See the documentation of the {@link Config#LoggerProvider} property for information about how the default logging provider is determined. *

    * Note that many of the useful functions which can be performed on the source document are * defined in its superclass, {@link Segment}. * The source object is itself a segment which spans the entire document. *

    * Most of the methods defined in this class are useful for determining the elements and tags * surrounding or neighbouring a particular character position in the document. *

    * For information on how to create a modified version of this source document, see the {@link OutputDocument} class. *

    * Source objects are not thread safe, and should therefore not be shared between multiple threads unless all access is synchronized using * some mechanism external to the library. *

    * If memory usage is a major concern, consider using the {@link StreamedSource} class instead of the Source class. * * @see Segment * @see StreamedSource */ public final class Source extends Segment implements Iterable { private final CharSequence sourceText; private String documentSpecifiedEncoding=UNINITIALISED; private String encoding=UNINITIALISED; // null value means no encoding specified. private String encodingSpecificationInfo; private String preliminaryEncodingInfo=null; private String newLine=UNINITIALISED; private ParseText parseText=null; private OutputDocument parseTextOutputDocument=null; Logger logger; // never null private RowColumnVector[] rowColumnVectorCacheArray=null; final Cache cache; boolean useAllTypesCache=true; boolean useSpecialTypesCache=true; int[] fullSequentialParseData=null; // non-null iff a fullSequentialParse is underway. In version 2.5 this was passed around as a parameter during full sequential parse, but this approach was found to be error-prone and abandoned in 2.6 // cached result lists: Tag[] allTagsArray=null; // non-null iff fullSequentialParse was called List allTags=null; // non-null iff fullSequentialParse was called List allStartTags=null; private List allElements=null; private List childElements=null; private static String lastNewLine=null; private static final String UNINITIALISED=""; private static final String CR="\r"; private static final String LF="\n"; private static final String CRLF="\r\n"; static final String PACKAGE_NAME=Source.class.getPackage().getName(); // "net.htmlparser.jericho" /** * Constructs a new Source object from the specified text. * @param text the source text. */ public Source(final CharSequence text) { super(text.length()); sourceText=text.toString(); setLogger(newLogger()); cache=new Cache(this); } private Source(final EncodingDetector encodingDetector) throws IOException { this(getString(encodingDetector)); encoding=encodingDetector.getEncoding(); encodingSpecificationInfo=encodingDetector.getEncodingSpecificationInfo(); preliminaryEncodingInfo=encodingDetector.getPreliminaryEncoding()+": "+encodingDetector.getPreliminaryEncodingSpecificationInfo(); } Source(final Reader reader, final String encoding) throws IOException { this(Util.getString(reader)); if (encoding!=null) { this.encoding=encoding; encodingSpecificationInfo="InputStreamReader.getEncoding() of constructor argument"; } } // Only called from StreamedSource: Source(final CharSequence sourceText, final StreamedParseText streamedParseText, final String encoding, final String encodingSpecificationInfo, final String preliminaryEncodingInfo) { super(streamedParseText.getEnd()); // normally Integer.MAX_VALUE unless called from StreamedSource(CharSequence) cache=Cache.STREAMED_SOURCE_MARKER; useAllTypesCache=false; useSpecialTypesCache=false; fullSequentialParseData=new int[1]; if (encoding!=null) this.encoding=encoding; this.encodingSpecificationInfo=encodingSpecificationInfo; this.preliminaryEncodingInfo=preliminaryEncodingInfo; this.sourceText=sourceText; parseText=streamedParseText; setLogger(newLogger()); } // only called from CharacterReference.parse(CharSequence) Source(final CharSequence sourceText, final boolean CHARACTER_REFERENCE_PARSE_METHOD) { super(sourceText.length()); this.sourceText=sourceText; cache=null; useAllTypesCache=false; useSpecialTypesCache=false; setLogger(LoggerDisabled.INSTANCE); } /** * Constructs a new Source object by loading the content from the specified Reader. *

    * If the specified reader is an instance of InputStreamReader, the {@link #getEncoding()} method of the * created Source object returns the encoding from InputStreamReader.getEncoding(). * * @param reader the java.io.Reader from which to load the source text. * @throws java.io.IOException if an I/O error occurs. */ public Source(final Reader reader) throws IOException { this(reader,(reader instanceof InputStreamReader) ? ((InputStreamReader)reader).getEncoding() : null); } /** * Constructs a new Source object by loading the content from the specified InputStream. *

    * The algorithm for detecting the character {@linkplain #getEncoding() encoding} of the source document from the raw bytes * of the specified input stream is the same as that for the {@link #Source(URLConnection)} constructor, * except that the first step is not possible as there is no * Content-Type header to check. * * @param inputStream the java.io.InputStream from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public Source(final InputStream inputStream) throws IOException { this(new EncodingDetector(inputStream)); } /** * Constructs a new Source object by loading the content from the specified URL. *

    * This is equivalent to {@link #Source(URLConnection) Source(url.openConnection())}. * * @param url the URL from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public Source(final URL url) throws IOException { this(new EncodingDetector(url.openConnection())); } /** * Constructs a new Source object by loading the content from the specified URLConnection. *

    * The algorithm for detecting the character {@linkplain #getEncoding() encoding} of the source document is as follows: *
    (process termination is marked by ♦) *

      *
    1. If the HTTP headers received from the URL connection include a * Content-Type header * specifying a charset parameter, then use the encoding specified in the value of the charset parameter. ♦ *
    2. Read the first four bytes of the input stream. *
    3. If the input stream is empty, the created source document has zero length and its {@link #getEncoding()} method * returns null. ♦ *
    4. If the input stream starts with a unicode Byte Order Mark (BOM), * then use the encoding signified by the BOM. ♦ * * * * * * * * * * * *
      BOM BytesEncoding
      EF BB FFUTF-8
      FF FE 00 00UTF-32 (little-endian)
      00 00 FE FFUTF-32 (big-endian)
      FF FEUTF-16 (little-endian)
      FE FFUTF-16 (big-endian)
      0E FE FFSCSU
      2B 2F 76UTF-7
      DD 73 66 73UTF-EBCDIC
      FB EE 28BOCU-1
      *
    5. If the stream contains less than four bytes, then: *
        *
      1. If the stream contains either one or three bytes, then use the encoding ISO-8859-1. ♦ *
      2. If the stream starts with a zero byte, then use the encoding UTF-16BE. ♦ *
      3. If the second byte of the stream is zero, then use the encoding UTF-16LE. ♦ *
      4. Otherwise use the encoding ISO-8859-1. ♦ *
      *
    6. Determine a {@linkplain #getPreliminaryEncodingInfo() preliminary encoding} by examining the first four bytes of the input stream. * See the {@link #getPreliminaryEncodingInfo()} method for details. *
    7. Read the first 2048 bytes of the input stream and decode it using the preliminary encoding to create a "preview segment". * If the detected preliminary encoding is not supported on this platform, create the preview segment using * ISO-8859-1 instead (this incident is logged at {@linkplain Logger#warn(String) warn} level). *
    8. Search the preview segment for an encoding specification, which should always appear at or near the top of the document. *
    9. If an encoding specification is found: *
        *
      1. If the specified encoding is supported on this platform, use it. ♦ *
      2. If the specified encoding is not supported on this platform, use the encoding that was used to create the preview segment, * which is normally the detected {@linkplain #getPreliminaryEncodingInfo() preliminary encoding}. ♦ *
      *
    10. If the document {@linkplain #isXML() looks like XML}, then use UTF-8. ♦ *
      Section 4.3.3 of the XML 1.0 specification states that * an XML file that is not encoded in UTF-8 must contain either a UTF-16 BOM * or an encoding declaration in its {@linkplain StartTagType#XML_DECLARATION XML declaration}. * Since neither of these was detected, we can assume the encoding is UTF-8. *
    11. Use the encoding that was used to create the preview segment, which is normally the detected {@linkplain #getPreliminaryEncodingInfo() preliminary encoding}. ♦ *
      This is the best guess, in the absence of any explicit information about the encoding, based on the first four bytes of the stream. * The HTTP protocol section 3.7.1 * states that an encoding of ISO-8859-1 can be assumed * if no charset parameter was included in the HTTP * Content-Type header. * This is consistent with the preliminary encoding detected in this scenario. *
    * * @param urlConnection the URL connection from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public Source(final URLConnection urlConnection) throws IOException { this(new EncodingDetector(urlConnection)); } private String setEncoding(final String encoding, final String encodingSpecificationInfo) { if (this.encoding==UNINITIALISED) { this.encoding=encoding; this.encodingSpecificationInfo=encodingSpecificationInfo; } return encoding; } /** * Returns the document {@linkplain #getEncoding() encoding} specified within the text of the document. *

    * The document encoding can be specified within the document text in two ways. * They are referred to generically in this library as an encoding specification, * and are listed below in order of precedence: *

      *
    1. * An encoding declaration within the * {@linkplain StartTagType#XML_DECLARATION XML declaration} of an XML document, * which must be present if it has an encoding other than UTF-8 * or UTF-16. *
      <?xml version="1.0" encoding="ISO-8859-1" ?>
      *
    2. * A META declaration, * which is in the form of a {@link HTMLElementName#META META} tag with attribute http-equiv="Content-Type". * The encoding is specified in the charset parameter of a * Content-Type * HTTP header value, which is placed in the value of the meta tag's content attribute. * This META declaration should appear as early as possible in the {@link HTMLElementName#HEAD HEAD} element. *
      <META http-equiv=Content-Type content="text/html; charset=iso-8859-1">
      *
    *

    * Both of these tags must only use characters in the range U+0000 to U+007F, and in the case of the META declaration * must use ASCII encoding. This, along with the fact that they must occur at or near the beginning of the document, * assists in their detection and decoding without the need to know the exact encoding of the full text. * * @return the document {@linkplain #getEncoding() encoding} specified within the text of the document, or null if no encoding is specified. * @see #getEncoding() */ public String getDocumentSpecifiedEncoding() { if (documentSpecifiedEncoding!=UNINITIALISED) return documentSpecifiedEncoding; final Tag xmlDeclarationTag=getTagAt(0); if (xmlDeclarationTag!=null && xmlDeclarationTag.getTagType()==StartTagType.XML_DECLARATION) { documentSpecifiedEncoding=((StartTag)xmlDeclarationTag).getAttributeValue("encoding"); if (documentSpecifiedEncoding!=null) return setEncoding(documentSpecifiedEncoding,xmlDeclarationTag.toString()); } // Check for Content-Type http-equiv meta tag: final StartTag contentTypeMetaTag=getFirstStartTag("http-equiv","content-type",false); if (contentTypeMetaTag!=null) { final String contentValue=contentTypeMetaTag.getAttributeValue("content"); if (contentValue!=null) { documentSpecifiedEncoding=getCharsetParameterFromHttpHeaderValue(contentValue); if (documentSpecifiedEncoding!=null) return setEncoding(documentSpecifiedEncoding,contentTypeMetaTag.toString()); } } return setEncoding(null,"No encoding specified in document"); } /** * Returns the character encoding scheme of the source byte stream used to create this object. *

    * The encoding of a document defines how the original byte stream was encoded into characters. * The HTTP specification section 3.4 * uses the term "character set" to refer to the encoding, and the term "charset" is similarly used in Java * (see the class java.nio.charset.Charset). * This often causes confusion, as a modern "coded character set" such as Unicode * can have several encodings, such as UTF-8, UTF-16, and UTF-32. * See the Wikipedia character encoding article * for an explanation of the terminology. *

    * This method makes the best possible effort to return the name of the encoding used to decode the original source byte stream * into character data. This decoding takes place in the constructor when a parameter based on a byte stream such as an * InputStream or URLConnection is used to specify the source text. * The documentation of the {@link #Source(InputStream)} and {@link #Source(URLConnection)} constructors describe how the return value of this * method is determined in these cases. * It is also possible in some circumstances for the encoding to be determined in the {@link #Source(Reader)} constructor. *

    * If a constructor was used that specifies the source text directly in character form (not requiring the decoding of a byte sequence) * then the document itself is searched for an encoding specification. In this case, this * method returns the same value as the {@link #getDocumentSpecifiedEncoding()} method. *

    * The {@link #getEncodingSpecificationInfo()} method returns a simple description of how the value of this method was determined. * * @return the character encoding scheme of the source byte stream used to create this object, or null if the encoding is not known. * @see #getEncodingSpecificationInfo() */ public String getEncoding() { if (encoding==UNINITIALISED) getDocumentSpecifiedEncoding(); return encoding; } /** * Returns a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. *

    * The description is intended for informational purposes only. * It is not guaranteed to have any particular format and can not be reliably parsed. * * @return a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. * @see #getEncoding() */ public String getEncodingSpecificationInfo() { if (encoding==UNINITIALISED) getDocumentSpecifiedEncoding(); return encodingSpecificationInfo; } /** * Returns the preliminary encoding of the source document together with a concise description of how it was determined. *

    * It is sometimes necessary for the {@link #Source(InputStream)} and {@link #Source(URLConnection)} constructors to search the document for an * encoding specification in order to determine the exact {@linkplain #getEncoding() encoding} * of the source byte stream. *

    * In order to search for the {@linkplain #getDocumentSpecifiedEncoding() document specified encoding} before the exact encoding is known, * a preliminary encoding is determined using the first four bytes of the input stream. *

    * Because the encoding specification must only use characters in the range U+0000 to U+007F, the preliminary encoding need only have the following * basic properties determined: *

      *
    • Code unit size (8-bit, 16-bit or 32-bit) *
    • Byte order (big-endian or little-endian) if the code unit size is 16-bit or 32-bit *
    • Basic encoding of characters in the range U+0000 to U+007F (the current implementation only distinguishes between ASCII and EBCDIC) *
    *

    * The encodings used to represent the most commonly encountered combinations of these basic properties are: *

      *
    • ISO-8859-1: 8-bit ASCII-compatible encoding *
    • Cp037: 8-bit EBCDIC-compatible encoding *
    • UTF-16BE: 16-bit big-endian encoding *
    • UTF-16LE: 16-bit little-endian encoding *
    • UTF-32BE: 32-bit big-endian encoding (not supported on most java platforms) *
    • UTF-32LE: 32-bit little-endian encoding (not supported on most java platforms) *
    * Note: all encodings with a code unit size greater than 8 bits are assumed to use an * ASCII-compatible low-order byte. *

    * In some descriptions returned by this method, and the documentation below, a pattern is used to help demonstrate the contents of the first four bytes of the stream. * The patterns use the characters "00" to signify a zero byte, "XX" to signify a non-zero byte, and "??" to signify * a byte than can be either zero or non-zero. *

    * The algorithm for determining the preliminary encoding is as follows: *

      *
    1. Byte pattern "00 00..." : If the stream starts with two zero bytes, the default 32-bit big-endian encoding UTF-32BE is used. *
    2. Byte pattern "00 XX..." : If the stream starts with a single zero byte, the default 16-bit big-endian encoding UTF-16BE is used. *
    3. Byte pattern "XX ?? 00 00..." : If the third and fourth bytes of the stream are zero, the default 32-bit little-endian encoding UTF-32LE is used. *
    4. Byte pattern "XX 00..." or "XX ?? XX 00..." : If the second or fourth byte of the stream is zero, the default 16-bit little-endian encoding UTF-16LE is used. *
    5. Byte pattern "XX XX 00 XX..." : If the third byte of the stream is zero, the default 16-bit big-endian encoding UTF-16BE is used (assumes the first character is > U+00FF). *
    6. Byte pattern "4C XX XX XX..." : If the first four bytes are consistent with the EBCDIC encoding of * an {@linkplain StartTagType#XML_DECLARATION XML declaration} ("<?xm") or * a {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} ("<!DO"), * or any other string starting with the EBCDIC character '<' followed by three non-ASCII characters (8th bit set), * which is consistent with EBCDIC alphanumeric characters, * the default EBCDIC-compatible encoding * Cp037 is used. *
    7. Byte pattern "XX XX XX XX..." : Otherwise, if all of the first four bytes of the stream are non-zero, * the default 8-bit ASCII-compatible encoding * ISO-8859-1 is used. *
    *

    * If it was not necessary to search for a {@linkplain #getDocumentSpecifiedEncoding() document specified encoding} when determining the * {@linkplain #getEncoding() encoding} of this source document from a byte stream, this method returns null. *

    * See the documentation of the {@link #Source(InputStream)} and {@link #Source(URLConnection)} constructors for more detailed information about when the detection of a * preliminary encoding is required. *

    * The description returned by this method is intended for informational purposes only. * It is not guaranteed to have any particular format and can not be reliably parsed. * * @return the preliminary encoding of the source document together with a concise description of how it was determined, or null if no preliminary encoding was required. * @see #getEncoding() */ public String getPreliminaryEncodingInfo() { return preliminaryEncodingInfo; } /** * Indicates whether the source document is likely to be XML. *

    * The algorithm used to determine this is designed to be relatively inexpensive and to provide an accurate result in * most normal situations. * An exact determination of whether the source document is XML would require a much more complex analysis of the text. *

    * The algorithm is as follows: *

      *
    1. If the document begins with an {@linkplain StartTagType#XML_DECLARATION XML declaration}, it is an XML document. *
    2. If the document contains a {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} that contains the text * "xhtml", it is an XHTML document, and hence * also an XML document. *
    3. If none of the above conditions are met, assume the document is normal HTML, and therefore not an XML document. *
    * * @return true if the source document is likely to be XML, otherwise false. */ public boolean isXML() { final Tag xmlDeclarationTag=getTagAt(0); if (xmlDeclarationTag!=null && xmlDeclarationTag.getTagType()==StartTagType.XML_DECLARATION) return true; final Tag doctypeTag=getNextTag(0,StartTagType.DOCTYPE_DECLARATION); // if document has a DOCTYPE declaration and it contains the text "xhtml", it is an XML document: if (doctypeTag!=null && getParseText().indexOf("xhtml",doctypeTag.begin,doctypeTag.end)!=-1) return true; return false; } /** * Returns the newline character sequence used in the source document. *

    * If the document does not contain any newline characters, this method returns null. *

    * The three possible return values (aside from null) are "\n", "\r\n" and "\r". * * @return the newline character sequence used in the source document, or null if none is present. */ public String getNewLine() { if (newLine!=UNINITIALISED) return newLine; for (int i=0; iend) throw new IndexOutOfBoundsException(); if (rowColumnVectorCacheArray==null) rowColumnVectorCacheArray=RowColumnVector.getCacheArray(this); return RowColumnVector.get(rowColumnVectorCacheArray,pos); } /** * Returns the source text as a String. * @return the source text as a String. */ public String toString() { return sourceText.toString(); } /** * Parses all of the {@linkplain Tag tags} in this source document sequentially from beginning to end. *

    * Calling this method can greatly improve performance if most or all of the tags in the document need to be parsed. *

    * Calling the {@link #getAllTags()}, {@link #getAllStartTags()}, {@link #getAllElements()}, {@link #getChildElements()}, * {@link #iterator()} or {@link #getNodeIterator()} * method on the Source object performs a full sequential parse automatically. * There are however still circumstances where it should be called manually, such as when it is known that most or all of the tags in the document will need to be parsed, * but none of the abovementioned methods are used, or are called only after calling one or more other tag search methods. *

    * If this method is called manually, is should be called soon after the Source object is created, * before any tag search methods are called. *

    * By default, tags are parsed only as needed, which is referred to as parse on demand mode. * In this mode, every call to a tag search method that is not returning previously cached tags must perform a relatively complex check to determine whether a * potential tag is in a {@linkplain TagType#isValidPosition(Source,int,int[]) valid position}. *

    * Generally speaking, a tag is in a valid position if it does not appear inside any another tag. * {@linkplain TagType#isServerTag() Server tags} can appear anywhere in a document, including inside other tags, so this relates only to non-server tags. * Theoretically, checking whether a specified position in the document is enclosed in another tag is only possible if every preceding tag has been parsed, * otherwise it is impossible to tell whether one of the delimiters of the enclosing tag was in fact enclosed by some other tag before it, thereby invalidating it. *

    * When this method is called, each tag is parsed in sequence starting from the beginning of the document, making it easy to check whether each potential * tag is in a valid position. * In parse on demand mode a compromise technique must be used for this check, since the theoretical requirement of having parsed all preceding tags * is no longer practical. * This compromise involves only checking whether the position is enclosed by other tags with {@linkplain TagType#getTagTypesIgnoringEnclosedMarkup() certain tag types}. * The added complexity of this technique makes parsing each tag slower compared to when a full sequential parse is performed, but when only a few tags need * parsing this is an extremely beneficial trade-off. *

    * The documentation of the {@link TagType#isValidPosition(Source, int pos, int[] fullSequentialParseData)} method, * which is called internally by the parser to perform the valid position check, * includes a more detailed explanation of the differences between the two modes of operation. *

    * Calling this method a second or subsequent time has no effect. *

    * This method returns the same list of tags as the {@link Source#getAllTags() Source.getAllTags()} method, but as an array instead of a list. *

    * If this method is called after any of the tag search methods are called, * the {@linkplain #getCacheDebugInfo() cache} is cleared of any previously found tags before being restocked via the full sequential parse. * This means that if you still have references to tags or elements from before the full sequential parse, they will not be the same objects as those * that are returned by tag search methods after the full sequential parse, which can cause confusion if you are allocating * {@linkplain Tag#setUserData(Object) user data} to tags. * It is also significant if the {@link Segment#ignoreWhenParsing()} method has been called since the tags were first found, as any tags inside the * ignored segments will no longer be returned by any of the tag search methods. *

    * See also the {@link Tag} class documentation for more general details about how tags are parsed. * * @return an array of all {@linkplain Tag tags} in this source document. */ public Tag[] fullSequentialParse() { // The assumeNoNestedTags flag tells the parser not to bother checking for tags inside other tags // if the user knows that the document doesn't contain any server tags. // This results in a more efficient search, but the difference during benchmark tests was only minimal - // about 12% speed improvement in a 1MB document containing 70,000 tags, 75% of which were inside a comment tag. // With such a small improvement in a document specifically designed to show an an exaggerated improvement, // it is not worth documenting this feature. // The flag has been retained internally however as it does not have a measurable performance impact to check for it. if (allTagsArray!=null) return allTagsArray; final boolean assumeNoNestedTags=false; if (cache.getTagCount()!=0) { logger.warn("Full sequential parse clearing all tags from cache. Consider calling Source.fullSequentialParse() manually immediately after construction of Source."); cache.clear(); } final boolean useAllTypesCacheSave=useAllTypesCache; try { useAllTypesCache=false; useSpecialTypesCache=false; return Tag.parseAll(this,assumeNoNestedTags); } finally { useAllTypesCache=useAllTypesCacheSave; useSpecialTypesCache=true; } } /** * Returns an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within the source document. *

    * Plain text is defined as all text that is not part of a {@link Tag} or {@link CharacterReference}. *

    * This results in a sequential walk-through of the entire source document. * The {@linkplain Segment#getEnd() end} position of each segment should correspond with the {@linkplain Segment#getBegin() begin} position of the subsequent segment, * unless any of the tags are enclosed by other tags. * This could happen if there are {@linkplain TagType#isServerTag() server tags} present in the document, or in rare circumstances where the * {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} contains {@linkplain StartTagType#MARKUP_DECLARATION markup declarations}. *

    * Character references that are found inside tags, such as those present inside attribute values, are not included as separate iterated segments. *

    * This method is implemented by simply calling the {@link Segment#getNodeIterator()} method of the {@link Segment} superclass. *

    * Prior to version 3.1, character references were not handled as separate segments, and were instead included unparsed in the plain text segments. * This required the use of the {@link CharacterReference#decode(CharSequence)} method to retrieve the actual text from each plain text segment. * Although it is likely that existing programs based on the previous functionality should still work without modification, the static configuration property * {@link #LegacyIteratorCompatabilityMode} is provided on a temporary basis to revert back to the behaviour of previous versions, ensuring * that existing programs function as intended without major modification. *

    *

    *
    Example:
    *
    *

    * The following code demonstrates the typical (implied) usage of this method through the Iterable interface * to make an exact copy of the document from reader to writer (assuming no server tags are present): *

    *
    	 * Source source=new Source(reader);
    	 * for (Segment segment : source) {
    	 *   if (segment instanceof Tag) {
    	 *     Tag tag=(Tag)segment;
    	 *     // HANDLE TAG
    	 *     // Uncomment the following line to ensure each tag is valid XML:
    	 *     // writer.write(tag.tidy()); continue;
    	 *   } else if (segment instanceof CharacterReference) {
    	 *     CharacterReference characterReference=(CharacterReference)segment;
    	 *     // HANDLE CHARACTER REFERENCE
    	 *     // Uncomment the following line to decode all character references instead of copying them verbatim:
    	 *     // characterReference.appendCharTo(writer); continue;
    	 *   } else {
    	 *     // HANDLE PLAIN TEXT
    	 *   }
    	 *   // unless specific handling has prevented getting to here, simply output the segment as is:
    	 *   writer.write(segment.toString());
    	 * }
    *
    *
    * @return an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within the source document. */ public Iterator iterator() { return getNodeIterator(); } /** * Returns a list of the top-level {@linkplain Element elements} in the document element hierarchy. *

    * The objects in the list are all of type {@link Element}. *

    * The term top-level element refers to an element that is not nested within any other element in the document. *

    * The term document element hierarchy refers to the hierarchy of elements that make up this source document. * The source document itself is not considered to be part of the hierarchy, meaning there is typically more than one top-level element. * Even when the source represents an entire HTML document, the {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} and/or an * {@linkplain StartTagType#XML_DECLARATION XML declaration} often exist as top-level elements along with the {@link HTMLElementName#HTML HTML} element itself. *

    * The {@link Element#getChildElements()} method can be used to get the children of the top-level elements, with recursive use providing a means to * visit every element in the document hierarchy. *

    * The document element hierarchy differs from that of the Document Object Model * in that it is only a representation of the elements that are physically present in the source text. Unlike the DOM, it does not include any "implied" HTML elements * such as {@link HTMLElementName#TBODY TBODY} if they are not present in the source text. *

    * Elements formed from {@linkplain TagType#isServerTag() server tags} are not included in the hierarchy at all. *

    * Structural errors in this source document such as overlapping elements are reported in the {@linkplain #getLogger() log}. * When elements are found to overlap, the position of the start tag determines the location of the element in the hierarchy. *

    * Calling this method on the Source object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically. *

    * A visual representation of the document element hierarchy can be obtained by calling:
    * {@link #getSourceFormatter()}.{@link SourceFormatter#setIndentAllElements(boolean) setIndentAllElements(true)}.{@link SourceFormatter#setCollapseWhiteSpace(boolean) setCollapseWhiteSpace(true)}.{@link SourceFormatter#setTidyTags(boolean) setTidyTags(true)}.{@link SourceFormatter#toString() toString()} * * @return a list of the top-level {@linkplain Element elements} in the document element hierarchy, guaranteed not null. * @see Element#getParentElement() * @see Element#getChildElements() * @see Element#getDepth() */ @Override public List getChildElements() { if (childElements==null) { if (length()==0) { childElements=Collections.emptyList(); } else { if (allTags==null) fullSequentialParse(); childElements=new ArrayList(); int pos=0; while (true) { final StartTag childStartTag=source.getNextStartTag(pos); if (childStartTag==null) break; if (!Config.IncludeServerTagsInElementHierarchy && childStartTag.getTagType().isServerTag()) { pos=childStartTag.end; continue; } final Element childElement=childStartTag.getElement(); childElement.getChildElements(0); if (childElement.parentElement==Element.NOT_CACHED) { // make sure element was not added as a child of a descendent element (can happen with overlapping elements) childElement.parentElement=null; childElements.add(childElement); } pos=childElement.end; } } } return childElements; } /** * Formats the HTML source by laying out each non-inline-level element on a new line with an appropriate indent. *

    * The output format can be configured by setting any number of properties on the returned {@link SourceFormatter} instance before * {@linkplain SourceFormatter#writeTo(Writer) obtaining its output}. *

    * To create a SourceFormatter instance based on a {@link Segment} rather than an entire Source document, * use {@linkplain SourceFormatter#SourceFormatter(Segment) new SourceFormatter(segment)} instead. * * @return an instance of {@link SourceFormatter} based on this source document. */ public SourceFormatter getSourceFormatter() { return new SourceFormatter(this); } /** * Returns a list of all {@linkplain Tag tags} in this source document. *

    * Calling this method on the Source object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return a list of all {@linkplain Tag tags} in this source document. */ public List getAllTags() { if (allTags==null) fullSequentialParse(); return allTags; } /** * Returns a list of all {@linkplain StartTag start tags} in this source document. *

    * Calling this method on the Source object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return a list of all {@linkplain StartTag start tags} in this source document. */ public List getAllStartTags() { if (allStartTags==null) { final List allTags=getAllTags(); allStartTags=new ArrayList(allTags.size()); for (Tag tag : allTags) if (tag instanceof StartTag) allStartTags.add((StartTag)tag); } return allStartTags; } /** * Returns a list of all {@linkplain Element elements} in this source document. *

    * Calling this method on the Source object performs a {@linkplain #fullSequentialParse() full sequential parse} automatically. *

    * The elements returned correspond exactly with the start tags returned in the {@link #getAllStartTags()} method. * * @return a list of all {@linkplain Element elements} in this source document. */ public List getAllElements() { if (allElements==null) { final List allStartTags=getAllStartTags(); if (allStartTags.isEmpty()) return Collections.emptyList(); allElements=new ArrayList(allStartTags.size()); for (StartTag startTag : allStartTags) allElements.add(startTag.getElement()); } return allElements; } /** * Returns the {@link Element} with the specified id attribute value. *

    * This simulates the script method * getElementById * defined in DOM HTML level 1. *

    * This is equivalent to {@link #getFirstElement(String,String,boolean) getFirstElement}("id",id,true). *

    * A well formed HTML document should have no more than one element with any given id attribute value. * * @param id the id attribute value (case sensitive) to search for, must not be null. * @return the {@link Element} with the specified id attribute value, or null if no such element exists. */ public Element getElementById(final String id) { return getFirstElement(Attribute.ID,id,true); } /** * Returns the {@link Tag} at the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This method also returns {@linkplain Tag#isUnregistered() unregistered} tags. * * @param pos the position in the source document, may be out of bounds. * @return the {@link Tag} at the specified position in the source document, or null if no tag exists at the specified position or it is out of bounds. */ public final Tag getTagAt(final int pos) { return Tag.getTagAt(this,pos,false); } /** * Returns the {@link Tag} beginning at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link Tag} beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Tag getPreviousTag(final int pos) { return Tag.getPreviousTag(this,pos); } /** * Returns the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param tagType the TagType to search for. * @return the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Tag getPreviousTag(final int pos, final TagType tagType) { return Tag.getPreviousTag(this,pos,tagType); } /** * Returns the {@link Tag} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Use {@link Tag#getNextTag()} to get the tag immediately following another tag. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link Tag} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Tag getNextTag(final int pos) { return Tag.getNextTag(this,pos); } Tag getNextNonServerTag(int pos) { while (true) { final Tag tag=getNextTag(pos); if (tag==null) return null; if (!tag.getTagType().isServerTag()) return tag; pos=tag.end; } } Tag getPreviousNonServerTag(int pos) { while (true) { final Tag tag=getPreviousTag(pos-1); if (tag==null) return null; if (!tag.getTagType().isServerTag()) return tag; pos=tag.begin-1; } } /** * Returns the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param tagType the TagType to search for. * @return the {@link Tag} of the specified {@linkplain TagType type} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Tag getNextTag(final int pos, final TagType tagType) { return Tag.getNextTag(this,pos,tagType); } /** * Returns the {@link Tag} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document, may be out of bounds. * @return the {@link Tag} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document, or null if the position is not within a tag or is out of bounds. */ public Tag getEnclosingTag(final int pos) { return getEnclosingTag(pos,null); } /** * Returns the {@link Tag} of the specified {@linkplain TagType type} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document, may be out of bounds. * @param tagType the TagType to search for. * @return the {@link Tag} of the specified {@linkplain TagType type} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document, or null if the position is not within a tag of the specified type or is out of bounds. */ public Tag getEnclosingTag(final int pos, final TagType tagType) { final Tag tag=getPreviousTag(pos,tagType); if (tag==null || tag.end<=pos) return null; return tag; } /** * Returns the {@link Element} beginning at or immediately following the specified position in the source document. *

    * This is equivalent to {@link #getNextStartTag(int) getNextStartTag(pos)}.{@link StartTag#getElement() getElement()}, * assuming the result is not null. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link Element} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Element getNextElement(final int pos) { final StartTag startTag=getNextStartTag(pos); return startTag==null ? null : startTag.getElement(); } /** * Returns the {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} beginning at or immediately following the specified position in the source document. *

    * This is equivalent to {@link #getNextStartTag(int,String) getNextStartTag(pos,name)}.{@link StartTag#getElement() getElement()}, * assuming the result is not null. *

    * Specifying a null argument to the name parameter is equivalent to * {@link #getNextElement(int) getNextElement(pos)}. *

    * Specifying an argument to the name parameter that ends in a colon (:) searches for all elements * in the specified XML namespace. *

    * This method also returns elements consisting of {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain Element#getName() name} of the element to search for. * @return the {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Element getNextElement(final int pos, String name) { final StartTag startTag=getNextStartTag(pos,name); return startTag==null ? null : startTag.getElement(); } /** * Returns the {@link Element} with the specified attribute name/value pair beginning at or immediately following the specified position in the source document. *

    * This is equivalent to {@link #getNextStartTag(int,String,String,boolean) getNextStartTag(pos,attributeName,value,valueCaseSensitive)}.{@link StartTag#getElement() getElement()}, * assuming the result is not null. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return the {@link Element} with the specified attribute name/value pair beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. * @see #getNextElement(int pos, String attributeName, Pattern valueRegexPattern) */ public Element getNextElement(final int pos, final String attributeName, final String value, final boolean valueCaseSensitive) { final StartTag startTag=getNextStartTag(pos,attributeName,value,valueCaseSensitive); return startTag==null ? null : startTag.getElement(); } /** * Returns the {@link Element} with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document. *

    * Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, * without regard to the attribute value. This will also match an attribute that {@linkplain Attribute#hasValue() has no value} at all. *

    * This is equivalent to {@link #getNextStartTag(int,String,Pattern) getNextStartTag(pos,attributeName,valueRegexPattern)}.{@link StartTag#getElement() getElement()}, * assuming the result is not null. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return the {@link Element} with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. * @see #getNextElement(int pos, String attributeName, String value, boolean valueCaseSensitive) */ public Element getNextElement(final int pos, final String attributeName, final Pattern valueRegexPattern) { final StartTag startTag=getNextStartTag(pos,attributeName,valueRegexPattern); return startTag==null ? null : startTag.getElement(); } /** * Returns the {@link Element} with the specified class beginning at or immediately following the specified position in the source document. *

    * This matches an element with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple * class names separated by white space in the attribute value. *

    * This is equivalent to {@link #getNextStartTagByClass(int,String) getNextStartTagByClass(pos,className)}.{@link StartTag#getElement() getElement()}, * assuming the result is not null. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param className the class name (case sensitive) to search for, must not be null. * @return the {@link Element} with the specified class beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Element getNextElementByClass(final int pos, final String className) { final StartTag startTag=getNextStartTagByClass(pos,className); return startTag==null ? null : startTag.getElement(); } /** * Returns the {@link StartTag} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link StartTag} at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getPreviousStartTag(final int pos) { return StartTag.getPrevious(this,pos); } /** * Returns the {@link StartTag} of the specified {@linkplain StartTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This is exactly equivalent to (StartTag){@link #getPreviousTag(int,TagType) getPreviousTag}(pos,startTagType), * but can be used to avoid the explicit cast to a {@link StartTag} object. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param startTagType the StartTagType to search for. * @return the {@link StartTag} of the specified {@linkplain StartTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getPreviousStartTag(final int pos, final StartTagType startTagType) { return (StartTag)getPreviousTag(pos,startTagType); } /** * Returns the {@linkplain StartTagType#NORMAL normal} {@link StartTag} with the specified {@linkplain StartTag#getName() name} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying a null argument to the name parameter is equivalent to * {@link #getPreviousStartTag(int) getPreviousStartTag(pos)}. *

    * This method also returns {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the start tag to search for. * @return the {@linkplain StartTagType#NORMAL normal} {@link StartTag} with the specified {@linkplain StartTag#getName() name} at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getPreviousStartTag(final int pos, final String name) { return getPreviousStartTag(pos,name,StartTagType.NORMAL); } /** * Returns the {@link StartTag} with the specified {@linkplain StartTag#getName() name} and {@linkplain StartTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying {@link StartTagType#NORMAL} as the argument to the startTagType parameter is equivalent to * {@link #getPreviousStartTag(int,String) getPreviousStartTag(pos,name)}. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the start tag to search for, may be null. * @param startTagType the {@linkplain StartTagType type} of the start tag to search for, must not be null. * @return the {@link StartTag} with the specified {@linkplain StartTag#getName() name} and {@linkplain StartTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getPreviousStartTag(final int pos, String name, final StartTagType startTagType) { if (name!=null) name=name.toLowerCase(); return StartTag.getPrevious(this,pos,name,startTagType); } /** * Returns the {@link StartTag} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link StartTag} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getNextStartTag(final int pos) { return StartTag.getNext(this,pos); } /** * Returns the {@link StartTag} of the specified {@linkplain StartTagType type} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This is exactly equivalent to (StartTag){@link #getNextTag(int,TagType) getNextTag}(pos,startTagType), * but can be used to avoid the explicit cast to a {@link StartTag} object. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param startTagType the StartTagType to search for. * @return the {@link StartTag} of the specified {@linkplain StartTagType type} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getNextStartTag(final int pos, final StartTagType startTagType) { return (StartTag)getNextTag(pos,startTagType); } /** * Returns the {@linkplain StartTagType#NORMAL normal} {@link StartTag} with the specified {@linkplain StartTag#getName() name} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying a null argument to the name parameter is equivalent to * {@link #getNextStartTag(int) getNextStartTag(pos)}. *

    * Specifying an argument to the name parameter that ends in a colon (:) searches for all start tags * in the specified XML namespace. *

    * This method also returns {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the start tag to search for, may be null. * @return the {@linkplain StartTagType#NORMAL normal} {@link StartTag} with the specified {@linkplain StartTag#getName() name} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getNextStartTag(final int pos, final String name) { return getNextStartTag(pos,name,StartTagType.NORMAL); } /** * Returns the {@link StartTag} with the specified {@linkplain StartTag#getName() name} and {@linkplain StartTagType type} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying {@link StartTagType#NORMAL} as the argument to the startTagType parameter is equivalent to * {@link #getNextStartTag(int,String) getNextStartTag(pos,name)}. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the start tag to search for, may be null. * @param startTagType the {@linkplain StartTagType type} of the start tag to search for, must not be null. * @return the {@link StartTag} with the specified {@linkplain StartTag#getName() name} and {@linkplain StartTagType type} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getNextStartTag(final int pos, String name, final StartTagType startTagType) { if (name!=null) name=name.toLowerCase(); return StartTag.getNext(this,pos,name,startTagType); } /** * Returns the {@link StartTag} with the specified attribute name/value pair beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return the {@link StartTag} with the specified attribute name/value pair beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. * @see #getNextStartTag(int pos, String attributeName, Pattern valueRegexPattern) */ public StartTag getNextStartTag(final int pos, final String attributeName, final String value, final boolean valueCaseSensitive) { return StartTag.getNext(this,pos,attributeName,value,valueCaseSensitive); } /** * Returns the {@link StartTag} with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document. *

    * Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, * without regard to the attribute value. This will also match an attribute that {@linkplain Attribute#hasValue() has no value} at all. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return the {@link StartTag} with the specified attribute name and value pattern beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. * @see #getNextStartTag(int pos, String attributeName, String value, boolean valueCaseSensitive) */ public StartTag getNextStartTag(final int pos, final String attributeName, final Pattern valueRegexPattern) { return StartTag.getNext(this,pos,attributeName,valueRegexPattern); } /** * Returns the {@link StartTag} with the specified class beginning at or immediately following the specified position in the source document. *

    * This matches a start tag with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple * class names separated by white space in the attribute value. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param className the class name (case sensitive) to search for, must not be null. * @return the {@link StartTag} with the specified class beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public StartTag getNextStartTagByClass(final int pos, final String className) { return getNextStartTag(pos,"class",getClassPattern(className)); } /** * Returns the {@link EndTag} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link EndTag} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getPreviousEndTag(final int pos) { return EndTag.getPrevious(this,pos); } /** * Returns the {@link EndTag} of the specified {@linkplain EndTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This is exactly equivalent to (EndTag){@link #getPreviousTag(int,TagType) getPreviousTag}(pos,endTagType), * but can be used to avoid the explicit cast to an {@link EndTag} object. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param endTagType the EndTagType to search for. * @return the {@link EndTag} of the specified {@linkplain EndTagType type} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getPreviousEndTag(final int pos, final EndTagType endTagType) { return (EndTag)getPreviousTag(pos,endTagType); } /** * Returns the {@linkplain EndTagType#NORMAL normal} {@link EndTag} with the specified {@linkplain EndTag#getName() name} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the end tag to search for, must not be null. * @return the {@linkplain EndTagType#NORMAL normal} {@link EndTag} with the specified {@linkplain EndTag#getName() name} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getPreviousEndTag(final int pos, final String name) { if (name==null) throw new IllegalArgumentException("name argument must not be null"); return EndTag.getPrevious(this,pos,name.toLowerCase(),EndTagType.NORMAL); } /** * Returns the {@link EndTag} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link EndTag} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getNextEndTag(final int pos) { return EndTag.getNext(this,pos); } /** * Returns the {@link EndTag} of the specified {@linkplain EndTagType type} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This is exactly equivalent to (EndTag){@link #getNextTag(int,TagType) getNextTag}(pos,endTagType), * but can be used to avoid the explicit cast to an {@link EndTag} object. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param endTagType the EndTagType to search for. * @return the {@link EndTag} of the specified {@linkplain EndTagType type} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getNextEndTag(final int pos, final EndTagType endTagType) { return (EndTag)getNextTag(pos,endTagType); } /** * Returns the {@linkplain EndTagType#NORMAL normal} {@link EndTag} with the specified {@linkplain EndTag#getName() name} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain EndTag#getName() name} of the end tag to search for, must not be null. * @return the {@linkplain EndTagType#NORMAL normal} {@link EndTag} with the specified {@linkplain EndTag#getName() name} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getNextEndTag(final int pos, final String name) { return getNextEndTag(pos,name,EndTagType.NORMAL); } /** * Returns the {@link EndTag} with the specified {@linkplain EndTag#getName() name} and {@linkplain EndTagType type} beginning at or immediately following the specified position in the source document. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @param name the {@linkplain StartTag#getName() name} of the end tag to search for, must not be null. * @param endTagType the {@linkplain EndTagType type} of the end tag to search for, must not be null. * @return the {@link EndTag} with the specified {@linkplain EndTag#getName() name} and {@linkplain EndTagType type} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public EndTag getNextEndTag(final int pos, final String name, final EndTagType endTagType) { if (name==null) throw new IllegalArgumentException("name argument must not be null"); return EndTag.getNext(this,pos,name.toLowerCase(),endTagType); } /** * Returns the most nested {@linkplain StartTagType#NORMAL normal} {@link Element} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document. *

    * The specified position can be anywhere inside the {@linkplain Element#getStartTag() start tag}, {@linkplain Element#getEndTag() end tag}, * or {@linkplain Element#getContent() content} of the element. There is no requirement that the returned element has an end tag, and it * may be a {@linkplain TagType#isServerTag() server tag} or HTML {@linkplain StartTagType#COMMENT comment}. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param pos the position in the source document, may be out of bounds. * @return the most nested {@linkplain StartTagType#NORMAL normal} {@link Element} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document, or null if the position is not within an element or is out of bounds. */ public Element getEnclosingElement(final int pos) { return getEnclosingElement(pos,null); } /** * Returns the most nested {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document. *

    * The specified position can be anywhere inside the {@linkplain Element#getStartTag() start tag}, {@linkplain Element#getEndTag() end tag}, * or {@linkplain Element#getContent() content} of the element. There is no requirement that the returned element has an end tag, and it * may be a {@linkplain TagType#isServerTag() server tag} or HTML {@linkplain StartTagType#COMMENT comment}. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * This method also returns elements consisting of {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. * * @param pos the position in the source document, may be out of bounds. * @param name the {@linkplain Element#getName() name} of the element to search for. * @return the most nested {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} that {@linkplain Segment#encloses(int) encloses} the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public Element getEnclosingElement(final int pos, String name) { int startBefore=pos; if (name!=null) name=name.toLowerCase(); final boolean isXMLTagName=Tag.isXMLName(name); while (true) { StartTag startTag=StartTag.getPrevious(this,startBefore,name,StartTagType.NORMAL,isXMLTagName); if (startTag==null) return null; Element element=startTag.getElement(); if (pos < element.end) return element; startBefore=startTag.begin-1; } } /** * Returns the {@link CharacterReference} at or immediately preceding (or {@linkplain Segment#encloses(int) enclosing}) the specified position in the source document. *

    * Character references positioned within an HTML {@linkplain StartTagType#COMMENT comment} are NOT ignored. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link CharacterReference} beginning at or immediately preceding the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public CharacterReference getPreviousCharacterReference(final int pos) { return CharacterReference.getPrevious(this,pos); } /** * Returns the {@link CharacterReference} beginning at or immediately following the specified position in the source document. *

    * Character references positioned within an HTML {@linkplain StartTagType#COMMENT comment} are NOT ignored. * * @param pos the position in the source document from which to start the search, may be out of bounds. * @return the {@link CharacterReference} beginning at or immediately following the specified position in the source document, or null if none exists or the specified position is out of bounds. */ public CharacterReference getNextCharacterReference(final int pos) { return CharacterReference.getNext(this,pos); } /** * Returns the end position of the XML Name that starts at the * specified position. *

    * This implementation first checks that the character at the specified position is a valid XML Name start character as defined by the * {@link Tag#isXMLNameStartChar(char)} method. If this is not the case, the value -1 is returned. *

    * Once the first character has been checked, subsequent characters are checked using the {@link Tag#isXMLNameChar(char)} method until * one is found that is not a valid XML Name character or the end of the document is reached. This position is then returned. * * @param pos the position in the source document of the first character of the XML Name. * @return the end position of the XML Name that starts at the specified position. * @throws IndexOutOfBoundsException if the specified position is not within the bounds of the document. */ public int getNameEnd(int pos) { if (!Tag.isXMLNameStartChar(sourceText.charAt(pos++))) return -1; try { while (Tag.isXMLNameChar(sourceText.charAt(pos))) pos++; // cost of IndexOutOfBoundsException in rare case of name ending at end of file is less than checking for end of file manually with each iteration. } catch (IndexOutOfBoundsException ex) {} return pos; } /** * Parses any {@link Attributes} starting at the specified position. * This method is only used in the unusual situation where attributes exist outside of a start tag. * The {@link StartTag#getAttributes()} method should be used in normal situations. *

    * The returned Attributes segment always begins at pos, * and ends at the end of the last attribute before either maxEnd or * the first occurrence of "/>" or ">" outside of a quoted attribute value, whichever comes first. *

    * Only returns null if the segment contains a major syntactical error * or more than the {@linkplain Attributes#getDefaultMaxErrorCount() default maximum} number of * minor syntactical errors. *

    * This is equivalent to * {@link #parseAttributes(int,int,int) parseAttributes}(pos,maxEnd,{@link Attributes#getDefaultMaxErrorCount()})}. * * @param pos the position in the source document at the beginning of the attribute list, may be out of bounds. * @param maxEnd the maximum end position of the attribute list, or -1 if no maximum. * @return the {@link Attributes} starting at the specified position, or null if too many errors occur while parsing or the specified position is out of bounds. * @see StartTag#getAttributes() * @see Segment#parseAttributes() */ public Attributes parseAttributes(final int pos, final int maxEnd) { return parseAttributes(pos,maxEnd,Attributes.getDefaultMaxErrorCount()); } /** * Parses any {@link Attributes} starting at the specified position. * This method is only used in the unusual situation where attributes exist outside of a start tag. * The {@link StartTag#getAttributes()} method should be used in normal situations. *

    * Only returns null if the segment contains a major syntactical error * or more than the specified number of minor syntactical errors. *

    * The maxErrorCount argument overrides the {@linkplain Attributes#getDefaultMaxErrorCount() default maximum error count}. *

    * See {@link #parseAttributes(int pos, int maxEnd)} for more information. * * @param pos the position in the source document at the beginning of the attribute list, may be out of bounds. * @param maxEnd the maximum end position of the attribute list, or -1 if no maximum. * @param maxErrorCount the maximum number of minor errors allowed while parsing. * @return the {@link Attributes} starting at the specified position, or null if too many errors occur while parsing or the specified position is out of bounds. * @see StartTag#getAttributes() * @see #parseAttributes(int pos, int MaxEnd) */ public Attributes parseAttributes(final int pos, final int maxEnd, final int maxErrorCount) { return Attributes.construct(this,pos,maxEnd,maxErrorCount); } /** * Causes the specified range of the source text to be ignored when parsing. *

    * See the documentation of the {@link Segment#ignoreWhenParsing()} method for more information. * * @param begin the beginning character position in the source text. * @param end the end character position in the source text. */ public void ignoreWhenParsing(final int begin, final int end) { if (wasFullSequentialParseCalled()) throw new IllegalStateException("ignoreWhenParsing can not be used after a full sequential parse has been performed"); if (parseTextOutputDocument==null) { parseTextOutputDocument=new OutputDocument(getParseText()); parseText=null; } parseTextOutputDocument.replaceWithSpaces(begin,end); } /** * Causes all of the segments in the specified collection to be ignored when parsing. *

    * This is equivalent to calling {@link Segment#ignoreWhenParsing()} on each segment in the collection. */ public void ignoreWhenParsing(final Collection segments) { for (Segment segment : segments) segment.ignoreWhenParsing(); } /** * Sets the {@link Logger} that handles log messages. *

    * Specifying a null argument disables logging completely for operations performed on this Source object. *

    * A logger instance is created automatically for each Source object using the {@link LoggerProvider} * specified by the static {@link Config#LoggerProvider} property. * The name used for all automatically created logger instances is "net.htmlparser.jericho". *

    * Use of this method with a non-null argument is therefore not usually necessary, * unless specifying an instance of {@link WriterLogger} or a user-defined {@link Logger} implementation. * * @param logger the logger that will handle log messages, or null to disable logging. * @see Config#LoggerProvider */ public void setLogger(final Logger logger) { this.logger=(logger!=null ? logger : LoggerDisabled.INSTANCE); } /** * Returns the {@link Logger} that handles log messages. *

    * A logger instance is created automatically for each Source object using the {@link LoggerProvider} * specified by the static {@link Config#LoggerProvider} property. * This can be overridden by calling the {@link #setLogger(Logger)} method. * The name used for all automatically created logger instances is "net.htmlparser.jericho". * * @return the {@link Logger} that handles log messages, or null if logging is disabled. */ public Logger getLogger() { return logger!=LoggerDisabled.INSTANCE ? logger : null; } /** * Clears the {@linkplain #getCacheDebugInfo() tag cache} of all tags. *

    * This method may be useful after calling the {@link Segment#ignoreWhenParsing()} method so that any tags previously found within the ignored segments * will no longer be returned by the tag search methods. */ public void clearCache() { cache.clear(); allTagsArray=null; allTags=null; allStartTags=null; allElements=null; } /** * Returns a string representation of the tag cache, useful for debugging purposes. * @return a string representation of the tag cache, useful for debugging purposes. */ public String getCacheDebugInfo() { return cache.toString(); } /** * Gets a list of all the tags that have been parsed so far. *

    * This information may be useful for debugging purposes. * Execution of this method collects information from the internal cache and is relatively expensive. * * @return a list of all the tags that have been parsed so far. * @see #getCacheDebugInfo() */ List getParsedTags() { final ArrayList list=new ArrayList(); for (final Iterator i=cache.getTagIterator(); i.hasNext();) list.add(i.next()); return list; } /** * Returns the {@linkplain ParseText parse text} of this source document. *

    * This method is normally only of interest to users who wish to create custom tag types. *

    * The parse text is defined as the entire text of the source document in lower case, with all * {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters. * * @return the {@linkplain ParseText parse text} of this source document. */ public final ParseText getParseText() { if (parseText==null) { if (parseTextOutputDocument!=null) { parseText=new CharSequenceParseText(parseTextOutputDocument.toString()); parseTextOutputDocument=null; } else { parseText=new CharSequenceParseText(sourceText); } } return parseText; } /** * Returns a new character sequence that is a subsequence of this source document. * * @param begin the begin position, inclusive. * @param end the end position, exclusive. * @return a new character sequence that is a subsequence of this source document. */ public final CharSequence subSequence(final int begin, final int end) { return sourceText.subSequence(begin,end); } final String substring(final int begin, final int end) { return subSequence(begin,end).toString(); } final String getName(final int begin, final int end) { // change this implentation if we want to provide the option for case sensitive names return substring(begin,end).toLowerCase(); } public final char charAt(final int index) { return sourceText.charAt(index); } /** * Returns the length of the source document. * @return the length of the source document. */ public final int length() { return sourceText.length(); } /** * Specifies whether to enable the legacy {@link Segment#getNodeIterator()} compatability mode. *

    * Prior to version 3.1, {@link Segment#getNodeIterator()} and {@link #iterator() Source.iterator()} did not handle {@linkplain CharacterReference character references} * as separate segments, and they were instead included unparsed in the plain text segments. * This required the use of the {@link CharacterReference#decode(CharSequence)} method to retrieve the actual text from each plain text segment. *

    * Although it is likely that existing programs based on the previous functionality should still work without modification, * this static configuration property is provided on a temporary basis to revert back to the behaviour of previous versions, ensuring * that existing programs function as intended without major modification. *

    * Setting this configuration property to true restores compatability with previous versions. *

    * This property and compatability mode will be removed in a future release. * * @deprecated Modify existing code to explicitly handle {@linkplain CharacterReference} segments. */ @Deprecated public static boolean LegacyIteratorCompatabilityMode=false; boolean wasFullSequentialParseCalled() { return allTagsArray!=null; } static String getCharsetParameterFromHttpHeaderValue(final String httpHeaderValue) { final int charsetParameterPos=httpHeaderValue.toLowerCase().indexOf("charset="); if (charsetParameterPos==-1) return null; final int charsetBegin=charsetParameterPos+8; int charsetEnd=httpHeaderValue.indexOf(';',charsetBegin); final String charset=(charsetEnd==-1) ? httpHeaderValue.substring(charsetBegin) : httpHeaderValue.substring(charsetBegin,charsetEnd); return charset.trim(); } static Logger newLogger() { return LoggerFactory.getLogger(PACKAGE_NAME); } private static String getString(final EncodingDetector encodingDetector) throws IOException { try { return Util.getString(encodingDetector.openReader()); } catch (IOException ex) { try { Logger logger=newLogger(); if (logger.isInfoEnabled()) logger.info("IOException constructing encoded source. Encoding: "+encodingDetector.getEncoding()+" - "+encodingDetector.getEncodingSpecificationInfo()+". PreliminaryEncoding: "+encodingDetector.getPreliminaryEncoding()+" - "+encodingDetector.getPreliminaryEncodingSpecificationInfo()); } catch (Exception ex2) {} // make sure attempting to log does not cause a new exception throw ex; } } final boolean isStreamed() { return cache==Cache.STREAMED_SOURCE_MARKER; } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTagTypeMasonNamedBlock.java0000644000175000017500000000241611204550410027562 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class EndTagTypeMasonNamedBlock extends EndTagTypeGenericImplementation { protected static final EndTagTypeMasonNamedBlock INSTANCE=new EndTagTypeMasonNamedBlock(); private EndTagTypeMasonNamedBlock() { super("/mason named block","",true,false); } public StartTagType getCorrespondingStartTagType() { return MasonTagTypes.MASON_NAMED_BLOCK; } } jericho-html-3.1/src/java/net/htmlparser/jericho/Cache.java0000644000175000017500000001236411204550410023646 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents a cached map of character positions to tags. * The allTagTypesSubCache object is used to cache all tags. * Additional subcaches are used to cache single tag types. See the TagType.getTagTypesIgnoringEnclosedMarkup() method for details. */ final class Cache { public final Source source; private final SubCache allTagTypesSubCache; private final SubCache[] subCaches; // contains allTagTypesSubCache plus a SubCache object for each separately cached tag type static final Cache STREAMED_SOURCE_MARKER=new Cache(); public Cache(final Source source) { this.source=source; allTagTypesSubCache=new SubCache(this,null); TagType[] separatelyCachedTagTypes=getSeparatelyCachedTagTypes(); subCaches=new SubCache[separatelyCachedTagTypes.length+1]; subCaches[0]=allTagTypesSubCache; for (int i=0; i i=allTagTypesSubCache.getTagIterator(); i.hasNext();) i.next().orphan(); for (int i=0; i getTagIterator() { return allTagTypesSubCache.getTagIterator(); } public void loadAllTags(final List tags, final Tag[] allRegisteredTags, final StartTag[] allRegisteredStartTags) { // assumes the tags list implements RandomAccess final int tagCount=tags.size(); allTagTypesSubCache.bulkLoad_Init(tagCount); int registeredTagIndex=0; int registeredStartTagIndex=0; for (int i=0; i",null,false,false,false); } protected Tag constructTagAt(final Source source, final int pos) { final int closingDelimiterPos=source.getParseText().indexOf('>',pos+1); if (closingDelimiterPos==-1) return null; final Tag tag=constructStartTag(source,pos,closingDelimiterPos+1,"",null); if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(tag.getBegin()).appendTo(new StringBuilder(200).append("Encountered possible StartTag at ")).append(" whose content does not match a registered StartTagType").toString()); return tag; } } jericho-html-3.1/src/java/net/htmlparser/jericho/AttributesOutputSegment.java0000644000175000017500000001653711204550410027543 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; /** * Implements an {@link OutputSegment} whose content is a list of attribute name/value pairs. *

    * This output segment is designed to replace the original {@link Attributes} segment in the source, * providing a simple means of adding, modifying and removing attributes. *

    * Each instance of this class contains a java.util.Map of name/value pairs which can either be * specified directly in the constructor or initialised to the same entries as the source {@link Attributes} * specified in the constructor. * This map can be accessed via the {@link #getMap()} method, and its entries modified as required before output. *

    * Keys in the map must be String objects, and values must implement the CharSequence interface. *

    * An attribute with no value is represented by a map entry with a null value. *

    * Attribute values are stored unencoded in the map, and are automatically * {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output. *

    * The use of invalid characters in attribute names results in unspecified behaviour. *

    * Note that methods in the Attributes class treat attribute names as case insensitive, * whereas the Map treats them as case sensitive. *

    * This class has been removed from the public API and the functionality replaced with the * {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods. * * @see OutputDocument * @see Attributes */ class AttributesOutputSegment implements OutputSegment { private final int begin; private final int end; private final Map map; /** * Constructs a new AttributesOutputSegment with the same span and initial name/value entries as the specified source {@link Attributes}. *

    * Specifying a value of true as an argument to the convertNamesToLowerCase parameter * causes all attribute names to be converted to lower case in the map. * This simplifies the process of finding/updating specific attributes since map keys are case sensitive. *

    * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} before * being loaded into the map. *

    * Calling this constructor with the following code: *

    new AttributesOutputSegment(attributes, convertNamesToLowerCase)
    * is logically equivalent to calling: *
    new AttributesOutputSegment(attributes, attributes.populateMap(new LinkedHashMap(), convertNamesToLowerCase))
    *

    * The use of LinkedHashMap to implement the map ensures (probably unnecessarily) that * existing attributes are output in the same order as they appear in the source document, and new * attributes are output in the same order as they are added. * * @param attributes the Attributes defining the span and initial name/value entries of the new AttributesOutputSegment. * @param convertNamesToLowerCase specifies whether all attribute names are converted to lower case in the map. * @see #AttributesOutputSegment(Attributes,Map) */ public AttributesOutputSegment(final Attributes attributes, final boolean convertNamesToLowerCase) { this(attributes,attributes.getMap(convertNamesToLowerCase)); } /** * Constructs a new AttributesOutputSegment with the same span * as the specified source {@link Attributes}, using the specified Map to * store the entries. *

    * This constructor might be used if the Map containing the new attribute values * should not be preloaded with the same entries as the source attributes, or a map implementation * other than LinkedHashMap is required. * * @param attributes the Attributes defining the span of the new AttributesOutputSegment. * @param map the Map containing the name/value entries. * @see #AttributesOutputSegment(Attributes, boolean convertNamesToLowerCase) */ public AttributesOutputSegment(final Attributes attributes, final Map map) { if (map==null || attributes==null) throw new IllegalArgumentException("both arguments must be non-null"); begin=attributes.getBegin(); end=attributes.getEnd(); this.map=map; } public int getBegin() { return begin; } public int getEnd() { return end; } /** * Returns the Map containing the name/value entries to be output. * @return the Map containing the name/value entries to be output. */ public Map getMap() { return map; } /** * Writes the contents of the {@linkplain #getMap() map} as HTML attribute name/value pairs to the specified Writer. *

    * This is equivalent to {@link #appendTo(Appendable) appendTo}(writer). * * @param writer the destination java.io.Writer for the output. * @throws IOException if an I/O exception occurs. * @see Attributes#generateHTML(Map attributesMap) */ public void writeTo(final Writer writer) throws IOException { Attributes.appendHTML(writer,map); } /** * Appends the contents of the {@linkplain #getMap() map} as HTML attribute name/value pairs to the specified Appendable object. *

    * Each attribute is preceded by a single space, and all values are * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes. * * @param appendable the destination java.lang.Appendable object for the output. * @throws IOException if an I/O exception occurs. * @see Attributes#generateHTML(Map attributesMap) */ public void appendTo(final Appendable appendable) throws IOException { Attributes.appendHTML(appendable,map); } public long getEstimatedMaximumOutputLength() { return (end-begin)*2; } public String toString() { return Attributes.generateHTML(map); } public String getDebugInfo() { StringBuilder sb=new StringBuilder(); sb.append("(p").append(begin).append("-p").append(end).append("):"); try { appendTo(sb); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens return sb.toString(); } } jericho-html-3.1/src/java/net/htmlparser/jericho/Attribute.java0000644000175000017500000003167211204550410024611 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Represents a single attribute * name/value segment within a {@link StartTag}. *

    * An instance of this class is a representation of a single attribute in the source document and is not modifiable. * The {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods * provide the means to add, delete or modify attributes and their values in an {@link OutputDocument}. *

    * Obtained using the {@link Attributes#get(String key)} method. *

    * See also the XML 1.0 specification for attributes. * * @see Attributes */ public final class Attribute extends Segment { private final String key; private final Segment nameSegment; private final Segment valueSegment; private final Segment valueSegmentIncludingQuotes; static final String CHECKED="checked"; static final String CLASS="class"; static final String DISABLED="disabled"; static final String ID="id"; static final String MULTIPLE="multiple"; static final String NAME="name"; static final String SELECTED="selected"; static final String STYLE="style"; static final String TYPE="type"; static final String VALUE="value"; /** * Constructs a new Attribute with no value part, called from Attributes class. *

    * Note that the resulting Attribute segment has the same span as the supplied nameSegment. * * @param source the {@link Source} document. * @param key the name of this attribute in lower case. * @param nameSegment the segment representing the name. */ Attribute(final Source source, final String key, final Segment nameSegment) { this(source,key,nameSegment,null,null); } /** * Constructs a new Attribute, called from Attributes class. *

    * The resulting Attribute segment begins at the start of the nameSegment * and finishes at the end of the valueSegmentIncludingQuotes. If this attribute * has no value, it finishes at the end of the nameSegment. *

    * If this attribute has no value, the valueSegment and valueSegmentIncludingQuotes must be null. * The parameter must not be null if the valueSegment is not null, and vice versa * * @param source the {@link Source} document. * @param key the name of this attribute in lower case. * @param nameSegment the segment spanning the name. * @param valueSegment the segment spanning the value. * @param valueSegmentIncludingQuotes the segment spanning the value, including quotation marks if any. */ Attribute(final Source source, final String key, final Segment nameSegment, final Segment valueSegment, final Segment valueSegmentIncludingQuotes) { super(source,nameSegment.getBegin(),(valueSegmentIncludingQuotes==null ? nameSegment.getEnd() : valueSegmentIncludingQuotes.getEnd())); this.key=key; this.nameSegment=nameSegment; this.valueSegment=valueSegment; this.valueSegmentIncludingQuotes=valueSegmentIncludingQuotes; } /** * Returns the name of this attribute in lower case. *

    * This package treats all attribute names as case insensitive, consistent with * HTML but not consistent with * XHTML. * * @return the name of this attribute in lower case. * @see #getName() */ public String getKey() { return key; } /** * Returns the name of this attribute in original case. *

    * This is exactly equivalent to {@link #getNameSegment()}.toString(). * * @return the name of this attribute in original case. * @see #getKey() */ public String getName() { return nameSegment.toString(); } /** * Returns the segment spanning the {@linkplain #getName() name} of this attribute. * @return the segment spanning the {@linkplain #getName() name} of this attribute. * @see #getName() */ public Segment getNameSegment() { return nameSegment; } /** * Indicates whether this attribute has a value. *

    * This method also returns true if this attribute has been assigned a zero-length value. *

    * It only returns false if this attribute appears in * minimized form. * * @return true if this attribute has a value, otherwise false. */ public boolean hasValue() { return valueSegment!=null; } /** * Returns the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, * or null if it {@linkplain #hasValue() has no value}. *

    * This is equivalent to {@link CharacterReference}.{@link CharacterReference#decode(CharSequence,boolean) decode}({@link #getValueSegment()},true). *

    * Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document, * without {@linkplain CharacterReference#decode(CharSequence,boolean) decoding}. *

    * To obtain the raw value without decoding, use {@link #getValueSegment()}.toString(). *

    * Special attention should be given to attributes that contain URLs, such as the * href attribute. * When such an attribute contains a URL with parameters (as described in the * form-urlencoded media type), * the ampersand (&) characters used to separate the parameters should be * {@linkplain CharacterReference#encode(CharSequence) encoded} to prevent the parameter names from being * unintentionally interpreted as {@linkplain CharacterEntityReference character entity references}. * This requirement is explicitly stated in the * HTML 4.01 specification section 5.3.2. *

    * For example, take the following element in the source document: *

    <a href="Report.jsp?chapt=2&sect=3">next</a>
    * By default, calling * {@link Element#getAttributes() getAttributes()}.{@link Attributes#getValue(String) getValue}("href") * on this element returns the string * "Report.jsp?chapt=2§=3", since the text "&sect" is interpreted as the rarely used * character entity reference {@link CharacterEntityReference#_sect &sect;} (U+00A7), despite the fact that it is * missing the {@linkplain CharacterReference#isTerminated() terminating semicolon} (;). *

    * Most browsers recognise unterminated character entity references * in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value. * One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would * have interpreted the URL in the above example differently to most other browsers. * Most browsers also use different rules depending on whether the unterminated character reference is inside or outside * of an attribute value, with both of these possibilities further split into different rules for * {@linkplain CharacterEntityReference character entity references}, * decimal character references, and * hexadecimal character references. *

    * The behaviour of this library is determined by the current {@linkplain Config.CompatibilityMode compatibility mode} setting, * which is determined by the static {@link Config#CurrentCompatibilityMode} property. * * @return the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, or null if it {@linkplain #hasValue() has no value}. */ public String getValue() { return CharacterReference.decode(valueSegment,true); } /** * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}. * @return the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}. * @see #getValue() */ public Segment getValueSegment() { return valueSegment; } /** * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, * or null if it {@linkplain #hasValue() has no value}. *

    * If the value is not enclosed by quotation marks, this is the same as the {@linkplain #getValueSegment() value segment} * * @return the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, or null if it {@linkplain #hasValue() has no value}. */ public Segment getValueSegmentIncludingQuotes() { return valueSegmentIncludingQuotes; } /** * Returns the character used to quote the value. *

    * The return value is either a double-quote ("), a single-quote ('), or a space. * * @return the character used to quote the value, or a space if the value is not quoted or this attribute has no value. */ public char getQuoteChar() { if (valueSegment==valueSegmentIncludingQuotes) return ' '; // no quotes return source.charAt(valueSegmentIncludingQuotes.getBegin()); } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder().append(key).append(super.getDebugInfo()).append(",name=").append(nameSegment.getDebugInfo()); if (hasValue()) sb.append(",value=").append(valueSegment.getDebugInfo()).append('"').append(valueSegment).append('"').append(Config.NewLine); else sb.append(",NO VALUE").append(Config.NewLine); return sb.toString(); } Tag appendTidy(final Appendable appendable, Tag nextTag) throws IOException { appendable.append(' ').append(nameSegment); if (valueSegment!=null) { appendable.append("=\""); while (nextTag!=null && nextTag.begin=valueSegment.end) { appendTidyValue(appendable,valueSegment); } else { int i=valueSegment.begin; while (nextTag!=null && nextTag.beginvalueSegment.end) { appendable.append(new Segment(source,nextTag.begin,i=valueSegment.end)); break; } appendable.append(nextTag); i=nextTag.end; nextTag=nextTag.getNextTag(); } if (istart tag of an * {@linkplain Element element} in a specific {@linkplain Source source} document. *

    * A start tag always has a {@linkplain #getTagType() type} that is a subclass of {@link StartTagType}, meaning that any tag * that does not start with the characters '</' is categorised as a start tag. *

    * This includes many tags which stand alone, without a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag}, * and would not intuitively be categorised as a "start tag". * For example, an HTML {@linkplain StartTagType#COMMENT comment} is represented as a single start tag that spans the whole comment, * and does not have an end tag at all. *

    * See the static fields defined in the {@link StartTagType} class for a list of the * standard start tag types. *

    * StartTag instances are obtained using one of the following methods: *

      *
    • {@link Element#getStartTag()} *
    • {@link Tag#getNextTag()} *
    • {@link Tag#getPreviousTag()} *
    • {@link Source#getPreviousStartTag(int pos)} *
    • {@link Source#getPreviousStartTag(int pos, String name)} *
    • {@link Source#getPreviousTag(int pos)} *
    • {@link Source#getPreviousTag(int pos, TagType)} *
    • {@link Source#getNextStartTag(int pos)} *
    • {@link Source#getNextStartTag(int pos, String name)} *
    • {@link Source#getNextStartTag(int pos, String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Source#getNextTag(int pos)} *
    • {@link Source#getNextTag(int pos, TagType)} *
    • {@link Source#getEnclosingTag(int pos)} *
    • {@link Source#getEnclosingTag(int pos, TagType)} *
    • {@link Source#getTagAt(int pos)} *
    • {@link Segment#getAllStartTags()} *
    • {@link Segment#getAllStartTags(String name)} *
    • {@link Segment#getAllStartTags(String attributeName, String value, boolean valueCaseSensitive)} *
    • {@link Segment#getAllTags()} *
    • {@link Segment#getAllTags(TagType)} *
    *

    * The methods above which accept a name parameter are categorised as named search methods. *

    * In such methods dealing with start tags, specifying an argument to the name parameter that ends in a * colon (:) searches for all start tags in the specified XML namespace. *

    * The constants defined in the {@link HTMLElementName} interface can be used directly as arguments to these name parameters. * For example, source.getAllStartTags({@link HTMLElementName#A}) is equivalent to * source.getAllStartTags("a"), and gets all hyperlink start tags. *

    * The {@link Tag} superclass defines a method called {@link Tag#getName() getName()} to get the name of this start tag. *

    * See also the XML 1.0 specification for start tags. * * @see Tag * @see Element * @see EndTag */ public final class StartTag extends Tag { private final Attributes attributes; final StartTagType startTagType; /** * Constructs a new StartTag. * * @param source the {@link Source} document. * @param begin the character position in the source document where this tag {@linkplain Segment#getBegin() begins}. * @param end the character position in the source document where this tag {@linkplain Segment#getEnd() ends}. * @param startTagType the {@linkplain #getStartTagType() type} of the start tag. * @param name the {@linkplain Tag#getName() name} of the tag. * @param attributes the {@linkplain #getAttributes() attributes} of the tag. */ StartTag(final Source source, final int begin, final int end, final StartTagType startTagType, final String name, final Attributes attributes) { super(source,begin,end,name); this.attributes=attributes; this.startTagType=startTagType; } // only used to create Tag.NOT_CACHED StartTag() { attributes=null; startTagType=null; } /** * Returns the {@linkplain Element element} that is started by this start tag. * Guaranteed not null. *

    *

    *
    Example 1: Elements for which the {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required}
    *
    *
    	 *    1. <div>
    	 *    2.   <div>
    	 *    3.     <div>
    	 *    4.       <div>This is line 4</div>
    	 *    5.     </div>
    	 *    6.     <div>This is line 6</div>
    	 *    7.   </div>
    *
      *
    • The start tag on line 1 returns an empty element spanning only the start tag. * This is because the end tag of a <div> element is required, * making the sample code invalid as all the end tags are matched with other start tags. *
    • The start tag on line 2 returns an element spanning to the end of line 7. *
    • The start tag on line 3 returns an element spanning to the end of line 5. *
    • The start tag on line 4 returns an element spanning to the end of line 4. *
    • The start tag on line 6 returns an element spanning to the end of line 6. *
    *

    *

    *
    Example 2: Elements for which the {@linkplain HTMLElements#getEndTagOptionalElementNames() end tag is optional}
    *
    *
    	 *    1. <ul>
    	 *    2.   <li>item 1
    	 *    3.   <li>item 2
    	 *    4.     <ul>
    	 *    5.       <li>subitem 1</li>
    	 *    6.       <li>subitem 2
    	 *    7.     </ul>
    	 *    8.   <li>item 3</li>
    	 *    9. </ul>
    *
      *
    • The start tag on line 1 returns an element spanning to the end of line 9. *
    • The start tag on line 2 returns an element spanning to the start of the <li> start tag on line 3. *
    • The start tag on line 3 returns an element spanning to the start of the <li> start tag on line 8. *
    • The start tag on line 4 returns an element spanning to the end of line 7. *
    • The start tag on line 5 returns an element spanning to the end of line 5. *
    • The start tag on line 6 returns an element spanning to the start of the </ul> end tag on line 7. *
    • The start tag on line 8 returns an element spanning to the end of line 8. *
    *
    *
    * * @return the {@linkplain Element element} that is started by this start tag. */ public Element getElement() { if (element==Element.NOT_CACHED) { final EndTag endTag=getEndTagInternal(); element=new Element(source,this,endTag); if (endTag!=null) { if (endTag.element!=Element.NOT_CACHED) { // This is presumably impossible, except in certain circumstances where the cache was cleared, such as if the parser decides to do a full sequential parse after some tags have already been found. // If the existing element and the current element are not the same, log it. if (source.logger.isInfoEnabled() && !element.equals(endTag.element)) source.logger.info(source.getRowColumnVector(endTag.begin).appendTo(new StringBuilder(200).append("End tag ").append(endTag).append(" at ")).append(" terminates more than one element").toString()); } endTag.element=element; } } return element; } /** * Indicates whether this start tag is an empty-element tag. *

    * This property checks that the the tag is {@linkplain #isSyntacticalEmptyElementTag() syntactically an empty-element tag}, * but in addition checks that the {@linkplain #getName() name} of the tag is not one that is defined in the HTML specification to have a * {@linkplain HTMLElements#getEndTagRequiredElementNames() required} or {@linkplain HTMLElements#getEndTagOptionalElementNames() optional} end tag, * which the major browsers do not recognise as empty-element tags, even in an XHTML document. *

    * This is equivalent to:
    * {@link #isSyntacticalEmptyElementTag()} && !({@link HTMLElements#getEndTagOptionalElementNames()}.contains({@link #getName() getName()}) || {@link HTMLElements#getEndTagRequiredElementNames()}.contains({@link #getName() getName()})). * * @return true if this start tag is an empty-element tag, otherwise false. */ public boolean isEmptyElementTag() { return isSyntacticalEmptyElementTag() && !HTMLElements.isClosingSlashIgnored(name); } /** * Indicates whether this start tag is syntactically an empty-element tag. *

    * This is signified by the characters "/>" at the end of the start tag. *

    * Only a {@linkplain StartTagType#NORMAL normal} start tag can be syntactically an empty-element tag. *

    * This property simply reports whether the syntax of the start tag is consistent with that of an empty-element tag, * it does not guarantee that this start tag's {@linkplain #getElement() element} is actually {@linkplain Element#isEmpty() empty}. *

    * This possible discrepancy reflects the way major browsers interpret illegal empty element tags used in * HTML elements, and is explained further in the documentation of the * {@link #isEmptyElementTag()} property. * * @return true if this start tag is syntactically an empty-element tag, otherwise false. * @see #isEmptyElementTag() */ public boolean isSyntacticalEmptyElementTag() { return startTagType==StartTagType.NORMAL && source.charAt(end-2)=='/'; } /** * Returns the {@linkplain StartTagType type} of this start tag. *

    * This is equivalent to (StartTagType){@link #getTagType()}. * * @return the {@linkplain StartTagType type} of this start tag. */ public StartTagType getStartTagType() { return startTagType; } // Documentation inherited from Tag public TagType getTagType() { return startTagType; } /** * Returns the attributes specified in this start tag. *

    * Return value is not null if and only if * {@link #getStartTagType()}.{@link StartTagType#hasAttributes() hasAttributes()}==true. *

    * To force the parsing of attributes in other start tag types, use the {@link #parseAttributes()} method instead. * * @return the attributes specified in this start tag, or null if the {@linkplain #getStartTagType() type} of this start tag does not {@linkplain StartTagType#hasAttributes() have attributes}. * @see #parseAttributes() * @see Source#parseAttributes(int pos, int maxEnd) */ public Attributes getAttributes() { return attributes; } /** * Returns the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name (case insensitive). *

    * Returns null if this start tag does not {@linkplain StartTagType#hasAttributes() have attributes}, * no attribute with the specified name exists or the attribute {@linkplain Attribute#hasValue() has no value}. *

    * This is equivalent to {@link #getAttributes()}.{@link Attributes#getValue(String) getValue(attributeName)}, * except that it returns null if this start tag does not have attributes instead of throwing a * NullPointerException. * * @param attributeName the name of the attribute to get. * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or null if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}. */ public String getAttributeValue(final String attributeName) { return attributes==null ? null : attributes.getValue(attributeName); } /** * Parses the attributes specified in this start tag, regardless of the type of start tag. * This method is only required in the unusual situation where attributes exist in a start tag whose * {@linkplain #getStartTagType() type} doesn't {@linkplain StartTagType#hasAttributes() have attributes}. *

    * This method returns the cached attributes from the {@link StartTag#getAttributes()} method * if its value is not null, otherwise the source is physically parsed with each call to this method. *

    * This is equivalent to {@link #parseAttributes(int) parseAttributes}({@link Attributes#getDefaultMaxErrorCount()})}. * * @return the attributes specified in this start tag, or null if too many errors occur while parsing. * @see #getAttributes() * @see Source#parseAttributes(int pos, int maxEnd) */ public Attributes parseAttributes() { return parseAttributes(Attributes.getDefaultMaxErrorCount()); } /** * Parses the attributes specified in this start tag, regardless of the type of start tag. * This method is only required in the unusual situation where attributes exist in a start tag whose * {@linkplain #getStartTagType() type} doesn't {@linkplain StartTagType#hasAttributes() have attributes}. *

    * See the documentation of the {@link #parseAttributes()} method for more information. * * @param maxErrorCount the maximum number of minor errors allowed while parsing * @return the attributes specified in this start tag, or null if too many errors occur while parsing. * @see #getAttributes() */ public Attributes parseAttributes(final int maxErrorCount) { if (attributes!=null) return attributes; final int maxEnd=end-startTagType.getClosingDelimiter().length(); int attributesBegin=begin+1+name.length(); // skip any non-name characters directly after the name (which are quite common) while (!isXMLNameStartChar(source.charAt(attributesBegin))) { attributesBegin++; if (attributesBegin==maxEnd) return null; } return Attributes.construct(source,begin,attributesBegin,maxEnd,startTagType,name,maxErrorCount); } /** * Returns the segment between the end of the tag's {@linkplain #getName() name} and the start of its end delimiter. *

    * This method is normally only of use for start tags whose content is something other than {@linkplain #getAttributes() attributes}. *

    * A new {@link Segment} object is created with each call to this method. * * @return the segment between the end of the tag's {@linkplain #getName() name} and the start of the end delimiter. */ public Segment getTagContent() { return new Segment(source,begin+1+name.length(),end-startTagType.getClosingDelimiter().length()); } /** * Returns the {@link FormControl} defined by this start tag. *

    * This is equivalent to {@link #getElement()}.{@link Element#getFormControl() getFormControl()}. * * @return the {@link FormControl} defined by this start tag, or null if it is not a control. */ public FormControl getFormControl() { return getElement().getFormControl(); } /** * Indicates whether a matching end tag is forbidden. *

    * This property returns true if one of the following conditions is met: *

      *
    • The {@linkplain #getStartTagType() type} of this start tag does not specify a * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}. *
    • The {@linkplain #getName() name} of this start tag indicates it is the start of an * HTML element whose {@linkplain HTMLElements#getEndTagForbiddenElementNames() end tag is forbidden}. *
    • This start tag is {@linkplain #isSyntacticalEmptyElementTag() syntactically an empty-element tag} and its * {@linkplain #getName() name} indicates it is the start of a non-HTML element. *
    *

    * If this property returns true then this start tag's {@linkplain #getElement() element} will always be a * single tag element. * * @return true if a matching end tag is forbidden, otherwise false. */ public boolean isEndTagForbidden() { if (getStartTagType()!=StartTagType.NORMAL) return getStartTagType().getCorrespondingEndTagType()==null; if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) return true; if (HTMLElements.getElementNames().contains(name)) return false; return isSyntacticalEmptyElementTag(); } /** * Indicates whether a matching end tag is required. *

    * This property returns true if one of the following conditions is met: *

      *
    • The {@linkplain #getStartTagType() type} of this start tag is NOT {@link StartTagType#NORMAL}, but specifies a * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}. *
    • The {@linkplain #getName() name} of this start tag indicates it is the start of an * HTML element whose {@linkplain HTMLElements#getEndTagRequiredElementNames() end tag is required}. *
    • This start tag is NOT {@linkplain #isSyntacticalEmptyElementTag() syntactically an empty-element tag} and its * {@linkplain #getName() name} indicates it is the start of a non-HTML element. *
    * * @return true if a matching end tag is required, otherwise false. */ public boolean isEndTagRequired() { if (getStartTagType()!=StartTagType.NORMAL) return getStartTagType().getCorrespondingEndTagType()!=null; if (HTMLElements.getEndTagRequiredElementNames().contains(name)) return true; if (HTMLElements.getElementNames().contains(name)) return false; return !isSyntacticalEmptyElementTag(); } // Documentation inherited from Tag public boolean isUnregistered() { return startTagType==StartTagType.UNREGISTERED; } /** * Returns an XML representation of this start tag. *

    * This is equivalent to {@link #tidy(boolean) tidy(false)}, thereby keeping the {@linkplain #getName() name} of the tag in its original case. *

    * See the documentation of the {@link #tidy(boolean toXHTML)} method for more details. * * @return an XML representation of this start tag, or the {@linkplain Segment#toString() source text} if it is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}. */ public String tidy() { return tidy(false); } /** * Returns an XML or XHTML representation of this start tag. *

    * The tidying of the tag is carried out as follows: *

      *
    • if this start tag is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}, * then the original {@linkplain Segment#toString() source text} of the enture tag is returned. *
    • if this start tag contain any {@linkplain TagType#isServerTag() server tags} outside of an attribute value, * then the original {@linkplain Segment#toString() source text} of the entire tag is returned. *
    • name converted to lower case if the toXHTML argument is true and this is a {@linkplain StartTagType#NORMAL normal} start tag *
    • attributes separated by a single space *
    • attribute names in original case *
    • attribute values are enclosed in double quotes and {@linkplain CharacterReference#reencode(CharSequence) re-encoded} *
    • if this start tag forms an HTML element that has no {@linkplain Element#getEndTag() end tag}, * a slash is inserted before the closing angle bracket, separated from the {@linkplain #getName() name} or last attribute by a single space. *
    • if an attribute value contains a {@linkplain TagType#isServerTag() server tag} it is inserted verbatim instead of being * {@linkplain CharacterReference#encode(CharSequence) encoded}. *
    *

    * The toXHTML parameter determines only whether the name is converted to lower case for {@linkplain StartTagType#NORMAL normal} tags. * In all other respects the generated tag is already valid XHTML. *

    *

    *
    Example:
    *
    *

    * The following source text: *

    * <INPUT name=Company value='G&uuml;nter O&#39;Reilly &amp Associés'> *
    * produces the following regenerated HTML: *
    * <input name="Company" value="G&uuml;nter O'Reilly &amp; Associ&eacute;s" /> *
    *
    *
    * * @param toXHTML specifies whether the output is XHTML. * @return an XML or XHTML representation of this start tag, or the {@linkplain Segment#toString() source text} if it is of a {@linkplain #getStartTagType() type} that does not {@linkplain StartTagType#hasAttributes() have attributes}. */ public String tidy(boolean toXHTML) { if (attributes==null || attributes.containsServerTagOutsideOfAttributeValue) return toString(); final StringBuilder sb=new StringBuilder(); sb.append('<'); if (toXHTML && startTagType==StartTagType.NORMAL) { sb.append(name); } else { int i=begin+startTagType.startDelimiterPrefix.length(); final int nameSegmentEnd=i+name.length(); while (i * The output of the attributes is as described in the {@link Attributes#generateHTML(Map attributesMap)} method. *

    * The emptyElementTag parameter specifies whether the start tag should be an * empty-element tag, * in which case a slash is inserted before the closing angle bracket, separated from the name * or last attribute by a single space. *

    *

    *
    Example:
    *
    *

    * The following code: *

    *
    	 * LinkedHashMap attributesMap=new LinkedHashMap();
    	 * attributesMap.put("name","Company");
    	 * attributesMap.put("value","G\n00fcnter O'Reilly & Associés");
    	 * System.out.println(StartTag.generateHTML("INPUT",attributesMap,true));
    *
    * generates the following output: *
    * <INPUT name="Company" value="G&uuml;nter O'Reilly &amp; Associ&eacute;s" /> *
    *
    *
    * * @param tagName the name of the start tag. * @param attributesMap a map containing attribute name/value pairs. * @param emptyElementTag specifies whether the start tag should be an empty-element tag. * @return the HTML text of a {@linkplain StartTagType#NORMAL normal} start tag with the specified tag name and {@linkplain Attributes#populateMap(Map,boolean) attributes map}. * @see EndTag#generateHTML(String tagName) */ public static String generateHTML(final String tagName, final Map attributesMap, final boolean emptyElementTag) { final StringBuilder sb=new StringBuilder(); sb.append('<').append(tagName); try { Attributes.appendHTML(sb,attributesMap); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens if (emptyElementTag) sb.append(" />"); else sb.append('>'); return sb.toString(); } public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); appendDebugTag(sb); sb.append(' '); appendDebugTagType(sb); sb.append(super.getDebugInfo()); return sb.toString(); } StringBuilder appendDebugTag(final StringBuilder sb) { if (startTagType==StartTagType.NORMAL && getAttributes().isEmpty()) { sb.append(this); } else { sb.append('<').append(getNameSegment()).append(' '); if (isSyntacticalEmptyElementTag()) sb.append('/'); sb.append(startTagType.getClosingDelimiter()); } return sb; } StringBuilder appendDebugTagType(final StringBuilder sb) { if (startTagType!=StartTagType.NORMAL) sb.append('(').append(startTagType.getDescription()).append(") "); return sb; } private EndTag getEndTagInternal() { boolean checkForEmptyElementTag=true; // A missing optional end tag returns a zero length EndTag instead of null final EndTagType endTagType=startTagType.getCorrespondingEndTagType(); if (startTagType==StartTagType.NORMAL) { final HTMLElementTerminatingTagNameSets terminatingTagNameSets=HTMLElements.getTerminatingTagNameSets(name); if (terminatingTagNameSets!=null) // end tag is optional return getOptionalEndTag(terminatingTagNameSets); if (HTMLElements.getEndTagForbiddenElementNames().contains(name)) // end tag is forbidden return null; checkForEmptyElementTag=!HTMLElements.getEndTagRequiredElementNames().contains(name); // check for empty-element tags if tag is not an HTML element if (checkForEmptyElementTag && isSyntacticalEmptyElementTag()) // non-html empty-element tag return null; } else if (endTagType==null) { return null; } // This is either a start tag type other than NORMAL that requires an end tag, or an HTML element tag that requires an end tag, // or a non-HTML element tag that is not an empty-element tag. // In all of these cases the end tag is required. final EndTag nextEndTag=source.getNextEndTag(end,endTagType.getEndTagName(name),endTagType); if (nextEndTag!=null) { if (startTagType==StartTagType.NORMAL && HTMLElements.END_TAG_REQUIRED_NESTING_FORBIDDEN_SET.contains(name)) { final StartTag nextStartTag=source.getNextStartTag(end,name); if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return nextEndTag; if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(begin).appendTo(new StringBuilder(200).append("StartTag at ")).append(" missing required end tag - invalid nested start tag encountered before end tag").toString()); // Terminate the element at the start of the invalidly nested start tag. // This is how IE and Mozilla treat illegally nested A elements, but other elements may vary. return new EndTag(source,nextStartTag.begin,nextStartTag.begin,EndTagType.NORMAL,name); } final Segment[] getResult=getEndTag(nextEndTag,checkForEmptyElementTag,Tag.isXMLName(name)); if (getResult!=null) return (EndTag)getResult[0]; } if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(begin).appendTo(new StringBuilder(200).append("StartTag at ")).append(" missing required end tag").toString()); return null; } private EndTag getOptionalEndTag(final HTMLElementTerminatingTagNameSets terminatingTagNameSets) { int pos=end; while (possearchName.length()) { // The name of the start tag is longer than the search name, and the type of tag indicates // that we are probably looking for an exact match. // (eg searchName="a", startTag.name="applet" -> reject) // We only require an exact match if the last character of the search name is part of the name, as the // search name might be just the prefix of a server tag. // (eg searchName="?", startTag.name="?abc" -> accept, but searchName="?a", startTag.name="?abc" -> reject) // The only exception to this is if the last character of the search name is a colon (which also forms part of // the name), but signifies that we want to search on the entire namespace. // (eg searchName="o:", startTag.name="o:p" -> accept) char lastSearchNameChar=searchName.charAt(searchName.length()-1); if (lastSearchNameChar!=':' && isXMLNameChar(lastSearchNameChar)) continue; } return startTag; } while ((begin-=2)>=0); } catch (IndexOutOfBoundsException ex) { // this should never happen during a get previous operation so rethrow it: throw ex; } return null; } static StartTag getNext(final Source source, final int pos, final String searchName, final StartTagType searchStartTagType) { return getNext(source,pos,searchName,searchStartTagType,searchStartTagType==StartTagType.NORMAL ? Tag.isXMLName(searchName) : true); } static StartTag getNext(final Source source, final int pos, final String searchName, final StartTagType searchStartTagType, final boolean isXMLTagName) { // searchName is already in lower case, but may be null // searchStartTagType must not be null // isXMLTagName is only used if searchStartTagType==StartTagType.NORMAL if (searchName==null) return (StartTag)source.getNextTag(pos,searchStartTagType); final String startDelimiter=getStartDelimiter(searchName); try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf(startDelimiter,begin); if (begin==-1) return null; final StartTag startTag=(StartTag)Tag.getTagAt(source,begin,false); if (startTag==null) continue; // keep looking if it wasn't a start tag if (searchStartTagType!=startTag.getStartTagType()) { // The start tag is of the wrong type. The only case in which we want to return it is if // we are looking for a normal start tag, the found start tag is unregistered, and the search name is NOT a valid XML name. // This allows users to search for some types of unregistered tags by name rather than having to register custom tag types. if (searchStartTagType!=StartTagType.NORMAL || isXMLTagName || !startTag.isUnregistered()) continue; } if (startTag.getStartTagType().isNameAfterPrefixRequired() && startTag.getName().length()>searchName.length()) { // The name of the start tag is longer than the search name, and the type of tag indicates // that we are probably looking for an exact match. // (eg searchName="a", startTag.name="applet" -> reject) // We only require an exact match if the last character of the search name is part of the name, as the // search name might be just the prefix of a server tag. // (eg searchName="?", startTag.name="?abc" -> accept, but searchName="?a", startTag.name="?abc" -> reject) // The only exception to this is if the last character of the search name is a colon (which also forms part of // the name), but signifies that we want to search on the entire namespace. // (eg searchName="o:", startTag.name="o:p" -> accept) char lastSearchNameChar=searchName.charAt(searchName.length()-1); if (lastSearchNameChar!=':' && isXMLNameChar(lastSearchNameChar)) continue; } return startTag; } while ((begin+=1)= 3 chars long. // - have to perform the text search on the name if the value is zero length. // - perform the text search on the name if the name >= 3 chars long, otherwise on the value. final String searchString=value.length()>=3 || (value.length()>0 && attributeName.length()<3) ? value : attributeName; final ParseText parseText=source.getParseText(); int searchPos=pos; while (searchPos=pos) { final StartTag startTag=(StartTag)tag; if (startTag.getAttributes()!=null) { final String attributeValue=startTag.getAttributes().getValue(attributeName); if (attributeValue!=null) { if (value.equals(attributeValue)) return startTag; if (value.equalsIgnoreCase(attributeValue)) { if (!valueCaseSensitive) return startTag; if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(searchPos).appendTo(new StringBuilder(200)).append(": StartTag with attribute ").append(attributeName).append("=\"").append(attributeValue).append("\" ignored during search because its case does not match search value \"").append(value).append('"').toString()); } } } } searchPos=tag.end; } return null; } static StartTag getNext(final Source source, final int pos, final String attributeName, final Pattern regexPattern) { if (attributeName==null || attributeName.length()==0) throw new IllegalArgumentException(); final String searchString=attributeName; final ParseText parseText=source.getParseText(); int searchPos=pos; while (searchPos=pos) { final StartTag startTag=(StartTag)tag; if (startTag.getAttributes()!=null) { final Attribute attribute=startTag.getAttributes().get(attributeName); if (attribute!=null) { if (regexPattern==null) return startTag; final String attributeValue=attribute.getValue(); if (attributeValue!=null && regexPattern.matcher(attributeValue).matches()) return startTag; } } } searchPos=tag.end; } return null; } private Segment[] getEndTag(final EndTag nextEndTag, final boolean checkForEmptyElementTag, final boolean isXMLTagName) { assert nextEndTag!=null; StartTag nextStartTag=getNext(source,end,name,startTagType,isXMLTagName); if (checkForEmptyElementTag) { while (nextStartTag!=null && nextStartTag.isSyntacticalEmptyElementTag()) nextStartTag=getNext(source,nextStartTag.end,name,startTagType,isXMLTagName); } return getEndTag(end,nextStartTag,nextEndTag,checkForEmptyElementTag,isXMLTagName); } private Segment[] getEndTag(final int afterPos, final StartTag nextStartTag, final EndTag nextEndTag, final boolean checkForEmptyElementTag, final boolean isXMLTagName) { // returns null if no end tag exists in the rest of the file, otherwise the following two segments: // first is the matching end tag to this start tag. Must be present if array is returned. // second is the next occurrence after the returned end tag of a start tag of the same name. (null if none exists) if (nextEndTag==null) return null; // no end tag in the rest of the file final Segment[] returnArray={nextEndTag,nextStartTag}; if (nextStartTag==null || nextStartTag.begin>nextEndTag.begin) return returnArray; // no more start tags of the same name in rest of file, or they occur after the end tag that we found. This means we have found the matching end tag. final Segment[] getResult=nextStartTag.getEndTag(nextEndTag,checkForEmptyElementTag,isXMLTagName); // get the matching end tag to the interloping start tag if (getResult==null) return null; // no end tag in the rest of the file final EndTag nextStartTagsEndTag=(EndTag)getResult[0]; final EndTag nextNextEndTag=EndTag.getNext(source,nextStartTagsEndTag.end,nextEndTag.getName(),nextEndTag.getEndTagType()); // get end tag after the interloping start tag's end tag return getEndTag(nextStartTagsEndTag.end,(StartTag)getResult[1],nextNextEndTag,checkForEmptyElementTag,isXMLTagName); // recurse to see if this is the matching end tag } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTagTypeGenericImplementation.java0000644000175000017500000002451711204550410031055 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Provides a generic implementation of the abstract {@link EndTagType} class based on the most common end tag behaviour. *

    * This class is only of interest to users who wish to create custom tag types. *

    * The differences between this class and its abstract superclass {@link EndTagType} are: *

      *
    • The introduction of the {@link #isStatic() IsStatic} property. *
    • The {@link #constructTagAt(Source, int pos)} method has a default implementation. *
    *

    * Most of the predefined end tag types are implemented using this class or a subclass of it. * * @see StartTagTypeGenericImplementation */ public class EndTagTypeGenericImplementation extends EndTagType { private final String staticString; /** * Constructs a new EndTagTypeGenericImplementation object based on the specified properties. *
    (implementation assistance method) *

    * The purpose of the isStatic parameter is explained in the {@link #isStatic() IsStatic} property description. * * @param description a {@linkplain #getDescription() description} of the new end tag type useful for debugging purposes. * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new end tag type. * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new end tag type. * @param isServerTag indicates whether the new end tag type is a {@linkplain #isServerTag() server tag}. * @param isStatic determines whether the end tag text {@linkplain #isStatic() is static}. */ protected EndTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final boolean isServerTag, final boolean isStatic) { super(description,startDelimiter,closingDelimiter,isServerTag); staticString=isStatic ? (startDelimiter+closingDelimiter) : null; } /** * Indicates whether the {@linkplain #generateHTML(String) end tag text} is static. *
    (property and implementation assistance method) *

    * The purpose of this property is to determine the behaviour of the {@link #generateHTML(String startTagName)} method. *

    * If this property is true, the {@linkplain #generateHTML(String) end tag text} is constant for all tags of this type. *

    * If this property is false, the {@linkplain #generateHTML(String) end tag text} includes the * {@linkplain StartTag#getName() name} of the {@linkplain #getCorrespondingStartTagType corresponding} * {@linkplain StartTag start tag}. *

    * {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END} is the only predefined end tag * for which this property is true. * All tags of this type have the constant tag text "</&>". * * @return true if the {@linkplain #generateHTML(String) end tag text} is static, otherwise false. */ protected final boolean isStatic() { return staticString!=null; } /** * Returns the end tag {@linkplain EndTag#getName() name} that is required to match a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag} with the specified {@linkplain StartTag#getName() name}. *
    (property method) *

    * This implementation overrides the default implementation in {@link EndTagType#getEndTagName(String startTagName)}. *

    * If the value of the {@link #isStatic() IsStatic} property is false, it returns simply returns startTagName, as in the default implementation. *

    * If the value of the {@link #isStatic() IsStatic} property is true, it returns this end tag type's {@linkplain #getNamePrefix() name prefix}. *

    * Note that the startTagName parameter should include the start tag's {@linkplain TagType#getNamePrefix() name prefix} if it has one. * * @param startTagName the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}, including its {@linkplain TagType#getNamePrefix() name prefix}. * @return the end tag {@linkplain EndTag#getName() name} that is required to match a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag} with the specified {@linkplain StartTag#getName() name}. */ public String getEndTagName(final String startTagName) { return isStatic() ? getNamePrefix() : startTagName; } /** * Generates the HTML text of an {@linkplain EndTag end tag} of this type given the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}. *
    (property method) *

    * This implementation overrides the default implementation in {@link EndTagType#generateHTML(String startTagName)} * to improve efficiency in the case of a {@linkplain #isStatic() static} end tag type, although the functionality is the same. * * @param startTagName the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}, including its {@linkplain TagType#getNamePrefix() name prefix}. * @return the HTML text of an {@linkplain EndTag end tag} of this type given the {@linkplain StartTag#getName() name} of a {@linkplain #getCorrespondingStartTagType() corresponding} {@linkplain StartTag start tag}. */ public String generateHTML(final String startTagName) { return isStatic() ? staticString : super.generateHTML(startTagName); } /** * Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features. *
    (default implementation method) *

    * This default implementation checks the source text for a match according to the following criteria: *

    * If the value of the {@link #isStatic() IsStatic} property is false, this implementation ensures that the * source text matches the expression:
    * {@link #getStartDelimiter() getStartDelimiter()}+"name"+optionalWhiteSpace+{@link #getClosingDelimiter() getClosingDelimiter()}
    * where name is a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}, and optionalWhiteSpace is a string of zero or more {@linkplain Segment#isWhiteSpace(char) white space} characters. * The {@linkplain Tag#getName() name} of the constructed end tag becomes {@link #getNamePrefix() getNamePrefix()}+"name". *

    * If the value of the {@link #isStatic() IsStatic} property is true, this implementation ensures that the * source text matches the static expression:
    * {@link #getStartDelimiter() getStartDelimiter()}+{@link #getClosingDelimiter() getClosingDelimiter()}
    * The {@linkplain Tag#getName() name} of the constructed end tag is the value of the {@link #getNamePrefix() getNamePrefix()} method. *

    * See {@link TagType#constructTagAt(Source, int pos)} for more important information about this method. * * @param source the {@link Source} document. * @param pos the position in the source document. * @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria. */ protected Tag constructTagAt(final Source source, final int pos) { final ParseText parseText=source.getParseText(); final int nameBegin=pos+START_DELIMITER_PREFIX.length(); String name=null; final int startDelimiterEnd=pos+getStartDelimiter().length(); int end=-1; if (isStatic()) { name=getNamePrefix(); if (!parseText.containsAt(getClosingDelimiter(),startDelimiterEnd)) { if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("EndTag of expected format ").append(staticString).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because it is missing the closing delimiter").toString()); return null; } end=startDelimiterEnd+getClosingDelimiter().length(); } else { final int nameEnd=source.getNameEnd(startDelimiterEnd); if (nameEnd==-1) return null; name=source.getName(nameBegin,nameEnd); int expectedClosingDelimiterPos=nameEnd; while (Segment.isWhiteSpace(parseText.charAt(expectedClosingDelimiterPos))) expectedClosingDelimiterPos++; if (!parseText.containsAt(getClosingDelimiter(),expectedClosingDelimiterPos)) { if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("EndTag ").append(name).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because its name and closing delimiter are separated by characters other than white space").toString()); return null; } end=expectedClosingDelimiterPos+getClosingDelimiter().length(); } return constructEndTag(source,pos,end,name); } } jericho-html-3.1/src/java/net/htmlparser/jericho/EncodingDetector.java0000644000175000017500000002032511204550410026057 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; import java.nio.charset.*; import java.net.*; final class EncodingDetector { private final InputStream inputStream; private String encoding=null; private String encodingSpecificationInfo=null; private final String preliminaryEncoding; private final String preliminaryEncodingSpecificationInfo; private final String alternativePreliminaryEncoding; private static final int PREVIEW_BYTE_COUNT=2048; private static final String UTF_8="UTF-8"; private static final String ISO_8859_1="ISO-8859-1"; public EncodingDetector(final URLConnection urlConnection) throws IOException { this(new StreamEncodingDetector(urlConnection)); } public EncodingDetector(final InputStream inputStream) throws IOException { this(new StreamEncodingDetector(inputStream)); } public EncodingDetector(final InputStream inputStream, final String preliminaryEncoding) throws IOException { this(inputStream,preliminaryEncoding,"preliminary encoding set explicitly",null); if (!Charset.isSupported(preliminaryEncoding)) throw new UnsupportedEncodingException(preliminaryEncoding+" specified as preliminaryEncoding constructor argument"); detectDocumentSpecifiedEncoding(); } private EncodingDetector(final StreamEncodingDetector streamEncodingDetector) throws IOException { this(streamEncodingDetector,ISO_8859_1); } private EncodingDetector(final StreamEncodingDetector streamEncodingDetector, final String alternativePreliminaryEncoding) throws IOException { this(streamEncodingDetector.getInputStream(),streamEncodingDetector.getEncoding(),streamEncodingDetector.getEncodingSpecificationInfo(),alternativePreliminaryEncoding); if (streamEncodingDetector.isDifinitive() || !streamEncodingDetector.isDocumentSpecifiedEncodingPossible()) { // don't try to detect the encoding from the document because there is no need or it is not possible setEncoding(preliminaryEncoding,preliminaryEncodingSpecificationInfo); } else { detectDocumentSpecifiedEncoding(); } } private EncodingDetector(final InputStream inputStream, final String preliminaryEncoding, final String preliminaryEncodingSpecificationInfo, final String alternativePreliminaryEncoding) throws IOException { this.inputStream=inputStream.markSupported() ? inputStream : new BufferedInputStream(inputStream); this.preliminaryEncoding=preliminaryEncoding; this.preliminaryEncodingSpecificationInfo=preliminaryEncodingSpecificationInfo; this.alternativePreliminaryEncoding=alternativePreliminaryEncoding; if (alternativePreliminaryEncoding!=null && !Charset.isSupported(alternativePreliminaryEncoding)) throw new UnsupportedEncodingException(alternativePreliminaryEncoding+" specified as alternativePreliminaryEncoding constructor argument"); } public InputStream getInputStream() { return inputStream; } public String getEncoding() { return encoding; } public String getEncodingSpecificationInfo() { return encodingSpecificationInfo; } public String getPreliminaryEncoding() { return preliminaryEncoding; } public String getPreliminaryEncodingSpecificationInfo() { return preliminaryEncodingSpecificationInfo; } public Reader openReader() throws UnsupportedEncodingException { if (encoding==null) return new InputStreamReader(inputStream,ISO_8859_1); // encoding==null only if input stream is empty so use an arbitrary encoding. if (!Charset.isSupported(encoding)) { throw new UnsupportedEncodingException(encoding+": "+encodingSpecificationInfo); } return new InputStreamReader(inputStream,encoding); } private boolean setEncoding(final String encoding, final String encodingSpecificationInfo) { this.encoding=encoding; this.encodingSpecificationInfo=encodingSpecificationInfo; return true; } private boolean detectDocumentSpecifiedEncoding() throws IOException { inputStream.mark(PREVIEW_BYTE_COUNT); String safePreliminaryEncoding; if (Charset.isSupported(preliminaryEncoding)) { safePreliminaryEncoding=preliminaryEncoding; } else { if (alternativePreliminaryEncoding==null) throw new UnsupportedEncodingException(preliminaryEncoding+": "+preliminaryEncodingSpecificationInfo); safePreliminaryEncoding=alternativePreliminaryEncoding; } final Source previewSource=getPreviewSource(safePreliminaryEncoding); // should never throw UnsupportedEncodingException inputStream.reset(); final Logger logger=previewSource.getLogger(); previewSource.setLogger(null); if (preliminaryEncoding!=safePreliminaryEncoding && logger.isWarnEnabled()) logger.warn("Alternative encoding "+safePreliminaryEncoding+" substituted for unsupported preliminary encoding "+preliminaryEncoding+": "+preliminaryEncodingSpecificationInfo); String documentSpecifiedEncodingInfoSuffix; if (previewSource.getDocumentSpecifiedEncoding()==null) { if (previewSource.isXML()) { // The source looks like an XML document. // The XML 1.0 specification section 4.3.3 states that an XML file that is not encoded in UTF-8 must contain // either a UTF-16 BOM or an encoding declaration in its XML declaration. // Since no encoding declaration was detected, and if we assume this class is only used if no BOM is present, we can then assume it is UTF-8. return setEncoding(UTF_8,"mandatory XML encoding when no BOM or encoding declaration is present"); } documentSpecifiedEncodingInfoSuffix="no encoding specified in document"; } else { if (Charset.isSupported(previewSource.getDocumentSpecifiedEncoding())) return setEncoding(previewSource.getDocumentSpecifiedEncoding(),previewSource.getEncodingSpecificationInfo()); // Document specified encoding is not supported. Fall back on preliminary encoding. documentSpecifiedEncodingInfoSuffix="encoding "+previewSource.getDocumentSpecifiedEncoding()+" specified in document is not supported"; if (logger.isWarnEnabled()) logger.warn("Unsupported encoding "+previewSource.getDocumentSpecifiedEncoding()+" specified in document, using preliminary encoding "+safePreliminaryEncoding+" instead"); } // Document does not look like XML, does not specify an encoding in its transport protocol, has no BOM, and does not specify an encoding in the document itself. // The HTTP protocol states that such a situation should assume ISO-8859-1 encoding. // We will just assume the preliminary encoding, which is the best guess based on the first 4 bytes of the stream. // This means ISO-8859-1 will be used for any 8-bit ASCII compatible encoding, consistent with the HTTP protocol default. if (preliminaryEncoding!=safePreliminaryEncoding) return setEncoding(safePreliminaryEncoding,"alternative encoding substituted for unsupported preliminary encoding "+preliminaryEncoding+": "+preliminaryEncodingSpecificationInfo+", "+documentSpecifiedEncodingInfoSuffix); return setEncoding(preliminaryEncoding,preliminaryEncodingSpecificationInfo+", "+documentSpecifiedEncodingInfoSuffix); } private Source getPreviewSource(final String previewEncoding) throws IOException { final byte[] bytes=new byte[PREVIEW_BYTE_COUNT]; int i; for (i=0; iCharacter Reference, * implemented by the subclasses {@link CharacterEntityReference} and {@link NumericCharacterReference}. *

    * This class, together with its subclasses, contains static methods to perform most required operations * without having to instantiate an object. *

    * Instances of this class are useful when the positions of character references in a source document are required, * or to replace the found character references with customised text. *

    * CharacterReference instances are obtained using one of the following methods: *

      *
    • {@link CharacterReference#parse(CharSequence characterReferenceText)} *
    • {@link Source#getNextCharacterReference(int pos)} *
    • {@link Source#getPreviousCharacterReference(int pos)} *
    • {@link Segment#getAllCharacterReferences()} *
    */ public abstract class CharacterReference extends Segment { int codePoint; /** * Represents an invalid unicode code point. *

    * This can be the result of parsing a numeric character reference outside of the valid unicode range of 0x000000-0x10FFFF, or any other invalid character reference. */ public static final int INVALID_CODE_POINT=-1; static int MAX_ENTITY_REFERENCE_LENGTH; // set in CharacterEntityReference static class initialisation /** The number of spaces used to simulate a tab when {@linkplain #encodeWithWhiteSpaceFormatting encoding with white space formatting}. */ private static final int TAB_LENGTH=4; CharacterReference(final Source source, final int begin, final int end, final int codePoint) { super(source,begin,end); this.codePoint=codePoint; } /** * Returns the unicode code point represented by this character reference. * @return the unicode code point represented by this character reference. * @see #appendCharTo(Appendable) */ public int getCodePoint() { return codePoint; } /** * Returns the character represented by this character reference. *

    * If this character reference represents a unicode * supplimentary code point, * any bits outside of the least significant 16 bits of the code point are truncated, yielding an incorrect result. *

    * To ensure that the character is correctly appended to an Appendable object such as a Writer, use the code: *
    characterReference.{@link #appendCharTo(Appendable) appendCharTo}(appendable)
    * instead of: *
    appendable.append(characterReference.getChar()) * * @return the character represented by this character reference. * @see #appendCharTo(Appendable) * @see #getCodePoint() */ public char getChar() { return (char)codePoint; } /** * Appends the character represented by this character reference to the specified appendable object. *

    * If this character is a unicode supplementary character, * then both the UTF-16 high/low surrogate char values of the of the character are appended, as described in the * Unicode character representations section of the * java.lang.Character class. *

    * If the static {@link Config#ConvertNonBreakingSpaces} property is set to true (the default), * then calling this method on a non-breaking space character reference ({@link CharacterEntityReference#_nbsp &nbsp;}) * results in a normal space being appended. * * @param appendable the object to append this character reference to. */ public final void appendCharTo(Appendable appendable) throws IOException { appendCharTo(appendable,Config.ConvertNonBreakingSpaces); } private void appendCharTo(Appendable appendable, final boolean convertNonBreakingSpaces) throws IOException { if (Character.isSupplementaryCodePoint(codePoint)) { appendable.append(getHighSurrogate(codePoint)); appendable.append(getLowSurrogate(codePoint)); } else { final char ch=getChar(); if (ch==CharacterEntityReference._nbsp && convertNonBreakingSpaces) { appendable.append(' '); } else { appendable.append(ch); } } } /** * Indicates whether this character reference is terminated by a semicolon (;). *

    * Conversely, this library defines an unterminated character reference as one which does * not end with a semicolon. *

    * The SGML specification allows unterminated character references in some circumstances, and because the * HTML 4.01 specification states simply that * "authors may use SGML character references", * it follows that they are also valid in HTML documents, although their use is strongly discouraged. *

    * Unterminated character references are not allowed in XHTML documents. * * @return true if this character reference is terminated by a semicolon, otherwise false. * @see #decode(CharSequence encodedText, boolean insideAttributeValue) */ public boolean isTerminated() { return source.charAt(end-1)==';'; } /** * Encodes the specified text, escaping special characters into character references. *

    * Each character is encoded only if the {@link #requiresEncoding(char)} method would return true for that character, * using its {@link CharacterEntityReference} if available, or a decimal {@link NumericCharacterReference} if its unicode * code point is greater than U+007F. *

    * The only exception to this is an {@linkplain CharacterEntityReference#_apos apostrophe} (U+0027), * which depending on the current setting of the static {@link Config#IsApostropheEncoded} property, * is either left unencoded (default setting), or encoded as the numeric character reference "&#39;". *

    * This method never encodes an apostrophe into its character entity reference {@link CharacterEntityReference#_apos &apos;} * as this entity is not defined for use in HTML. See the comments in the {@link CharacterEntityReference} class for more information. *

    * To encode text using only numeric character references, use the
    * {@link NumericCharacterReference#encode(CharSequence)} method instead. * * @param unencodedText the text to encode. * @return the encoded string. * @see #decode(CharSequence) */ public static String encode(final CharSequence unencodedText) { if (unencodedText==null) return null; try { return appendEncode(new StringBuilder(unencodedText.length()*2),unencodedText,false).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } /** * Encodes the specified character into a character reference if {@linkplain #requiresEncoding(char) required}. *

    * The encoding of the character follows the same rules as for each character in the {@link #encode(CharSequence unencodedText)} method. * * @param ch the character to encode. * @return a character reference if appropriate, otherwise a string containing the original character. */ public static String encode(final char ch) { try { return appendEncode(new StringBuilder(MAX_ENTITY_REFERENCE_LENGTH),ch).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } /** * {@linkplain #encode(CharSequence) Encodes} the specified text, preserving line breaks, tabs and spaces for rendering by converting them to markup. *

    * This performs the same encoding as the {@link #encode(CharSequence)} method, but also performs the following conversions: *

      *
    • Line breaks, being Carriage Return (U+000D) or Line Feed (U+000A) characters, and Form Feed characters (U+000C) * are converted to "<br />". CR/LF pairs are treated as a single line break. *
    • Multiple consecutive spaces are converted so that every second space is converted to "&nbsp;" * while ensuring the last is always a normal space. *
    • Tab characters (U+0009) are converted as if they were four consecutive spaces. *
    *

    * The conversion of multiple consecutive spaces to alternating space/non-breaking-space allows the correct number of * spaces to be rendered, but also allows the line to wrap in the middle of it. *

    * Note that zero-width spaces (U+200B) are converted to the numeric character reference * "&#x200B;" through the normal encoding process, but IE6 does not render them properly * either encoded or unencoded. *

    * There is no method provided to reverse this encoding. * * @param unencodedText the text to encode. * @return the encoded string with white space formatting converted to markup. * @see #encode(CharSequence) */ public static String encodeWithWhiteSpaceFormatting(final CharSequence unencodedText) { if (unencodedText==null) return null; try { return appendEncode(new StringBuilder(unencodedText.length()*2),unencodedText,true).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } /** * Decodes the specified HTML encoded text into normal text. *

    * All {@linkplain CharacterEntityReference character entity references} and {@linkplain NumericCharacterReference numeric character references} * are converted to their respective characters. *

    * This is equivalent to {@link #decode(CharSequence,boolean) decode(encodedText,false)}. *

    * Unterminated character references are dealt with according to the rules for * text outside of attribute values in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. *

    * If the static {@link Config#ConvertNonBreakingSpaces} property is set to true (the default), * then all non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to normal spaces. *

    * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, * some browsers also recognise them in a case-insensitive way. * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case. * * @param encodedText the text to decode. * @return the decoded string. * @see #encode(CharSequence) */ public static String decode(final CharSequence encodedText) { return decode(encodedText,false,Config.ConvertNonBreakingSpaces); } /** * Decodes the specified HTML encoded text into normal text. *

    * All {@linkplain CharacterEntityReference character entity references} and {@linkplain NumericCharacterReference numeric character references} * are converted to their respective characters. *

    * Unterminated character references are dealt with according to the * value of the insideAttributeValue parameter and the * {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. *

    * If the static {@link Config#ConvertNonBreakingSpaces} property is set to true (the default), * then all non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to normal spaces. *

    * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, * some browsers also recognise them in a case-insensitive way. * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case. * * @param encodedText the text to decode. * @param insideAttributeValue specifies whether the encoded text is inside an attribute value. * @return the decoded string. * @see #decode(CharSequence) * @see #encode(CharSequence) */ public static String decode(final CharSequence encodedText, final boolean insideAttributeValue) { return decode(encodedText,insideAttributeValue,Config.ConvertNonBreakingSpaces); } static String decode(final CharSequence encodedText, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) { if (encodedText==null) return null; for (int i=0; i * All leading and trailing white space is omitted, and any sections of internal white space are replaced by a single space. *

    * The result is how the text would normally be rendered by a * user agent, * assuming it does not contain any tags. *

    * If the static {@link Config#ConvertNonBreakingSpaces} property is set to true (the default), * then all non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to normal spaces. *

    * Unterminated character references are dealt with according to the rules for * text outside of attribute values in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. * See the discussion of the insideAttributeValue parameter of the {@link #decode(CharSequence, boolean insideAttributeValue)} * method for a more detailed explanation of this topic. * * @param text the source text * @return the decoded text with collapsed white space. * @see FormControl#getPredefinedValues() */ public static String decodeCollapseWhiteSpace(final CharSequence text) { return decodeCollapseWhiteSpace(text,Config.ConvertNonBreakingSpaces); } static String decodeCollapseWhiteSpace(final CharSequence text, final boolean convertNonBreakingSpaces) { return decode(appendCollapseWhiteSpace(new StringBuilder(text.length()),text),false,convertNonBreakingSpaces); } /** * Re-encodes the specified text, equivalent to {@linkplain #decode(CharSequence) decoding} and then {@linkplain #encode(CharSequence) encoding} again. *

    * This process ensures that the specified encoded text does not contain any remaining unencoded characters. *

    * IMPLEMENTATION NOTE: At present this method simply calls the {@link #decode(CharSequence) decode} method * followed by the {@link #encode(CharSequence) encode} method, but a more efficient implementation * may be used in future. * * @param encodedText the text to re-encode. * @return the re-encoded string. */ public static String reencode(final CharSequence encodedText) { return encode(decode(encodedText,true)); } /** * Returns the encoded form of this character reference. *

    * The exact behaviour of this method depends on the class of this object. * See the {@link CharacterEntityReference#getCharacterReferenceString()} and * {@link NumericCharacterReference#getCharacterReferenceString()} methods for more details. *

    *

    *
    Examples:
    *
    CharacterReference.parse("&GT;").getCharacterReferenceString() returns "&gt;"
    *
    CharacterReference.parse("&#x3E;").getCharacterReferenceString() returns "&#3e;"
    *
    * * @return the encoded form of this character reference. * @see #getCharacterReferenceString(int codePoint) * @see #getDecimalCharacterReferenceString() */ public abstract String getCharacterReferenceString(); /** * Returns the encoded form of the specified unicode code point. *

    * This method returns the {@linkplain CharacterEntityReference#getCharacterReferenceString(int) character entity reference} encoded form of the unicode code point * if one exists, otherwise it returns the {@linkplain #getDecimalCharacterReferenceString(int) decimal character reference} encoded form. *

    * The only exception to this is an {@linkplain CharacterEntityReference#_apos apostrophe} (U+0027), * which is encoded as the numeric character reference "&#39;" instead of its character entity reference * "&apos;". *

    *

    *
    Examples:
    *
    CharacterReference.getCharacterReferenceString(62) returns "&gt;"
    *
    CharacterReference.getCharacterReferenceString('>') returns "&gt;"
    *
    CharacterReference.getCharacterReferenceString('☺') returns "&#9786;"
    *
    * * @param codePoint the unicode code point to encode. * @return the encoded form of the specified unicode code point. * @see #getHexadecimalCharacterReferenceString(int codePoint) */ public static String getCharacterReferenceString(final int codePoint) { String characterReferenceString=null; if (codePoint!=CharacterEntityReference._apos) characterReferenceString=CharacterEntityReference.getCharacterReferenceString(codePoint); if (characterReferenceString==null) characterReferenceString=NumericCharacterReference.getCharacterReferenceString(codePoint); return characterReferenceString; } /** * Returns the decimal encoded form of this character reference. *

    * This is equivalent to {@link #getDecimalCharacterReferenceString(int) getDecimalCharacterReferenceString}({@link #getCodePoint()}). *

    *

    *
    Example:
    *
    CharacterReference.parse("&gt;").getDecimalCharacterReferenceString() returns "&#62;"
    *
    * * @return the decimal encoded form of this character reference. * @see #getCharacterReferenceString() * @see #getHexadecimalCharacterReferenceString() */ public String getDecimalCharacterReferenceString() { return getDecimalCharacterReferenceString(codePoint); } /** * Returns the decimal encoded form of the specified unicode code point. *

    *

    *
    Example:
    *
    CharacterReference.getDecimalCharacterReferenceString('>') returns "&#62;"
    *
    * * @param codePoint the unicode code point to encode. * @return the decimal encoded form of the specified unicode code point. * @see #getCharacterReferenceString(int codePoint) * @see #getHexadecimalCharacterReferenceString(int codePoint) */ public static String getDecimalCharacterReferenceString(final int codePoint) { try { return appendDecimalCharacterReferenceString(new StringBuilder(),codePoint).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } /** * Returns the hexadecimal encoded form of this character reference. *

    * This is equivalent to {@link #getHexadecimalCharacterReferenceString(int) getHexadecimalCharacterReferenceString}({@link #getCodePoint()}). *

    *

    *
    Example:
    *
    CharacterReference.parse("&gt;").getHexadecimalCharacterReferenceString() returns "&#x3e;"
    *
    * * @return the hexadecimal encoded form of this character reference. * @see #getCharacterReferenceString() * @see #getDecimalCharacterReferenceString() */ public String getHexadecimalCharacterReferenceString() { return getHexadecimalCharacterReferenceString(codePoint); } /** * Returns the hexadecimal encoded form of the specified unicode code point. *

    *

    *
    Example:
    *
    CharacterReference.getHexadecimalCharacterReferenceString('>') returns "&#x3e;"
    *
    * * @param codePoint the unicode code point to encode. * @return the hexadecimal encoded form of the specified unicode code point. * @see #getCharacterReferenceString(int codePoint) * @see #getDecimalCharacterReferenceString(int codePoint) */ public static String getHexadecimalCharacterReferenceString(final int codePoint) { try { return appendHexadecimalCharacterReferenceString(new StringBuilder(),codePoint).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } /** * Returns the unicode code point of this character reference in U+ notation. *

    * This is equivalent to {@link #getUnicodeText(int) getUnicodeText(getCodePoint())}. *

    *

    *
    Example:
    *
    CharacterReference.parse("&gt;").getUnicodeText() returns "U+003E"
    *
    * * @return the unicode code point of this character reference in U+ notation. * @see #getUnicodeText(int codePoint) */ public String getUnicodeText() { return getUnicodeText(codePoint); } /** * Returns the specified unicode code point in U+ notation. *

    *

    *
    Example:
    *
    CharacterReference.getUnicodeText('>') returns "U+003E"
    *
    * * @param codePoint the unicode code point. * @return the specified unicode code point in U+ notation. */ public static String getUnicodeText(final int codePoint) { try { return appendUnicodeText(new StringBuilder(),codePoint).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } static final Appendable appendUnicodeText(final Appendable appendable, final int codePoint) throws IOException { appendable.append("U+"); final String hex=Integer.toString(codePoint,16).toUpperCase(); for (int i=4-hex.length(); i>0; i--) appendable.append('0'); appendable.append(hex); return appendable; } /** * Parses a single encoded character reference text into a CharacterReference object. *

    * The character reference must be at the start of the given text, but may contain other characters at the end. * The {@link #getEnd() getEnd()} method can be used on the resulting object to determine at which character position the character reference ended. *

    * If the text does not represent a valid character reference, this method returns null. *

    * Unterminated character references are always accepted, regardless of the settings in the * {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. *

    * To decode all character references in a given text, use the {@link #decode(CharSequence)} method instead. *

    *

    *
    Example:
    *
    CharacterReference.parse("&gt;").getChar() returns '>'
    *
    * * @param characterReferenceText the text containing a single encoded character reference. * @return a CharacterReference object representing the specified text, or null if the text does not represent a valid character reference. * @see #decode(CharSequence) */ public static CharacterReference parse(final CharSequence characterReferenceText) { return construct(new Source(characterReferenceText,true),0,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); } /** * Parses a single encoded character reference text into a unicode code point. *

    * The character reference must be at the start of the given text, but may contain other characters at the end. *

    * If the text does not represent a valid character reference, this method returns {@link #INVALID_CODE_POINT}. *

    * This is equivalent to {@link #parse(CharSequence) parse(characterReferenceText)}.{@link #getCodePoint()}, * except that it returns {@link #INVALID_CODE_POINT} if an invalid character reference is specified instead of throwing a * NullPointerException. *

    *

    *
    Example:
    *
    CharacterReference.getCodePointFromCharacterReferenceString("&gt;") returns 38
    *
    * * @param characterReferenceText the text containing a single encoded character reference. * @return the unicode code point representing representing the specified text, or {@link #INVALID_CODE_POINT} if the text does not represent a valid character reference. */ public static int getCodePointFromCharacterReferenceString(final CharSequence characterReferenceText) { final CharacterReference characterReference=parse(characterReferenceText); return (characterReference!=null) ? characterReference.getCodePoint() : INVALID_CODE_POINT; } /** * Indicates whether the specified character would need to be encoded in HTML text. *

    * This is the case if a {@linkplain CharacterEntityReference character entity reference} exists for the character, or the unicode code point is greater than U+007F. *

    * The only exception to this is an {@linkplain CharacterEntityReference#_apos apostrophe} (U+0027), * which only returns true if the static {@link Config#IsApostropheEncoded} property * is currently set to true. * * @param ch the character to test. * @return true if the specified character would need to be encoded in HTML text, otherwise false. */ public static final boolean requiresEncoding(final char ch) { return ch>127 || (CharacterEntityReference.getName(ch)!=null && (ch!='\'' || Config.IsApostropheEncoded)); } /** * Returns a filter Writer that {@linkplain #encode(CharSequence) encodes} all text before passing it through to the specified Writer. * * @param writer the destination for the encoded text * @return a filter Writer that {@linkplain #encode(CharSequence) encodes} all text before passing it through to the specified Writer. * @see #encode(CharSequence unencodedText) */ public static Writer getEncodingFilterWriter(final Writer writer) { return new EncodingFilterWriter(writer); } private static final class EncodingFilterWriter extends FilterWriter { StringBuilder sb=new StringBuilder(MAX_ENTITY_REFERENCE_LENGTH); public EncodingFilterWriter(final Writer writer) { super(writer); } public void write(final char ch) throws IOException { sb.setLength(0); appendEncode(sb,ch); if (sb.length()==1) out.write(sb.charAt(0)); else out.append(sb); } public void write(final int chInt) throws IOException { write((char)chInt); } public void write(final char[] cbuf, final int off, final int len) throws IOException { final int end=off+len; for (int i=off; i"); // add line break continue; } else { spaceCount=TAB_LENGTH; } } else { spaceCount=1; } while (nexti=2) { appendable.append("  "); // use alternating   and spaces to keep original number of spaces spaceCount-=2; } // note that the last character is never a nbsp, so that word wrapping won't result in a nbsp before the first character in a line i=nexti-1; // minus 1 because top level for loop will add it again } return appendable; } private static final boolean appendEncodeCheckForWhiteSpaceFormatting(final Appendable appendable, char ch, final boolean whiteSpaceFormatting) throws IOException { final String characterEntityReferenceName=CharacterEntityReference.getName(ch); if (characterEntityReferenceName!=null) { if (ch=='\'') { if (Config.IsApostropheEncoded) appendable.append("'"); else appendable.append(ch); } else { CharacterEntityReference.appendCharacterReferenceString(appendable,characterEntityReferenceName); } } else if (ch>127) { appendDecimalCharacterReferenceString(appendable,ch); } else if (!(whiteSpaceFormatting && isWhiteSpace(ch))) { appendable.append(ch); } else { return false; } return true; } static CharacterReference getPrevious(final Source source, final int pos) { return getPrevious(source,pos,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); } static CharacterReference getNext(final Source source, final int pos) { return getNext(source,pos,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); } private static CharacterReference getPrevious(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.lastIndexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.lastIndexOf('&',pos-1); } return null; } private static CharacterReference getNext(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.indexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.indexOf('&',pos+1); } return null; } static final Appendable appendHexadecimalCharacterReferenceString(final Appendable appendable, final int codePoint) throws IOException { return appendable.append("&#x").append(Integer.toString(codePoint,16)).append(';'); } static final Appendable appendDecimalCharacterReferenceString(final Appendable appendable, final int codePoint) throws IOException { return appendable.append("&#").append(Integer.toString(codePoint)).append(';'); } static CharacterReference construct(final Source source, final int begin, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { try { if (source.getParseText().charAt(begin)!='&') return null; return (source.getParseText().charAt(begin+1)=='#') ? NumericCharacterReference.construct(source,begin,unterminatedCharacterReferenceSettings) : CharacterEntityReference.construct(source,begin,unterminatedCharacterReferenceSettings.characterEntityReferenceMaxCodePoint); } catch (IndexOutOfBoundsException ex) { return null; } } private static Appendable appendDecode(final Appendable appendable, final CharSequence encodedText, int pos, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException { final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue); int lastEnd=0; final StreamedSource streamedSource=new StreamedSource(encodedText).setHandleTags(false).setSearchBegin(pos); for (Segment segment : streamedSource) { if (segment instanceof CharacterReference) { ((CharacterReference)segment).appendCharTo(appendable,convertNonBreakingSpaces); } else { appendable.append(segment.toString()); // benchmark tests reveal (surprisingly) that converting to a string before appending is faster than appending the specified section of the encodedText or segment directly. // appendable.append(encodedText,segment.begin,segment.end); // appendable.append(segment); } } return appendable; } // pinched from http://svn.apache.org/repos/asf/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java private static char getHighSurrogate(int codePoint) { return (char)((0xD800 - (0x10000 >> 10)) + (codePoint >> 10)); } private static char getLowSurrogate(int codePoint) { return (char)(0xDC00 + (codePoint & 0x3FF)); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagType.java0000644000175000017500000011152611204550410025236 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Defines the syntax for a start tag type. *

    * A start tag type is any {@link TagType} that {@linkplain #getStartDelimiter() starts} with the character '<' * (as with all tag types), but whose second character is not '/'. *

    * This includes types for many tags which stand alone, without a {@linkplain #getCorrespondingEndTagType() corresponding end tag}, * and would not intuitively be categorised as a "start tag". For example, an HTML {@linkplain #COMMENT comment} in a document * is represented as a single start tag that spans the whole comment, and does not have an end tag at all. *

    * The singleton instances of all the standard start tag types are available in this class as static * fields. *

    * Because all StartTagType instaces must be singletons, the '==' operator can be used to test for a particular tag type * instead of the equals(Object) method. * * @see EndTagType */ public abstract class StartTagType extends TagType { private final EndTagType correspondingEndTagType; private final boolean hasAttributes; private final boolean isNameAfterPrefixRequired; static final String START_DELIMITER_PREFIX="<"; /** * The tag type given to an {@linkplain Tag#isUnregistered() unregistered} {@linkplain StartTag start tag} * (< ... >). *

    * See the documentation of the {@link Tag#isUnregistered()} method for details. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}unregistered *
    {@link #getStartDelimiter() StartDelimiter}< *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}(empty string) *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <"This is not recognised as any of the predefined tag types in this library">
    *
    * @see EndTagType#UNREGISTERED */ public static final StartTagType UNREGISTERED=StartTagTypeUnregistered.INSTANCE; /** * The tag type given to a normal HTML or XML {@linkplain StartTag start tag} * (<name ... >). *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}normal *
    {@link #getStartDelimiter() StartDelimiter}< *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}(empty string) *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}{@link EndTagType#NORMAL} *
    {@link #hasAttributes() HasAttributes}true *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}true *
    *
    Example:
    *
    <div class="NormalDivTag">
    *
    */ public static final StartTagType NORMAL=StartTagTypeNormal.INSTANCE; /** * The tag type given to an HTML comment * (<!-- ... -->). *

    * An HTML comment is an area of the source document enclosed by the delimiters * <!-- on the left and --> on the right. *

    * The HTML 4.01 specification section 3.2.4 * states that the end of comment delimiter may contain white space between the "--" and ">" characters, * but this library does not recognise end of comment delimiters containing white space. *

    * In the default configuration, any non-{@linkplain #isServerTag() server} tag appearing within an HTML comment is ignored * by the parser. * See the documentation of the tag parsing process for more information. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}comment *
    {@link #getStartDelimiter() StartDelimiter}<!-- *
    {@link #getClosingDelimiter() ClosingDelimiter}--> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}!-- *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <!-- This is a comment -->
    *
    */ public static final StartTagType COMMENT=StartTagTypeComment.INSTANCE; /** * The tag type given to an XML declaration * (<?xml ... ?>). *

    * An XML declaration is often referred to in texts as a special type of processing instruction with the reserved * PITarget name of "xml". * Technically it is not an {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction} at all, but is still a type of * SGML processing instruction. *

    * According to section 2.8 of the XML 1.0 specification, * a valid XML declaration can specify only "version", "encoding" and "standalone" attributes in that order. * This library parses the {@linkplain Attributes attributes} of an XML declaration in the same way as those of a * {@linkplain #NORMAL normal} tag, without checking that they conform to the specification. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}XML declaration *
    {@link #getStartDelimiter() StartDelimiter}<?xml *
    {@link #getClosingDelimiter() ClosingDelimiter}?> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}?xml *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}true *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <?xml version="1.0" encoding="UTF-8"?>
    *
    */ public static final StartTagType XML_DECLARATION=StartTagTypeXMLDeclaration.INSTANCE; /** * The tag type given to an XML processing instruction * (<?PITarget ... ?>). *

    * An XML processing instruction is a specific form of * SGML processing instruction with the following * two additional constraints: *

      *
    • it must be {@linkplain #getClosingDelimiter() closed} with '?>' instead of just a single * '>' character. *
    • it requires a PITarget * (essentially a {@linkplain Tag#getName() name} following the '<?' {@linkplain #getStartDelimiter() start delimiter}). *
    *

    * This library does not include a predefined generic tag type for SGML processing instructions * as the only forms in which they are found in HTML documents are the more specific XML processing instruction and * the {@linkplain #XML_DECLARATION XML declaration}, both of which have their own dedicated predefined tag type. *

    * There is no restriction on the contents of an XML processing instruction. In particular, it can not be assumed that the * processing instruction contains {@linkplain Attributes attributes}, in contrast to the {@linkplain #XML_DECLARATION XML declaration}. *

    * Note that {@linkplain #register() registering} the {@link PHPTagTypes#PHP_SHORT} tag type overrides this tag type. * This is because they both have the same {@linkplain #getStartDelimiter start delimiter}, * so the one registered latest takes precedence over the other. * See the documentation of the {@link PHPTagTypes} class for more information. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}XML processing instruction *
    {@link #getStartDelimiter() StartDelimiter}<? *
    {@link #getClosingDelimiter() ClosingDelimiter}?> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}? *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}true *
    *
    Example:
    *
    <?xml-stylesheet href="standardstyle.css" type="text/css"?>
    *
    */ public static final StartTagType XML_PROCESSING_INSTRUCTION=StartTagTypeXMLProcessingInstruction.INSTANCE; /** * The tag type given to a document type declaration * (<!DOCTYPE ... >). *

    * Information about the document type declaration can be found in the * HTML 4.01 specification section 7.2, and the * XML 1.0 specification section 2.8. *

    * The "!DOCTYPE" tag name is required to be in upper case in the source document, * but all tag properties are stored in lower case because this library performs all parsing in lower case. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}document type declaration *
    {@link #getStartDelimiter() StartDelimiter}<!doctype *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}!doctype *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    *
    */ public static final StartTagType DOCTYPE_DECLARATION=StartTagTypeDoctypeDeclaration.INSTANCE; /** * The tag type given to a markup declaration * (<!ELEMENT ... > | <!ATTLIST ... > | <!ENTITY ... > | <!NOTATION ... >). *

    * The {@linkplain Tag#getName() name} of a markup declaration tag is must be one of * "!element", "!attlist", "!entity" or "!notation". * These tag names are required to be in upper case in the source document, * but all tag properties are stored in lower case because this library performs all parsing in lower case. *

    * Markup declarations usually appear inside a * document type definition (DTD), which is usually an external * document to the HTML or XML document, but they can also appear directly within the * {@linkplain #DOCTYPE_DECLARATION document type declaration} which is why they must be recognised by the parser. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}markup declaration *
    {@link #getStartDelimiter() StartDelimiter}<! *
    {@link #getClosingDelimiter() ClosingDelimiter}> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}! *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}true *
    *
    Example:
    *
    <!ELEMENT BODY O O (%flow;)* +(INS|DEL) -- document body -->
    *
    */ public static final StartTagType MARKUP_DECLARATION=StartTagTypeMarkupDeclaration.INSTANCE; /** * The tag type given to a CDATA section * (<![CDATA[ ... ]]>). *

    * A CDATA section is a specific form of a * marked section. * This library does not include a predefined generic tag type for marked sections, * as the only type of marked sections found in HTML documents are CDATA sections. *

    * The HTML 4.01 specification section B.3.5 * and the XML 1.0 specification section 2.7 * contain definitions for a CDATA section. *

    * There is inconsistency between the SGML and HTML/XML specifications in the definition of a marked section. * SGML requires the presence of a space between the "<![" prefix and the keyword, and allows a space after the keyword. * The XML specification forbids these spaces, and the examples given in the HTML specification do not include them either. * This library only recognises CDATA sections that do not include the spaces. *

    * The "![CDATA[" tag name is required to be in upper case in the source document according to the HTML/XML specifications, * but all tag properties are stored in lower case because this makes it more efficient for the library to perform case-insensitive * parsing of all tags. *

    * In the default configuration, any non-{@linkplain #isServerTag() server} tag appearing within a CDATA section is ignored * by the parser. * See the documentation of the tag parsing process for more information. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}CDATA section *
    {@link #getStartDelimiter() StartDelimiter}<![cdata[ *
    {@link #getClosingDelimiter() ClosingDelimiter}]]> *
    {@link #isServerTag() IsServerTag}false *
    {@link #getNamePrefix() NamePrefix}![cdata[ *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    This example shows the recommended practice of enclosing scripts inside a CDATA section: *
    *
    <script type="text/javascript">
    //<![CDATA[
    function min(a,b) {return a<b ? a : b;}
    //]]>
    </script>
    *
    *
    */ public static final StartTagType CDATA_SECTION=StartTagTypeCDATASection.INSTANCE; /** * The tag type given to a common server tag * (<% ... %>). *

    * Common server tags include * ASP, * JSP, * PSP, * ASP-style PHP, * eRuby, and * Mason substitution tags. *

    * This tag and the {@linkplain #SERVER_COMMON_ESCAPED escaped common server tag} are the only standard tag types * that define {@linkplain #isServerTag() server tags}. * They are included as standard tag types because of the common server tag's widespread use in many platforms, including those listed above. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}common server tag *
    {@link #getStartDelimiter() StartDelimiter}<% *
    {@link #getClosingDelimiter() ClosingDelimiter}%> *
    {@link #isServerTag() IsServerTag}true *
    {@link #getNamePrefix() NamePrefix}% *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <%@ include file="header.html" %>
    *
    */ public static final StartTagType SERVER_COMMON=StartTagTypeServerCommon.INSTANCE; /** * The tag type given to an escaped common server tag * (<\% ... %>). *

    * Some of the platforms that support the {@linkplain #SERVER_COMMON common server tag} also support a mechanism to escape that tag by adding a * backslash (\) before the percent (%) character. * Although rarely used, this tag type allows the parser to recognise these escaped tags in addition to the common server tag itself. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link #getDescription() Description}escaped common server tag *
    {@link #getStartDelimiter() StartDelimiter}<\% *
    {@link #getClosingDelimiter() ClosingDelimiter}%> *
    {@link #isServerTag() IsServerTag}true *
    {@link #getNamePrefix() NamePrefix}\% *
    {@link #getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link #hasAttributes() HasAttributes}false *
    {@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <\%@ include file="header.html" %>
    *
    */ public static final StartTagType SERVER_COMMON_ESCAPED=StartTagTypeServerCommonEscaped.INSTANCE; /** * Constructs a new StartTagType object with the specified properties. *
    (implementation assistance method) *

    * As StartTagType is an abstract class, this constructor is only called from sub-class constructors. * * @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes. * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type. * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type. * @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type. * @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}. * @param hasAttributes indicates whether the new start tag type {@linkplain #hasAttributes() has attributes}. * @param isNameAfterPrefixRequired indicates whether a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}. */ protected StartTagType(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag, final boolean hasAttributes, final boolean isNameAfterPrefixRequired) { super(description,startDelimiter.toLowerCase(),closingDelimiter,isServerTag,START_DELIMITER_PREFIX); if (!getStartDelimiter().startsWith(START_DELIMITER_PREFIX)) throw new IllegalArgumentException("startDelimiter of a start tag must start with \""+START_DELIMITER_PREFIX+'"'); this.correspondingEndTagType=correspondingEndTagType; this.hasAttributes=hasAttributes; this.isNameAfterPrefixRequired=isNameAfterPrefixRequired; } /** * Returns the {@linkplain EndTagType type} of {@linkplain EndTag end tag} required to pair with a * {@linkplain StartTag start tag} of this type to form an {@linkplain Element element}. *
    (property method) *

    * This can be represented by the following expression that is always true given an arbitrary {@linkplain Element element} * that has an end tag: *

    * element.{@link Element#getStartTag() getStartTag()}.{@link StartTag#getStartTagType() getStartTagType()}.{@link #getCorrespondingEndTagType()}==element.{@link Element#getEndTag() getEndTag()}.{@link EndTag#getEndTagType() getEndTagType()} *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Start Tag TypeCorresponding End Tag Type *
    {@link StartTagType#UNREGISTERED}null *
    {@link StartTagType#NORMAL}{@link EndTagType#NORMAL} *
    {@link StartTagType#COMMENT}null *
    {@link StartTagType#XML_DECLARATION}null *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}null *
    {@link StartTagType#DOCTYPE_DECLARATION}null *
    {@link StartTagType#MARKUP_DECLARATION}null *
    {@link StartTagType#CDATA_SECTION}null *
    {@link StartTagType#SERVER_COMMON}null *
    {@link StartTagType#SERVER_COMMON_ESCAPED}null *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Start Tag TypeCorresponding End Tag Type *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}null *
    {@link PHPTagTypes#PHP_SCRIPT}{@link EndTagType#NORMAL} *
    {@link PHPTagTypes#PHP_SHORT}null *
    {@link PHPTagTypes#PHP_STANDARD}null *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}null *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}{@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END} *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}{@link MasonTagTypes#MASON_NAMED_BLOCK_END} *
    *
    * * @return the {@linkplain EndTagType type} of {@linkplain EndTag end tag} required to pair with a {@linkplain StartTag start tag} of this type to form an {@link Element}. * @see EndTagType#getCorrespondingStartTagType() */ public final EndTagType getCorrespondingEndTagType() { return correspondingEndTagType; } /** * Indicates whether a start tag of this type contains {@linkplain Attributes attributes}. *
    (property method) *

    * The attributes start at the end of the {@linkplain Tag#getName() name} and continue until the * {@linkplain #getClosingDelimiter() closing delimiter} is encountered. If the character sequence representing the * closing delimiter occurs within a quoted attribute value it is not recognised as the end of the tag. *

    * The {@link #atEndOfAttributes(Source, int pos, boolean isClosingSlashIgnored)} method can be overridden to provide more control * over where the attributes end. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Start Tag TypeHas Attributes *
    {@link StartTagType#UNREGISTERED}false *
    {@link StartTagType#NORMAL}true *
    {@link StartTagType#COMMENT}false *
    {@link StartTagType#XML_DECLARATION}true *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}false *
    {@link StartTagType#DOCTYPE_DECLARATION}false *
    {@link StartTagType#MARKUP_DECLARATION}false *
    {@link StartTagType#CDATA_SECTION}false *
    {@link StartTagType#SERVER_COMMON}false *
    {@link StartTagType#SERVER_COMMON_ESCAPED}false *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Start Tag TypeHas Attributes *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}false *
    {@link PHPTagTypes#PHP_SCRIPT}true *
    {@link PHPTagTypes#PHP_SHORT}false *
    {@link PHPTagTypes#PHP_STANDARD}false *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}false *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}false *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}false *
    *
    * * @return true if a start tag of this type contains {@linkplain Attributes attributes}, otherwise false. */ public final boolean hasAttributes() { return hasAttributes; } /** * Indicates whether a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name} is required directly after the {@linkplain #getNamePrefix() prefix}. *
    (property method) *

    * If this property is true, the {@linkplain Tag#getName() name} of the tag consists of the * {@linkplain #getNamePrefix() prefix} followed by an {@linkplain Tag#isXMLName(CharSequence) XML tag name}. *

    * If this property is false, the {@linkplain Tag#getName() name} of the tag consists of only the * {@linkplain #getNamePrefix() prefix}. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Start Tag TypeName After Prefix Required *
    {@link StartTagType#UNREGISTERED}false *
    {@link StartTagType#NORMAL}true *
    {@link StartTagType#COMMENT}false *
    {@link StartTagType#XML_DECLARATION}false *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}true *
    {@link StartTagType#DOCTYPE_DECLARATION}false *
    {@link StartTagType#MARKUP_DECLARATION}true *
    {@link StartTagType#CDATA_SECTION}false *
    {@link StartTagType#SERVER_COMMON}false *
    {@link StartTagType#SERVER_COMMON_ESCAPED}false *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Start Tag TypeName After Prefix Required *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}true *
    {@link PHPTagTypes#PHP_SCRIPT}false *
    {@link PHPTagTypes#PHP_SHORT}false *
    {@link PHPTagTypes#PHP_STANDARD}false *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}false *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}false *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}true *
    *
    * * @return true if a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name} is required directly after the {@linkplain #getNamePrefix() prefix}, otherwise false. */ public final boolean isNameAfterPrefixRequired() { return isNameAfterPrefixRequired; } /** * Indicates whether the specified source document position is at the end of a tag's {@linkplain Attributes attributes}. *
    (default implementation method) *

    * This method is called internally while parsing {@linkplain Attributes attributes} to detect where they should end. *

    * It can be assumed that the specified position is not inside a quoted attribute value. *

    * The default implementation simply compares the {@linkplain ParseText parse text} at the specified * position with the {@linkplain #getClosingDelimiter() closing delimiter}, and is equivalent to:
    * source.{@link Source#getParseText() getParseText()}.containsAt({@link #getClosingDelimiter() getClosingDelimiter()},pos) *

    * The isClosingSlashIgnored parameter is only relevant in the {@link #NORMAL} start tag type, * which makes use of it to cater for the '/' character that can occur before the * {@linkplain #getClosingDelimiter() closing delimiter} in {@linkplain StartTag#isEmptyElementTag() empty-element tags}. * It's value is always false when passed to other start tag types. * * @param source the {@link Source} document. * @param pos the character position in the source document. * @param isClosingSlashIgnored indicates whether the {@linkplain StartTag#getName() name} of the {@linkplain StartTag start tag} being tested is incompatible with an {@linkplain StartTag#isEmptyElementTag() empty-element tag}. * @return true if the specified source document position is at the end of a tag's {@linkplain Attributes attributes}, otherwise false. */ public boolean atEndOfAttributes(final Source source, final int pos, final boolean isClosingSlashIgnored) { return source.getParseText().containsAt(getClosingDelimiter(),pos); } /** * Internal method for the construction of a {@link StartTag} object if this type. *
    (implementation assistance method) *

    * Intended for use from within the {@link #constructTagAt(Source,int) constructTagAt(Source, int pos)} method. * * @param source the {@link Source} document. * @param begin the character position in the source document where the tag {@linkplain Segment#getBegin() begins}. * @param end the character position in the source document where the tag {@linkplain Segment#getEnd() ends}. * @param name the {@linkplain Tag#getName() name} of the tag. * @param attributes the {@linkplain StartTag#getAttributes() attributes} of the tag. * @return the new {@link StartTag} object. */ protected final StartTag constructStartTag(final Source source, final int begin, final int end, final String name, final Attributes attributes) { return new StartTag(source,begin,end,this,name,attributes); } /** * Internal method for the parsing of {@link Attributes}. *
    (implementation assistance method) *

    * Intended for use from within the {@link #constructTagAt(Source,int) constructTagAt(Source, int pos)} method. *

    * The returned {@link Attributes} segment begins at startTagBegin+1+tagName.length(), * and ends straight after the last attribute found before the tag's {@linkplain #getClosingDelimiter() closing delimiter}. *

    * Only returns null if the segment contains a major syntactical error * or more than the {@linkplain Attributes#getDefaultMaxErrorCount() default maximum} number of * minor syntactical errors. * * @param source the {@link Source} document. * @param startTagBegin the position in the source document at which the start tag is to begin. * @param tagName the {@linkplain StartTag#getName() name} of the start tag to be constructed. * @return the {@link Attributes} of the start tag to be constructed, or null if too many errors occur while parsing. */ protected final Attributes parseAttributes(final Source source, final int startTagBegin, final String tagName) { return Attributes.construct(source,startTagBegin,this,tagName); } } jericho-html-3.1/src/java/net/htmlparser/jericho/TagType.java0000644000175000017500000012371511204550410024223 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Defines the syntax for a tag type that can be recognised by the parser. *

    * This class is the root abstract class common to all tag types, and contains methods to {@linkplain #register() register} * and {@linkplain #deregister() deregister} tag types as well as various methods to aid in their implementation. *

    * Every tag type is represented by a singleton instance of a class that must be a subclass of either * {@link StartTagType} or {@link EndTagType}. These two abstract classes, the only direct descendants of this class, * represent the two major classifications under which every tag type exists. *

    * Because all TagType instaces must be singletons, the '==' operator can be used to test for a particular tag type * instead of the equals(Object) method. *

    * The term predefined tag type refers to any of the tag types defined in this library, * including both standard and extended tag types. *

    * The term standard tag type refers to any of the tag types represented by instances * in static fields of the {@link StartTagType} and {@link EndTagType} subclasses. * Standard tag types are registered by default, and define the tags most commonly found in HTML documents. *

    * The term extended tag type refers to any predefined tag type * that is not a standard tag type. * The {@link PHPTagTypes} and {@link MasonTagTypes} classes contain extended tag types related to their respective server platforms. * The tag types defined within them must be registered by the user before they are recognised by the parser. *

    * The term custom tag type refers to any user-defined tag type, or any tag type that is * not a predefined tag type. *

    * The tag recognition process of the parser gives each tag type a precedence level, * which is primarily determined by the length of its {@linkplain #getStartDelimiter() start delimiter}. * A tag type with a more specific start delimiter is chosen in preference to one with a less specific start delimiter, * assuming they both share the same prefix. If two tag types have exactly the same start delimiter, the one which was * {@linkplain #register() registered} later has the higher precedence. *

    * The two special tag types {@link StartTagType#UNREGISTERED} and {@link EndTagType#UNREGISTERED} represent * tags that do not match the syntax of any other tag type. They have the lowest precedence * of all the tag types. The {@link Tag#isUnregistered()} method provides a detailed explanation of unregistered tags. *

    * See the documentation of the tag parsing process for more information * on how each tag is identified by the parser. *

    * Note that the standard {@linkplain HTMLElementName HTML element names} do not represent different * tag types. All standard HTML tags have a tag type of {@link StartTagType#NORMAL} or {@link EndTagType#NORMAL}, * and are also referred to as normal tags. *

    * Apart from the registration related methods, all of the methods in this class and its * subclasses relate to the implementation of custom tag types and are not relevant to the majority of users * who just use the predefined tag types. *

    * For perfomance reasons, this library only allows tag types that {@linkplain #getStartDelimiter() start} * with a '<' character. * The character following this defines the immediate subclass of the tag type. * An {@link EndTagType} always has a slash ('/') as the second character, while a {@link StartTagType} * has any character other than a slash as the second character. * This definition means that tag types which are not intuitively classified as either start tag types or end tag types * (such as an HTML {@linkplain StartTagType#COMMENT comment}) are mostly classified as start tag types. *

    * Every method in this and the {@link StartTagType} and {@link EndTagType} abstract classes can be categorised * as one of the following: *

    *
    Properties: *
    Simple properties (marked final) that were either specified as parameters * during construction or are derived from those parameters. *
    Abstract implementation methods: *
    Methods that must be implemented in a subclass. *
    Default implementation methods: *
    Methods (not marked final) that implement common behaviour, but may be overridden in a subclass. *
    Implementation assistance methods: *
    Protected methods that provide low-level functionality and are only of use within other implementation methods. *
    Registration related methods: *
    Utility methods (marked final) relating to the {@linkplain #register() registration} of tag type instances. *
    */ public abstract class TagType { private final String description; private final String startDelimiter; private final String closingDelimiter; private final boolean isServerTag; private final String namePrefix; final String startDelimiterPrefix; TagType(final String description, final String startDelimiter, final String closingDelimiter, final boolean isServerTag, final String startDelimiterPrefix) { // startDelimiterPrefix is either "<" or "(registration related method) *

    * The order of registration affects the precedence of the tag type when a potential tag is being parsed. * * @see #deregister() */ public final void register() { TagTypeRegister.add(this); } /** * Deregisters this tag type. *
    (registration related method) * * @see #register() */ public final void deregister() { TagTypeRegister.remove(this); } /** * Returns a list of all the currently registered tag types in order of lowest to highest precedence. *
    (registration related method) * @return a list of all the currently registered tag types in order of lowest to highest precedence. */ public static final List getRegisteredTagTypes() { return TagTypeRegister.getList(); } /** * Returns a description of this tag type useful for debugging purposes. *
    (property method) * * @return a description of this tag type useful for debugging purposes. */ public final String getDescription() { return description; } /** * Returns the character sequence that marks the start of the tag. *
    (property method) *

    * The character sequence must be all in lower case. *

    * The first character in this property must be '<'. * This is a deliberate limitation of the system which is necessary to retain reasonable performance. *

    * The second character in this property must be '/' if the implementing class is an {@link EndTagType}. * It must not be '/' if the implementing class is a {@link StartTagType}. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Tag TypeStart Delimiter *
    {@link StartTagType#UNREGISTERED}< *
    {@link StartTagType#NORMAL}< *
    {@link StartTagType#COMMENT}<!-- *
    {@link StartTagType#XML_DECLARATION}<?xml *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}<? *
    {@link StartTagType#DOCTYPE_DECLARATION}<!doctype *
    {@link StartTagType#MARKUP_DECLARATION}<! *
    {@link StartTagType#CDATA_SECTION}<![cdata[ *
    {@link StartTagType#SERVER_COMMON}<% *
    {@link EndTagType#UNREGISTERED}</ *
    {@link EndTagType#NORMAL}</ *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Tag TypeStart Delimiter *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}<![ *
    {@link PHPTagTypes#PHP_SCRIPT}<script *
    {@link PHPTagTypes#PHP_SHORT}<? *
    {@link PHPTagTypes#PHP_STANDARD}<?php *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}<& *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}<&| *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}</& *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}<% *
    {@link MasonTagTypes#MASON_NAMED_BLOCK_END}</% *
    *
    * * @return the character sequence that marks the start of the tag. */ public final String getStartDelimiter() { return startDelimiter; } /** * Returns the character sequence that marks the end of the tag. *
    (property method) *

    * The character sequence must be all in lower case. *

    * In a {@link StartTag} of a {@linkplain StartTagType type} that {@linkplain StartTagType#hasAttributes() has attributes}, * characters appearing inside a quoted attribute value are ignored when determining the location of the closing delimiter. *

    * Note that the optional '/' character preceding the closing '>' in an * {@linkplain StartTag#isEmptyElementTag() empty-element tag} is not considered part of the end delimiter. * This property must define the closing delimiter common to all instances of the tag type. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Tag TypeClosing Delimiter *
    {@link StartTagType#UNREGISTERED}> *
    {@link StartTagType#NORMAL}> *
    {@link StartTagType#COMMENT}--> *
    {@link StartTagType#XML_DECLARATION}?> *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}?> *
    {@link StartTagType#DOCTYPE_DECLARATION}> *
    {@link StartTagType#MARKUP_DECLARATION}> *
    {@link StartTagType#CDATA_SECTION}]]> *
    {@link StartTagType#SERVER_COMMON}%> *
    {@link EndTagType#UNREGISTERED}> *
    {@link EndTagType#NORMAL}> *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Tag TypeClosing Delimiter *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}]> *
    {@link PHPTagTypes#PHP_SCRIPT}> *
    {@link PHPTagTypes#PHP_SHORT}?> *
    {@link PHPTagTypes#PHP_STANDARD}?> *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}&> *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}&> *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}> *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}> *
    {@link MasonTagTypes#MASON_NAMED_BLOCK_END}> *
    *
    * * @return the character sequence that marks the end of the tag. */ public final String getClosingDelimiter() { return closingDelimiter; } /** * Indicates whether this tag type represents a server tag. *
    (property method) *

    * Server tags are typically parsed by some process on the web server and substituted with other text or markup before delivery to the * user agent. * This parser therefore handles them differently to non-server tags in that they can occur at any position in the document * without regard for the HTML document structure. * As a result they can occur anywhere inside any other tag, although a non-server tag cannot theoretically occur inside a server tag. *

    * The documentation of the tag parsing process explains in detail * how the value of this property affects the recognition of server tags, * as well as how the presence of server tags affects the recognition of non-server tags in and around them. *

    * Most XML-style server tags can not be represented as a distinct tag type because they are generally indistinguishable from non-server XML tags. * See the {@link Segment#ignoreWhenParsing()} method for information about how to prevent such server tags from interfering with the proper parsing * of the rest of the document. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Tag TypeIs Server Tag *
    {@link StartTagType#UNREGISTERED}false *
    {@link StartTagType#NORMAL}false *
    {@link StartTagType#COMMENT}false *
    {@link StartTagType#XML_DECLARATION}false *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}false *
    {@link StartTagType#DOCTYPE_DECLARATION}false *
    {@link StartTagType#MARKUP_DECLARATION}false *
    {@link StartTagType#CDATA_SECTION}false *
    {@link StartTagType#SERVER_COMMON}true *
    {@link EndTagType#UNREGISTERED}false *
    {@link EndTagType#NORMAL}false *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Tag TypeIs Server Tag *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}false *
    {@link PHPTagTypes#PHP_SCRIPT}true *
    {@link PHPTagTypes#PHP_SHORT}true *
    {@link PHPTagTypes#PHP_STANDARD}true *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}true *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}true *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}true *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}true *
    {@link MasonTagTypes#MASON_NAMED_BLOCK_END}true *
    *
    * * @return true if this tag type represents a server tag, otherwise false. */ public final boolean isServerTag() { return isServerTag; } /** * Returns the {@linkplain Tag#getName() name} prefix required by this tag type. *
    (property method) *

    * This string is identical to the {@linkplain #getStartDelimiter() start delimiter}, except that it does not include the * initial "<" or "</" characters that always prefix the start delimiter of a * {@link StartTagType} or {@link EndTagType} respectively. *

    * The {@linkplain Tag#getName() name} of a tag of this type may or may not include extra characters after the prefix. * This is determined by properties such as {@link StartTagType#isNameAfterPrefixRequired()} * or {@link EndTagTypeGenericImplementation#isStatic()}. *

    *

    *
    Standard Tag Type Values:
    *
    * *
    Tag TypeName Prefix *
    {@link StartTagType#UNREGISTERED}(empty string) *
    {@link StartTagType#NORMAL}(empty string) *
    {@link StartTagType#COMMENT}!-- *
    {@link StartTagType#XML_DECLARATION}?xml *
    {@link StartTagType#XML_PROCESSING_INSTRUCTION}? *
    {@link StartTagType#DOCTYPE_DECLARATION}!doctype *
    {@link StartTagType#MARKUP_DECLARATION}! *
    {@link StartTagType#CDATA_SECTION}![cdata[ *
    {@link StartTagType#SERVER_COMMON}% *
    {@link EndTagType#UNREGISTERED}(empty string) *
    {@link EndTagType#NORMAL}(empty string) *
    *
    *
    *
    Extended Tag Type Values:
    *
    * *
    Tag TypeName Prefix *
    {@link MicrosoftTagTypes#DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT}![ *
    {@link PHPTagTypes#PHP_SCRIPT}script *
    {@link PHPTagTypes#PHP_SHORT}? *
    {@link PHPTagTypes#PHP_STANDARD}?php *
    {@link MasonTagTypes#MASON_COMPONENT_CALL}& *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT}&| *
    {@link MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT_END}& *
    {@link MasonTagTypes#MASON_NAMED_BLOCK}% *
    {@link MasonTagTypes#MASON_NAMED_BLOCK_END}% *
    *
    * * @return the {@linkplain Tag#getName() name} prefix required by this tag type. * @see #getStartDelimiter() */ protected final String getNamePrefix() { return namePrefix; } /** * Indicates whether a tag of this type is valid in the specified position of the specified source document. *
    (implementation assistance method) *

    * This method is called immediately before {@link #constructTagAt(Source, int pos)} * to do a preliminary check on the validity of a tag of this type in the specified position. *

    * This check is not performed as part of the {@link #constructTagAt(Source, int pos)} call because the same * validation is used for all the standard tag types, and is likely to be sufficient * for all custom tag types. * Having this check separated into a different method helps to isolate common code from the code that is unique to each tag type. *

    * In theory, a {@linkplain TagType#isServerTag() server tag} is valid in any position, but a non-server tag is not valid inside any other tag, * nor inside elements with CDATA content such as {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements. *

    * The common implementation of this method always returns true for server tags, but for non-server tags it behaves slightly differently * depending upon whether or not a {@linkplain Source#fullSequentialParse() full sequential parse} is being peformed. *

    * When this method is called during a full sequential parse, the fullSequentialParseData argument contains information * allowing the exact theoretical check to be performed, rejecting a non-server tag if it is inside any other tag. * See below for further information about the fullSequentialParseData parameter. *

    * When this method is called in parse on demand mode * (not during a full sequential parse, fullSequentialParseData==null), * practical constraints prevent the exact theoretical check from being carried out, and non-server tags are only rejected * if they are found inside HTML {@linkplain StartTagType#COMMENT comments} or {@linkplain StartTagType#CDATA_SECTION CDATA sections}. *

    * This behaviour is configurable by manipulating the static {@link TagType#getTagTypesIgnoringEnclosedMarkup() TagTypesIgnoringEnclosedMarkup} array * to determine which tag types can not contain non-server tags in parse on demand mode. * The {@linkplain TagType#getTagTypesIgnoringEnclosedMarkup() documentation of this property} contains * a more detailed analysis of the subject and explains why only the {@linkplain StartTagType#COMMENT comment} and * {@linkplain StartTagType#CDATA_SECTION CDATA section} tag types are included by default. *

    * See the documentation of the tag parsing process for more information about how this method fits into the whole tag parsing process. *

    * This method can be overridden in custom tag types if the default implementation is unsuitable. *

    * The fullSequentialParseData parameter: *

    * This parameter is used to discard non-server tags that are found inside other tags or inside {@link HTMLElementName#SCRIPT SCRIPT} elements. *

    * In the current version of this library, the fullSequentialParseData argument is either null * (in parse on demand mode) or an integer array containing only a single entry * (if a {@linkplain Source#fullSequentialParse() full sequential parse} is being peformed). *

    * The integer contained in the array is the maximum position in the document at which the end of a tag has been found, * indicating that no non-server tags should be recognised before that position. * If no tags have yet been encountered, the value of this integer is zero. *

    * If the last tag encountered was the {@linkplain StartTag start tag} of a {@link HTMLElementName#SCRIPT SCRIPT} element, * the value of this integer is Integer.MAX_VALUE, indicating that no other non-server elements should be recognised until the * {@linkplain EndTag end tag} of the {@link HTMLElementName#SCRIPT SCRIPT} element is found. * According to the HTML 4.01 specification section 6.2, * the first occurrence of the character sequence "</" terminates the special handling of CDATA within * {@link HTMLElementName#SCRIPT SCRIPT} and {@link HTMLElementName#STYLE STYLE} elements. * This library however only terminates the CDATA handling of {@link HTMLElementName#SCRIPT SCRIPT} element content * when the character sequence "</script" is detected, in line with the behaviour of the major browsers. *

    * Note that the implicit treatment of {@link HTMLElementName#SCRIPT SCRIPT} element content as CDATA should theoretically also prevent the recognition of * {@linkplain StartTagType#COMMENT comments} and explicit {@linkplain StartTagType#CDATA_SECTION CDATA sections} inside script elements. * While this is true for explicit {@linkplain StartTagType#CDATA_SECTION CDATA sections}, the parser does still recognise * {@linkplain StartTagType#COMMENT comments} inside {@link HTMLElementName#SCRIPT SCRIPT} elements in order to maintain compatability with the major browsers. * This prevents the character sequence "</script" from terminating the {@link HTMLElementName#SCRIPT SCRIPT} element * if it occurs inside a {@linkplain StartTagType#COMMENT comment}. The end of the {@linkplain StartTagType#COMMENT comment} however also * ends the implicit treatment of the {@link HTMLElementName#SCRIPT SCRIPT} element content as CDATA. *

    * Although {@link HTMLElementName#STYLE STYLE} elements should theoretically be treated in the same way as {@link HTMLElementName#SCRIPT SCRIPT} elements, * the syntax of Cascading Style Sheets (CSS) does not contain any constructs that * could be misinterpreted as HTML tags, so there is virtually no need to perform any special checks in this case. *

    * IMPLEMENTATION NOTE: The rationale behind using an integer array to hold this value, rather than a scalar int value, * is to emulate passing the parameter by reference. * This value needs to be shared amongst several internal methods during the {@linkplain Source#fullSequentialParse() full sequential parse} process, * and any one of those methods needs to be able to modify the value and pass it back to the calling method. * This would normally be implemented by passing the parameter by reference, but because Java does not support this language construct, a container for a * mutable integer must be passed instead. * Because the standard Java library does not provide a class for holding a single mutable integer (the java.lang.Integer class is immutable), * the easiest container to use, without creating a class especially for this purpose, is an integer array. * The use of an array does not imply any intention to use more than a single array entry in subsequent versions. * * @param source the {@link Source} document. * @param pos the character position in the source document to check. * @param fullSequentialParseData an integer array containing data allowing this method to implement a better algorithm when a {@linkplain Source#fullSequentialParse() full sequential parse} is being performed, or null in parse on demand mode. * @return true if a tag of this type is valid in the specified position of the specified source document, otherwise false. */ protected boolean isValidPosition(final Source source, final int pos, final int[] fullSequentialParseData) { if (isServerTag()) return true; if (fullSequentialParseData!=null) { // use simplified check when doing full sequential parse. Normally we are only able to check whether a tag is inside specially cached // tag types for efficiency reasons, but during a full sequential parse we can reject a tag if it is inside any other tag. if (fullSequentialParseData[0]==Integer.MAX_VALUE) { // we are in a SCRIPT element if (this==EndTagType.NORMAL && source.getParseText().containsAt("=fullSequentialParseData[0]; // accept the non-server tag only if it is after the end of the last found non-server tag } // Use the normal method of checking whether the position is inside a tag of a tag type that ignores enclosed markup: final TagType[] tagTypesIgnoringEnclosedMarkup=getTagTypesIgnoringEnclosedMarkup(); for (int i=0; iparse on demand mode. *
    (implementation assistance method) *

    * The tag types returned by this property (referred to in the following paragraphs as the "listed types") default to * {@link StartTagType#COMMENT} and {@link StartTagType#CDATA_SECTION}. *

    * This property is used by the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method * in parse on demand mode. * It is not used at all during a {@linkplain Source#fullSequentialParse() full sequential parse}. *

    * In the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method, * in parse on demand mode, * every new non-server tag found by the parser (referred to as a "new tag") undergoes a check to see whether it is enclosed * by a tag of one of the listed types. * This includes new tags of the listed types themselves if they are non-server tags. * The recursive nature of this check means that all tags of the listed types occurring before the new tag must be found * by the parser before it can determine whether the new tag should be ignored. * To mitigate any performance issues arising from this process, the listed types are given special treatment in the tag cache. * This dramatically decreases the time taken to search on these tag types, so adding a tag type to this array that * is easily recognised and occurs infrequently only results in a small degradation in overall performance. *

    * A special exception to the algorithm described above applies to {@link StartTagType#COMMENT COMMENT} tags. * The default implementation of the {@link #isValidPosition(Source,int,int[]) isValidPosition} method * does not check whether a {@link StartTagType#COMMENT COMMENT} tag is inside another {@link StartTagType#COMMENT COMMENT} tag, * as this should never happen in a syntactically correct document (the characters '--' should not occur inside a comment). * Skipping this check also avoids the need to recursively check every {@link StartTagType#COMMENT COMMENT} tag back to the start of the document, * which has the potential to cause a stack overflow in a large document containing lots of comments. *

    * Theoretically, non-server tags appearing inside any other tag should be ignored, which is how the parser behaves during a * {@linkplain Source#fullSequentialParse() full sequential parse}. *

    * Server tags in particular very often contain other "tags" that should not be recognised as tags by the parser. * If this behaviour is required in parse on demand, the tag type of each server tag that might be found * in the source documents can be added to this property using the static {@link #setTagTypesIgnoringEnclosedMarkup(TagType[])} method. * For example, the following command would prevent non-server tags from being recognised inside {@linkplain PHPTagTypes#PHP_STANDARD standard PHP} tags, * as well as the default {@linkplain StartTagType#COMMENT comment} and {@linkplain StartTagType#CDATA_SECTION CDATA section} tags: *

    *

    TagType.setTagTypesIgnoringEnclosedMarkup(new TagType[] {PHPTagTypes.PHP_STANDARD, StartTagType.COMMENT, StartTagType.CDATA_SECTION});
    *

    * The only situation where a non-server tag can legitimately contain a sequence of characters that resembles a tag is within an attribute value. * The HTML 4.01 specification section 5.3.2 * specifically allows the presence of '<' and '>' characters within attribute values. * A common occurrence of this is in event attributes containing scripts, * such as the onclick attribute. * There is no way of preventing such "tags" from being recognised in parse on demand mode, as adding * {@link StartTagType#NORMAL} to this property as a listed type would be far too inefficient. * Performing a {@linkplain Source#fullSequentialParse() full sequential parse} of the source document prevents these attribute values from being * recognised as tags, but can be very expensive if only a few tags in the document need to be parsed. * The penalty of not parsing every tag in the document is that the exactness of this check is compromised, but in practical terms the difference is inconsequential. * The default listed types of {@linkplain StartTagType#COMMENT comments} and {@linkplain StartTagType#CDATA_SECTION CDATA sections} yields sensible results * in the vast majority of practical applications with only a minor impact on performance. *

    * In XHTML, '<' and '>' characters * must be represented in attribute values as {@linkplain CharacterReference character references} * (see the XML 1.0 specification section 3.1), * so the situation should never arise that a tag is found inside another tag unless one of them is a * {@linkplain #isServerTag() server tag}. * * @return an array of all the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags. */ public static final TagType[] getTagTypesIgnoringEnclosedMarkup() { return TagTypesIgnoringEnclosedMarkup.array; } /** * Sets the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags. *
    (implementation assistance method) *

    * See {@link #getTagTypesIgnoringEnclosedMarkup()} for the documentation of this property. * * @param tagTypes an array of tag types. */ public static final void setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes) { if (tagTypes==null) throw new IllegalArgumentException(); TagTypesIgnoringEnclosedMarkup.array=tagTypes; } /** * Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features. *
    (abstract implementation method) *

    * The implementation of this method must check that the text at the specified position meets all of * the criteria of this tag type, including such checks as the presence of the correct or well formed * {@linkplain #getClosingDelimiter() closing delimiter}, {@linkplain Tag#getName() name}, {@linkplain Attributes attributes}, * {@linkplain EndTag end tag}, or any other distinguishing features. *

    * It can be assumed that the specified position starts with the {@linkplain #getStartDelimiter() start delimiter} of this tag type, * and that all other tag types with higher precedence (if any) have already been rejected as candidates. * Tag types with lower precedence will be considered if this method returns null. *

    * This method is only called after a successful check of the tag's position, i.e. * {@link #isValidPosition(Source,int,int[]) isValidPosition(source,pos,fullSequentialParseData)}==true. *

    * The {@link StartTagTypeGenericImplementation} and {@link EndTagTypeGenericImplementation} subclasses provide default * implementations of this method that allow the use of much simpler properties and * implementation assistance methods and to carry out the required functions. * * @param source the {@link Source} document. * @param pos the position in the source document. * @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria. */ protected abstract Tag constructTagAt(Source source, int pos); /** * Indicates whether a tag of this type encloses the specified position of the specified source document. *
    (implementation assistance method) *

    * This is logically equivalent to source.{@link Source#getEnclosingTag(int,TagType) getEnclosingTag(pos,this)}!=null, * but is safe to use within other implementation methods without the risk of causing an infinite recursion. *

    * This method is called from the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData)} method. * * @param source the {@link Source} document. * @param pos the character position in the source document to check. * @return true if a tag of this type encloses the specified position of the specified source document, otherwise false. */ protected final boolean tagEncloses(final Source source, final int pos) { if (pos==0) return false; final Tag enclosingTag=source.getEnclosingTag(pos-1,this); // use pos-1 otherwise a tag at pos could cause infinite recursion when this is called from constructTagAt return enclosingTag!=null && pos!=enclosingTag.getEnd(); // make sure pos!=enclosingTag.getEnd() to compensate for using pos-1 above (important if the tag in question immediately follows an end tag delimiter) } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String toString() { return getDescription(); } static final Tag getTagAt(final Source source, final int pos, final boolean serverTagOnly, final boolean assumeNoNestedTags) { final TagTypeRegister.ProspectiveTagTypeIterator prospectiveTagTypeIterator=new TagTypeRegister.ProspectiveTagTypeIterator(source,pos); // prospectiveTagTypeIterator is empty if pos is out of range. while (prospectiveTagTypeIterator.hasNext()) { final TagType tagType=prospectiveTagTypeIterator.next(); if (serverTagOnly && !tagType.isServerTag()) continue; if (!assumeNoNestedTags && !tagType.isValidPosition(source,pos,source.fullSequentialParseData)) continue; try { final Tag tag=tagType.constructTagAt(source,pos); if (tag!=null) return tag; } catch (IndexOutOfBoundsException ex) { if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("Tag at ")).append(" not recognised as type '").append(tagType.getDescription()).append("' because it has no end delimiter").toString()); } } return null; } private static final class TagTypesIgnoringEnclosedMarkup { // This internal class is used to contain the array because its static initialisation can occur after // the StartTagType.COMMENT and StartTagType.CDATA_SECTION members have been created. public static TagType[] array=new TagType[] { StartTagType.COMMENT, StartTagType.CDATA_SECTION }; } } jericho-html-3.1/src/java/net/htmlparser/jericho/WriterLogger.java0000644000175000017500000001376311204550410025263 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Provides an implementation of the {@link Logger} interface that sends output to the specified java.io.Writer. *

    * Each log entry is formatted using the {@link BasicLogFormatter#format(String level, String message, String loggerName)} method. *

    * Note that each logging level can be enabled independently in this implementation. * All levels are enabled by default. */ public class WriterLogger implements Logger { private final Writer writer; private final String name; private boolean errorEnabled=true; private boolean warnEnabled=true; private boolean infoEnabled=true; private boolean debugEnabled=true; /** * Constructs a new WriterLogger with the specified Writer and the default name. *

    * The default logger name is "net.htmlparser.jericho". * * @param writer the Writer to which all output is sent. */ public WriterLogger(final Writer writer) { this(writer,Source.PACKAGE_NAME); } /** * Constructs a new WriterLogger with the specified Writer and name. *

    * The value of the name argument is only relevant if the {@link BasicLogFormatter#OutputName} static property is set to true, * otherwise the name is not included in the output at all. * * @param writer the Writer to which all output is sent. * @param name the logger name, may be null. */ public WriterLogger(final Writer writer, final String name) { this.writer=writer; this.name=name; } /** * Returns the Writer to which all output is sent. * @return the Writer to which all output is sent. */ public Writer getWriter() { return writer; } /** * Returns the name of this logger. * @return the name of this logger, may be null. */ public String getName() { return name; } // Documentation inherited from Logger public void error(final String message) { if (isErrorEnabled()) log("ERROR",message); } // Documentation inherited from Logger public void warn(final String message) { if (isWarnEnabled()) log("WARN",message); } // Documentation inherited from Logger public void info(final String message) { if (isInfoEnabled()) log("INFO",message); } // Documentation inherited from Logger public void debug(final String message) { if (isDebugEnabled()) log("DEBUG",message); } // Documentation inherited from Logger public boolean isErrorEnabled() { return errorEnabled; } /** * Sets whether logging is enabled at the ERROR level. * @param errorEnabled determines whether logging is enabled at the ERROR level. */ public void setErrorEnabled(final boolean errorEnabled) { this.errorEnabled=errorEnabled; } // Documentation inherited from Logger public boolean isWarnEnabled() { return warnEnabled; } /** * Sets whether logging is enabled at the WARN level. * @param warnEnabled determines whether logging is enabled at the WARN level. */ public void setWarnEnabled(final boolean warnEnabled) { this.warnEnabled=warnEnabled; } // Documentation inherited from Logger public boolean isInfoEnabled() { return infoEnabled; } /** * Sets whether logging is enabled at the INFO level. * @param infoEnabled determines whether logging is enabled at the INFO level. */ public void setInfoEnabled(final boolean infoEnabled) { this.infoEnabled=infoEnabled; } // Documentation inherited from Logger public boolean isDebugEnabled() { return debugEnabled; } /** * Sets whether logging is enabled at the DEBUG level. * @param debugEnabled determines whether logging is enabled at the DEBUG level. */ public void setDebugEnabled(final boolean debugEnabled) { this.debugEnabled=debugEnabled; } /** * Logs the specified message at the specified level. *

    * This method is called internally by the {@link #error(String)}, {@link #warn(String)}, {@link #info(String)} and {@link #debug(String)} methods, * with the level argument set to the text "ERROR", "WARN", "INFO", or "DEBUG" respectively. *

    * The default implementation of this method sends the the output of * {@link BasicLogFormatter#format(String,String,String) BasicLogFormatter.format}(level,message,{@link #getName()}) * to the {@link #getWriter() Writer} specified in the class constructor, and then flushes it. *

    * Overriding this method in a subclass provides a convenient means of logging to a Writer using a different format. * * @param level a string representing the level of the log message. * @param message the message to log. */ protected void log(final String level, final String message) { try { writer.write(BasicLogFormatter.format(level,message,name)); writer.flush(); } catch (IOException ex) { throw new RuntimeException(ex); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/CharOutputSegment.java0000644000175000017500000000547411204550410026270 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Implements an {@link OutputSegment} whose content is a single character constant. *

    * This class has been removed from the public API and the functionality replaced with the * {@link OutputDocument#Replace(int begin, int end, char ch)} method. */ final class CharOutputSegment implements OutputSegment { private final int begin; private final int end; private final char ch; /** * Constructs a new CharOutputSegment with the specified begin and end character positions and the specified content. * @param begin the position in the {@link OutputDocument} where this OutputSegment begins. * @param end the position in the {@link OutputDocument} where this OutputSegment ends. * @param ch the character output of the new OutputSegment. */ public CharOutputSegment(final int begin, final int end, final char ch) { this.begin=begin; this.end=end; this.ch=ch; } /** * Constructs a new CharOutputSegment with the same span as the specified {@link Segment}. * @param segment a {@link Segment} defining the begin and end character positions of the new OutputSegment. * @param ch the character output of the new OutputSegment. */ public CharOutputSegment(final Segment segment, final char ch) { begin=segment.begin; end=segment.end; this.ch=ch; } public int getBegin() { return begin; } public int getEnd() { return end; } public void writeTo(final Writer writer) throws IOException { appendTo(writer); } public void appendTo(final Appendable appendable) throws IOException { appendable.append(ch); } public long getEstimatedMaximumOutputLength() { return 1; } public String toString() { return Character.toString(ch); } public String getDebugInfo() { return "Replace: (p"+begin+"-p"+end+") "+ch; } } jericho-html-3.1/src/java/net/htmlparser/jericho/FormControlOutputStyle.java0000644000175000017500000003137111204550410027350 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * An enumerated type representing the three major output styles of a {@linkplain FormControl form control's} * output element. *

    * A form control's output style is set using the {@link FormControl#setOutputStyle(FormControlOutputStyle)} method. */ public enum FormControlOutputStyle { /** * Normal display of the output element. *

    * This is the default display style. */ NORMAL, /** * Remove the output element from the {@linkplain OutputDocument output document} completely. */ REMOVE, /** * The {@linkplain #NORMAL normal} output element is replaced with a simple representation * of the {@linkplain FormControl form control's} submission value(s). *

    * The implementation of this functionality is highly subjective, but provides a more aesthetic way of displaying a read-only version * of a form without having to resort to using {@linkplain FormControl#isDisabled() disabled} controls. *

    * The representation is dependent on the {@linkplain FormControlType form control type}, and can be configured using the * static properties of the {@link ConfigDisplayValue ConfigDisplayValue} nested class. *

    * Unless specified otherwise below, the {@linkplain #NORMAL normal} output element is * replaced with a display value element having the {@linkplain Element#getName() name} * specified in the static {@link ConfigDisplayValue#ElementName ConfigDisplayValue.ElementName} property * (div by default). * The attributes specified in the static {@link ConfigDisplayValue#AttributeNames ConfigDisplayValue.AttributeNames} list * (id, class and style by default) are copied from * the {@linkplain #NORMAL normal} output element into the * display value element. *

    * Details of the content of the display value element or other representation of the * control value are as follows: *

    *

    *
    {@link FormControlType#TEXT TEXT}, {@link FormControlType#FILE FILE} *
    The content of the display value element is the * {@linkplain CharacterReference#reencode(CharSequence) re-encoded} value of the * {@linkplain #NORMAL normal} output element's value attribute. *
    {@link FormControlType#TEXTAREA TEXTAREA} *
    The content of the display value element is the content of the TEXTAREA element * re-encoded {@linkplain CharacterReference#encodeWithWhiteSpaceFormatting(CharSequence) with white space formatting}. *
    {@link FormControlType#CHECKBOX CHECKBOX}, {@link FormControlType#RADIO RADIO} *
    The {@linkplain #NORMAL normal} output element is replaced with the * un-encoded content specified in the {@link ConfigDisplayValue#CheckedHTML ConfigDisplayValue.CheckedHTML} or * {@link ConfigDisplayValue#UncheckedHTML ConfigDisplayValue.UncheckedHTML} static property, depending on * whether the {@linkplain #NORMAL normal} output element contains a * checked attribute. * If the relevant static property has a value of null (the default), the * output element is simply a {@linkplain FormControl#setDisabled(boolean) disabled} * version of the form control. * Attempting to determine which labels might apply to which checkbox or radio button, allowing only the * selected controls to be displayed, would require a very complex and inexact algorithm, so is best left to the developer * to implement if required. *
    {@link FormControlType#SELECT_SINGLE SELECT_SINGLE}, {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} *
    The content of the display value element is the * {@linkplain CharacterReference#reencode(CharSequence) re-encoded} label of the currently selected option. * In the case of a {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} control, all labels of selected options * are listed, separated by the text specified in the static * {@link ConfigDisplayValue#MultipleValueSeparator ConfigDisplayValue.MultipleValueSeparator} property * (", " by default). *
    {@link FormControlType#PASSWORD PASSWORD} *
    The content of the display value element is the * {@linkplain CharacterReference#encode(CharSequence) encoded} character specified in the * {@link ConfigDisplayValue#PasswordChar ConfigDisplayValue.PasswordChar} static property ('*' by default), * repeated n times, where n is the number of characters in the control's * submission value. *
    {@link FormControlType#HIDDEN HIDDEN} *
    The output element is {@linkplain #REMOVE removed} completely. *
    {@link FormControlType#BUTTON BUTTON}, {@link FormControlType#SUBMIT SUBMIT}, {@link FormControlType#IMAGE IMAGE} *
    The output element * is a {@linkplain FormControl#setDisabled(boolean) disabled} version of the original form control. *
    *

    * If the submission value of the control is null or an empty string, * the display value element is given the un-encoded content specified in the * {@link ConfigDisplayValue#EmptyHTML ConfigDisplayValue.EmptyHTML} static property. */ DISPLAY_VALUE; /** * Returns a string representation of this object useful for debugging purposes. *

    * This is equivalent to {@link #toString()}. * * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { return toString(); } /** * Contains static properties that configure the {@link #DISPLAY_VALUE} form control output style. *

    * None of the properties should be assigned a null value. *

    * See the documentation of the {@link #DISPLAY_VALUE} output style for details on how these properties are used. */ public static final class ConfigDisplayValue { /** * Defines the text that is used to separate multiple values in a * display value element. *

    * This property is only relevant to {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} form controls, and is only used * if multiple items in the control are selected. *

    * The default value is ", ". */ public static volatile String MultipleValueSeparator=", "; /** * Defines the {@linkplain Element#getName() name} of * display value elements. *

    * The default value is "div". *

    * Although all form control {@linkplain FormControl#getElement() elements} are * {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} elements, the default replacement is the * {@linkplain HTMLElements#getBlockLevelElementNames() block-level} {@link HTMLElementName#DIV DIV} element, which allows * richer stylesheet formatting than the most common alternative, the {@link HTMLElementName#SPAN SPAN} element, * such as the ability to set its width and height. *

    * This has the undesired effect in some cases of displaying the value on a new line, whereas the original form control * was not on a new line. In practical use however, many form controls are placed inside table cells for better control * over their positioning. In this case replacing the original inline form control with the block DIV * element does not alter its position. */ public static volatile String ElementName=HTMLElementName.DIV; /** * Defines the names of the {@linkplain Attributes attributes} that are copied from the normal form control * output element to a * display value element. *

    * The names included in the list by default are "id", "class" and "style". *

    * These attributes are usually all that is needed to identify the elements in style sheets or specify the styles directly. *

    * The default list is modifiable. */ public static volatile List AttributeNames=new ArrayList(Arrays.asList(new String[] {Attribute.ID,Attribute.CLASS,Attribute.STYLE})); /** * Defines the content of a display value element * if the submission value of the control is null or an empty string. *

    * The content is not {@linkplain CharacterReference#encode(CharSequence) encoded} before output. *

    * The default content is "&nbsp;". */ public static volatile String EmptyHTML=" "; /** * Defines the character used to represent the value of a {@link FormControlType#PASSWORD PASSWORD} form control * in a display value element. *

    * The character is repeated n times, where n is the number of characters in the control's * submission value. *

    * The resulting string is {@linkplain CharacterReference#encode(CharSequence) encoded} before output. *

    * The default password character is '*'. */ public static volatile char PasswordChar='*'; /** * Defines the HTML which replaces the {@linkplain #NORMAL normal} output element * of a {@link FormControlType#CHECKBOX CHECKBOX} or {@link FormControlType#RADIO RADIO} form control if it contains a * checked attribute. *

    * If this property is null, the output element is simply a * {@linkplain FormControl#setDisabled(boolean) disabled} version of the form control. *

    * The HTML is not {@linkplain CharacterReference#encode(CharSequence) encoded} before output. *

    * The default value is null. */ public static volatile String CheckedHTML=null; /** * Defines the HTML which replaces the {@linkplain #NORMAL normal} output element * of a {@link FormControlType#CHECKBOX CHECKBOX} or {@link FormControlType#RADIO RADIO} form control if it does not contain a * checked attribute. *

    * If this property is null, the output element is simply a * {@linkplain FormControl#setDisabled(boolean) disabled} version of the form control. *

    * The HTML is not {@linkplain CharacterReference#encode(CharSequence) encoded} before output. *

    * The default value is null. */ public static volatile String UncheckedHTML=null; private ConfigDisplayValue() {} } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProviderJCL.java0000644000175000017500000000402011204550410026114 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class LoggerProviderJCL implements LoggerProvider { public static final LoggerProvider INSTANCE=new LoggerProviderJCL(); private LoggerProviderJCL() {} public Logger getLogger(final String name) { return new JCLLogger(org.apache.commons.logging.LogFactory.getLog(name)); } private static class JCLLogger implements Logger { private final org.apache.commons.logging.Log jclLog; public JCLLogger(final org.apache.commons.logging.Log jclLog) { this.jclLog=jclLog; } public void error(final String message) { jclLog.error(message); } public void warn(final String message) { jclLog.warn(message); } public void info(final String message) { jclLog.info(message); } public void debug(final String message) { jclLog.debug(message); } public boolean isErrorEnabled() { return jclLog.isErrorEnabled(); } public boolean isWarnEnabled() { return jclLog.isWarnEnabled(); } public boolean isInfoEnabled() { return jclLog.isInfoEnabled(); } public boolean isDebugEnabled() { return jclLog.isDebugEnabled(); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProviderLog4J.java0000644000175000017500000000424711204550410026436 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class LoggerProviderLog4J implements LoggerProvider { public static final LoggerProvider INSTANCE=new LoggerProviderLog4J(); private LoggerProviderLog4J() {} public Logger getLogger(final String name) { return new Log4JLogger(org.apache.log4j.Logger.getLogger(name)); } private static class Log4JLogger implements Logger { private final org.apache.log4j.Logger log4JLogger; public Log4JLogger(final org.apache.log4j.Logger log4JLogger) { this.log4JLogger=log4JLogger; } public void error(final String message) { log4JLogger.error(message); } public void warn(final String message) { log4JLogger.warn(message); } public void info(final String message) { log4JLogger.info(message); } public void debug(final String message) { log4JLogger.debug(message); } public boolean isErrorEnabled() { return log4JLogger.isEnabledFor(org.apache.log4j.Level.ERROR); } public boolean isWarnEnabled() { return log4JLogger.isEnabledFor(org.apache.log4j.Level.WARN); } public boolean isInfoEnabled() { return log4JLogger.isEnabledFor(org.apache.log4j.Level.INFO); } public boolean isDebugEnabled() { return log4JLogger.isEnabledFor(org.apache.log4j.Level.DEBUG); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypePHPScript.java0000644000175000017500000000302511204550410026765 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypePHPScript extends StartTagTypeGenericImplementation { protected static final StartTagTypePHPScript INSTANCE=new StartTagTypePHPScript(); private StartTagTypePHPScript() { super("PHP script","",EndTagType.NORMAL,true,true,false); } protected Tag constructTagAt(final Source source, final int pos) { final StartTag startTag=(StartTag)super.constructTagAt(source,pos); if (startTag==null) return null; // A PHP script element requires the attribute language="php". if (!"php".equalsIgnoreCase(startTag.getAttributes().getValue("language"))) return null; return startTag; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypePHPShort.java0000644000175000017500000000236511204550410026626 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; // note this has the same startdelimiter as processing instruction, so overrides it if registered final class StartTagTypePHPShort extends StartTagTypeGenericImplementation { protected static final StartTagTypePHPShort INSTANCE=new StartTagTypePHPShort(); private StartTagTypePHPShort() { super("PHP short tag","",null,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/HTMLElementNameSet.java0000644000175000017500000000345011204550410026172 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; final class HTMLElementNameSet extends HashSet { public HTMLElementNameSet() { super(1); } public HTMLElementNameSet(final String[] items) { super(items.length*2); for (int i=0; i collection) { super(collection.size()*2); union(collection); } public HTMLElementNameSet(final String item) { super(2); add(item); } HTMLElementNameSet union(final String item) { add(item); return this; } HTMLElementNameSet union(final Collection collection) { for (String item : collection) add(item); return this; } HTMLElementNameSet minus(final String item) { remove(item); return this; } HTMLElementNameSet minus(final Collection collection) { for (String item : collection) remove(item); return this; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeServerCommon.java0000644000175000017500000000223011204550410027565 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeServerCommon extends StartTagTypeGenericImplementation { static final StartTagTypeServerCommon INSTANCE=new StartTagTypeServerCommon(); private StartTagTypeServerCommon() { super("common server tag","<%","%>",null,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/Config.java0000644000175000017500000010160011204550410024040 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Encapsulates global configuration properties which determine the behaviour of various functions. *

    * All of the properties in this class are static, affecting all objects and threads. * Multiple concurrent configurations are not possible. *

    * Properties that relate to user agent * compatibility issues are stored in instances of the {@link Config.CompatibilityMode} class. * This allows all of the properties in the compatibility mode to be set as a block by setting the static * {@link #CurrentCompatibilityMode} property to a different instance. * * @see Config.CompatibilityMode */ public final class Config { private Config() {} /** * Determines the string used to separate a single column's multiple values in the output of the {@link FormFields#getColumnValues(Map)} method. *

    * The situation where a single column has multiple values only arises if {@link FormField#getUserValueCount()}>1 * on the relevant form field, which usually indicates a poorly designed form. *

    * The default value is "," (a comma, not including the quotes). *

    * Must not be null. */ public static String ColumnMultipleValueSeparator=","; /** * Determines the string that represents the value true in the output of the {@link FormFields#getColumnValues(Map)} method. *

    * The default value is "true" (without the quotes). *

    * Must not be null. */ public static String ColumnValueTrue=Boolean.toString(true); /** * Determines the string that represents the value false in the output of the {@link FormFields#getColumnValues(Map)} method. *

    * The default value is null, which represents no output at all. */ public static String ColumnValueFalse=null; /** * Determines whether the {@link CharacterReference#decode(CharSequence)} and similar methods convert non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character references to normal spaces. *

    * The default value is true. *

    * When this property is set to false, non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) * character references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020). *

    * The default behaviour of the library reflects the fact that non-breaking space character references are almost always used in HTML documents * as a non-collapsing white space character. * Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users * who expected to see only normal spaces. * The most common example of this is its visualisation as the character á in the MS-DOS CP437 character set. *

    * The functionality of the following methods is affected: *

      *
    • {@link CharacterReference#appendCharTo(Appendable)} *
    • {@link CharacterReference#decode(CharSequence)} *
    • {@link CharacterReference#decode(CharSequence, boolean insideAttributeValue)} *
    • {@link CharacterReference#decodeCollapseWhiteSpace(CharSequence)} *
    • {@link CharacterReference#reencode(CharSequence)} *
    • {@link Attribute#getValue()} *
    • {@link Attributes#getValue(String name)} *
    • {@link Attributes#populateMap(Map, boolean convertNamesToLowerCase)} *
    • {@link StartTag#getAttributeValue(String attributeName)} *
    • {@link Element#getAttributeValue(String attributeName)} *
    • {@link FormControl#getPredefinedValues()} *
    • {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} *
    • {@link Renderer#getConvertNonBreakingSpaces()} *
    • {@link TextExtractor#getConvertNonBreakingSpaces()} *
    */ public static boolean ConvertNonBreakingSpaces=true; /** * Determines the currently active {@linkplain Config.CompatibilityMode compatibility mode}. *

    * The default setting is {@link Config.CompatibilityMode#IE} (MS Internet Explorer 6.0). *

    * Must not be null. */ public static CompatibilityMode CurrentCompatibilityMode=CompatibilityMode.IE; /** * Determines whether apostrophes are encoded when calling the {@link CharacterReference#encode(CharSequence)} method. *

    * A value of false means {@linkplain CharacterEntityReference#_apos apostrophe} * (U+0027) characters are not encoded. * The only time apostrophes need to be encoded is within an attribute value delimited by * single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and * enhances the readability of the source document. *

    * Note that apostrophes are always encoded as a {@linkplain NumericCharacterReference numeric character reference}, never as the * character entity reference {@link CharacterEntityReference#_apos &apos;}. *

    * The default value is false. */ public static boolean IsApostropheEncoded=false; /** * Determines the {@link LoggerProvider} that is used to create the default {@link Logger} object for each new {@link Source} object. *

    * The {@link LoggerProvider} interface contains several predefined LoggerProvider instances which this property can be set to, * mostly representing wrappers to common logging frameworks. *

    * The default value is null, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm: *

    *

      *
    1. If the class org.slf4j.impl.StaticLoggerBinder is detected: *
        *
      • If the class org.slf4j.impl.JDK14LoggerFactory is detected, use {@link LoggerProvider#JAVA}. *
      • If the class org.slf4j.impl.Log4jLoggerFactory is detected, use {@link LoggerProvider#LOG4J}. *
      • If the class org.slf4j.impl.JCLLoggerFactory is NOT detected, use {@link LoggerProvider#SLF4J}. *
      *
    2. If the class org.apache.commons.logging.Log is detected: *
      * Create an instance of it using the commons-logging LogFactory class. *
        *
      • If the created Log is of type org.apache.commons.logging.impl.Jdk14Logger, use {@link LoggerProvider#JAVA}. *
      • If the created Log is of type org.apache.commons.logging.impl.Log4JLogger, use {@link LoggerProvider#LOG4J}. *
      • otherwise, use {@link LoggerProvider#JCL}. *
      *
      *
    3. If the class org.apache.log4j.Logger is detected, use {@link LoggerProvider#LOG4J}. *
    4. otherwise, use {@link LoggerProvider#JAVA}. *
    * * @see Source#setLogger(Logger) */ public static LoggerProvider LoggerProvider=null; /** * Determines the string used to represent a newline in text output throughout the library. *

    * The default value is the standard new line character sequence of the host platform, determined by System.getProperty("line.separator"). */ public static String NewLine=System.getProperty("line.separator"); /** * Used in Element.getChildElements. * Server elements containing markup should be included in the hierarchy, so consider making this option public in future. */ static final boolean IncludeServerTagsInElementHierarchy=false; /** * Represents a set of maximum unicode code points to be recognised for the three types of * unterminated character reference in a given context. *

    * The three types of character reference are: *

    *

    * The two types of contexts used in this library are: *

      *
    • Inside an attribute value *
    • Outside an attribute value *
    */ static class UnterminatedCharacterReferenceSettings { // use volatile fields to make them thread safe public volatile int characterEntityReferenceMaxCodePoint; public volatile int decimalCharacterReferenceMaxCodePoint; public volatile int hexadecimalCharacterReferenceMaxCodePoint; public static UnterminatedCharacterReferenceSettings ACCEPT_ALL=new UnterminatedCharacterReferenceSettings(CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL); public UnterminatedCharacterReferenceSettings() { this(CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE); } public UnterminatedCharacterReferenceSettings(final int characterEntityReferenceMaxCodePoint, final int decimalCharacterReferenceMaxCodePoint, final int hexadecimalCharacterReferenceMaxCodePoint) { this.characterEntityReferenceMaxCodePoint=characterEntityReferenceMaxCodePoint; this.decimalCharacterReferenceMaxCodePoint=decimalCharacterReferenceMaxCodePoint; this.hexadecimalCharacterReferenceMaxCodePoint=hexadecimalCharacterReferenceMaxCodePoint; } public String toString() { return Config.NewLine+" Character entity reference: "+getDescription(characterEntityReferenceMaxCodePoint) +Config.NewLine+" Decimal character reference: "+getDescription(decimalCharacterReferenceMaxCodePoint) +Config.NewLine+" Haxadecimal character reference: "+getDescription(hexadecimalCharacterReferenceMaxCodePoint); } private String getDescription(final int codePoint) { if (codePoint==CompatibilityMode.CODE_POINTS_NONE) return "None"; if (codePoint==CompatibilityMode.CODE_POINTS_ALL) return "All"; return "0x"+Integer.toString(codePoint,16); } } /** * Represents a set of configuration parameters that relate to * user agent compatibility issues. *

    * The predefined compatibility modes {@link #IE}, {@link #MOZILLA}, {@link #OPERA} and {@link #XHTML} provide an easy means of * ensuring the library interprets the markup in a way consistent with some of the most commonly used browsers, * at least in relation to the behaviour described by the properties in this class. *

    * The properties of any CompatibilityMode object can be modified individually, including those in * the predefined instances as well as newly constructed instances. * Take note however that modifying the properties of the predefined instances has a global affect. *

    * The currently active compatibility mode is stored in the static {@link Config#CurrentCompatibilityMode} property. *

    */ public static final class CompatibilityMode { private String name; private volatile boolean formFieldNameCaseInsensitive; volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue; volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue; /** * Indicates the recognition of all unicode code points. *

    * This value is used in properties which specify a maximum unicode code point to be recognised by the parser. * * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue) * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) */ public static final int CODE_POINTS_ALL=Character.MAX_CODE_POINT; // 0x10FFFF (decimal 1114111) /** * Indicates the recognition of no unicode code points. *

    * This value is used in properties which specify a maximum unicode code point to be recognised by the parser. * * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue) * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue) */ public static final int CODE_POINTS_NONE=CharacterReference.INVALID_CODE_POINT; /** * Microsoft Internet Explorer compatibility mode. *

    * {@link #getName() Name} = IE
    * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true
    * *
    Recognition of unterminated character references:  (inside attribute)    (outside attribute)   *
    {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =U+00FFU+00FF *
    {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_ALL All} *
    {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_NONE None} *
    */ public static final CompatibilityMode IE=new CompatibilityMode("IE",true, new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_NONE) // outside attributes ); /** * Mozilla / * Firefox / * Netscape compatibility mode. *

    * {@link #getName() Name} = Mozilla
    * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false
    * *
    Recognition of unterminated character references:  (inside attribute)    (outside attribute)   *
    {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =U+00FF{@linkplain #CODE_POINTS_ALL All} *
    {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_ALL All} *
    {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_ALL All} *
    */ public static final CompatibilityMode MOZILLA=new CompatibilityMode("Mozilla",false, new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes ); /** * Opera compatibility mode. *

    * {@link #getName() Name} = Opera
    * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true
    * *
    Recognition of unterminated character references:  (inside attribute)    (outside attribute)   *
    {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =U+003E{@linkplain #CODE_POINTS_ALL All} *
    {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_ALL All} *
    {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_ALL All}{@linkplain #CODE_POINTS_ALL All} *
    */ public static final CompatibilityMode OPERA=new CompatibilityMode("Opera",true, new UnterminatedCharacterReferenceSettings(0x3E, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes ); /** * XHTML compatibility mode. *

    * {@link #getName() Name} = XHTML
    * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false
    * *
    Recognition of unterminated character references:  (inside attribute)    (outside attribute)   *
    {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_NONE None}{@linkplain #CODE_POINTS_NONE None} *
    {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_NONE None}{@linkplain #CODE_POINTS_NONE None} *
    {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} ={@linkplain #CODE_POINTS_NONE None}{@linkplain #CODE_POINTS_NONE None} *
    */ public static final CompatibilityMode XHTML=new CompatibilityMode("XHTML"); /** * Constructs a new CompatibilityMode with the given {@linkplain #getName() name}. *

    * All properties in the new instance are initially assigned their default values, which are the same as the strict * rules of the {@link #XHTML} compatibility mode. * * @param name the {@linkplain #getName() name} of the new compatibility mode */ public CompatibilityMode(final String name) { this(name,false,new UnterminatedCharacterReferenceSettings(),new UnterminatedCharacterReferenceSettings()); } private CompatibilityMode(final String name, final boolean formFieldNameCaseInsensitive, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue) { this.name=name; this.formFieldNameCaseInsensitive=formFieldNameCaseInsensitive; this.unterminatedCharacterReferenceSettingsInsideAttributeValue=unterminatedCharacterReferenceSettingsInsideAttributeValue; this.unterminatedCharacterReferenceSettingsOutsideAttributeValue=unterminatedCharacterReferenceSettingsOutsideAttributeValue; } /** * Returns the name of this compatibility mode. * @return the name of this compatibility mode. */ public String getName() { return name; } /** * Indicates whether {@linkplain FormField#getName() form field names} are treated as case insensitive. *

    * Microsoft Internet Explorer treats field names as case insensitive, * while Mozilla treats them as case sensitive. *

    * The value of this property in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode} * affects all instances of the {@link FormFields} class. * It should be set to the desired configuration before any instances of FormFields are created. * * @return true if {@linkplain FormField#getName() form field names} are treated as case insensitive, otherwise false. * @see #setFormFieldNameCaseInsensitive(boolean) */ public boolean isFormFieldNameCaseInsensitive() { return formFieldNameCaseInsensitive; } /** * Sets whether {@linkplain FormField#getName() form field names} are treated as case insensitive. *

    * See {@link #isFormFieldNameCaseInsensitive()} for the documentation of this property. * * @param value the new value of the property */ public void setFormFieldNameCaseInsensitive(final boolean value) { formFieldNameCaseInsensitive=value; } /** * Returns the maximum unicode code point of an unterminated * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context. *

    * For example, if getUnterminatedCharacterEntityReferenceMaxCodePoint(true) has the value 0xFF (U+00FF) * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then: *

      *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&gt",true)} * returns ">".
      * The string is recognised as the character entity reference {@link CharacterEntityReference#_gt &gt;} * despite the fact that it is unterminated, * because its unicode code point U+003E is below the maximum of U+00FF set by this property. *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&euro",true)} * returns "&euro".
      * The string is not recognised as the character entity reference {@link CharacterEntityReference#_euro &euro;} * because it is unterminated * and its unicode code point U+20AC is above the maximum of U+00FF set by this property. *
    *

    * See the documentation of the {@link Attribute#getValue()} method for further discussion. * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @return the maximum unicode code point of an unterminated {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context. * @see #setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint) */ public int getUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue) { return getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint; } /** * Sets the maximum unicode code point of an unterminated * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context. *

    * See {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property. * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @param maxCodePoint the maximum unicode code point. */ public void setUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) { getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint=maxCodePoint; } /** * Returns the maximum unicode code point of an unterminated * decimal character reference which is to be recognised in the specified context. *

    * For example, if getUnterminatedDecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF) * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then: *

      *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#62",true)} * returns ">".
      * The string is recognised as the numeric character reference &#62; * despite the fact that it is unterminated, * because its unicode code point U+003E is below the maximum of U+00FF set by this property. *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#8364",true)} * returns "&#8364".
      * The string is not recognised as the numeric character reference &#8364; * because it is unterminated * and its unicode code point U+20AC is above the maximum of U+00FF set by this property. *
    * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @return the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context. * @see #setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint) */ public int getUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) { return getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint; } /** * Sets the maximum unicode code point of an unterminated * decimal character reference which is to be recognised in the specified context. *

    * See {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property. * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @param maxCodePoint the maximum unicode code point. */ public void setUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) { getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint=maxCodePoint; } /** * Returns the maximum unicode code point of an unterminated * hexadecimal character reference which is to be recognised in the specified context. *

    * For example, if getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF) * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then: *

      *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#x3e",true)} * returns ">".
      * The string is recognised as the numeric character reference &#x3e; * despite the fact that it is unterminated, * because its unicode code point U+003E is below the maximum of U+00FF set by this property. *
    • {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#x20ac",true)} * returns "&#x20ac".
      * The string is not recognised as the numeric character reference &#20ac; * because it is unterminated * and its unicode code point U+20AC is above the maximum of U+00FF set by this property. *
    * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @return the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context. * @see #setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint) */ public int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) { return getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint; } /** * Sets the maximum unicode code point of an unterminated * headecimal character reference which is to be recognised in the specified context. *

    * See {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property. * * @param insideAttributeValue the context within an HTML document - true if inside an attribute value or false if outside an attribute value. * @param maxCodePoint the maximum unicode code point. */ public void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) { getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint=maxCodePoint; } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { return "Form field name case insensitive: "+formFieldNameCaseInsensitive +Config.NewLine+"Maximum codepoints in unterminated character references:" +Config.NewLine+" Inside attribute values:" +unterminatedCharacterReferenceSettingsInsideAttributeValue +Config.NewLine+" Outside attribute values:" +unterminatedCharacterReferenceSettingsOutsideAttributeValue; } /** * Returns the {@linkplain #getName() name} of this compatibility mode. * @return the {@linkplain #getName() name} of this compatibility mode. */ public String toString() { return getName(); } UnterminatedCharacterReferenceSettings getUnterminatedCharacterReferenceSettings(final boolean insideAttributeValue) { return insideAttributeValue ? unterminatedCharacterReferenceSettingsInsideAttributeValue : unterminatedCharacterReferenceSettingsOutsideAttributeValue; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerDisabled.java0000644000175000017500000000266711204550410025517 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class LoggerDisabled implements Logger { public static final LoggerDisabled INSTANCE=new LoggerDisabled(); private LoggerDisabled() {} public void error(String message) {} public void warn(String message) {} public void info(String message) {} public void debug(String message) {} public boolean isErrorEnabled() { return false; } public boolean isWarnEnabled() { return false; } public boolean isInfoEnabled() { return false; } public boolean isDebugEnabled() { return false; } } jericho-html-3.1/src/java/net/htmlparser/jericho/SubCache.java0000644000175000017500000004237211204550410024322 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents a cached map of character positions to tags for a particular tag type, * or for all tag types if the tagType field is null. */ final class SubCache { private final Cache cache; public final TagType tagType; // does not support unregistered tag types at present private final CacheEntry bof; // beginning of file marker private final CacheEntry eof; // end of file marker private CacheEntry[] array=new CacheEntry[INITIAL_CAPACITY]; private static final int INITIAL_CAPACITY=64; public SubCache(final Cache cache, final TagType tagType) { this.cache=cache; this.tagType=tagType; array[0]=bof=new CacheEntry(0,-1,null,false,false); array[1]=eof=new CacheEntry(1,cache.getSourceLength(),null,false,false); } public int size() { return eof.index+1; } public void clear() { bof.nextCached=false; eof.index=1; eof.previousCached=false; array[1]=eof; } public void bulkLoad_Init(final int tagCount) { array=new CacheEntry[tagCount+2]; array[0]=bof; bof.nextCached=true; array[eof.index=tagCount+1]=eof; eof.previousCached=true; } public void bulkLoad_Set(final int tagsIndex, final Tag tag) { final int index=tagsIndex+1; array[index]=new CacheEntry(index,tag.begin,tag,true,true); } public void bulkLoad_AddToTypeSpecificCache(final Tag tag) { final int index=eof.index; if (array.length==eof.index+1) doubleCapacity(); array[index]=new CacheEntry(index,tag.begin,tag,true,true); eof.index++; } public void bulkLoad_FinaliseTypeSpecificCache() { bof.nextCached=true; eof.previousCached=true; array[eof.index]=eof; } public Tag getTagAt(final int pos, final boolean serverTagOnly) { // This must only be called on allTagTypesSubCache (ie tagType==null) if (cache.getSourceLength()==0) return null; if (pos<0 || pos>=cache.getSourceLength()) return null; final int index=getIndexOfPos(pos); final CacheEntry cacheEntry=array[index]; if (cacheEntry.pos==pos) { if (serverTagOnly && !cacheEntry.tag.getTagType().isServerTag()) return null; return cacheEntry.tag; } if (cacheEntry.previousCached) return null; return cache.addTagAt(pos,serverTagOnly); } public void addTagAt(final int pos, final Tag tag) { final int index=getIndexOfPos(pos); final CacheEntry nextCacheEntry=array[index]; final CacheEntry previousCacheEntry=getPrevious(nextCacheEntry); add(previousCacheEntry,new CacheEntry(index,pos,tag,pos==previousCacheEntry.pos+1,pos==nextCacheEntry.pos-1),nextCacheEntry); } public Tag getPreviousTag(final int pos) { // Note that this method never returns tags for which tag.includInSearch() is false, so separate caching of unregistered tags won't work. if (cache.getSourceLength()==0) return null; if (pos<0 || pos>=cache.getSourceLength()) return null; int index=getIndexOfPos(pos); final CacheEntry cacheEntry=array[index]; final Tag tag; if (cacheEntry.pos==pos && cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag; tag=getPreviousTag(getPrevious(cacheEntry),pos,cacheEntry); addPreviousTag(pos,tag); return tag; } public Tag getNextTag(final int pos) { // Note that this method never returns tags for which tag.includInSearch() is false, so separate caching of unregistered tags won't work. if (cache.getSourceLength()==0) return null; if (pos<0 || pos>=cache.getSourceLength()) return null; int index=getIndexOfPos(pos); final CacheEntry cacheEntry=array[index]; final Tag tag; if (cacheEntry.pos==pos) { if (cacheEntry.tag!=null && cacheEntry.tag.includeInSearch()) return cacheEntry.tag; tag=getNextTag(cacheEntry,pos,getNext(cacheEntry)); } else { tag=getNextTag(getPrevious(cacheEntry),pos,cacheEntry); } addNextTag(pos,tag); return tag; } public Iterator getTagIterator() { return new TagIterator(); } public String toString() { return appendTo(new StringBuilder()).toString(); } protected StringBuilder appendTo(final StringBuilder sb) { sb.append("Cache for TagType : ").append(tagType).append(Config.NewLine); for (int i=0; i<=lastIndex(); i++) sb.append(array[i]).append(Config.NewLine); return sb; } private Tag getPreviousTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) { // previousCacheEntry.pos < pos <= nextCacheEntry.pos while (true) { if (!nextCacheEntry.previousCached) { final Tag tag=Tag.getPreviousTagUncached(cache.source,pos,tagType,previousCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to all caches if one is found, and maybe some unregistered tags along the way. if (tag!=null) { if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false return tag; } } if (previousCacheEntry==bof) return null; if (previousCacheEntry.tag!=null && previousCacheEntry.tag.includeInSearch()) return previousCacheEntry.tag; pos=previousCacheEntry.pos-1; previousCacheEntry=getPrevious(nextCacheEntry=previousCacheEntry); } } private Tag getNextTag(CacheEntry previousCacheEntry, int pos, CacheEntry nextCacheEntry) { // previousCacheEntry.pos <= pos < nextCacheEntry.pos while (true) { if (!previousCacheEntry.nextCached) { final Tag tag=Tag.getNextTagUncached(cache.source,pos,tagType,nextCacheEntry.pos); // if useAllTypesCache is true, automatically adds tag to caches if one is found, and maybe some unregistered tags along the way. if (tag!=null) { if (!cache.source.useAllTypesCache) addTagAt(tag.begin,tag); // have to add tag manually if useAllTypesCache is false return tag; } } if (nextCacheEntry==eof) return null; if (nextCacheEntry.tag!=null && nextCacheEntry.tag.includeInSearch()) return nextCacheEntry.tag; pos=nextCacheEntry.pos+1; nextCacheEntry=getNext(previousCacheEntry=nextCacheEntry); } } private void addPreviousTag(final int pos, final Tag tag) { final int tagPos=(tag==null) ? bof.pos : tag.begin; if (tagPos==pos) return; // the tag was found exactly on pos, so cache has already been fully updated // tagPos < pos int index=getIndexOfPos(pos); CacheEntry stepCacheEntry=array[index]; // stepCacheEntry.pos is either == or > than tagPos. // stepCacheEntry.pos is either == or > pos. int compactStartIndex=Integer.MAX_VALUE; if (stepCacheEntry.pos==pos) { // a cache entry was aleady at pos (containing null or wrong tagType) stepCacheEntry.previousCached=true; if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);} } else if (!stepCacheEntry.previousCached) { // we have to add a new cacheEntry at pos: if (tagType==null) cache.addTagAt(pos,false); // this pos has never been checked before, so add it to all relevant SubCaches (a null or unregistered tag entry is always added to this SubCache) else addTagAt(pos,null); // all we know is that the pos doesn't contain a tag of this SubCache's type, so add a null entry to this SubCache only. // now we have to reload the index and stepCacheEntry as they may have changed: stepCacheEntry=array[index=getIndexOfPos(pos)]; // stepCacheEntry.pos is either == or > than tagPos. // stepCacheEntry.pos is either == or > pos. (the latter if the added entry was redundant) if (stepCacheEntry.pos==pos) { // perform same steps as in the (stepCacheEntry.pos==pos) if condition above: stepCacheEntry.previousCached=true; if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);} } } while (true) { stepCacheEntry=array[--index]; if (stepCacheEntry.pos<=tagPos) break; if (stepCacheEntry.tag!=null) { if (stepCacheEntry.tag.includeInSearch()) throw new SourceCacheEntryMissingInternalError(tagType,tag,this); stepCacheEntry.previousCached=true; stepCacheEntry.nextCached=true; } else { stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index); } } if (stepCacheEntry.pos!=tagPos) throw new FoundCacheEntryMissingInternalError(tagType,tag,this); stepCacheEntry.nextCached=true; compact(compactStartIndex); } private void addNextTag(final int pos, final Tag tag) { final int tagPos=(tag==null) ? eof.pos : tag.begin; if (tagPos==pos) return; // the tag was found exactly on pos, so cache has already been fully updated // tagPos > pos int index=getIndexOfPos(pos); CacheEntry stepCacheEntry=array[index]; // stepCacheEntry.pos may be <, == or > than tagPos. // stepCacheEntry.pos is either == or > pos. int compactStartIndex=Integer.MAX_VALUE; if (stepCacheEntry.pos==pos) { // a cache entry was aleady at pos (containing null or wrong tagType) stepCacheEntry.nextCached=true; if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);} } else if (!getPrevious(stepCacheEntry).nextCached) { // we have to add a new cacheEntry at pos: if (tagType==null) cache.addTagAt(pos,false); // this pos has never been checked before, so add it to all relevant SubCaches (a null or unregistered tag entry is always added to this SubCache) else addTagAt(pos,null); // all we know is that the pos doesn't contain a tag of this SubCache's type, so add a null entry to this SubCache only. // now we have to reload the index and stepCacheEntry as they may have changed: stepCacheEntry=array[index=getIndexOfPos(pos)]; // stepCacheEntry.pos may be <, == or > than tagPos. // stepCacheEntry.pos is either == or > pos. (the latter if the added entry was redundant) if (stepCacheEntry.pos==pos) { // perform same steps as in the (stepCacheEntry.pos==pos) if condition above: stepCacheEntry.nextCached=true; if (stepCacheEntry.isRedundant()) {stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index);} } } if (stepCacheEntry.pos=tagPos) break; if (stepCacheEntry.tag!=null) { if (stepCacheEntry.tag.includeInSearch()) throw new SourceCacheEntryMissingInternalError(tagType,tag,this); stepCacheEntry.previousCached=true; stepCacheEntry.nextCached=true; } else { stepCacheEntry.removed=true; compactStartIndex=Math.min(compactStartIndex,stepCacheEntry.index); } } if (stepCacheEntry.pos!=tagPos) throw new FoundCacheEntryMissingInternalError(tagType,tag,this); } stepCacheEntry.previousCached=true; compact(compactStartIndex); } private void compact(int i) { final int lastIndex=lastIndex(); int removedCount=1; while (i>1; while (true) { final CacheEntry cacheEntry=array[index]; if (pos>cacheEntry.pos) { final CacheEntry nextCacheEntry=getNext(cacheEntry); if (pos<=nextCacheEntry.pos) return nextCacheEntry.index; minIndex=nextCacheEntry.index; } else if (pospreviousCacheEntry.pos) return index; maxIndex=previousCacheEntry.index; } else { return index; } index=(minIndex+maxIndex)>>1; // using the following complex calculation instead of a binary search is likely to result in less iterations but is slower overall: // final int minIndexPos=array[minIndex].pos; // index=((maxIndex-minIndex-1)*(pos-minIndexPos))/(array[maxIndex].pos-minIndexPos)+minIndex+1; // approximate next guess at index, assuming even distribution of cache entries between min and max entries } } private CacheEntry getNext(final CacheEntry cacheEntry) { return array[cacheEntry.index+1]; } private CacheEntry getPrevious(final CacheEntry cacheEntry) { return array[cacheEntry.index-1]; } private int lastIndex() { return eof.index; } private void insert(final CacheEntry cacheEntry) { final int index=cacheEntry.index; if (array.length==size()) doubleCapacity(); for (int i=lastIndex(); i>=index; i--) { final CacheEntry movedCacheEntry=array[i]; array[movedCacheEntry.index=(i+1)]=movedCacheEntry; } array[index]=cacheEntry; } private void remove(final CacheEntry cacheEntry) { final int lastIndex=lastIndex(); for (int i=cacheEntry.index; i=0; i--) newArray[i]=array[i]; array=newArray; } private static class CacheEntryMissingInternalError extends AssertionError { public CacheEntryMissingInternalError(final TagType tagType, final Tag tag, final SubCache subCache, final String message) { super("INTERNAL ERROR: Inconsistent Cache State for TagType \""+tagType+"\" - "+message+' '+tag.getDebugInfo()+'\n'+subCache); } } private static class SourceCacheEntryMissingInternalError extends CacheEntryMissingInternalError { public SourceCacheEntryMissingInternalError(final TagType tagType, final Tag tag, final SubCache subCache) { super(tagType,tag,subCache,"cache entry no longer found in source:"); } } private static class FoundCacheEntryMissingInternalError extends CacheEntryMissingInternalError { public FoundCacheEntryMissingInternalError(final TagType tagType, final Tag tag, final SubCache subCache) { super(tagType,tag,subCache,"missing cache entry for found tag"); } } private final class TagIterator implements Iterator { private int i=0; private Tag nextTag; public TagIterator() { loadNextTag(); } public boolean hasNext() { return nextTag!=null; } public Tag next() { final Tag result=nextTag; loadNextTag(); return result; } public void remove() { throw new UnsupportedOperationException(); } private void loadNextTag() { while (++i<=lastIndex() && (nextTag=array[i].tag)==null) {} } } private static final class CacheEntry { public int index; public final int pos; public final Tag tag; public boolean previousCached; public boolean nextCached; public boolean removed=false; public CacheEntry(final int index, final int pos, final Tag tag, final boolean previousCached, final boolean nextCached) { this.index=index; this.pos=pos; this.tag=tag; this.previousCached=previousCached; this.nextCached=nextCached; } public boolean isRedundant() { return tag==null && previousCached && nextCached; } public String toString() { return pad(index,4)+" "+pad(pos,5)+" "+(previousCached?'|':'-')+' '+(nextCached?'|':'-')+' '+(tag==null ? "null" : tag.getDebugInfo()); } private String pad(final int n, final int places) { final String nstring=String.valueOf(n); final StringBuilder sb=new StringBuilder(places); for (int i=places-nstring.length(); i>0; i--) sb.append(' '); sb.append(nstring); return sb.toString(); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/RowColumnVector.java0000644000175000017500000001013211204550410025742 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents the row and column number of a character position in the source document. *

    * Obtained using the {@link Source#getRowColumnVector(int pos)} method. */ public final class RowColumnVector { private final int row; private final int column; private final int pos; private static final RowColumnVector FIRST=new RowColumnVector(1,1,0); private static final RowColumnVector[] STREAMED=new RowColumnVector[0]; private RowColumnVector(final int row, final int column, final int pos) { this.row=row; this.column=column; this.pos=pos; } private RowColumnVector(final int pos) { // used in Streamed source where row and column aren't available. this(-1,-1,pos); } /** * Returns the row number of this character position in the source document. *

    * If a {@link StreamedSource} is in use, this method always returns -1. * * @return the row number of this character position in the source document. */ public int getRow() { return row; } /** * Returns the column number of this character position in the source document. *

    * If a {@link StreamedSource} is in use, this method always returns -1. * * @return the column number of this character position in the source document. */ public int getColumn() { return column; } /** * Returns the character position in the source document. * @return the character position in the source document. */ public int getPos() { return pos; } /** * Returns a string representation of this character position. *

    * The returned string has the format "(row,column:pos)". * * @return a string representation of this character position. */ public String toString() { return appendTo(new StringBuilder(20)).toString(); } StringBuilder appendTo(final StringBuilder sb) { if (row!=-1) return sb.append("(r").append(row).append(",c").append(column).append(",p").append(pos).append(')'); return sb.append("(p").append(pos).append(')'); } static RowColumnVector[] getCacheArray(final Source source) { if (source.isStreamed()) return STREAMED; final int lastSourcePos=source.end-1; final ArrayList list=new ArrayList(); int pos=0; list.add(FIRST); int row=1; while (pos<=lastSourcePos) { final char ch=source.charAt(pos); if (ch=='\n' || (ch=='\r' && (pos==lastSourcePos || source.charAt(pos+1)!='\n'))) list.add(new RowColumnVector(++row,1,pos+1)); pos++; } return list.toArray(new RowColumnVector[list.size()]); } static RowColumnVector get(final RowColumnVector[] cacheArray, final int pos) { if (cacheArray==STREAMED) return new RowColumnVector(pos); int low=0; int high=cacheArray.length-1; while (true) { int mid=(low+high) >> 1; final RowColumnVector rowColumnVector=cacheArray[mid]; if (rowColumnVector.pospos) { high=mid-1; } else { return rowColumnVector; } } } } jericho-html-3.1/src/java/net/htmlparser/jericho/Attributes.java0000644000175000017500000006605411204550410024776 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import net.htmlparser.jericho.nodoc.*; import java.util.*; import java.io.*; /** * Represents the list of {@link Attribute} objects present within a particular {@link StartTag}. *

    * This segment starts at the end of the start tag's {@linkplain StartTag#getName() name} * and ends at the end of the last attribute. *

    * The attributes in this list are a representation of those found in the source document and are not modifiable. * The {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods * provide the means to add, delete or modify attributes and their values in an {@link OutputDocument}. *

    * Any {@linkplain TagType#isServerTag() server tags} encountered inside the attributes area of a non-server tag * do not interfere with the parsing of the attributes. *

    * If too many syntax errors are encountered while parsing a start tag's attributes, the parser rejects the entire start tag * and generates a {@linkplain Source#getLogger() log} entry. * The threshold for the number of errors allowed can be set using the {@link #setDefaultMaxErrorCount(int)} static method. *

    * Obtained using the {@link StartTag#getAttributes()} method, or explicitly using the {@link Source#parseAttributes(int pos, int maxEnd)} method. *

    * It is common for instances of this class to contain no attributes. *

    * See also the XML 1.0 specification for attributes. * * @see StartTag * @see Attribute */ public final class Attributes extends SequentialListSegment { private final LinkedList attributeList; // never null final boolean containsServerTagOutsideOfAttributeValue; private enum ParsingState { AFTER_TAG_NAME, BETWEEN_ATTRIBUTES, IN_NAME, AFTER_NAME, // this only happens if an attribute name is followed by whitespace START_VALUE, IN_VALUE, AFTER_VALUE_FINAL_QUOTE } private static int defaultMaxErrorCount=2; // defines maximum number of minor errors that can be encountered in attributes before entire start tag is rejected. private Attributes(final Source source, final int begin, final int end, final LinkedList attributeList, final boolean containsServerTagOutsideOfAttributeValue) { super(source,begin,end); this.attributeList=attributeList; this.containsServerTagOutsideOfAttributeValue=containsServerTagOutsideOfAttributeValue; } /** called from StartTagType.parseAttributes(Source, int startTagBegin, String tagName) */ static Attributes construct(final Source source, final int startTagBegin, final StartTagType startTagType, final String tagName) { return construct(source,"StartTag",ParsingState.AFTER_TAG_NAME,startTagBegin,-1,-1,startTagType,tagName,defaultMaxErrorCount); } /** called from StartTag.parseAttributes(int maxErrorCount) */ static Attributes construct(final Source source, final int startTagBegin, final int attributesBegin, final int maxEnd, final StartTagType startTagType, final String tagName, final int maxErrorCount) { return construct(source,"Attributes for StartTag",ParsingState.BETWEEN_ATTRIBUTES,startTagBegin,attributesBegin,maxEnd,startTagType,tagName,maxErrorCount); } /** called from Source.parseAttributes(int pos, int maxEnd, int maxErrorCount) */ static Attributes construct(final Source source, final int begin, final int maxEnd, final int maxErrorCount) { return construct(source,"Attributes",ParsingState.BETWEEN_ATTRIBUTES,begin,-1,maxEnd,StartTagType.NORMAL,null,maxErrorCount); } /** * Any < character found within the start tag is treated as though it is part of the attribute * list, which is consistent with the way IE treats it. * @param logBegin the position of the beginning of the object being searched (for logging) * @param attributesBegin the position of the beginning of the attribute list, or -1 if it should be calculated automatically from logBegin. * @param maxEnd the position at which the attributes must end if a terminating character is not found, or -1 if no maximum. * @param tagName the name of the enclosing StartTag, or null if constucting attributes directly. */ private static Attributes construct(final Source source, final String logType, ParsingState parsingState, final int logBegin, int attributesBegin, final int maxEnd, final StartTagType startTagType, final String tagName, final int maxErrorCount) { boolean isClosingSlashIgnored=false; if (tagName!=null) { // 'logBegin' parameter is the start of the associated start tag if (attributesBegin==-1) attributesBegin=logBegin+1+tagName.length(); if (startTagType==StartTagType.NORMAL && HTMLElements.isClosingSlashIgnored(tagName)) isClosingSlashIgnored=true; } else { attributesBegin=logBegin; } int attributesEnd=attributesBegin; final LinkedList attributeList=new LinkedList(); boolean containsServerTagOutsideOfAttributeValue=false; final ParseText parseText=source.getParseText(); int i=attributesBegin; char quote=' '; Segment nameSegment=null; String key=null; int currentBegin=-1; boolean isTerminatingCharacter=false; int errorCount=0; try { while (!isTerminatingCharacter) { if (i==maxEnd || startTagType.atEndOfAttributes(source,i,isClosingSlashIgnored)) isTerminatingCharacter=true; final char ch=parseText.charAt(i); // First check if there is a server tag in this position: if (ch=='<') { final Tag interlopingTag=Tag.getTagAt(source,i,true); // search for server tags only if (interlopingTag!=null) { // There is a server tag in this position. Skip over it: if (parsingState==ParsingState.START_VALUE) { currentBegin=i; quote=' '; parsingState=ParsingState.IN_VALUE; } i=attributesEnd=interlopingTag.end; if (parsingState!=ParsingState.IN_VALUE) containsServerTagOutsideOfAttributeValue=true; continue; } } // There is no server tag in this position. Now we can parse the attributes: switch (parsingState) { case IN_VALUE: if (isTerminatingCharacter || ch==quote || (quote==' ' && isWhiteSpace(ch))) { Segment valueSegment; Segment valueSegmentIncludingQuotes; if (quote==' ') { valueSegment=valueSegmentIncludingQuotes=new Segment(source,currentBegin,i); } else { if (isTerminatingCharacter) { if (i==maxEnd) { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"terminated in the middle of a quoted attribute value",i); if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; valueSegment=new Segment(source,currentBegin,i); valueSegmentIncludingQuotes=new Segment(source,currentBegin-1,i); // this is missing the end quote } else { // don't want to terminate, only encountered a terminating character in the middle of a quoted value isTerminatingCharacter=false; break; } } else { valueSegment=new Segment(source,currentBegin,i); valueSegmentIncludingQuotes=new Segment(source,currentBegin-1,i+1); } } attributeList.add(new Attribute(source,key,nameSegment,valueSegment,valueSegmentIncludingQuotes)); attributesEnd=valueSegmentIncludingQuotes.getEnd(); parsingState=ParsingState.BETWEEN_ATTRIBUTES; } else if (ch=='<' && quote==' ') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character in unquoted attribute value",i); return null; } break; case IN_NAME: if (isTerminatingCharacter || ch=='=' || isWhiteSpace(ch)) { nameSegment=new Segment(source,currentBegin,i); key=nameSegment.toString().toLowerCase(); if (isTerminatingCharacter) { attributeList.add(new Attribute(source,key,nameSegment)); // attribute with no value attributesEnd=i; } else { parsingState=(ch=='=' ? ParsingState.START_VALUE : ParsingState.AFTER_NAME); } } else if (!Tag.isXMLNameChar(ch)) { // invalid character detected in attribute name. if (ch=='<') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character in attribute name",i); return null; } if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"contains attribute name with invalid character",i); if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; } break; case AFTER_NAME: // attribute name has been followed by whitespace, but may still be followed by an '=' character. if (isTerminatingCharacter || !(ch=='=' || isWhiteSpace(ch))) { attributeList.add(new Attribute(source,key,nameSegment)); // attribute with no value attributesEnd=nameSegment.getEnd(); if (isTerminatingCharacter) break; // The current character is the first character of an attribute name parsingState=ParsingState.BETWEEN_ATTRIBUTES; i--; // want to reparse the same character again, so decrement i. Note we could instead just fall into the next case statement without a break, but such code is always discouraged. } else if (ch=='=') { parsingState=ParsingState.START_VALUE; } else if (ch=='<') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character after attribute name",i); return null; } break; case BETWEEN_ATTRIBUTES: if (!isTerminatingCharacter) { // the quote variable is used here to make sure whitespace has come after the last quoted attribute value if (isWhiteSpace(ch)) { quote=' '; } else { if (quote!=' ') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"has missing whitespace after quoted attribute value",i); // only count this as an error if there have already been other errors, otherwise allow unlimited errors of this type. if (errorCount>0 && reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; } if (!Tag.isXMLNameStartChar(ch)) { // invalid character detected as first character of attribute name. if (ch=='<') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character",i); return null; } if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; if (startTagType==StartTagType.NORMAL && startTagType.atEndOfAttributes(source,i,false)) { // This checks whether we've found the characters "/>" but it wasn't recognised as the closing delimiter because isClosingSlashIgnored is true. if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags"); break; } if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"contains attribute name with invalid first character",i); if (reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; } parsingState=ParsingState.IN_NAME; currentBegin=i; } } break; case START_VALUE: currentBegin=i; if (isTerminatingCharacter) { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"has missing attribute value after '=' sign",i); // only count this as an error if there have already been other errors, otherwise allow unlimited errors of this type. if (errorCount>0 && reachedMaxErrorCount(++errorCount,source,logType,tagName,logBegin,maxErrorCount)) return null; final Segment valueSegment=new Segment(source,i,i); attributeList.add(new Attribute(source,key,nameSegment,valueSegment,valueSegment)); attributesEnd=i; parsingState=ParsingState.BETWEEN_ATTRIBUTES; break; } if (ch=='\'' || ch=='"') { quote=ch; currentBegin++; } else if (isWhiteSpace(ch)) { break; // just ignore whitespace after the '=' sign as nearly all browsers do. } else if (ch=='<') { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because of '<' character at the start of an attribute value",i); return null; } else { quote=' '; } parsingState=ParsingState.IN_VALUE; break; case AFTER_TAG_NAME: if (!isTerminatingCharacter) { if (!isWhiteSpace(ch)) { if (isInvalidEmptyElementTag(startTagType,source,i,logType,tagName,logBegin)) break; if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because the name contains an invalid character",i); return null; } parsingState=ParsingState.BETWEEN_ATTRIBUTES; } break; } i++; } return new Attributes(source,attributesBegin,attributesEnd,attributeList,containsServerTagOutsideOfAttributeValue); } catch (IndexOutOfBoundsException ex) { if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because it has no closing '>' character"); return null; } } private static boolean reachedMaxErrorCount(final int errorCount, final Source source, final String logType, final String tagName, final int logBegin, final int maxErrorCount) { if (errorCount<=maxErrorCount) return false; if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"rejected because it contains too many errors"); return true; } private static boolean isInvalidEmptyElementTag(final StartTagType startTagType, final Source source, final int i, final String logType, final String tagName, final int logBegin) { // This checks whether we've found the characters "/>" but it wasn't recognised as the closing delimiter because isClosingSlashIgnored is true. if (startTagType!=StartTagType.NORMAL || !startTagType.atEndOfAttributes(source,i,false)) return false; if (source.logger.isInfoEnabled()) log(source,logType,tagName,logBegin,"contains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags"); return true; } /** * Returns the {@link Attribute} with the specified name (case insensitive). *

    * If more than one attribute exists with the specified name (which is illegal HTML), * the first is returned. * * @param name the name of the attribute to get. * @return the attribute with the specified name, or null if no attribute with the specified name exists. * @see #getValue(String name) */ public Attribute get(final String name) { if (size()==0) return null; for (int i=0; i * Returns null if no attribute with the specified name exists or * the attribute {@linkplain Attribute#hasValue() has no value}. *

    * This is equivalent to {@link #get(String) get(name)}.{@link Attribute#getValue() getValue()}, * except that it returns null if no attribute with the specified name exists instead of throwing a * NullPointerException. * * @param name the name of the attribute to get. * @return the {@linkplain CharacterReference#decode(CharSequence) decoded} value of the attribute with the specified name, or null if the attribute does not exist or {@linkplain Attribute#hasValue() has no value}. * @see Attribute#getValue() */ public String getValue(final String name) { final Attribute attribute=get(name); return attribute==null ? null : attribute.getValue(); } /** * Returns the raw (not {@linkplain CharacterReference#decode(CharSequence) decoded}) value of the attribute, or null if the attribute {@linkplain Attribute#hasValue() has no value}. *

    * This is an internal convenience method. * * @return the raw (not {@linkplain CharacterReference#decode(CharSequence) decoded}) value of the attribute, or null if the attribute {@linkplain Attribute#hasValue() has no value}. */ String getRawValue(final String name) { final Attribute attribute=get(name); return attribute==null || !attribute.hasValue() ? null : attribute.getValueSegment().toString(); } /** * Returns the number of attributes. *

    * This is equivalent to calling the size() method specified in the List interface. * * @return the number of attributes. */ public int getCount() { return attributeList.size(); } /** * Returns an iterator over the {@link Attribute} objects in this list in order of appearance. * @return an iterator over the {@link Attribute} objects in this list in order of appearance. */ public Iterator iterator() { return listIterator(); } /** * Returns a list iterator of the {@link Attribute} objects in this list in order of appearance, * starting at the specified position in the list. *

    * The specified index indicates the first item that would be returned by an initial call to the next() method. * An initial call to the previous() method would return the item with the specified index minus one. *

    * IMPLEMENTATION NOTE: For efficiency reasons this method does not return an immutable list iterator. * Calling any of the add(Object), remove() or set(Object) methods on the returned * ListIterator does not throw an exception but could result in unexpected behaviour. * * @param index the index of the first item to be returned from the list iterator (by a call to the next() method). * @return a list iterator of the items in this list (in proper sequence), starting at the specified position in the list. * @throws IndexOutOfBoundsException if the specified index is out of range (index < 0 || index > size()). */ public ListIterator listIterator(final int index) { return attributeList.listIterator(index); } /** * Populates the specified Map with the name/value pairs from these attributes. *

    * Both names and values are stored as String objects. *

    * The entries are added in order of apprearance in the source document. *

    * An attribute with {@linkplain Attribute#hasValue() no value} is represented by a map entry with a null value. *

    * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} * before storage in the map. * * @param attributesMap the map to populate, must not be null. * @param convertNamesToLowerCase specifies whether all attribute names are converted to lower case in the map. * @return the same map specified as the argument to the attributesMap parameter, populated with the name/value pairs from these attributes. * @see #generateHTML(Map attributesMap) */ public Map populateMap(final Map attributesMap, final boolean convertNamesToLowerCase) { for (Attribute attribute : this) { attributesMap.put(convertNamesToLowerCase ? attribute.getKey() : attribute.getName(),attribute.getValue()); } return attributesMap; } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append("Attributes ").append(super.getDebugInfo()).append(": "); if (isEmpty()) { sb.append("EMPTY"); } else { sb.append(Config.NewLine); for (Attribute attribute : this) { sb.append(" ").append(attribute.getDebugInfo()); } } return sb.toString(); } /** * Returns the default maximum error count allowed when parsing attributes. *

    * The system default value is 2. *

    * When searching for start tags, the parser can find the end of the start tag only by * {@linkplain StartTagType#parseAttributes(Source,int,String) parsing} * the attributes, as it is valid HTML for attribute values to contain '>' characters * (see the HTML 4.01 specification section 5.3.2). *

    * If the source text being parsed does not follow the syntax of an attribute list at all, the parser assumes * that the text which was originally identified as the beginning of of a start tag is in fact some other text, * such as an invalid '<' character in the middle of some text, or part of a script element. * In this case the entire start tag is rejected. *

    * On the other hand, it is quite common for attributes to contain minor syntactical errors, * such as an invalid character in an attribute name. * For this reason the parser allows a certain number of minor errors to occur while parsing an * attribute list before the entire start tag or attribute list is rejected. * This property indicates the number of minor errors allowed. *

    * Major syntactical errors cause the start tag or attribute list to be rejected immediately, regardless * of the maximum error count setting. *

    * Some errors are considered too minor to count at all (ignorable), such as missing white space between the end * of a quoted attribute value and the start of the next attribute name. *

    * The classification of particular syntax errors in attribute lists into major, minor, and ignorable is * not part of the specification and may change in future versions. *

    * Errors are {@linkplain Source#getLogger() logged} as they occur. *

    * The value of this property is set using the {@link #setDefaultMaxErrorCount(int)} method. * * @return the default maximum error count allowed when parsing attributes. * @see Source#parseAttributes(int pos, int maxEnd, int maxErrorCount) */ public static int getDefaultMaxErrorCount() { return defaultMaxErrorCount; } /** * Sets the default maximum error count allowed when parsing attributes. *

    * See the {@link #getDefaultMaxErrorCount()} method for a full description of this property. * * @param value the default maximum error count allowed when parsing attributes. */ public static void setDefaultMaxErrorCount(final int value) { defaultMaxErrorCount=value; } /** * Returns the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs. *

    * Each attribute (including the first) is preceded by a single space, and all values are * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes. *

    * The map keys must be of type String and values must be objects that implement the CharSequence interface. *

    * A null value represents an attribute with no value. * * @param attributesMap a map containing attribute name/value pairs. * @return the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs. * @see StartTag#generateHTML(String tagName, Map attributesMap, boolean emptyElementTag) */ public static String generateHTML(final Map attributesMap) { final StringBuilder sb=new StringBuilder(); try {appendHTML(sb,attributesMap);} catch (IOException ex) {} // IOException never occurs in StringWriter return sb.toString(); } /** * Outputs the contents of the specified {@linkplain #populateMap(Map,boolean) attributes map} as HTML attribute name/value pairs to the specified Appendable object. *

    * Each attribute is preceded by a single space, and all values are * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes. * * @param appendable the Appendable object to which the output is to be sent. * @param attributesMap a map containing attribute name/value pairs. * @throws IOException if an I/O exception occurs. * @see #populateMap(Map attributesMap, boolean convertNamesToLowerCase) */ static void appendHTML(final Appendable appendable, final Map attributesMap) throws IOException { for (Map.Entry entry : attributesMap.entrySet()) { Attribute.appendHTML(appendable,entry.getKey(),entry.getValue()); } } Appendable appendTidy(final Appendable appendable, Tag nextTag) throws IOException { for (Attribute attribute : this) nextTag=attribute.appendTidy(appendable,nextTag); return appendable; } Map getMap(final boolean convertNamesToLowerCase) { return populateMap(new LinkedHashMap(getCount()*2,1.0F),convertNamesToLowerCase); } private static void log(final Source source, final String part1, final CharSequence part2, final int begin, final String part3, final int pos) { source.logger.info(source.getRowColumnVector(pos).appendTo(source.getRowColumnVector(begin).appendTo(new StringBuilder(200).append(part1).append(' ').append(part2).append(" at ")).append(' ').append(part3).append(" at position ")).toString()); } private static void log(final Source source, final String part1, final CharSequence part2, final int begin, final String part3) { source.logger.info(source.getRowColumnVector(begin).appendTo(new StringBuilder(200).append(part1).append(' ').append(part2).append(" at ")).append(' ').append(part3).toString()); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StringOutputSegment.java0000644000175000017500000000662211204550410026655 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Implements an {@link OutputSegment} whose content is a CharSequence. *

    * This class has been removed from the pulic API and the functionality replaced with the * {@link OutputDocument#replace(Segment, CharSequence text)} method. */ final class StringOutputSegment implements OutputSegment { private final int begin; private final int end; private final CharSequence text; /** * Constructs a new StringOutputSegment with the specified begin and end positions and the specified content. *

    * Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, * and results in the segment being completely removed from the output document. * * @param begin the position in the OutputDocument where this output segment begins. * @param end the position in the OutputDocument where this output segment ends. * @param text the textual content of the new output segment, or null if no content. */ public StringOutputSegment(final int begin, final int end, final CharSequence text) { this.begin=begin; this.end=end; this.text=(text==null ? "" : text); } /** * Constructs a new StringOutputSegment with the same span as the specified {@link Segment}. *

    * Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, * and results in the segment being completely removed from the output document. * * @param segment a segment defining the beginning and ending positions of the new output segment. * @param text the textual content of the new output segment, or null if no content. */ public StringOutputSegment(final Segment segment, final CharSequence text) { this(segment.begin,segment.end,text); } public int getBegin() { return begin; } public int getEnd() { return end; } public void writeTo(final Writer writer) throws IOException { appendTo(writer); } public void appendTo(final Appendable appendable) throws IOException { appendable.append(text); } public long getEstimatedMaximumOutputLength() { return text.length(); } public String toString() { return text.toString(); } public String getDebugInfo() { return "Replace: (p"+begin+"-p"+end+") "+text; } public void output(final Writer writer) throws IOException { writeTo(writer); } } jericho-html-3.1/src/java/net/htmlparser/jericho/OutputSegment.java0000644000175000017500000001027511204550410025465 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; import java.util.*; /** * Defines the interface for an output segment, which is used in an {@link OutputDocument} to * replace segments of the source document with other text. *

    * All text in the OutputDocument between the character positions defined by {@link #getBegin()} and {@link #getEnd()} * is replaced by the content of this output segment. * If the begin and end character positions are the same, the content is simply * inserted at this position without replacing any text. * * @see OutputDocument#register(OutputSegment) */ public interface OutputSegment extends CharStreamSource { /** * The comparator used to sort output segments in the {@link OutputDocument} before output. *

    * The following rules are applied in order compare two output segments: *

      *
    1. The output segment that {@linkplain #getBegin() begins} earlier in the document comes first. *
    2. If both output segments begin at the same position, the one that has zero length comes first. * If neither or both have zero length then neither is guaranteed to come before the other. *
    *

    * Note: this comparator has a natural ordering that may be inconsistent with the equals * method of classes implementing this interface. * This means that the comparator may treat two output segments as equal where calling the * equals(Object) method with the same two output segments returns false. */ public static final Comparator COMPARATOR=new OutputSegmentComparator(); /** * Returns the character position in the {@linkplain OutputDocument#getSourceText() source text of the output document} where this segment begins. * @return the character position in the {@linkplain OutputDocument#getSourceText() source text of the output document} where this segment begins. */ public int getBegin(); /** * Returns the character position in the {@linkplain OutputDocument#getSourceText() source text of the output document} where this segment ends. * @return the character position in the {@linkplain OutputDocument#getSourceText() source text of the output document} where this segment ends. */ public int getEnd(); /** * Writes the content of this output segment to the specified Writer. * @param writer the destination java.io.Writer for the output. * @throws IOException if an I/O exception occurs. */ public void writeTo(Writer writer) throws IOException; /** * Appends the content of this output segment to the specified Appendable object. * @param appendable the destination java.lang.Appendable object for the output. * @throws IOException if an I/O exception occurs. */ public void appendTo(Appendable appendable) throws IOException; /** * Returns the content of this output segment as a String. * @return the content of this output segment as a String, guaranteed not null. * @see #writeTo(Writer) */ public String toString(); /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo(); } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.java0000644000175000017500000000233511204550410032113 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeXMLProcessingInstruction extends StartTagTypeGenericImplementation { static final StartTagTypeXMLProcessingInstruction INSTANCE=new StartTagTypeXMLProcessingInstruction(); private StartTagTypeXMLProcessingInstruction() { super("XML processing instruction","",null,false,false,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/MasonTagTypes.java0000644000175000017500000002627411204550410025406 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; /** * Contains {@linkplain TagType tag types} related to the Mason server platform. *

    * There is no specific tag type defined for the * Mason substitution tag * as it is recognised using the {@linkplain StartTagType#SERVER_COMMON common server tag type}. *

    * The tag types defined in this class are not {@linkplain TagType#register() registered} by default. * The {@link #register()} method is provided as a convenient way to register them all at once. */ public final class MasonTagTypes { /** * The tag type given to a * Mason component call * (<& ... &>). *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}mason component call *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<& *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}&> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}& *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}null *
    {@link StartTagType#hasAttributes() HasAttributes}false *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
    <& menu &>
    *
    */ public static final StartTagType MASON_COMPONENT_CALL=StartTagTypeMasonComponentCall.INSTANCE; /** * The tag type given to the start tag of a * Mason component called with content * (<&| ... &> ... </&>). *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}mason component called with content *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<&| *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}&> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}&| *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}{@link #MASON_COMPONENT_CALLED_WITH_CONTENT_END} *
    {@link StartTagType#hasAttributes() HasAttributes}false *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}false *
    *
    Example:
    *
     <&| /sql/select, query => 'SELECT name, age FROM User' &>
    	 *   <tr><td>%name</td><td>%age</td></tr>
    	 * </&>
    *
    */ public static final StartTagType MASON_COMPONENT_CALLED_WITH_CONTENT=StartTagTypeMasonComponentCalledWithContent.INSTANCE; // THIS ELEMENT TYPICALLY CONTAINS MARKUP /** * The tag type given to the end tag of a * Mason component called with content. *

    * See the {@linkplain EndTagType#getCorrespondingStartTagType() corresponding start tag type} * {@link #MASON_COMPONENT_CALLED_WITH_CONTENT} for more details. *

    *

    *
    Properties:
    *
    * *
    Property/MethodValue *
    {@link EndTagType#getDescription() Description}/mason component called with content *
    {@link EndTagType#getStartDelimiter() StartDelimiter}</& *
    {@link EndTagType#getClosingDelimiter() ClosingDelimiter}> *
    {@link EndTagType#isServerTag() IsServerTag}true *
    {@link EndTagType#getNamePrefix() NamePrefix}/& *
    {@link EndTagType#getCorrespondingStartTagType() CorrespondingStartTagType}{@link #MASON_COMPONENT_CALLED_WITH_CONTENT} *
    {@link EndTagType#generateHTML(String) generateHTML}("StartTagName")</&> *
    *
    Example:
    *
    </&>
    *
    * @see #MASON_COMPONENT_CALLED_WITH_CONTENT */ public static final EndTagType MASON_COMPONENT_CALLED_WITH_CONTENT_END=EndTagTypeMasonComponentCalledWithContent.INSTANCE; /** * The tag type given to the start tag of a * Mason named block * (<%name ... > ... </%name>). *

    * A tag of this type must not have a '%' character before its * {@linkplain StartTagType#getClosingDelimiter() closing delimiter}, otherwise it is most likely a * {@linkplain StartTagType#SERVER_COMMON common server tag}. *

    * For the start tag to be recognised, a {@linkplain StartTagType#getCorrespondingEndTagType() corresponding} end tag of the * {@linkplain #MASON_NAMED_BLOCK_END correct type} must exist somewhere in the source document following the start tag. *

    *

    *
    Properties:
    *
    * *
    PropertyValue *
    {@link StartTagType#getDescription() Description}mason named block *
    {@link StartTagType#getStartDelimiter() StartDelimiter}<% *
    {@link StartTagType#getClosingDelimiter() ClosingDelimiter}> *
    {@link StartTagType#isServerTag() IsServerTag}true *
    {@link StartTagType#getNamePrefix() NamePrefix}% *
    {@link StartTagType#getCorrespondingEndTagType() CorrespondingEndTagType}{@link #MASON_NAMED_BLOCK_END} *
    {@link StartTagType#hasAttributes() HasAttributes}false *
    {@link StartTagType#isNameAfterPrefixRequired() IsNameAfterPrefixRequired}true *
    *
    Example:
    *
    <%perl> print "hello world"; </%perl>
    *
    */ public static final StartTagType MASON_NAMED_BLOCK=StartTagTypeMasonNamedBlock.INSTANCE; // THIS ELEMENT TYPICALLY CONTAINS CODE, EXCEPT FOR THE <%def> VARIETY WHICH CONTAINS SOME MARKUP AND SOME OTHER SERVER ELEMENTS /** * The tag type given to the end tag of a * Mason named block. *

    * See the {@linkplain EndTagType#getCorrespondingStartTagType() corresponding start tag type} * {@link #MASON_NAMED_BLOCK} for more details. *

    *

    *
    Properties:
    *
    * *
    Property/MethodValue *
    {@link EndTagType#getDescription() Description}/mason named block *
    {@link EndTagType#getStartDelimiter() StartDelimiter}</% *
    {@link EndTagType#getClosingDelimiter() ClosingDelimiter}> *
    {@link EndTagType#isServerTag() IsServerTag}true *
    {@link EndTagType#getNamePrefix() NamePrefix}/% *
    {@link EndTagType#getCorrespondingStartTagType() CorrespondingStartTagType}{@link #MASON_NAMED_BLOCK} *
    {@link EndTagType#generateHTML(String) generateHTML}("%StartTagName")</%StartTagName> *
    *
    Example:
    *
    </%perl>
    *
    * @see #MASON_NAMED_BLOCK */ public static final EndTagType MASON_NAMED_BLOCK_END=EndTagTypeMasonNamedBlock.INSTANCE; private static final TagType[] TAG_TYPES={ MASON_COMPONENT_CALL, MASON_COMPONENT_CALLED_WITH_CONTENT, MASON_COMPONENT_CALLED_WITH_CONTENT_END, MASON_NAMED_BLOCK, MASON_NAMED_BLOCK_END }; private MasonTagTypes() {} /** * {@linkplain TagType#register() Registers} all of the tag types defined in this class at once. *

    * The tag types must be registered before the parser will recognise them. */ public static void register() { for (TagType tagType : TAG_TYPES) tagType.register(); } /** * Indicates whether the specified tag type is defined in this class. * * @param tagType the {@link TagType} to test. * @return true if the specified tag type is defined in this class, otherwise false. */ public static boolean defines(final TagType tagType) { for (TagType definedTagType : TAG_TYPES) if (tagType==definedTagType) return true; return false; } /** * Indicates whether the specified tag type is recognised by a Mason parser. *

    * This is true if the specified tag type is {@linkplain #defines(TagType) defined in this class} or if it is the * {@linkplain StartTagType#SERVER_COMMON common server tag type}. * * @param tagType the {@link TagType} to test. * @return true if the specified tag type is recognised by a Mason parser, otherwise false. */ public static boolean isParsedByMason(final TagType tagType) { return tagType==StartTagType.SERVER_COMMON || defines(tagType); } } jericho-html-3.1/src/java/net/htmlparser/jericho/FormControlType.java0000644000175000017500000006561611204550410025761 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents the control type * of a {@link FormControl}. *

    * Use the {@link FormControl#getFormControlType()} method to determine the type of a form control. *

    * The following table shows the relationship between the HTML 4.01 specification control type descriptions, * their associated {@link Element} names and attributes, and the FormControlType constants defined in this class: * * * * * * * * * * * * * * * * * *
    Description * {@linkplain Element#getName() Element Name} * Distinguishing Attribute * FormControlType *
    buttons - submit button * BUTTON * type="submit" * {@link #BUTTON} *
    INPUT * type="submit" * {@link #SUBMIT} *
    type="image" * {@link #IMAGE} *
    buttons - reset button * BUTTON, * INPUT * type="reset" * - *
    buttons - push button * BUTTON, * INPUT * type="button" * - *
    checkboxes * INPUT * type="checkbox" * {@link #CHECKBOX} *
    radio buttons * INPUT * type="radio" * {@link #RADIO} *
    menus * SELECT * multiple * {@link #SELECT_MULTIPLE} *
    absence of multiple * {@link #SELECT_SINGLE} *
    text input * INPUT * type="text" * {@link #TEXT} *
    type="password" * {@link #PASSWORD} *
    TEXTAREA * - * {@link #TEXTAREA} *
    file select * INPUT * type="file" * {@link #FILE} *
    hidden controls * INPUT * type="hidden" * {@link #HIDDEN} *
    object controls * OBJECT * - * - *
    * Reset buttons and * push buttons * have no associated FormControlType because they do not contribute to the * form data set * of a submitted form, * and so have no relevance to the methods provided in the {@link FormControl} and associated classes. * If required they can be found and manipulated as normal {@linkplain Element elements}. *

    * Object controls * have no associated FormControlType because any data they might contribute to the * form data set * is entirely dependent on the * class of object, * the interpretation of which is is beyond the scope of this library. *

    * This library does not consider the * OPTION * elements found within * SELECT * elements to be controls themselves, despite them being referred to as such in some * parts * of the HTML 4.01 specification. * Hence the absence of an OPTION control type. * * @see FormControl * @see FormField */ public enum FormControlType { /** * The form control type given to a submit button control implemented using a * BUTTON element. *

    *

    *
    Example:
    *
    <button type="submit" name="FieldName" value="PredefinedValue">Send</button> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#BUTTON}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = true
    *
    */ BUTTON (HTMLElementName.BUTTON,true,true), /** * The form control type given to a checkbox control. *

    *

    *
    Example:
    *
    <input type="checkbox" name="FieldName" value="PredefinedValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = false
    *
    */ CHECKBOX (HTMLElementName.INPUT,true,false), /** * The form control type given to a file select control. *

    * This library considers the submission value of this type of control * to be consist of only the selected file name, regardless of whether the file content would normally be included in the * form data set. *

    * To determine manually whether the file content is included in the form data set, the * enctype * attribute of the control's associated FORM * element can be examined. * Although the exact behaviour is not defined in the HTML 4.01 specification, the convention is that the content * is not included unless an enctype value of * "multipart/form-data" * is specified. *

    * For more information see the * HTML 4.01 specification section 17.13.4 - Form content types. *

    *

    *
    Example:
    *
    <input type="file" name="FieldName" value="DefaultFileName" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = false
    * {@link #isSubmit()} = false
    *
    */ FILE (HTMLElementName.INPUT,false,false), /** * The form control type given to a hidden control. *

    *

    *
    Example:
    *
    <input type="hidden" name="FieldName" value="DefaultValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = false
    * {@link #isSubmit()} = false
    *
    * Note that {@link #hasPredefinedValue()} returns false for this control type * because the value of hidden fields is usually set via server or client side scripting. */ HIDDEN (HTMLElementName.INPUT,false,false), /** * The form control type given to a submit button control implemented using an * INPUT element with attribute * type="image". *

    * See the description under the heading "image" in the * HTML 4.01 specification section 17.4.1 - Form control types created with INPUT. *

    * When a {@linkplain FormControl form control} of type IMAGE is present in the form used to * {@linkplain FormFields#FormFields(Collection) construct} a {@link FormFields} instance, three separate * {@link FormField} objects are created for the one control. * One has the {@linkplain FormField#getName() name} specified in the * name * attribute of the INPUT element, and the other two have this name with the suffixes * ".x" and ".y" appended to them to represent the additional * click coordinates * submitted by this control when activated using a pointing device. *

    * This type of control is also mentioned in the * HTML 4.01 specification section 13.6.2 - Server-side image maps. *

    *

    *
    Example:
    *
    <input type="image" name="FieldName" src="ImageURL" value="PredefinedValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = true
    *
    */ IMAGE (HTMLElementName.INPUT,true,true), /** * The form control type given to a text input control implemented using an * INPUT element with attribute * type="password". *

    *

    *
    Example:
    *
    <input type="password" name="FieldName" value="DefaultValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = false
    * {@link #isSubmit()} = false
    *
    */ PASSWORD (HTMLElementName.INPUT,false,false), /** * The form control type given to a radio button control. *

    *

    *
    Example:
    *
    <input type="radio" name="FieldName" value="PredefinedValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = false
    *
    */ RADIO (HTMLElementName.INPUT,true,false), /** * The form control type given to a menu control implemented using a * SELECT element containing * the attribute "multiple". *

    * SELECT elements that do not contain the attribute "multiple" are represented by the * {@link #SELECT_SINGLE} form control type. *

    * This is the only control type that can have multiple * submission values within the one control. * Contrast this with {@link #CHECKBOX} controls, which require multiple separate controls with the same * {@linkplain FormControl#getName() name} in order to contribute multiple submission values. *

    * The individual {@link HTMLElementName#OPTION OPTION} elements contained within a {@linkplain FormControl form control} of this type can be * obtained using the {@link FormControl#getOptionElementIterator()} method. *

    * The most efficient way to test whether a form control type is either SELECT_MULTIPLE or SELECT_SINGLE * is to test for {@link #getElementName()}=={@link HTMLElementName#SELECT}. *

    *

    *
    Example:
    *
    * * <select name="FieldName" multiple>
    *   <option value="PredefinedValue1" selected>Display Text1</option>
    *   <option value="PredefinedValue2">Display Text2</option>
    * </select> *
    *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#SELECT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = false
    *
    */ SELECT_MULTIPLE (HTMLElementName.SELECT,true,false), /** * The form control type given to a menu control implemented using a * SELECT element that does * not contain the attribute "multiple". *

    * SELECT elements that do contain the attribute "multiple" are represented by the * {@link #SELECT_MULTIPLE} form control type. *

    * The individual {@link HTMLElementName#OPTION OPTION} elements contained within a {@linkplain FormControl form control} of this type can be * obtained using the {@link FormControl#getOptionElementIterator()} method. *

    * The most efficient way to test whether a form control type is either SELECT_MULTIPLE or SELECT_SINGLE * is to test for {@link #getElementName()}=={@link HTMLElementName#SELECT}. *

    *

    *
    Example:
    *
    * * <select name="FieldName">
    *   <option value="PredefinedValue1" selected>Display Text1</option>
    *   <option value="PredefinedValue2">Display Text2</option>
    * </select> *
    *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#SELECT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = false
    *
    */ SELECT_SINGLE (HTMLElementName.SELECT,true,false), /** * The form control type given to a submit button control implemented using an * INPUT element with attribute * type="submit". *

    *

    *
    Example:
    *
    <input type="submit" name="FieldName" value="PredefinedValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = true
    * {@link #isSubmit()} = true
    *
    */ SUBMIT (HTMLElementName.INPUT,true,true), /** * The form control type given to a text input control implemented using an * INPUT element with attribute * type="text". *

    *

    *
    Example:
    *
    <input type="text" name="FieldName" value="DefaultValue" /> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#INPUT}
    * {@link #hasPredefinedValue()} = false
    * {@link #isSubmit()} = false
    *
    */ TEXT (HTMLElementName.INPUT,false,false), /** * The form control type given to a text input control implemented using a * TEXTAREA element. *

    *

    *
    Example:
    *
    <textarea name="FieldName">Default Value</textarea> *
    Properties:
    *
    * {@link #getElementName()} = {@link HTMLElementName#TEXTAREA}
    * {@link #hasPredefinedValue()} = false
    * {@link #isSubmit()} = false
    *
    */ TEXTAREA (HTMLElementName.TEXTAREA,false,false); private String elementName; private boolean hasPredefinedValue; private boolean submit; private static final HashMap INPUT_ELEMENT_TYPE_MAP=new HashMap(11,1.0F); // 8 input element types in total private static final HashSet NON_FORM_CONTROL_TYPE_ATTRIBUTE_SET=new HashSet(3,1.0F); // 2 non form control input element types in total static { // Map each INPUT element "type" attribute value to a FormControlType: INPUT_ELEMENT_TYPE_MAP.put("checkbox",CHECKBOX); INPUT_ELEMENT_TYPE_MAP.put("file",FILE); INPUT_ELEMENT_TYPE_MAP.put("hidden",HIDDEN); INPUT_ELEMENT_TYPE_MAP.put("image",IMAGE); INPUT_ELEMENT_TYPE_MAP.put("password",PASSWORD); INPUT_ELEMENT_TYPE_MAP.put("radio",RADIO); INPUT_ELEMENT_TYPE_MAP.put("submit",SUBMIT); INPUT_ELEMENT_TYPE_MAP.put("text",TEXT); // The following INPUT element "type" attributes do not produce a form control: NON_FORM_CONTROL_TYPE_ATTRIBUTE_SET.add("button"); NON_FORM_CONTROL_TYPE_ATTRIBUTE_SET.add("reset"); } private FormControlType(final String elementName, final boolean hasPredefinedValue, final boolean submit) { this.elementName=elementName; this.hasPredefinedValue=hasPredefinedValue; this.submit=submit; } /** * Returns the {@linkplain Element#getName() name} of the {@link Element} that constitues this form control type. * @return the {@linkplain Element#getName() name} of the {@link Element} that constitues this form control type. */ public String getElementName() { return elementName; } /** * Indicates whether any value * submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts. *

    * The word "typically" is used because the use of client side scripts can cause * control types * which normally have predefined values to be set by the user, which is a condition which is beyond * the scope of this library to test for. *

    * The predefined value is defined by the control's initial value. *

    * A return value of true signifies that a form control of this type is a * predefined value control. *

    * A return value of false signifies that a form control of this type is a * user value control. *

    * Note that the {@link #HIDDEN} type returns false for this method because the value of hidden fields is usually set via server or client side scripting. * * @return true if any value submitted by this type of control is predefined in the HTML and typically not modified by the user or server/client scripts, otherwise false. */ public boolean hasPredefinedValue() { return hasPredefinedValue; } /** * Indicates whether this control type causes the form to be submitted. *

    * Returns true only for the {@link #SUBMIT}, {@link #BUTTON}, and {@link #IMAGE} instances. * * @return true if this control type causes the form to be submitted, otherwise false. */ public boolean isSubmit() { return submit; } static FormControlType getFromInputElementType(final String typeAttributeValue) { return INPUT_ELEMENT_TYPE_MAP.get(typeAttributeValue.toLowerCase()); } static boolean isNonFormControl(final String typeAttributeValue) { return NON_FORM_CONTROL_TYPE_ATTRIBUTE_SET.contains(typeAttributeValue.toLowerCase()); } } ././@LongLink0000000000000000000000000000015700000000000011570 Lustar rootrootjericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment.javajericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalCo0000644000175000017500000000334611204550410034202 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeMicrosoftDownlevelRevealedConditionalComment extends StartTagTypeGenericImplementation { static final StartTagTypeMicrosoftDownlevelRevealedConditionalComment INSTANCE=new StartTagTypeMicrosoftDownlevelRevealedConditionalComment(); static final String IF="![if"; static final String ENDIF="![endif"; private StartTagTypeMicrosoftDownlevelRevealedConditionalComment() { super("Microsoft downlevel-revealed conditional comment","",null,false,false,true); } protected Tag constructTagAt(final Source source, final int pos) { final Tag tag=super.constructTagAt(source,pos); if (tag==null) return null; final String name=tag.getName(); if (name!=IF && name!=ENDIF) return null; // can use == instead of .equals() because the names are in HtmlElements.CONSTANT_NAME_MAP return tag; } } jericho-html-3.1/src/java/net/htmlparser/jericho/Util.java0000644000175000017500000001322211204550410023552 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; /** * Contains miscellaneous utility methods not directly associated with the HTML Parser library. */ public final class Util { private static final int BUFFER_SIZE=2048; private static final String CSVNewLine=System.getProperty("line.separator"); private Util() {} /** * Returns the text loaded from the specified Reader as a string. *

    * If a null argument is supplied to this method, an empty string is returned. *

    * To load text from an InputStream, use getString(new InputStreamReader(inputStream,encoding)). * * @param reader the java.io.Reader from which to load the text. * @return the text loaded from the specified java.io.Reader as a string. * @throws java.io.IOException if an I/O error occurs. */ public static String getString(final Reader reader) throws IOException { if (reader==null) return ""; try { int charsRead; final char[] copyBuffer=new char[BUFFER_SIZE]; final StringBuilder sb=new StringBuilder(); while ((charsRead=reader.read(copyBuffer,0,BUFFER_SIZE))!=-1) sb.append(copyBuffer,0,charsRead); return sb.toString(); } finally { reader.close(); } } /** * Outputs the specified array of strings to the specified Writer in the format of a line for a CSV file. *

    * "CSV" stands for Comma Separated Values. * There is no formal specification for a CSV file, so there is significant variation in * the way different applications handle issues like the encoding of different data types and special characters. *

    * Generally, a CSV file contains a list of records separated by line breaks, with each record consisting of a list of * field values separated by commas. * Each record in the file should contain the same number of field values, with the values at each position representing the same * type of data in all the records. In this way the file can also be divided into columns, often with the first line of the * file containing the column labels. *

    * Columns can have different data types such as text, numeric, date / time and boolean. * A text value is often delimited with single (') or double-quotes ("), * especially if the value contains a comma, line feed, or other special character that is significant to the syntax. * Encoding techniques for including quote characters themselves in text values vary widely. * Values of other types are generally unquoted to distinguish them from text values. *

    * This method produces output that is readable by MS-Excel, conforming to the following rules: *

    *

      *
    • All values are considered to be of type text, except for the static constants {@link Config#ColumnValueTrue} * and {@link Config#ColumnValueFalse}, representing the boolean values true and false respectively. *
    • All text values are enclosed in double-quotes. *
    • Double-quote characters contained in text values are encoded using two consecutive double-quotes (""). *
    • null values are represented as empty fields. *
    • The end of each record is represented by a carriage-return / line-feed (CR/LF) pair. *
    • Line breaks inside text values are represented by a single line feed (LF) character. *
    * * @param writer the destination java.io.Writer for the output. * @throws java.io.IOException if an I/O error occurs. * @see FormFields#getColumnLabels() * @see FormFields#getColumnValues(Map) */ public static void outputCSVLine(final Writer writer, final String[] values) throws IOException { for (int i=0; i { public int compare(final OutputSegment outputSegment1, final OutputSegment outputSegment2) { if (outputSegment1.getBegin()outputSegment2.getBegin()) return 1; if (outputSegment1.getEnd()outputSegment2.getEnd()) return 1; return 0; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StreamedParseText.java0000644000175000017500000000255211204550410026245 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; import java.nio.*; final class StreamedParseText extends CharSequenceParseText { private final StreamedText streamedText; public StreamedParseText(final StreamedText streamedText) { super(streamedText); this.streamedText=streamedText; } protected int getEnd() { return streamedText.getEnd(); } protected String substring(final int begin, final int end) { return streamedText.substring(begin,end).toLowerCase(); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeNormal.java0000644000175000017500000000265311204550410026407 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeNormal extends StartTagTypeGenericImplementation { static final StartTagTypeNormal INSTANCE=new StartTagTypeNormal(); private StartTagTypeNormal() { super("normal",START_DELIMITER_PREFIX,">",EndTagType.NORMAL,false,true,true); } public boolean atEndOfAttributes(final Source source, final int pos, final boolean isClosingSlashIgnored) { final ParseText parseText=source.getParseText(); return parseText.charAt(pos)=='>' || (!isClosingSlashIgnored && parseText.containsAt("/>",pos)); } } jericho-html-3.1/src/java/net/htmlparser/jericho/BasicLogFormatter.java0000644000175000017500000001161711204550410026212 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.logging.*; /** * Provides basic formatting for log messages. *

    * This class extends the java.util.logging.Formatter class, allowing it to be specified as a formatter for the java.util.logging system. *

    * The static {@link #format(String level, String message, String loggerName)} method provides a means of using the same formatting * outside of the java.util.logging framework. See the documentation of this method for more details. */ public class BasicLogFormatter extends Formatter { /** * Determines whether the logging level is included in the output. *

    * The default value is true. *

    * As this is a static property, changing the value will affect all BasicLogFormatter instances, as well as the behaviour of the * static {@link #format(String level, String message, String loggerName)} method. */ public static boolean OutputLevel=true; /** * Determines whether the logger name is included in the output. *

    * The default value is false. *

    * The logger name used for all automatically created {@link Logger} instances is "net.htmlparser.jericho". *

    * As this is a static property, changing the value will affect all BasicLogFormatter instances, as well as the behaviour of the * static {@link #format(String level, String message, String loggerName)} method. */ public static boolean OutputName=false; static final Formatter INSTANCE=new BasicLogFormatter(); /** * Returns a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord. *

    * This method is not called directly, but is used by the java.util.logging framework when this class is specified * as a formatter in the logging.properties file. *

    * See the documentation of the parent java.util.logging.Formatter class in the Java SDK for more details. * * @param logRecord a java.util.logging.LogRecord object containing all of the log entry information. * @return a formatted string representing the log entry information contained in the specified java.util.logging.LogRecord. */ public String format(final LogRecord logRecord) { return format(logRecord.getLevel().getName(),logRecord.getMessage(),logRecord.getLoggerName()); } /** * Returns a formatted string representing the specified log entry information. *

    * This method is used by the default implementation of the {@link WriterLogger#log(String level, String message)} method. *

    * The static properties {@link #OutputLevel} and {@link #OutputName} affect what information is included in the output. *

    * The static {@link Config#NewLine} property determines the character sequence used for line breaks. *

    * A line of output typically looks like this: *

    INFO: this is the log message
    * or if the {@link #OutputName} property is set to true, the output would look similar to this: *
    INFO: [net.htmlparser.jericho] this is the log message
    * * @param level a string representing the logging level of the log entry. * @param message the log message. * @param loggerName the name of the logger. * @return a formatted string representing the specified log entry information. */ public static String format(final String level, final String message, final String loggerName) { final StringBuilder sb=new StringBuilder(message.length()+40); if (OutputLevel) sb.append(level).append(": "); if (OutputName && loggerName!=null) sb.append('[').append(loggerName).append("] "); sb.append(message); sb.append(Config.NewLine); return sb.toString(); } } jericho-html-3.1/src/java/net/htmlparser/jericho/HTMLElementTerminatingTagNameSets.java0000644000175000017500000000357611204550410031224 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; final class HTMLElementTerminatingTagNameSets { // all fields are guaranteed not null and contain unique sets. public final Set TerminatingStartTagNameSet; // Set of start tags that terminate the element public final Set TerminatingEndTagNameSet; // Set of end tags that terminate the element (the end tag of this element is assumed and not included in this set) public final Set NonterminatingElementNameSet; // Set of elements that can be inside this element, which may contain tags from TerminatingStartTagNameSet and TerminatingEndTagNameSet that must be ignored public HTMLElementTerminatingTagNameSets(final Set terminatingStartTagNameSet, final Set terminatingEndTagNameSet, final Set nonterminatingElementNameSet) { this.TerminatingStartTagNameSet=terminatingStartTagNameSet; this.TerminatingEndTagNameSet=terminatingEndTagNameSet; this.NonterminatingElementNameSet=nonterminatingElementNameSet; } } jericho-html-3.1/src/java/net/htmlparser/jericho/OutputDocument.java0000644000175000017500000005554711207246334025666 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; import java.util.*; /** * Represents a modified version of an original {@link Source} document or {@link Segment}. *

    * An OutputDocument represents an original {@link Source} document or {@link Segment} that * has been modified by substituting segments of it with other text. * Each of these substitutions must be registered in the output document, * which is most commonly done using the various replace, remove or insert methods in this class. * These methods internally {@linkplain #register(OutputSegment) register} one or more {@link OutputSegment} objects to define each substitution. *

    * If a {@link Segment} is used to construct the output document, all character positions are relative to the source document of the specified segment. *

    * After all of the substitutions have been registered, the modified text can be retrieved using the * {@link #writeTo(Writer)} or {@link #toString()} methods. *

    * The registered {@linkplain OutputSegment output segments} may be adjacent and may also overlap. * An output segment that is completely enclosed by another output segment is not included in the output. *

    * If unexpected results are being generated from an OutputDocument, the {@link #getDebugInfo()} method provides information on each * {@linkplain #getRegisteredOutputSegments() registered output segment}, which should provide enough information to determine the cause of the problem. * In most cases the problem will be caused by overlapping output segments. *

    * The following example converts all externally referenced style sheets to internal style sheets: *

    *

     *  URL sourceUrl=new URL(sourceUrlString);
     *  String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
     *  Source source=new Source(htmlText);
     *  OutputDocument outputDocument=new OutputDocument(source);
     *  StringBuilder sb=new StringBuilder();
     *  List linkStartTags=source.getAllStartTags(HTMLElementName.LINK);
     *  for (Iterator i=linkStartTags.iterator(); i.hasNext();) {
     *    StartTag startTag=(StartTag)i.next();
     *    Attributes attributes=startTag.getAttributes();
     *    String rel=attributes.getValue("rel");
     *    if (!"stylesheet".equalsIgnoreCase(rel)) continue;
     *    String href=attributes.getValue("href");
     *    if (href==null) continue;
     *    String styleSheetContent;
     *    try {
     *      styleSheetContent=Util.getString(new InputStreamReader(new URL(sourceUrl,href).openStream()));
     *    } catch (Exception ex) {
     *      continue; // don't convert if URL is invalid
     *    }
     *    sb.setLength(0);
     *    sb.append("<style");
     *    Attribute typeAttribute=attributes.get("type");
     *    if (typeAttribute!=null) sb.append(' ').append(typeAttribute);
     *    sb.append(">\n").append(styleSheetContent).append("\n</style>");
     *    outputDocument.replace(startTag,sb);
     *  }
     *  String convertedHtmlText=outputDocument.toString();
     * 
    * * @see OutputSegment */ public final class OutputDocument implements CharStreamSource { private CharSequence sourceText; private ArrayList outputSegments=new ArrayList(); /** * Constructs a new output document based on the specified source document. * @param source the source document. */ public OutputDocument(final Source source) { if (source==null) throw new IllegalArgumentException("source argument must not be null"); this.sourceText=source; } /** * Constructs a new output document based on the specified {@link Segment}. * @param segment the original {@link Segment}. */ public OutputDocument(final Segment segment) { if (segment==null) throw new IllegalArgumentException("segment argument must not be null"); Source source=segment.source; this.sourceText=source; if (segment.begin>0) remove(new Segment(source,0,segment.begin)); if (segment.end * If a {@link Segment} was used to construct the output document, this returns the text of the entire source document rather than just the segment. * * @return the original source text upon which this output document is based. */ public CharSequence getSourceText() { return sourceText; } /** * Removes the specified {@linkplain Segment segment} from this output document. *

    * This is equivalent to {@link #replace(Segment,CharSequence) replace}(segment,null). * * @param segment the segment to remove. */ public void remove(final Segment segment) { register(new RemoveOutputSegment(segment)); } /** * Removes all the segments from this output document represented by the specified source {@linkplain Segment} objects. *

    * This is equivalent to the following code:

    	 *  for (Iterator i=segments.iterator(); i.hasNext();)
    	 *    {@link #remove(Segment) remove}((Segment)i.next());
    * * @param segments a collection of segments to remove, represented by source {@link Segment} objects. */ public void remove(final Collection segments) { for (Segment segment : segments) remove(segment); } /** * Inserts the specified text at the specified character position in this output document. * @param pos the character position at which to insert the text. * @param text the replacement text. */ public void insert(final int pos, final CharSequence text) { register(new StringOutputSegment(pos,pos,text)); } /** * Replaces the specified {@linkplain Segment segment} in this output document with the specified text. *

    * Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, * and results in the segment being completely removed from the output document. * * @param segment the segment to replace. * @param text the replacement text, or null to remove the segment. */ public void replace(final Segment segment, final CharSequence text) { replace(segment.getBegin(),segment.getEnd(),text); } /** * Replaces the specified segment of this output document with the specified text. *

    * Specifying a null argument to the text parameter is exactly equivalent to specifying an empty string, * and results in the segment being completely removed from the output document. * * @param begin the character position at which to begin the replacement. * @param end the character position at which to end the replacement. * @param text the replacement text, or null to remove the segment. */ public void replace(final int begin, final int end, final CharSequence text) { register(new StringOutputSegment(begin,end,text)); } /** * Replaces the specified segment of this output document with the specified character. * * @param begin the character position at which to begin the replacement. * @param end the character position at which to end the replacement. * @param ch the replacement character. */ public void replace(final int begin, final int end, final char ch) { register(new CharOutputSegment(begin,end,ch)); } /** * Replaces the specified {@link FormControl} in this output document. *

    * The effect of this method is to {@linkplain #register(OutputSegment) register} zero or more * {@linkplain OutputSegment output segments} in the output document as required to reflect * previous modifications to the control's state. * The state of a control includes its submission value, * {@linkplain FormControl#setOutputStyle(FormControlOutputStyle) output style}, and whether it has been * {@linkplain FormControl#setDisabled(boolean) disabled}. *

    * The state of the form control should not be modified after this method is called, as there is no guarantee that * subsequent changes either will or will not be reflected in the final output. * A second call to this method with the same parameter is not allowed. * It is therefore recommended to call this method as the last action before the output is generated. *

    * Although the specifics of the number and nature of the output segments added in any particular circumstance * is not defined in the specification, it can generally be assumed that only the minimum changes necessary * are made to the original document. If the state of the control has not been modified, calling this method * has no effect at all. * * @param formControl the form control to replace. * @see #replace(FormFields) */ public void replace(final FormControl formControl) { formControl.replaceInOutputDocument(this); } /** * {@linkplain #replace(FormControl) Replaces} all the constituent {@linkplain FormControl form controls} * from the specified {@link FormFields} in this output document. *

    * This is equivalent to the following code: *

    for (Iterator i=formFields.{@link FormFields#getFormControls() getFormControls()}.iterator(); i.hasNext();)
    	 *   {@link #replace(FormControl) replace}((FormControl)i.next());
    *

    * The state of any of the form controls in the specified form fields should not be modified after this method is called, * as there is no guarantee that subsequent changes either will or will not be reflected in the final output. * A second call to this method with the same parameter is not allowed. * It is therefore recommended to call this method as the last action before the output is generated. * * @param formFields the form fields to replace. * @see #replace(FormControl) */ public void replace(final FormFields formFields) { formFields.replaceInOutputDocument(this); } /** * Replaces the specified {@link Attributes} segment in this output document with the name/value entries * in the returned Map. * The returned map initially contains entries representing the attributes from the source document, * which can be modified before output. *

    * The documentation of the {@link #replace(Attributes,Map)} method contains more information about the requirements * of the map entries. *

    * Specifying a value of true as an argument to the convertNamesToLowerCase parameter * causes all original attribute names to be converted to lower case in the map. * This simplifies the process of finding/updating specific attributes since map keys are case sensitive. *

    * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} before * being loaded into the map. *

    * This method is logically equivalent to:
    * {@link #replace(Attributes,Map) replace}(attributes, attributes.{@link Attributes#populateMap(Map,boolean) populateMap(new LinkedHashMap<String,String>(),convertNamesToLowerCase)}) *

    * The use of LinkedHashMap to implement the map ensures (probably unnecessarily) that * existing attributes are output in the same order as they appear in the source document, and new * attributes are output in the same order as they are added. *

    *

    *
    Example:
    *
    	 *  Source source=new Source(htmlDocument);
    	 *  Attributes bodyAttributes
    	 *    =source.getNextStartTag(0,HTMLElementName.BODY).getAttributes();
    	 *  OutputDocument outputDocument=new OutputDocument(source);
    	 *  Map<String,String> attributesMap=outputDocument.replace(bodyAttributes,true);
    	 *  attributesMap.put("bgcolor","green");
    	 *  String htmlDocumentWithGreenBackground=outputDocument.toString();
    * * @param attributes the Attributes segment defining the span of the segment and initial name/value entries of the returned map. * @param convertNamesToLowerCase specifies whether all attribute names are converted to lower case in the map. * @return a Map containing the name/value entries to be output. * @see #replace(Attributes,Map) */ public Map replace(final Attributes attributes, boolean convertNamesToLowerCase) { AttributesOutputSegment attributesOutputSegment=new AttributesOutputSegment(attributes,convertNamesToLowerCase); register(attributesOutputSegment); return attributesOutputSegment.getMap(); } /** * Replaces the specified attributes segment in this source document with the name/value entries in the specified Map. *

    * This method might be used if the Map containing the new attribute values * should not be preloaded with the same entries as the source attributes, or a map implementation * other than LinkedHashMap is required. * Otherwise, the {@link #replace(Attributes, boolean convertNamesToLowerCase)} method is generally more useful. *

    * An attribute with no value is represented by a map entry with a null value. *

    * Attribute values are stored unencoded in the map, and are automatically * {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output. *

    * The use of invalid characters in attribute names results in unspecified behaviour. *

    * Note that methods in the Attributes class treat attribute names as case insensitive, * whereas the Map treats them as case sensitive. * * @param attributes the Attributes object defining the span of the segment to replace. * @param map the Map containing the name/value entries. * @see #replace(Attributes, boolean convertNamesToLowerCase) */ public void replace(final Attributes attributes, final Map map) { register(new AttributesOutputSegment(attributes,map)); } /** * Replaces the specified segment of this output document with a string of spaces of the same length. *

    * This method is most commonly used to remove segments of the document without affecting the character positions of the remaining elements. *

    * It is used internally to implement the functionality available through the {@link Segment#ignoreWhenParsing()} method. *

    * To remove a segment from the output document completely, use the {@link #remove(Segment)} method instead. * * @param begin the character position at which to begin the replacement. * @param end the character position at which to end the replacement. */ public void replaceWithSpaces(final int begin, final int end) { register(new BlankOutputSegment(begin,end)); } /** * Registers the specified {@linkplain OutputSegment output segment} in this output document. *

    * Use this method if you want to use a customised {@link OutputSegment} class. * * @param outputSegment the output segment to register. */ public void register(final OutputSegment outputSegment) { outputSegments.add(outputSegment); } /** * Writes the final content of this output document to the specified Writer. *

    * The {@link #writeTo(Writer, int begin, int end)} method allows the output of a portion of the output document. *

    * If the output is required in the form of a Reader, use {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)} instead. * * @param writer the destination java.io.Writer for the output. * @throws IOException if an I/O exception occurs. * @see #toString() */ public void writeTo(final Writer writer) throws IOException { try { appendTo(writer); } finally { writer.flush(); } } /** * Writes the specified portion of the final content of this output document to the specified Writer. *

    * Any zero-length output segments located at begin or end are included in the output. * * @param writer the destination java.io.Writer for the output. * @param begin the character position at which to start the output, inclusive. * @param end the character position at which to end the output, exclusive. * @throws IOException if an I/O exception occurs. * @see #writeTo(Writer) */ public void writeTo(final Writer writer, final int begin, final int end) throws IOException { try { appendTo(writer,begin,end); } finally { writer.flush(); } } /** * Appends the final content of this output document to the specified Appendable object. *

    * The {@link #appendTo(Appendable, int begin, int end)} method allows the output of a portion of the output document. * * @param appendable the destination java.lang.Appendable object for the output. * @throws IOException if an I/O exception occurs. * @see #toString() */ public void appendTo(final Appendable appendable) throws IOException { appendTo(appendable,0,sourceText.length()); } /** * Appends the specified portion of the final content of this output document to the specified Appendable object. *

    * Any zero-length output segments located at begin or end are included in the output. * * @param appendable the destination java.lang.Appendable object for the output. * @param begin the character position at which to start the output, inclusive. * @param end the character position at which to end the output, exclusive. * @throws IOException if an I/O exception occurs. * @see #appendTo(Appendable) */ public void appendTo(final Appendable appendable, final int begin, final int end) throws IOException { if (outputSegments.isEmpty()) { appendable.append(sourceText,begin,end); return; } int pos=begin; Collections.sort(outputSegments,OutputSegment.COMPARATOR); for (OutputSegment outputSegment : outputSegments) { if (outputSegment.getEnd()end) break; // stop processing output segments if they are not longer in the desired output range if (outputSegment.getBegin()==end && outputSegment.getEnd()>end) break; // stop processing output segments if they start at end unless they are zero length if (outputSegment.getBegin()>pos) { appendable.append(sourceText,pos,outputSegment.getBegin()); } if (outputSegment.getBegin()=0L ? estimatedMaximumOutputLength : -1L; } /** * Returns the final content of this output document as a String. * @return the final content of this output document as a String. * @see #writeTo(Writer) */ public String toString() { return CharStreamSourceUtil.toString(this); } /** * Returns a string representation of this object useful for debugging purposes. *

    * The output includes details of all the {@link #getRegisteredOutputSegments() registered output segments}. * * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { StringBuilder sb=new StringBuilder(); for (OutputSegment outputSegment : getRegisteredOutputSegments()) { if (outputSegment instanceof BlankOutputSegment) sb.append("Replace with Spaces: "); else if (outputSegment instanceof RemoveOutputSegment) sb.append("Remove: "); else sb.append("Replace: "); if (sourceText instanceof Source) { Source source=(Source)sourceText; sb.append('('); source.getRowColumnVector(outputSegment.getBegin()).appendTo(sb); sb.append('-'); source.getRowColumnVector(outputSegment.getEnd()).appendTo(sb); sb.append(')'); } else { sb.append("(p").append(outputSegment.getBegin()).append("-p").append(outputSegment.getEnd()).append(')'); } sb.append(' '); String outputFromSegment=outputSegment.toString(); if (outputFromSegment.length()<=20) { sb.append(outputFromSegment); } else { sb.append(outputFromSegment.substring(0,20)).append("..."); } sb.append(Config.NewLine); } return sb.toString(); } /** * Returns a list all of the {@linkplain #register(OutputSegment) registered} {@link OutputSegment} objects in this output document. *

    * The output segments are sorted in order of their {@linkplain OutputSegment#getBegin() starting position} in the document. *

    * The returned list is modifiable and any changes will affect the output generated by this OutputDocument. * * @return a list all of the {@linkplain #register(OutputSegment) registered} {@link OutputSegment} objects in this output document. */ public List getRegisteredOutputSegments() { Collections.sort(outputSegments,OutputSegment.COMPARATOR); return outputSegments; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StreamEncodingDetector.java0000644000175000017500000003065211204550410027237 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; import java.nio.charset.*; import java.net.*; /** * Based on information in: * http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info * http://www.w3.org/TR/html401/charset.html#h-5.2 */ final class StreamEncodingDetector { private final InputStream inputStream; private String encoding=null; private String encodingSpecificationInfo=null; private boolean definitive=true; private boolean documentSpecifiedEncodingPossible=true; private static final String UTF_16="UTF-16"; private static final String UTF_16BE="UTF-16BE"; private static final String UTF_16LE="UTF-16LE"; private static final String UTF_8="UTF-8"; private static final String ISO_8859_1="ISO-8859-1"; private static final String EBCDIC="Cp037"; // aka IBM037, not guaranteed, but available on most platforms // All of the following encodings are generally not supported in java and will usually throw an exception if decoding is attempted. // Specified explicitly using Byte Order Mark: private static final String SCSU="SCSU"; private static final String UTF_7="UTF-7"; private static final String UTF_EBCDIC="UTF-EBCDIC"; private static final String BOCU_1="BOCU-1"; private static final String UTF_32="UTF-32"; // Guessed from presence of 00 bytes in first four bytes: private static final String UTF_32BE="UTF-32BE"; private static final String UTF_32LE="UTF-32LE"; public StreamEncodingDetector(final URLConnection urlConnection) throws IOException { final HttpURLConnection httpURLConnection=(urlConnection instanceof HttpURLConnection) ? (HttpURLConnection)urlConnection : null; // urlConnection.setRequestProperty("Accept-Charset","UTF-8, ISO-8859-1;q=0"); // used for debugging final InputStream urlInputStream=urlConnection.getInputStream(); final String contentType=urlConnection.getContentType(); if (contentType!=null) { encoding=Source.getCharsetParameterFromHttpHeaderValue(contentType); if (encoding!=null) { inputStream=urlInputStream; encodingSpecificationInfo="HTTP header Content-Type: "+contentType; return; } } inputStream=urlInputStream.markSupported() ? urlInputStream : new BufferedInputStream(urlInputStream); init(); } public StreamEncodingDetector(final InputStream inputStream) throws IOException { this.inputStream=inputStream.markSupported() ? inputStream : new BufferedInputStream(inputStream); init(); } public InputStream getInputStream() { return inputStream; } public String getEncoding() { return encoding; } public String getEncodingSpecificationInfo() { return encodingSpecificationInfo; } public boolean isDifinitive() { return definitive; } public boolean isDocumentSpecifiedEncodingPossible() { return documentSpecifiedEncodingPossible; } public Reader openReader() throws UnsupportedEncodingException { if (encoding==null) return new InputStreamReader(inputStream,ISO_8859_1); // encoding==null only if input stream is empty so use an arbitrary encoding. if (!Charset.isSupported(encoding)) throw new UnsupportedEncodingException(encoding+" - "+encodingSpecificationInfo); return new InputStreamReader(inputStream,encoding); } private boolean setEncoding(final String encoding, final String encodingSpecificationInfo) { this.encoding=encoding; this.encodingSpecificationInfo=encodingSpecificationInfo; return true; } private boolean init() throws IOException { inputStream.mark(4); final int b1=inputStream.read(); if (b1==-1) return setEncoding(null,"empty input stream"); final int b2=inputStream.read(); final int b3=inputStream.read(); final int b4=inputStream.read(); inputStream.reset(); // Check for Unicode Byte Order Mark: if (b1==0xEF) { if (b2==0xBB && b3==0xBF) return setEncoding(UTF_8,"UTF-8 Byte Order Mark (EF BB BF)"); } else if (b1==0xFE) { if (b2==0xFF) return setEncoding(UTF_16,"UTF-16 big-endian Byte Order Mark (FE FF)"); } else if (b1==0xFF) { if (b2==0xFE) { if (b3==0 && b4==0) return setEncoding(UTF_32,"UTF-32 little-endian Byte Order Mark (FF EE 00 00)"); return setEncoding(UTF_16,"UTF-16 little-endian Byte Order Mark (FF EE)"); } } else if (b1==0) { if (b2==0 && b3==0xFE && b4==0xFF) return setEncoding(UTF_32,"UTF-32 big-endian Byte Order Mark (00 00 FE FF)"); } else if (b1==0x0E) { if (b2==0xFE && b3==0xFF) return setEncoding(SCSU,"SCSU Byte Order Mark (0E FE FF)"); } else if (b1==0x2B) { if (b2==0x2F && b3==0x76) return setEncoding(UTF_7,"UTF-7 Byte Order Mark (2B 2F 76)"); } else if (b1==0xDD) { if (b2==0x73 && b3==0x66 && b4==0x73) return setEncoding(UTF_EBCDIC,"UTF-EBCDIC Byte Order Mark (DD 73 66 73)"); } else if (b1==0xFB) { if (b2==0xEE && b3==0x28) return setEncoding(BOCU_1,"BOCU-1 Byte Order Mark (FB EE 28)"); } // No Unicode Byte Order Mark found. Have to start guessing. definitive=false; // The best we can do is to provide an encoding that reflects the correct number and ordering of bytes for characters in the ASCII range. // The result will be one of ISO_8859_1, EBCDIC, UTF_16BE, UTF_16LE, UTF_32BE or UTF_32LE. // Assumes 00 bytes indicate multi-byte encodings rather than the presence of NUL characters or characters with a code that is a multiple of 0x100. if (b4==-1) { // The stream contains between 1 and 3 bytes. // This means the document can't possibly specify the encoding, so make a best guess based on the first 3 bytes. documentSpecifiedEncodingPossible=false; // It might be possible to rule out some encodings based on these bytes, but it is impossible to make a definite determination. // The main thing to determine is whether it is an 8-bit or 16-bit encoding. // In order to guess the most likely encoding, assume that the text contains only ASCII characters, and that any 00 bytes indicate a 16-bit encoding. // The only strictly 8-bit encoding guaranteed to be supported on all java platforms is ISO-8859-1 (UTF-8 uses a variable number of bytes per character). // If no 00 bytes are present it is safest to assume ISO-8859-1, as this accepts the full range of values 00-FF in every byte. if (b2==-1 || b3!=-1) return setEncoding(ISO_8859_1,"default 8-bit ASCII-compatible encoding (stream 3 bytes long)"); // The stream contains exactly 1 or 3 bytes, so assume an 8-bit encoding regardless of whether any 00 bytes are present. // The stream contains exactly 2 bytes. if (b1==0) return setEncoding(UTF_16BE,"default 16-bit BE encoding (byte stream starts with 00, stream 2 bytes long)"); if (b2==0) return setEncoding(UTF_16LE,"default 16-bit LE encoding (byte stream pattern XX 00, stream 2 bytes long)"); // No 00 bytes present, assume 8-bit encoding: return setEncoding(ISO_8859_1,"default 8-bit ASCII-compatible encoding (no 00 bytes present, stream 2 bytes long)"); } // Stream contains at least 4 bytes. // The patterns used for documentation are made up of: // 0 - zero byte // X - non-zero byte // ? - byte value not yet determined if (b1==0) { // pattern 0??? if (b2==0) return setEncoding(UTF_32BE,"default 32-bit BE encoding (byte stream starts with 00 00)"); // pattern 00?? most likely indicates UTF-32BE // pattern 0X?? // Regardless of the final two bytes, assume that the first two bytes indicate a 16-bit BE encoding. // There are many circumstances where this could be an incorrect assumption, for example: // - UTF-16LE encoding with first character U+0100 (or any other character whose code is a multiple of 100Hex) // - any encoding with first character NUL // - UTF-32BE encoding with first character outside of Basic Multilingual Plane (BMP) // Checking the final two bytes might give some clues as to whether any of these other situations are more likely, // but none of the clues will yield less than a 50% chance that the encoding is in fact UTF-16BE as suggested by the first two bytes. return setEncoding(UTF_16BE,"default 16-bit BE encoding (byte stream starts with 00)"); // >=50% chance that encoding is UTF-16BE } // pattern X??? if (b4==0) { // pattern X??0 if (b3==0) return setEncoding(UTF_32LE,"default 32-bit LE encoding (byte stream starts with pattern XX ?? 00 00)"); // pattern X?00 most likely indicates UTF-32LE // pattern X?X0 return setEncoding(UTF_16LE,"default 16-bit LE encoding (byte stream stars with pattern XX ?? XX 00)"); // Regardless of the second byte, assume the fourth 00 byte indicates UTF-16LE. } // pattern X??X if (b2==0) { // pattern X0?X // Assuming the second 00 byte doesn't indicate a NUL character, and that it is very unlikely that this is a 32-bit encoding // of a character outside of the BMP, we can assume that it indicates a 16-bit encoding. // If the pattern is X00X, there is a 50/50 chance that the encoding is BE or LE, with one of the characters have a code that is a multiple of 0x100. // This should be a very rare occurrence, and there is no more than a 50% chance that the encoding // will be different to that assumed (UTF-16LE) without checking for this occurrence, so don't bother checking for it. // If the pattern is X0XX, this is likely to indicate a 16-bit LE encoding with the second character > U+00FF. return setEncoding(UTF_16LE,"default 16-bit LE encoding (byte stream starts with pattern XX 00 ?? XX)"); } // pattern XX?X if (b3==0) return setEncoding(UTF_16BE,"default 16-bit BE encoding (byte stream starts with pattern XX XX 00 XX)"); // pattern XX0X likely to indicate a 16-bit BE encoding with the first character > U+00FF. // pattern XXXX // Although it is still possible that this is a 16-bit encoding with the first two characters > U+00FF // Assume the more likely case of four 8-bit characters <= U+00FF. // Check whether it fits some common EBCDIC strings that might be found at the start of a document: if (b1==0x4C) { // first character is EBCDIC '<' (ASCII 'L'), check a couple more characters before assuming EBCDIC encoding: if (b2==0x6F && b3==0xA7 && b4==0x94) return setEncoding(EBCDIC,"default EBCDIC encoding ( detected)"); // first four bytes are " detected)"); // first four bytes are "= 0x80 indicate the presence of a multi-byte character, and there are many byte values that are illegal. // Therefore, choose the only true 8-bit encoding that accepts all byte values and is guaranteed to be available on all java implementations. return setEncoding(ISO_8859_1,"default 8-bit ASCII-compatible encoding (no 00 bytes present in first four bytes of stream)"); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.java0000644000175000017500000000246111204550410033410 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeMasonComponentCalledWithContent extends StartTagTypeGenericImplementation { protected static final StartTagTypeMasonComponentCalledWithContent INSTANCE=new StartTagTypeMasonComponentCalledWithContent(); private StartTagTypeMasonComponentCalledWithContent() { super("mason component called with content","<&|","&>",EndTagTypeMasonComponentCalledWithContent.INSTANCE,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/RemoveOutputSegment.java0000644000175000017500000000332011204550410026634 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Implements an {@link OutputSegment} with no content. */ final class RemoveOutputSegment implements OutputSegment { private final int begin; private final int end; public RemoveOutputSegment(final int begin, final int end) { this.begin=begin; this.end=end; } public RemoveOutputSegment(final Segment segment) { this(segment.begin,segment.end); } public int getBegin() { return begin; } public int getEnd() { return end; } public void writeTo(final Writer writer) { appendTo(writer); } public void appendTo(final Appendable appendable) {} public long getEstimatedMaximumOutputLength() { return 0; } public String toString() { return ""; } public String getDebugInfo() { return "Remove: (p"+begin+"-p"+end+')'; } } jericho-html-3.1/src/java/net/htmlparser/jericho/HTMLElementName.java0000644000175000017500000014467211204550410025532 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Contains static fields representing the {@linkplain Element#getName() names} of * all elements defined in the HTML 4.01 specification. *

    * All of the name strings are in lower case. *

    * The {@link HTMLElements} class is closely related to this interface, containing static methods which group these names * by the characteristics of their associated elements. *

    * This interface does not specify any methods, but can be inherited by other classes, or statically imported (Java 5.0), * to provide less verbose access to the contained element name static fields. *

    * The field values in this interface can be used as name arguments in named tag searches. *

    * Note that since the Tag class implements HTMLElementName, all the constants defined in this interface * can be referred to via the Tag class. *
    For example, Tag.BODY is equivalent to HTMLElementName.BODY. * * @see HTMLElements * @see Element */ public interface HTMLElementName { /** * HTML element A - anchor. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String A="a"; /** * HTML element ABBR - abbreviated form (e.g., WWW, HTTP, etc.). *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String ABBR="abbr"; /** * HTML element ACRONYM - acronym. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String ACRONYM="acronym"; /** * HTML element ADDRESS - information on author. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String ADDRESS="address"; /** * HTML element APPLET - Java applet. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String APPLET="applet"; /** * HTML element AREA - client-side image map area. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String AREA="area"; /** * HTML element B - bold text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String B="b"; /** * HTML element BASE - document base URI. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String BASE="base"; /** * HTML element BASEFONT - base font size. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String BASEFONT="basefont"; /** * HTML element BDO - I18N BiDi over-ride. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String BDO="bdo"; /** * HTML element BIG - large text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String BIG="big"; /** * HTML element BLOCKQUOTE - long quotation. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String BLOCKQUOTE="blockquote"; /** * HTML element BODY - document body. *

    * The start tag of this element is {@linkplain HTMLElements#getStartTagOptionalElementNames() optional}. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * (none) *
    Terminating end tags: * {@link #BODY}, {@link #HTML} *
    Nonterminating elements: * {@link #HTML} *
    *

    * Note that the {@link #HTML} element is included as a * {@linkplain HTMLElements#getNonterminatingElementNames(String) nonterminating element} in case the source contains * (illegaly) nested HTML elements. */ public static final String BODY="body"; /** * HTML element BR - forced line break. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String BR="br"; /** * HTML element BUTTON - push button. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String BUTTON="button"; /** * HTML element CAPTION - table caption. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String CAPTION="caption"; /** * HTML element CENTER - shorthand for DIV align=center. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String CENTER="center"; /** * HTML element CITE - citation. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String CITE="cite"; /** * HTML element CODE - computer code fragment. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String CODE="code"; /** * HTML element COL - table column. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String COL="col"; /** * HTML element COLGROUP - table column group. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #COLGROUP}, {@link #TBODY}, {@link #TFOOT}, {@link #THEAD}, {@link #TR} *
    Terminating end tags: * {@link #COLGROUP}, {@link #TABLE} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String COLGROUP="colgroup"; /** * HTML element DD - definition description. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #DD}, {@link #DT} *
    Terminating end tags: * {@link #DD}, {@link #DL} *
    Nonterminating elements: * {@link #DL} *
    */ public static final String DD="dd"; /** * HTML element DEL - deleted text. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String DEL="del"; /** * HTML element DFN - instance definition. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String DFN="dfn"; /** * HTML element DIR - directory list. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String DIR="dir"; /** * HTML element DIV - generic language/style container. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. */ public static final String DIV="div"; /** * HTML element DL - definition list. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. */ public static final String DL="dl"; /** * HTML element DT - definition term. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #DD}, {@link #DT} *
    Terminating end tags: * {@link #DL}, {@link #DT} *
    Nonterminating elements: * {@link #DL} *
    */ public static final String DT="dt"; /** * HTML element EM - emphasis. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String EM="em"; /** * HTML element FIELDSET - form control group. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String FIELDSET="fieldset"; /** * HTML element FONT - local change to font. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String FONT="font"; /** * HTML element FORM - interactive form. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String FORM="form"; /** * HTML element FRAME - subwindow. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String FRAME="frame"; /** * HTML element FRAMESET - window subdivision. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String FRAMESET="frameset"; /** * HTML element H1 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H1="h1"; /** * HTML element H2 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H2="h2"; /** * HTML element H3 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H3="h3"; /** * HTML element H4 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H4="h4"; /** * HTML element H5 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H5="h5"; /** * HTML element H6 - heading. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String H6="h6"; /** * HTML element HEAD - document head. *

    * The start tag of this element is {@linkplain HTMLElements#getStartTagOptionalElementNames() optional}. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #BODY}, {@link #FRAMESET} *
    Terminating end tags: * {@link #HEAD}, {@link #HTML} *
    Nonterminating elements: * (none) *
    */ public static final String HEAD="head"; /** * HTML element HR - horizontal rule. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String HR="hr"; /** * HTML element HTML - document root element. *

    * The start tag of this element is {@linkplain HTMLElements#getStartTagOptionalElementNames() optional}. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * (none) *
    Terminating end tags: * {@link #HTML} *
    Nonterminating elements: * {@link #HTML} *
    *

    * Note that the {@link #HTML} element is included as a * {@linkplain HTMLElements#getNonterminatingElementNames(String) nonterminating element} in case the source contains * (illegaly) nested HTML elements. */ public static final String HTML="html"; /** * HTML element I - italic text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String I="i"; /** * HTML element IFRAME - inline subwindow. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String IFRAME="iframe"; /** * HTML element IMG - Embedded image. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String IMG="img"; /** * HTML element INPUT - form control. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String INPUT="input"; /** * HTML element INS - inserted text. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String INS="ins"; /** * HTML element ISINDEX - single line prompt. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String ISINDEX="isindex"; /** * HTML element KBD - text to be entered by the user. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String KBD="kbd"; /** * HTML element LABEL - form field label text. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String LABEL="label"; /** * HTML element LEGEND - fieldset legend. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String LEGEND="legend"; /** * HTML element LI - list item. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #LI} *
    Terminating end tags: * {@link #LI}, {@link #OL}, {@link #UL} *
    Nonterminating elements: * {@link #OL}, {@link #UL} *
    */ public static final String LI="li"; /** * HTML element LINK - a media-independent link. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String LINK="link"; /** * HTML element MAP - client-side image map. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String MAP="map"; /** * HTML element MENU - menu list. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String MENU="menu"; /** * HTML element META - generic metainformation. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String META="meta"; /** * HTML element NOFRAMES - alternate content container for non frame-based rendering. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String NOFRAMES="noframes"; /** * HTML element NOSCRIPT - alternate content container for non script-based rendering. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String NOSCRIPT="noscript"; /** * HTML element OBJECT - generic embedded object. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String OBJECT="object"; /** * HTML element OL - ordered list. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String OL="ol"; /** * HTML element OPTGROUP - option group. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String OPTGROUP="optgroup"; /** * HTML element OPTION - selectable choice. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #OPTGROUP}, {@link #OPTION} *
    Terminating end tags: * {@link #OPTION}, {@link #SELECT} *
    Nonterminating elements: * (none) *
    */ public static final String OPTION="option"; /** * HTML element P - paragraph. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@linkplain HTMLElements#getBlockLevelElementNames() All block-level element names},
    * {@link #DD}, {@link #DT}, {@link #LI}, {@link #TD}, {@link #TH} *
    Terminating end tags: * {@linkplain HTMLElements#getBlockLevelElementNames() All block-level element names},
    * {@link #BODY}, {@link #CAPTION}, {@link #DD}, {@link #DT}, {@link #HTML}, {@link #LEGEND}, {@link #TD}, {@link #TH}, * {@link #TBODY}, {@link #TFOOT}, {@link #THEAD}, {@link #TR} *
    Nonterminating elements: * (none) *
    *

    * The definition of this element in the HTML 4.01 specification * explicitly states that the P element cannot contain {@linkplain HTMLElements#getBlockLevelElementNames() block-level} * elements. * Despite this, all of the popular browsers (in at least some modes of operation) allow P elements to enclose * {@link #TABLE} elements, which are also block-level elements. *

    * It is possible to make this parser compatible with this incorrect behaviour by executing the following code: *

    	 * {@link HTMLElements#getTerminatingStartTagNames(String) HTMLElements.getTerminatingStartTagNames}(HTMLElementName.P).remove(HTMLElementName.TABLE);
    	 * {@link HTMLElements#getNonterminatingElementNames(String) HTMLElements.getNonterminatingElementNames}(HTMLElementName.P).add(HTMLElementName.TABLE);
    */ public static final String P="p"; /** * HTML element PARAM - named property value. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagForbiddenElementNames() forbidden}. */ public static final String PARAM="param"; /** * HTML element PRE - preformatted text. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String PRE="pre"; /** * HTML element Q - short inline quotation. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String Q="q"; /** * HTML element S - strike-through text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String S="s"; /** * HTML element SAMP - sample program output, scripts, etc.. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SAMP="samp"; /** * HTML element SCRIPT - script statements. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SCRIPT="script"; /** * HTML element SELECT - option selector. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SELECT="select"; /** * HTML element SMALL - small text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SMALL="small"; /** * HTML element SPAN - generic language/style container. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. */ public static final String SPAN="span"; /** * HTML element STRIKE - strike-through text. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String STRIKE="strike"; /** * HTML element STRONG - strong emphasis. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String STRONG="strong"; /** * HTML element STYLE - style info. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String STYLE="style"; /** * HTML element SUB - subscript. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SUB="sub"; /** * HTML element SUP - superscript. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String SUP="sup"; /** * HTML element TABLE - table. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String TABLE="table"; /** * HTML element TBODY - table body. *

    * The start tag of this element is {@linkplain HTMLElements#getStartTagOptionalElementNames() optional}. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TFOOT}, {@link #THEAD} *
    Terminating end tags: * {@link #TABLE}, {@link #TBODY} *
    Nonterminating elements: * {@link #TABLE} *
    *

    * Note that the {@link #TFOOT} and {@link #THEAD} elements are included as * {@linkplain HTMLElements#getTerminatingStartTagNames(String) terminating start tags}, even though the * HTML 4.01 specification section 11.2.3 * states that they must precede the {@link #TBODY} element inside a {@link #TABLE}. * Most browsers tolerate an incorrect ordering of the {@link #THEAD}, {@link #TFOOT} and {@link #TBODY} elements, * so this parser also recognises the elements in any order. */ public static final String TBODY="tbody"; /** * HTML element TD - table data cell. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TD}, {@link #TFOOT}, {@link #TH}, {@link #THEAD}, {@link #TR} *
    Terminating end tags: * {@link #TABLE}, {@link #TBODY}, {@link #TD}, {@link #TFOOT}, {@link #THEAD}, {@link #TR} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String TD="td"; /** * HTML element TEXTAREA - multi-line text field. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. */ public static final String TEXTAREA="textarea"; /** * HTML element TFOOT - table footer. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TFOOT}, {@link #THEAD} *
    Terminating end tags: * {@link #TABLE}, {@link #TFOOT} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String TFOOT="tfoot"; /** * HTML element TH - table header cell. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TD}, {@link #TFOOT}, {@link #TH}, {@link #THEAD}, {@link #TR} *
    Terminating end tags: * {@link #TABLE}, {@link #TBODY}, {@link #TFOOT}, {@link #TH}, {@link #THEAD}, {@link #TR} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String TH="th"; /** * HTML element THEAD - table header. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TFOOT}, {@link #THEAD} *
    Terminating end tags: * {@link #TABLE}, {@link #THEAD} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String THEAD="thead"; /** * HTML element TITLE - document title. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String TITLE="title"; /** * HTML element TR - table row. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagOptionalElementNames() optional}: * * * * *
    Terminating start tags: * {@link #TBODY}, {@link #TFOOT}, {@link #THEAD}, {@link #TR} *
    Terminating end tags: * {@link #TABLE}, {@link #TBODY}, {@link #TFOOT}, {@link #THEAD}, {@link #TR} *
    Nonterminating elements: * {@link #TABLE} *
    */ public static final String TR="tr"; /** * HTML element TT - teletype or monospaced text style. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. */ public static final String TT="tt"; /** * HTML element U - underlined text style. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. *

    * This element is deprecated in HTML 4.01. * (see {@link HTMLElements#getDeprecatedElementNames()}) */ public static final String U="u"; /** * HTML element UL - unordered list. *

    * This is a {@linkplain HTMLElements#getBlockLevelElementNames() block-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String UL="ul"; /** * HTML element VAR - instance of a variable or program argument. *

    * This is an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level} element. *

    * The end tag of this element is {@linkplain HTMLElements#getEndTagRequiredElementNames() required}. */ public static final String VAR="var"; } jericho-html-3.1/src/java/net/htmlparser/jericho/CharStreamSourceUtil.java0000644000175000017500000000614711204550410026715 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; import java.nio.*; /** * Contains static utility methods for manipulating the way data is retrieved from a {@link CharStreamSource} object. *

    * See the documentation of the {@link CharStreamSource} class for details. */ public final class CharStreamSourceUtil { private static final int DEFAULT_ESTIMATED_MAXIMUM_OUTPUT_LENGTH=2048; private CharStreamSourceUtil() {} /** * Returns a Reader that reads the output of the specified {@link CharStreamSource}. *

    * The current implementation of this method simply returns new StringReader({@link #toString(CharStreamSource) toString(charStreamSource)}), * but a future version may implement this method in a more memory efficient manner. * * @param charStreamSource the character stream source producing the output. * @return a Reader that reads the output of the specified {@link CharStreamSource}. */ public static Reader getReader(final CharStreamSource charStreamSource) { return new StringReader(toString(charStreamSource)); } /** * Returns the output of the specified {@link CharStreamSource} as a string. *

    * The current implementation of this method simply returns new StringReader({@link #toString(CharStreamSource) toString(charStreamSource)}), * but a future version may implement this method in a more memory efficient manner, for example by utilising a temporary file. * * @param charStreamSource the character stream source producing the output. * @return the output of the specified {@link CharStreamSource} as a string. */ public static String toString(final CharStreamSource charStreamSource) { long estimatedMaximumOutputLength=charStreamSource.getEstimatedMaximumOutputLength(); if (estimatedMaximumOutputLength<=-1L) estimatedMaximumOutputLength=DEFAULT_ESTIMATED_MAXIMUM_OUTPUT_LENGTH; final StringBuilder sb=new StringBuilder((int)(estimatedMaximumOutputLength)); try { charStreamSource.appendTo(sb); } catch (IOException ex) {throw new RuntimeException(ex);} // assume the IOException is not thrown explicitly by the charStreamSource.output method return sb.toString(); } } jericho-html-3.1/src/java/net/htmlparser/jericho/CharacterEntityReference.java0000644000175000017500000021072011204550410027547 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; /** * Represents an HTML Character Entity Reference. *

    * Click here to scroll down to the method summary. *

    * The full list of HTML character entity references can be found at the following URL:
    * http://www.w3.org/TR/REC-html40/sgml/entities.html. *

    * There are a total of 253 HTML character entity references, ranging from codepoints U+0022 to U+2666. *

    * Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings * and single characters can be found in the {@link CharacterReference} superclass. *

    * The {@link #_apos &apos;} entity reference is not defined for use in HTML. * It is defined in the XHTML Special Characters Entity Set, * and is the only one that is not included in both HTML and XHTML. * For this reason, the &apos; entity reference is recognised by this library in decoding functions, but in encoding functions * the numeric character reference &#39; is used instead. * Most modern browsers support it in both XHTML and HTML, with the notable exception * of Microsoft Internet Explorer 6.0, which doesn't support it in either. *

    * CharacterEntityReference instances are obtained using one of the following methods: *

      *
    • {@link CharacterReference#parse(CharSequence characterReferenceText)} *
    • {@link Source#getNextCharacterReference(int pos)} *
    • {@link Source#getPreviousCharacterReference(int pos)} *
    • {@link Segment#getAllCharacterReferences()} *
    * * @see CharacterReference * @see NumericCharacterReference */ public class CharacterEntityReference extends CharacterReference { private String name; /**   &nbsp; = &#160; -- no-break space = non-breaking space, U+00A0 ISOnum. */ public static final char _nbsp='\u00A0'; /** ¡ &iexcl; = &#161; -- inverted exclamation mark, U+00A1 ISOnum. */ public static final char _iexcl='\u00A1'; /** ¢ &cent; = &#162; -- cent sign, U+00A2 ISOnum. */ public static final char _cent='\u00A2'; /** £ &pound; = &#163; -- pound sign, U+00A3 ISOnum. */ public static final char _pound='\u00A3'; /** ¤ &curren; = &#164; -- currency sign, U+00A4 ISOnum. */ public static final char _curren='\u00A4'; /** ¥ &yen; = &#165; -- yen sign = yuan sign, U+00A5 ISOnum. */ public static final char _yen='\u00A5'; /** ¦ &brvbar; = &#166; -- broken bar = broken vertical bar, U+00A6 ISOnum. */ public static final char _brvbar='\u00A6'; /** § &sect; = &#167; -- section sign, U+00A7 ISOnum. */ public static final char _sect='\u00A7'; /** ¨ &uml; = &#168; -- diaeresis = spacing diaeresis, U+00A8 ISOdia. */ public static final char _uml='\u00A8'; /** © &copy; = &#169; -- copyright sign, U+00A9 ISOnum. */ public static final char _copy='\u00A9'; /** ª &ordf; = &#170; -- feminine ordinal indicator, U+00AA ISOnum. */ public static final char _ordf='\u00AA'; /** « &laquo; = &#171; -- left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum. */ public static final char _laquo='\u00AB'; /** ¬ &not; = &#172; -- not sign = angled dash, U+00AC ISOnum. */ public static final char _not='\u00AC'; /** ­ &shy; = &#173; -- soft hyphen = discretionary hyphen, U+00AD ISOnum. */ public static final char _shy='\u00AD'; /** ® &reg; = &#174; -- registered sign = registered trade mark sign, U+00AE ISOnum. */ public static final char _reg='\u00AE'; /** ¯ &macr; = &#175; -- macron = spacing macron = overline = APL overbar, U+00AF ISOdia. */ public static final char _macr='\u00AF'; /** ° &deg; = &#176; -- degree sign, U+00B0 ISOnum. */ public static final char _deg='\u00B0'; /** ± &plusmn; = &#177; -- plus-minus sign = plus-or-minus sign, U+00B1 ISOnum. */ public static final char _plusmn='\u00B1'; /** ² &sup2; = &#178; -- superscript two = superscript digit two = squared, U+00B2 ISOnum. */ public static final char _sup2='\u00B2'; /** ³ &sup3; = &#179; -- superscript three = superscript digit three = cubed, U+00B3 ISOnum. */ public static final char _sup3='\u00B3'; /** ´ &acute; = &#180; -- acute accent = spacing acute, U+00B4 ISOdia. */ public static final char _acute='\u00B4'; /** µ &micro; = &#181; -- micro sign, U+00B5 ISOnum. */ public static final char _micro='\u00B5'; /** &para; = &#182; -- pilcrow sign = paragraph sign, U+00B6 ISOnum. */ public static final char _para='\u00B6'; /** · &middot; = &#183; -- middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum. */ public static final char _middot='\u00B7'; /** ¸ &cedil; = &#184; -- cedilla = spacing cedilla, U+00B8 ISOdia. */ public static final char _cedil='\u00B8'; /** ¹ &sup1; = &#185; -- superscript one = superscript digit one, U+00B9 ISOnum. */ public static final char _sup1='\u00B9'; /** º &ordm; = &#186; -- masculine ordinal indicator, U+00BA ISOnum. */ public static final char _ordm='\u00BA'; /** » &raquo; = &#187; -- right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum. */ public static final char _raquo='\u00BB'; /** ¼ &frac14; = &#188; -- vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum. */ public static final char _frac14='\u00BC'; /** ½ &frac12; = &#189; -- vulgar fraction one half = fraction one half, U+00BD ISOnum. */ public static final char _frac12='\u00BD'; /** ¾ &frac34; = &#190; -- vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum. */ public static final char _frac34='\u00BE'; /** ¿ &iquest; = &#191; -- inverted question mark = turned question mark, U+00BF ISOnum. */ public static final char _iquest='\u00BF'; /** À &Agrave; = &#192; -- latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1. */ public static final char _Agrave='\u00C0'; /** Á &Aacute; = &#193; -- latin capital letter A with acute, U+00C1 ISOlat1. */ public static final char _Aacute='\u00C1'; /** Â &Acirc; = &#194; -- latin capital letter A with circumflex, U+00C2 ISOlat1. */ public static final char _Acirc='\u00C2'; /** Ã &Atilde; = &#195; -- latin capital letter A with tilde, U+00C3 ISOlat1. */ public static final char _Atilde='\u00C3'; /** Ä &Auml; = &#196; -- latin capital letter A with diaeresis, U+00C4 ISOlat1. */ public static final char _Auml='\u00C4'; /** Å &Aring; = &#197; -- latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1. */ public static final char _Aring='\u00C5'; /** Æ &AElig; = &#198; -- latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1. */ public static final char _AElig='\u00C6'; /** Ç &Ccedil; = &#199; -- latin capital letter C with cedilla, U+00C7 ISOlat1. */ public static final char _Ccedil='\u00C7'; /** È &Egrave; = &#200; -- latin capital letter E with grave, U+00C8 ISOlat1. */ public static final char _Egrave='\u00C8'; /** É &Eacute; = &#201; -- latin capital letter E with acute, U+00C9 ISOlat1. */ public static final char _Eacute='\u00C9'; /** Ê &Ecirc; = &#202; -- latin capital letter E with circumflex, U+00CA ISOlat1. */ public static final char _Ecirc='\u00CA'; /** Ë &Euml; = &#203; -- latin capital letter E with diaeresis, U+00CB ISOlat1. */ public static final char _Euml='\u00CB'; /** Ì &Igrave; = &#204; -- latin capital letter I with grave, U+00CC ISOlat1. */ public static final char _Igrave='\u00CC'; /** Í &Iacute; = &#205; -- latin capital letter I with acute, U+00CD ISOlat1. */ public static final char _Iacute='\u00CD'; /** Î &Icirc; = &#206; -- latin capital letter I with circumflex, U+00CE ISOlat1. */ public static final char _Icirc='\u00CE'; /** Ï &Iuml; = &#207; -- latin capital letter I with diaeresis, U+00CF ISOlat1. */ public static final char _Iuml='\u00CF'; /** Ð &ETH; = &#208; -- latin capital letter ETH, U+00D0 ISOlat1. */ public static final char _ETH='\u00D0'; /** Ñ &Ntilde; = &#209; -- latin capital letter N with tilde, U+00D1 ISOlat1. */ public static final char _Ntilde='\u00D1'; /** Ò &Ograve; = &#210; -- latin capital letter O with grave, U+00D2 ISOlat1. */ public static final char _Ograve='\u00D2'; /** Ó &Oacute; = &#211; -- latin capital letter O with acute, U+00D3 ISOlat1. */ public static final char _Oacute='\u00D3'; /** Ô &Ocirc; = &#212; -- latin capital letter O with circumflex, U+00D4 ISOlat1. */ public static final char _Ocirc='\u00D4'; /** Õ &Otilde; = &#213; -- latin capital letter O with tilde, U+00D5 ISOlat1. */ public static final char _Otilde='\u00D5'; /** Ö &Ouml; = &#214; -- latin capital letter O with diaeresis, U+00D6 ISOlat1. */ public static final char _Ouml='\u00D6'; /** × &times; = &#215; -- multiplication sign, U+00D7 ISOnum. */ public static final char _times='\u00D7'; /** Ø &Oslash; = &#216; -- latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1. */ public static final char _Oslash='\u00D8'; /** Ù &Ugrave; = &#217; -- latin capital letter U with grave, U+00D9 ISOlat1. */ public static final char _Ugrave='\u00D9'; /** Ú &Uacute; = &#218; -- latin capital letter U with acute, U+00DA ISOlat1. */ public static final char _Uacute='\u00DA'; /** Û &Ucirc; = &#219; -- latin capital letter U with circumflex, U+00DB ISOlat1. */ public static final char _Ucirc='\u00DB'; /** Ü &Uuml; = &#220; -- latin capital letter U with diaeresis, U+00DC ISOlat1. */ public static final char _Uuml='\u00DC'; /** Ý &Yacute; = &#221; -- latin capital letter Y with acute, U+00DD ISOlat1. */ public static final char _Yacute='\u00DD'; /** Þ &THORN; = &#222; -- latin capital letter THORN, U+00DE ISOlat1. */ public static final char _THORN='\u00DE'; /** ß &szlig; = &#223; -- latin small letter sharp s = ess-zed, U+00DF ISOlat1. */ public static final char _szlig='\u00DF'; /** à &agrave; = &#224; -- latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1. */ public static final char _agrave='\u00E0'; /** á &aacute; = &#225; -- latin small letter a with acute, U+00E1 ISOlat1. */ public static final char _aacute='\u00E1'; /** â &acirc; = &#226; -- latin small letter a with circumflex, U+00E2 ISOlat1. */ public static final char _acirc='\u00E2'; /** ã &atilde; = &#227; -- latin small letter a with tilde, U+00E3 ISOlat1. */ public static final char _atilde='\u00E3'; /** ä &auml; = &#228; -- latin small letter a with diaeresis, U+00E4 ISOlat1. */ public static final char _auml='\u00E4'; /** å &aring; = &#229; -- latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1. */ public static final char _aring='\u00E5'; /** æ &aelig; = &#230; -- latin small letter ae = latin small ligature ae, U+00E6 ISOlat1. */ public static final char _aelig='\u00E6'; /** ç &ccedil; = &#231; -- latin small letter c with cedilla, U+00E7 ISOlat1. */ public static final char _ccedil='\u00E7'; /** è &egrave; = &#232; -- latin small letter e with grave, U+00E8 ISOlat1. */ public static final char _egrave='\u00E8'; /** é &eacute; = &#233; -- latin small letter e with acute, U+00E9 ISOlat1. */ public static final char _eacute='\u00E9'; /** ê &ecirc; = &#234; -- latin small letter e with circumflex, U+00EA ISOlat1. */ public static final char _ecirc='\u00EA'; /** ë &euml; = &#235; -- latin small letter e with diaeresis, U+00EB ISOlat1. */ public static final char _euml='\u00EB'; /** ì &igrave; = &#236; -- latin small letter i with grave, U+00EC ISOlat1. */ public static final char _igrave='\u00EC'; /** í &iacute; = &#237; -- latin small letter i with acute, U+00ED ISOlat1. */ public static final char _iacute='\u00ED'; /** î &icirc; = &#238; -- latin small letter i with circumflex, U+00EE ISOlat1. */ public static final char _icirc='\u00EE'; /** ï &iuml; = &#239; -- latin small letter i with diaeresis, U+00EF ISOlat1. */ public static final char _iuml='\u00EF'; /** ð &eth; = &#240; -- latin small letter eth, U+00F0 ISOlat1. */ public static final char _eth='\u00F0'; /** ñ &ntilde; = &#241; -- latin small letter n with tilde, U+00F1 ISOlat1. */ public static final char _ntilde='\u00F1'; /** ò &ograve; = &#242; -- latin small letter o with grave, U+00F2 ISOlat1. */ public static final char _ograve='\u00F2'; /** ó &oacute; = &#243; -- latin small letter o with acute, U+00F3 ISOlat1. */ public static final char _oacute='\u00F3'; /** ô &ocirc; = &#244; -- latin small letter o with circumflex, U+00F4 ISOlat1. */ public static final char _ocirc='\u00F4'; /** õ &otilde; = &#245; -- latin small letter o with tilde, U+00F5 ISOlat1. */ public static final char _otilde='\u00F5'; /** ö &ouml; = &#246; -- latin small letter o with diaeresis, U+00F6 ISOlat1. */ public static final char _ouml='\u00F6'; /** ÷ &divide; = &#247; -- division sign, U+00F7 ISOnum. */ public static final char _divide='\u00F7'; /** ø &oslash; = &#248; -- latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1. */ public static final char _oslash='\u00F8'; /** ù &ugrave; = &#249; -- latin small letter u with grave, U+00F9 ISOlat1. */ public static final char _ugrave='\u00F9'; /** ú &uacute; = &#250; -- latin small letter u with acute, U+00FA ISOlat1. */ public static final char _uacute='\u00FA'; /** û &ucirc; = &#251; -- latin small letter u with circumflex, U+00FB ISOlat1. */ public static final char _ucirc='\u00FB'; /** ü &uuml; = &#252; -- latin small letter u with diaeresis, U+00FC ISOlat1. */ public static final char _uuml='\u00FC'; /** ý &yacute; = &#253; -- latin small letter y with acute, U+00FD ISOlat1. */ public static final char _yacute='\u00FD'; /** þ &thorn; = &#254; -- latin small letter thorn, U+00FE ISOlat1. */ public static final char _thorn='\u00FE'; /** ÿ &yuml; = &#255; -- latin small letter y with diaeresis, U+00FF ISOlat1. */ public static final char _yuml='\u00FF'; /** ƒ &fnof; = &#402; -- latin small letter f with hook = function = florin, U+0192 ISOtech. */ public static final char _fnof='\u0192'; /** Α &Alpha; = &#913; -- greek capital letter alpha, U+0391. */ public static final char _Alpha='\u0391'; /** Β &Beta; = &#914; -- greek capital letter beta, U+0392. */ public static final char _Beta='\u0392'; /** Γ &Gamma; = &#915; -- greek capital letter gamma, U+0393 ISOgrk3. */ public static final char _Gamma='\u0393'; /** Δ &Delta; = &#916; -- greek capital letter delta, U+0394 ISOgrk3. */ public static final char _Delta='\u0394'; /** Ε &Epsilon; = &#917; -- greek capital letter epsilon, U+0395. */ public static final char _Epsilon='\u0395'; /** Ζ &Zeta; = &#918; -- greek capital letter zeta, U+0396. */ public static final char _Zeta='\u0396'; /** Η &Eta; = &#919; -- greek capital letter eta, U+0397. */ public static final char _Eta='\u0397'; /** Θ &Theta; = &#920; -- greek capital letter theta, U+0398 ISOgrk3. */ public static final char _Theta='\u0398'; /** Ι &Iota; = &#921; -- greek capital letter iota, U+0399. */ public static final char _Iota='\u0399'; /** Κ &Kappa; = &#922; -- greek capital letter kappa, U+039A. */ public static final char _Kappa='\u039A'; /** Λ &Lambda; = &#923; -- greek capital letter lambda, U+039B ISOgrk3. */ public static final char _Lambda='\u039B'; /** Μ &Mu; = &#924; -- greek capital letter mu, U+039C. */ public static final char _Mu='\u039C'; /** Ν &Nu; = &#925; -- greek capital letter nu, U+039D. */ public static final char _Nu='\u039D'; /** Ξ &Xi; = &#926; -- greek capital letter xi, U+039E ISOgrk3. */ public static final char _Xi='\u039E'; /** Ο &Omicron; = &#927; -- greek capital letter omicron, U+039F. */ public static final char _Omicron='\u039F'; /** Π &Pi; = &#928; -- greek capital letter pi, U+03A0 ISOgrk3. */ public static final char _Pi='\u03A0'; /** Ρ &Rho; = &#929; -- greek capital letter rho, U+03A1. */ public static final char _Rho='\u03A1'; /** Σ &Sigma; = &#931; -- greek capital letter sigma, U+03A3 ISOgrk3. */ public static final char _Sigma='\u03A3'; /** Τ &Tau; = &#932; -- greek capital letter tau, U+03A4. */ public static final char _Tau='\u03A4'; /** Υ &Upsilon; = &#933; -- greek capital letter upsilon, U+03A5 ISOgrk3. */ public static final char _Upsilon='\u03A5'; /** Φ &Phi; = &#934; -- greek capital letter phi, U+03A6 ISOgrk3. */ public static final char _Phi='\u03A6'; /** Χ &Chi; = &#935; -- greek capital letter chi, U+03A7. */ public static final char _Chi='\u03A7'; /** Ψ &Psi; = &#936; -- greek capital letter psi, U+03A8 ISOgrk3. */ public static final char _Psi='\u03A8'; /** Ω &Omega; = &#937; -- greek capital letter omega, U+03A9 ISOgrk3. */ public static final char _Omega='\u03A9'; /** α &alpha; = &#945; -- greek small letter alpha, U+03B1 ISOgrk3. */ public static final char _alpha='\u03B1'; /** β &beta; = &#946; -- greek small letter beta, U+03B2 ISOgrk3. */ public static final char _beta='\u03B2'; /** γ &gamma; = &#947; -- greek small letter gamma, U+03B3 ISOgrk3. */ public static final char _gamma='\u03B3'; /** δ &delta; = &#948; -- greek small letter delta, U+03B4 ISOgrk3. */ public static final char _delta='\u03B4'; /** ε &epsilon; = &#949; -- greek small letter epsilon, U+03B5 ISOgrk3. */ public static final char _epsilon='\u03B5'; /** ζ &zeta; = &#950; -- greek small letter zeta, U+03B6 ISOgrk3. */ public static final char _zeta='\u03B6'; /** η &eta; = &#951; -- greek small letter eta, U+03B7 ISOgrk3. */ public static final char _eta='\u03B7'; /** θ &theta; = &#952; -- greek small letter theta, U+03B8 ISOgrk3. */ public static final char _theta='\u03B8'; /** ι &iota; = &#953; -- greek small letter iota, U+03B9 ISOgrk3. */ public static final char _iota='\u03B9'; /** κ &kappa; = &#954; -- greek small letter kappa, U+03BA ISOgrk3. */ public static final char _kappa='\u03BA'; /** λ &lambda; = &#955; -- greek small letter lambda, U+03BB ISOgrk3. */ public static final char _lambda='\u03BB'; /** μ &mu; = &#956; -- greek small letter mu, U+03BC ISOgrk3. */ public static final char _mu='\u03BC'; /** ν &nu; = &#957; -- greek small letter nu, U+03BD ISOgrk3. */ public static final char _nu='\u03BD'; /** ξ &xi; = &#958; -- greek small letter xi, U+03BE ISOgrk3. */ public static final char _xi='\u03BE'; /** ο &omicron; = &#959; -- greek small letter omicron, U+03BF NEW. */ public static final char _omicron='\u03BF'; /** π &pi; = &#960; -- greek small letter pi, U+03C0 ISOgrk3. */ public static final char _pi='\u03C0'; /** ρ &rho; = &#961; -- greek small letter rho, U+03C1 ISOgrk3. */ public static final char _rho='\u03C1'; /** ς &sigmaf; = &#962; -- greek small letter final sigma, U+03C2 ISOgrk3. */ public static final char _sigmaf='\u03C2'; /** σ &sigma; = &#963; -- greek small letter sigma, U+03C3 ISOgrk3. */ public static final char _sigma='\u03C3'; /** τ &tau; = &#964; -- greek small letter tau, U+03C4 ISOgrk3. */ public static final char _tau='\u03C4'; /** υ &upsilon; = &#965; -- greek small letter upsilon, U+03C5 ISOgrk3. */ public static final char _upsilon='\u03C5'; /** φ &phi; = &#966; -- greek small letter phi, U+03C6 ISOgrk3. */ public static final char _phi='\u03C6'; /** χ &chi; = &#967; -- greek small letter chi, U+03C7 ISOgrk3. */ public static final char _chi='\u03C7'; /** ψ &psi; = &#968; -- greek small letter psi, U+03C8 ISOgrk3. */ public static final char _psi='\u03C8'; /** ω &omega; = &#969; -- greek small letter omega, U+03C9 ISOgrk3. */ public static final char _omega='\u03C9'; /** ϑ &thetasym; = &#977; -- greek small letter theta symbol, U+03D1 NEW. */ public static final char _thetasym='\u03D1'; /** ϒ &upsih; = &#978; -- greek upsilon with hook symbol, U+03D2 NEW. */ public static final char _upsih='\u03D2'; /** ϖ &piv; = &#982; -- greek pi symbol, U+03D6 ISOgrk3. */ public static final char _piv='\u03D6'; /** &bull; = &#8226; -- bullet = black small circle, U+2022 ISOpub
    (see comments).

    bullet is NOT the same as bullet operator, U+2219

    */ public static final char _bull='\u2022'; /** &hellip; = &#8230; -- horizontal ellipsis = three dot leader, U+2026 ISOpub. */ public static final char _hellip='\u2026'; /** &prime; = &#8242; -- prime = minutes = feet, U+2032 ISOtech. */ public static final char _prime='\u2032'; /** &Prime; = &#8243; -- double prime = seconds = inches, U+2033 ISOtech. */ public static final char _Prime='\u2033'; /** &oline; = &#8254; -- overline = spacing overscore, U+203E NEW. */ public static final char _oline='\u203E'; /** &frasl; = &#8260; -- fraction slash, U+2044 NEW. */ public static final char _frasl='\u2044'; /** &weierp; = &#8472; -- script capital P = power set = Weierstrass p, U+2118 ISOamso. */ public static final char _weierp='\u2118'; /** &image; = &#8465; -- black-letter capital I = imaginary part, U+2111 ISOamso. */ public static final char _image='\u2111'; /** &real; = &#8476; -- black-letter capital R = real part symbol, U+211C ISOamso. */ public static final char _real='\u211C'; /** &trade; = &#8482; -- trade mark sign, U+2122 ISOnum. */ public static final char _trade='\u2122'; /** &alefsym; = &#8501; -- alef symbol = first transfinite cardinal, U+2135 NEW
    (see comments).

    alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters

    */ public static final char _alefsym='\u2135'; /** &larr; = &#8592; -- leftwards arrow, U+2190 ISOnum. */ public static final char _larr='\u2190'; /** &uarr; = &#8593; -- upwards arrow, U+2191 ISOnum. */ public static final char _uarr='\u2191'; /** &rarr; = &#8594; -- rightwards arrow, U+2192 ISOnum. */ public static final char _rarr='\u2192'; /** &darr; = &#8595; -- downwards arrow, U+2193 ISOnum. */ public static final char _darr='\u2193'; /** &harr; = &#8596; -- left right arrow, U+2194 ISOamsa. */ public static final char _harr='\u2194'; /** &crarr; = &#8629; -- downwards arrow with corner leftwards = carriage return, U+21B5 NEW. */ public static final char _crarr='\u21B5'; /** &lArr; = &#8656; -- leftwards double arrow, U+21D0 ISOtech
    (see comments).

    ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests

    */ public static final char _lArr='\u21D0'; /** &uArr; = &#8657; -- upwards double arrow, U+21D1 ISOamsa. */ public static final char _uArr='\u21D1'; /** &rArr; = &#8658; -- rightwards double arrow, U+21D2 ISOtech
    (see comments).

    ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests

    */ public static final char _rArr='\u21D2'; /** &dArr; = &#8659; -- downwards double arrow, U+21D3 ISOamsa. */ public static final char _dArr='\u21D3'; /** &hArr; = &#8660; -- left right double arrow, U+21D4 ISOamsa. */ public static final char _hArr='\u21D4'; /** &forall; = &#8704; -- for all, U+2200 ISOtech. */ public static final char _forall='\u2200'; /** &part; = &#8706; -- partial differential, U+2202 ISOtech. */ public static final char _part='\u2202'; /** &exist; = &#8707; -- there exists, U+2203 ISOtech. */ public static final char _exist='\u2203'; /** &empty; = &#8709; -- empty set = null set = diameter, U+2205 ISOamso. */ public static final char _empty='\u2205'; /** &nabla; = &#8711; -- nabla = backward difference, U+2207 ISOtech. */ public static final char _nabla='\u2207'; /** &isin; = &#8712; -- element of, U+2208 ISOtech. */ public static final char _isin='\u2208'; /** &notin; = &#8713; -- not an element of, U+2209 ISOtech. */ public static final char _notin='\u2209'; /** &ni; = &#8715; -- contains as member, U+220B ISOtech
    (see comments).

    should there be a more memorable name than 'ni'?

    */ public static final char _ni='\u220B'; /** &prod; = &#8719; -- n-ary product = product sign, U+220F ISOamsb
    (see comments).

    prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both

    */ public static final char _prod='\u220F'; /** &sum; = &#8721; -- n-ary summation, U+2211 ISOamsb
    (see comments).

    sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both

    */ public static final char _sum='\u2211'; /** &minus; = &#8722; -- minus sign, U+2212 ISOtech. */ public static final char _minus='\u2212'; /** &lowast; = &#8727; -- asterisk operator, U+2217 ISOtech. */ public static final char _lowast='\u2217'; /** &radic; = &#8730; -- square root = radical sign, U+221A ISOtech. */ public static final char _radic='\u221A'; /** &prop; = &#8733; -- proportional to, U+221D ISOtech. */ public static final char _prop='\u221D'; /** &infin; = &#8734; -- infinity, U+221E ISOtech. */ public static final char _infin='\u221E'; /** &ang; = &#8736; -- angle, U+2220 ISOamso. */ public static final char _ang='\u2220'; /** &and; = &#8743; -- logical and = wedge, U+2227 ISOtech. */ public static final char _and='\u2227'; /** &or; = &#8744; -- logical or = vee, U+2228 ISOtech. */ public static final char _or='\u2228'; /** &cap; = &#8745; -- intersection = cap, U+2229 ISOtech. */ public static final char _cap='\u2229'; /** &cup; = &#8746; -- union = cup, U+222A ISOtech. */ public static final char _cup='\u222A'; /** &int; = &#8747; -- integral, U+222B ISOtech. */ public static final char _int='\u222B'; /** &there4; = &#8756; -- therefore, U+2234 ISOtech. */ public static final char _there4='\u2234'; /** &sim; = &#8764; -- tilde operator = varies with = similar to, U+223C ISOtech
    (see comments).

    tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both

    */ public static final char _sim='\u223C'; /** &cong; = &#8773; -- approximately equal to, U+2245 ISOtech. */ public static final char _cong='\u2245'; /** &asymp; = &#8776; -- almost equal to = asymptotic to, U+2248 ISOamsr. */ public static final char _asymp='\u2248'; /** &ne; = &#8800; -- not equal to, U+2260 ISOtech. */ public static final char _ne='\u2260'; /** &equiv; = &#8801; -- identical to, U+2261 ISOtech. */ public static final char _equiv='\u2261'; /** &le; = &#8804; -- less-than or equal to, U+2264 ISOtech. */ public static final char _le='\u2264'; /** &ge; = &#8805; -- greater-than or equal to, U+2265 ISOtech. */ public static final char _ge='\u2265'; /** &sub; = &#8834; -- subset of, U+2282 ISOtech. */ public static final char _sub='\u2282'; /** &sup; = &#8835; -- superset of, U+2283 ISOtech
    (see comments).

    note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn

    */ public static final char _sup='\u2283'; /** &nsub; = &#8836; -- not a subset of, U+2284 ISOamsn. */ public static final char _nsub='\u2284'; /** &sube; = &#8838; -- subset of or equal to, U+2286 ISOtech. */ public static final char _sube='\u2286'; /** &supe; = &#8839; -- superset of or equal to, U+2287 ISOtech. */ public static final char _supe='\u2287'; /** &oplus; = &#8853; -- circled plus = direct sum, U+2295 ISOamsb. */ public static final char _oplus='\u2295'; /** &otimes; = &#8855; -- circled times = vector product, U+2297 ISOamsb. */ public static final char _otimes='\u2297'; /** &perp; = &#8869; -- up tack = orthogonal to = perpendicular, U+22A5 ISOtech. */ public static final char _perp='\u22A5'; /** &sdot; = &#8901; -- dot operator, U+22C5 ISOamsb
    (see comments).

    dot operator is NOT the same character as U+00B7 middle dot

    */ public static final char _sdot='\u22C5'; /** &lceil; = &#8968; -- left ceiling = APL upstile, U+2308 ISOamsc. */ public static final char _lceil='\u2308'; /** &rceil; = &#8969; -- right ceiling, U+2309 ISOamsc. */ public static final char _rceil='\u2309'; /** &lfloor; = &#8970; -- left floor = APL downstile, U+230A ISOamsc. */ public static final char _lfloor='\u230A'; /** &rfloor; = &#8971; -- right floor, U+230B ISOamsc. */ public static final char _rfloor='\u230B'; /** &lang; = &#9001; -- left-pointing angle bracket = bra, U+2329 ISOtech
    (see comments).

    lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark'

    */ public static final char _lang='\u2329'; /** &rang; = &#9002; -- right-pointing angle bracket = ket, U+232A ISOtech
    (see comments).

    rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark'

    */ public static final char _rang='\u232A'; /** &loz; = &#9674; -- lozenge, U+25CA ISOpub. */ public static final char _loz='\u25CA'; /** &spades; = &#9824; -- black spade suit, U+2660 ISOpub
    (see comments).

    black here seems to mean filled as opposed to hollow

    */ public static final char _spades='\u2660'; /** &clubs; = &#9827; -- black club suit = shamrock, U+2663 ISOpub. */ public static final char _clubs='\u2663'; /** &hearts; = &#9829; -- black heart suit = valentine, U+2665 ISOpub. */ public static final char _hearts='\u2665'; /** &diams; = &#9830; -- black diamond suit, U+2666 ISOpub. */ public static final char _diams='\u2666'; /** " &quot; = &#34; -- quotation mark = APL quote, U+0022 ISOnum. */ public static final char _quot='\u0022'; /** & &amp; = &#38; -- ampersand, U+0026 ISOnum. */ public static final char _amp='\u0026'; /** < &lt; = &#60; -- less-than sign, U+003C ISOnum. */ public static final char _lt='\u003C'; /** > &gt; = &#62; -- greater-than sign, U+003E ISOnum. */ public static final char _gt='\u003E'; /** Œ &OElig; = &#338; -- latin capital ligature OE, U+0152 ISOlat2. */ public static final char _OElig='\u0152'; /** œ &oelig; = &#339; -- latin small ligature oe, U+0153 ISOlat2
    (see comments).

    ligature is a misnomer, this is a separate character in some languages

    */ public static final char _oelig='\u0153'; /** Š &Scaron; = &#352; -- latin capital letter S with caron, U+0160 ISOlat2. */ public static final char _Scaron='\u0160'; /** š &scaron; = &#353; -- latin small letter s with caron, U+0161 ISOlat2. */ public static final char _scaron='\u0161'; /** Ÿ &Yuml; = &#376; -- latin capital letter Y with diaeresis, U+0178 ISOlat2. */ public static final char _Yuml='\u0178'; /** ˆ &circ; = &#710; -- modifier letter circumflex accent, U+02C6 ISOpub. */ public static final char _circ='\u02C6'; /** ˜ &tilde; = &#732; -- small tilde, U+02DC ISOdia. */ public static final char _tilde='\u02DC'; /** &ensp; = &#8194; -- en space, U+2002 ISOpub. */ public static final char _ensp='\u2002'; /** &emsp; = &#8195; -- em space, U+2003 ISOpub. */ public static final char _emsp='\u2003'; /** &thinsp; = &#8201; -- thin space, U+2009 ISOpub. */ public static final char _thinsp='\u2009'; /** &zwnj; = &#8204; -- zero width non-joiner, U+200C NEW RFC 2070. */ public static final char _zwnj='\u200C'; /** &zwj; = &#8205; -- zero width joiner, U+200D NEW RFC 2070. */ public static final char _zwj='\u200D'; /** &lrm; = &#8206; -- left-to-right mark, U+200E NEW RFC 2070. */ public static final char _lrm='\u200E'; /** &rlm; = &#8207; -- right-to-left mark, U+200F NEW RFC 2070. */ public static final char _rlm='\u200F'; /** &ndash; = &#8211; -- en dash, U+2013 ISOpub. */ public static final char _ndash='\u2013'; /** &mdash; = &#8212; -- em dash, U+2014 ISOpub. */ public static final char _mdash='\u2014'; /** &lsquo; = &#8216; -- left single quotation mark, U+2018 ISOnum. */ public static final char _lsquo='\u2018'; /** &rsquo; = &#8217; -- right single quotation mark, U+2019 ISOnum. */ public static final char _rsquo='\u2019'; /** &sbquo; = &#8218; -- single low-9 quotation mark, U+201A NEW. */ public static final char _sbquo='\u201A'; /** &ldquo; = &#8220; -- left double quotation mark, U+201C ISOnum. */ public static final char _ldquo='\u201C'; /** &rdquo; = &#8221; -- right double quotation mark, U+201D ISOnum. */ public static final char _rdquo='\u201D'; /** &bdquo; = &#8222; -- double low-9 quotation mark, U+201E NEW. */ public static final char _bdquo='\u201E'; /** &dagger; = &#8224; -- dagger, U+2020 ISOpub. */ public static final char _dagger='\u2020'; /** &Dagger; = &#8225; -- double dagger, U+2021 ISOpub. */ public static final char _Dagger='\u2021'; /** &permil; = &#8240; -- per mille sign, U+2030 ISOtech. */ public static final char _permil='\u2030'; /** &lsaquo; = &#8249; -- single left-pointing angle quotation mark, U+2039 ISO proposed
    (see comments).

    lsaquo is proposed but not yet ISO standardized

    */ public static final char _lsaquo='\u2039'; /** &rsaquo; = &#8250; -- single right-pointing angle quotation mark, U+203A ISO proposed
    (see comments).

    rsaquo is proposed but not yet ISO standardized

    */ public static final char _rsaquo='\u203A'; /** &euro; = &#8364; -- euro sign, U+20AC NEW. */ public static final char _euro='\u20AC'; /** * ' &apos; = &#39; -- apostrophe = APL quote, U+0027 ISOnum
    (see comments).

    * apos is only defined for use in XHTML * (see the XHTML Special Characters Entity Set), * but not in HTML. * @see Config#IsApostropheEncoded */ public static final char _apos='\''; private static Map NAME_TO_CODE_POINT_MAP=new HashMap(512,1.0F); // 253 entities in total private static IntStringHashMap CODE_POINT_TO_NAME_MAP; private static int MAX_NAME_LENGTH=0; static { NAME_TO_CODE_POINT_MAP.put("nbsp",new Integer(_nbsp)); NAME_TO_CODE_POINT_MAP.put("iexcl",new Integer(_iexcl)); NAME_TO_CODE_POINT_MAP.put("cent",new Integer(_cent)); NAME_TO_CODE_POINT_MAP.put("pound",new Integer(_pound)); NAME_TO_CODE_POINT_MAP.put("curren",new Integer(_curren)); NAME_TO_CODE_POINT_MAP.put("yen",new Integer(_yen)); NAME_TO_CODE_POINT_MAP.put("brvbar",new Integer(_brvbar)); NAME_TO_CODE_POINT_MAP.put("sect",new Integer(_sect)); NAME_TO_CODE_POINT_MAP.put("uml",new Integer(_uml)); NAME_TO_CODE_POINT_MAP.put("copy",new Integer(_copy)); NAME_TO_CODE_POINT_MAP.put("ordf",new Integer(_ordf)); NAME_TO_CODE_POINT_MAP.put("laquo",new Integer(_laquo)); NAME_TO_CODE_POINT_MAP.put("not",new Integer(_not)); NAME_TO_CODE_POINT_MAP.put("shy",new Integer(_shy)); NAME_TO_CODE_POINT_MAP.put("reg",new Integer(_reg)); NAME_TO_CODE_POINT_MAP.put("macr",new Integer(_macr)); NAME_TO_CODE_POINT_MAP.put("deg",new Integer(_deg)); NAME_TO_CODE_POINT_MAP.put("plusmn",new Integer(_plusmn)); NAME_TO_CODE_POINT_MAP.put("sup2",new Integer(_sup2)); NAME_TO_CODE_POINT_MAP.put("sup3",new Integer(_sup3)); NAME_TO_CODE_POINT_MAP.put("acute",new Integer(_acute)); NAME_TO_CODE_POINT_MAP.put("micro",new Integer(_micro)); NAME_TO_CODE_POINT_MAP.put("para",new Integer(_para)); NAME_TO_CODE_POINT_MAP.put("middot",new Integer(_middot)); NAME_TO_CODE_POINT_MAP.put("cedil",new Integer(_cedil)); NAME_TO_CODE_POINT_MAP.put("sup1",new Integer(_sup1)); NAME_TO_CODE_POINT_MAP.put("ordm",new Integer(_ordm)); NAME_TO_CODE_POINT_MAP.put("raquo",new Integer(_raquo)); NAME_TO_CODE_POINT_MAP.put("frac14",new Integer(_frac14)); NAME_TO_CODE_POINT_MAP.put("frac12",new Integer(_frac12)); NAME_TO_CODE_POINT_MAP.put("frac34",new Integer(_frac34)); NAME_TO_CODE_POINT_MAP.put("iquest",new Integer(_iquest)); NAME_TO_CODE_POINT_MAP.put("Agrave",new Integer(_Agrave)); NAME_TO_CODE_POINT_MAP.put("Aacute",new Integer(_Aacute)); NAME_TO_CODE_POINT_MAP.put("Acirc",new Integer(_Acirc)); NAME_TO_CODE_POINT_MAP.put("Atilde",new Integer(_Atilde)); NAME_TO_CODE_POINT_MAP.put("Auml",new Integer(_Auml)); NAME_TO_CODE_POINT_MAP.put("Aring",new Integer(_Aring)); NAME_TO_CODE_POINT_MAP.put("AElig",new Integer(_AElig)); NAME_TO_CODE_POINT_MAP.put("Ccedil",new Integer(_Ccedil)); NAME_TO_CODE_POINT_MAP.put("Egrave",new Integer(_Egrave)); NAME_TO_CODE_POINT_MAP.put("Eacute",new Integer(_Eacute)); NAME_TO_CODE_POINT_MAP.put("Ecirc",new Integer(_Ecirc)); NAME_TO_CODE_POINT_MAP.put("Euml",new Integer(_Euml)); NAME_TO_CODE_POINT_MAP.put("Igrave",new Integer(_Igrave)); NAME_TO_CODE_POINT_MAP.put("Iacute",new Integer(_Iacute)); NAME_TO_CODE_POINT_MAP.put("Icirc",new Integer(_Icirc)); NAME_TO_CODE_POINT_MAP.put("Iuml",new Integer(_Iuml)); NAME_TO_CODE_POINT_MAP.put("ETH",new Integer(_ETH)); NAME_TO_CODE_POINT_MAP.put("Ntilde",new Integer(_Ntilde)); NAME_TO_CODE_POINT_MAP.put("Ograve",new Integer(_Ograve)); NAME_TO_CODE_POINT_MAP.put("Oacute",new Integer(_Oacute)); NAME_TO_CODE_POINT_MAP.put("Ocirc",new Integer(_Ocirc)); NAME_TO_CODE_POINT_MAP.put("Otilde",new Integer(_Otilde)); NAME_TO_CODE_POINT_MAP.put("Ouml",new Integer(_Ouml)); NAME_TO_CODE_POINT_MAP.put("times",new Integer(_times)); NAME_TO_CODE_POINT_MAP.put("Oslash",new Integer(_Oslash)); NAME_TO_CODE_POINT_MAP.put("Ugrave",new Integer(_Ugrave)); NAME_TO_CODE_POINT_MAP.put("Uacute",new Integer(_Uacute)); NAME_TO_CODE_POINT_MAP.put("Ucirc",new Integer(_Ucirc)); NAME_TO_CODE_POINT_MAP.put("Uuml",new Integer(_Uuml)); NAME_TO_CODE_POINT_MAP.put("Yacute",new Integer(_Yacute)); NAME_TO_CODE_POINT_MAP.put("THORN",new Integer(_THORN)); NAME_TO_CODE_POINT_MAP.put("szlig",new Integer(_szlig)); NAME_TO_CODE_POINT_MAP.put("agrave",new Integer(_agrave)); NAME_TO_CODE_POINT_MAP.put("aacute",new Integer(_aacute)); NAME_TO_CODE_POINT_MAP.put("acirc",new Integer(_acirc)); NAME_TO_CODE_POINT_MAP.put("atilde",new Integer(_atilde)); NAME_TO_CODE_POINT_MAP.put("auml",new Integer(_auml)); NAME_TO_CODE_POINT_MAP.put("aring",new Integer(_aring)); NAME_TO_CODE_POINT_MAP.put("aelig",new Integer(_aelig)); NAME_TO_CODE_POINT_MAP.put("ccedil",new Integer(_ccedil)); NAME_TO_CODE_POINT_MAP.put("egrave",new Integer(_egrave)); NAME_TO_CODE_POINT_MAP.put("eacute",new Integer(_eacute)); NAME_TO_CODE_POINT_MAP.put("ecirc",new Integer(_ecirc)); NAME_TO_CODE_POINT_MAP.put("euml",new Integer(_euml)); NAME_TO_CODE_POINT_MAP.put("igrave",new Integer(_igrave)); NAME_TO_CODE_POINT_MAP.put("iacute",new Integer(_iacute)); NAME_TO_CODE_POINT_MAP.put("icirc",new Integer(_icirc)); NAME_TO_CODE_POINT_MAP.put("iuml",new Integer(_iuml)); NAME_TO_CODE_POINT_MAP.put("eth",new Integer(_eth)); NAME_TO_CODE_POINT_MAP.put("ntilde",new Integer(_ntilde)); NAME_TO_CODE_POINT_MAP.put("ograve",new Integer(_ograve)); NAME_TO_CODE_POINT_MAP.put("oacute",new Integer(_oacute)); NAME_TO_CODE_POINT_MAP.put("ocirc",new Integer(_ocirc)); NAME_TO_CODE_POINT_MAP.put("otilde",new Integer(_otilde)); NAME_TO_CODE_POINT_MAP.put("ouml",new Integer(_ouml)); NAME_TO_CODE_POINT_MAP.put("divide",new Integer(_divide)); NAME_TO_CODE_POINT_MAP.put("oslash",new Integer(_oslash)); NAME_TO_CODE_POINT_MAP.put("ugrave",new Integer(_ugrave)); NAME_TO_CODE_POINT_MAP.put("uacute",new Integer(_uacute)); NAME_TO_CODE_POINT_MAP.put("ucirc",new Integer(_ucirc)); NAME_TO_CODE_POINT_MAP.put("uuml",new Integer(_uuml)); NAME_TO_CODE_POINT_MAP.put("yacute",new Integer(_yacute)); NAME_TO_CODE_POINT_MAP.put("thorn",new Integer(_thorn)); NAME_TO_CODE_POINT_MAP.put("yuml",new Integer(_yuml)); NAME_TO_CODE_POINT_MAP.put("fnof",new Integer(_fnof)); NAME_TO_CODE_POINT_MAP.put("Alpha",new Integer(_Alpha)); NAME_TO_CODE_POINT_MAP.put("Beta",new Integer(_Beta)); NAME_TO_CODE_POINT_MAP.put("Gamma",new Integer(_Gamma)); NAME_TO_CODE_POINT_MAP.put("Delta",new Integer(_Delta)); NAME_TO_CODE_POINT_MAP.put("Epsilon",new Integer(_Epsilon)); NAME_TO_CODE_POINT_MAP.put("Zeta",new Integer(_Zeta)); NAME_TO_CODE_POINT_MAP.put("Eta",new Integer(_Eta)); NAME_TO_CODE_POINT_MAP.put("Theta",new Integer(_Theta)); NAME_TO_CODE_POINT_MAP.put("Iota",new Integer(_Iota)); NAME_TO_CODE_POINT_MAP.put("Kappa",new Integer(_Kappa)); NAME_TO_CODE_POINT_MAP.put("Lambda",new Integer(_Lambda)); NAME_TO_CODE_POINT_MAP.put("Mu",new Integer(_Mu)); NAME_TO_CODE_POINT_MAP.put("Nu",new Integer(_Nu)); NAME_TO_CODE_POINT_MAP.put("Xi",new Integer(_Xi)); NAME_TO_CODE_POINT_MAP.put("Omicron",new Integer(_Omicron)); NAME_TO_CODE_POINT_MAP.put("Pi",new Integer(_Pi)); NAME_TO_CODE_POINT_MAP.put("Rho",new Integer(_Rho)); NAME_TO_CODE_POINT_MAP.put("Sigma",new Integer(_Sigma)); NAME_TO_CODE_POINT_MAP.put("Tau",new Integer(_Tau)); NAME_TO_CODE_POINT_MAP.put("Upsilon",new Integer(_Upsilon)); NAME_TO_CODE_POINT_MAP.put("Phi",new Integer(_Phi)); NAME_TO_CODE_POINT_MAP.put("Chi",new Integer(_Chi)); NAME_TO_CODE_POINT_MAP.put("Psi",new Integer(_Psi)); NAME_TO_CODE_POINT_MAP.put("Omega",new Integer(_Omega)); NAME_TO_CODE_POINT_MAP.put("alpha",new Integer(_alpha)); NAME_TO_CODE_POINT_MAP.put("beta",new Integer(_beta)); NAME_TO_CODE_POINT_MAP.put("gamma",new Integer(_gamma)); NAME_TO_CODE_POINT_MAP.put("delta",new Integer(_delta)); NAME_TO_CODE_POINT_MAP.put("epsilon",new Integer(_epsilon)); NAME_TO_CODE_POINT_MAP.put("zeta",new Integer(_zeta)); NAME_TO_CODE_POINT_MAP.put("eta",new Integer(_eta)); NAME_TO_CODE_POINT_MAP.put("theta",new Integer(_theta)); NAME_TO_CODE_POINT_MAP.put("iota",new Integer(_iota)); NAME_TO_CODE_POINT_MAP.put("kappa",new Integer(_kappa)); NAME_TO_CODE_POINT_MAP.put("lambda",new Integer(_lambda)); NAME_TO_CODE_POINT_MAP.put("mu",new Integer(_mu)); NAME_TO_CODE_POINT_MAP.put("nu",new Integer(_nu)); NAME_TO_CODE_POINT_MAP.put("xi",new Integer(_xi)); NAME_TO_CODE_POINT_MAP.put("omicron",new Integer(_omicron)); NAME_TO_CODE_POINT_MAP.put("pi",new Integer(_pi)); NAME_TO_CODE_POINT_MAP.put("rho",new Integer(_rho)); NAME_TO_CODE_POINT_MAP.put("sigmaf",new Integer(_sigmaf)); NAME_TO_CODE_POINT_MAP.put("sigma",new Integer(_sigma)); NAME_TO_CODE_POINT_MAP.put("tau",new Integer(_tau)); NAME_TO_CODE_POINT_MAP.put("upsilon",new Integer(_upsilon)); NAME_TO_CODE_POINT_MAP.put("phi",new Integer(_phi)); NAME_TO_CODE_POINT_MAP.put("chi",new Integer(_chi)); NAME_TO_CODE_POINT_MAP.put("psi",new Integer(_psi)); NAME_TO_CODE_POINT_MAP.put("omega",new Integer(_omega)); NAME_TO_CODE_POINT_MAP.put("thetasym",new Integer(_thetasym)); NAME_TO_CODE_POINT_MAP.put("upsih",new Integer(_upsih)); NAME_TO_CODE_POINT_MAP.put("piv",new Integer(_piv)); NAME_TO_CODE_POINT_MAP.put("bull",new Integer(_bull)); NAME_TO_CODE_POINT_MAP.put("hellip",new Integer(_hellip)); NAME_TO_CODE_POINT_MAP.put("prime",new Integer(_prime)); NAME_TO_CODE_POINT_MAP.put("Prime",new Integer(_Prime)); NAME_TO_CODE_POINT_MAP.put("oline",new Integer(_oline)); NAME_TO_CODE_POINT_MAP.put("frasl",new Integer(_frasl)); NAME_TO_CODE_POINT_MAP.put("weierp",new Integer(_weierp)); NAME_TO_CODE_POINT_MAP.put("image",new Integer(_image)); NAME_TO_CODE_POINT_MAP.put("real",new Integer(_real)); NAME_TO_CODE_POINT_MAP.put("trade",new Integer(_trade)); NAME_TO_CODE_POINT_MAP.put("alefsym",new Integer(_alefsym)); NAME_TO_CODE_POINT_MAP.put("larr",new Integer(_larr)); NAME_TO_CODE_POINT_MAP.put("uarr",new Integer(_uarr)); NAME_TO_CODE_POINT_MAP.put("rarr",new Integer(_rarr)); NAME_TO_CODE_POINT_MAP.put("darr",new Integer(_darr)); NAME_TO_CODE_POINT_MAP.put("harr",new Integer(_harr)); NAME_TO_CODE_POINT_MAP.put("crarr",new Integer(_crarr)); NAME_TO_CODE_POINT_MAP.put("lArr",new Integer(_lArr)); NAME_TO_CODE_POINT_MAP.put("uArr",new Integer(_uArr)); NAME_TO_CODE_POINT_MAP.put("rArr",new Integer(_rArr)); NAME_TO_CODE_POINT_MAP.put("dArr",new Integer(_dArr)); NAME_TO_CODE_POINT_MAP.put("hArr",new Integer(_hArr)); NAME_TO_CODE_POINT_MAP.put("forall",new Integer(_forall)); NAME_TO_CODE_POINT_MAP.put("part",new Integer(_part)); NAME_TO_CODE_POINT_MAP.put("exist",new Integer(_exist)); NAME_TO_CODE_POINT_MAP.put("empty",new Integer(_empty)); NAME_TO_CODE_POINT_MAP.put("nabla",new Integer(_nabla)); NAME_TO_CODE_POINT_MAP.put("isin",new Integer(_isin)); NAME_TO_CODE_POINT_MAP.put("notin",new Integer(_notin)); NAME_TO_CODE_POINT_MAP.put("ni",new Integer(_ni)); NAME_TO_CODE_POINT_MAP.put("prod",new Integer(_prod)); NAME_TO_CODE_POINT_MAP.put("sum",new Integer(_sum)); NAME_TO_CODE_POINT_MAP.put("minus",new Integer(_minus)); NAME_TO_CODE_POINT_MAP.put("lowast",new Integer(_lowast)); NAME_TO_CODE_POINT_MAP.put("radic",new Integer(_radic)); NAME_TO_CODE_POINT_MAP.put("prop",new Integer(_prop)); NAME_TO_CODE_POINT_MAP.put("infin",new Integer(_infin)); NAME_TO_CODE_POINT_MAP.put("ang",new Integer(_ang)); NAME_TO_CODE_POINT_MAP.put("and",new Integer(_and)); NAME_TO_CODE_POINT_MAP.put("or",new Integer(_or)); NAME_TO_CODE_POINT_MAP.put("cap",new Integer(_cap)); NAME_TO_CODE_POINT_MAP.put("cup",new Integer(_cup)); NAME_TO_CODE_POINT_MAP.put("int",new Integer(_int)); NAME_TO_CODE_POINT_MAP.put("there4",new Integer(_there4)); NAME_TO_CODE_POINT_MAP.put("sim",new Integer(_sim)); NAME_TO_CODE_POINT_MAP.put("cong",new Integer(_cong)); NAME_TO_CODE_POINT_MAP.put("asymp",new Integer(_asymp)); NAME_TO_CODE_POINT_MAP.put("ne",new Integer(_ne)); NAME_TO_CODE_POINT_MAP.put("equiv",new Integer(_equiv)); NAME_TO_CODE_POINT_MAP.put("le",new Integer(_le)); NAME_TO_CODE_POINT_MAP.put("ge",new Integer(_ge)); NAME_TO_CODE_POINT_MAP.put("sub",new Integer(_sub)); NAME_TO_CODE_POINT_MAP.put("sup",new Integer(_sup)); NAME_TO_CODE_POINT_MAP.put("nsub",new Integer(_nsub)); NAME_TO_CODE_POINT_MAP.put("sube",new Integer(_sube)); NAME_TO_CODE_POINT_MAP.put("supe",new Integer(_supe)); NAME_TO_CODE_POINT_MAP.put("oplus",new Integer(_oplus)); NAME_TO_CODE_POINT_MAP.put("otimes",new Integer(_otimes)); NAME_TO_CODE_POINT_MAP.put("perp",new Integer(_perp)); NAME_TO_CODE_POINT_MAP.put("sdot",new Integer(_sdot)); NAME_TO_CODE_POINT_MAP.put("lceil",new Integer(_lceil)); NAME_TO_CODE_POINT_MAP.put("rceil",new Integer(_rceil)); NAME_TO_CODE_POINT_MAP.put("lfloor",new Integer(_lfloor)); NAME_TO_CODE_POINT_MAP.put("rfloor",new Integer(_rfloor)); NAME_TO_CODE_POINT_MAP.put("lang",new Integer(_lang)); NAME_TO_CODE_POINT_MAP.put("rang",new Integer(_rang)); NAME_TO_CODE_POINT_MAP.put("loz",new Integer(_loz)); NAME_TO_CODE_POINT_MAP.put("spades",new Integer(_spades)); NAME_TO_CODE_POINT_MAP.put("clubs",new Integer(_clubs)); NAME_TO_CODE_POINT_MAP.put("hearts",new Integer(_hearts)); NAME_TO_CODE_POINT_MAP.put("diams",new Integer(_diams)); NAME_TO_CODE_POINT_MAP.put("quot",new Integer(_quot)); NAME_TO_CODE_POINT_MAP.put("amp",new Integer(_amp)); NAME_TO_CODE_POINT_MAP.put("lt",new Integer(_lt)); NAME_TO_CODE_POINT_MAP.put("gt",new Integer(_gt)); NAME_TO_CODE_POINT_MAP.put("OElig",new Integer(_OElig)); NAME_TO_CODE_POINT_MAP.put("oelig",new Integer(_oelig)); NAME_TO_CODE_POINT_MAP.put("Scaron",new Integer(_Scaron)); NAME_TO_CODE_POINT_MAP.put("scaron",new Integer(_scaron)); NAME_TO_CODE_POINT_MAP.put("Yuml",new Integer(_Yuml)); NAME_TO_CODE_POINT_MAP.put("circ",new Integer(_circ)); NAME_TO_CODE_POINT_MAP.put("tilde",new Integer(_tilde)); NAME_TO_CODE_POINT_MAP.put("ensp",new Integer(_ensp)); NAME_TO_CODE_POINT_MAP.put("emsp",new Integer(_emsp)); NAME_TO_CODE_POINT_MAP.put("thinsp",new Integer(_thinsp)); NAME_TO_CODE_POINT_MAP.put("zwnj",new Integer(_zwnj)); NAME_TO_CODE_POINT_MAP.put("zwj",new Integer(_zwj)); NAME_TO_CODE_POINT_MAP.put("lrm",new Integer(_lrm)); NAME_TO_CODE_POINT_MAP.put("rlm",new Integer(_rlm)); NAME_TO_CODE_POINT_MAP.put("ndash",new Integer(_ndash)); NAME_TO_CODE_POINT_MAP.put("mdash",new Integer(_mdash)); NAME_TO_CODE_POINT_MAP.put("lsquo",new Integer(_lsquo)); NAME_TO_CODE_POINT_MAP.put("rsquo",new Integer(_rsquo)); NAME_TO_CODE_POINT_MAP.put("sbquo",new Integer(_sbquo)); NAME_TO_CODE_POINT_MAP.put("ldquo",new Integer(_ldquo)); NAME_TO_CODE_POINT_MAP.put("rdquo",new Integer(_rdquo)); NAME_TO_CODE_POINT_MAP.put("bdquo",new Integer(_bdquo)); NAME_TO_CODE_POINT_MAP.put("dagger",new Integer(_dagger)); NAME_TO_CODE_POINT_MAP.put("Dagger",new Integer(_Dagger)); NAME_TO_CODE_POINT_MAP.put("permil",new Integer(_permil)); NAME_TO_CODE_POINT_MAP.put("lsaquo",new Integer(_lsaquo)); NAME_TO_CODE_POINT_MAP.put("rsaquo",new Integer(_rsaquo)); NAME_TO_CODE_POINT_MAP.put("euro",new Integer(_euro)); NAME_TO_CODE_POINT_MAP.put("apos",new Integer(_apos)); CODE_POINT_TO_NAME_MAP=new IntStringHashMap((int)(NAME_TO_CODE_POINT_MAP.size()/0.75F),1.0F); for (Map.Entry entry : NAME_TO_CODE_POINT_MAP.entrySet()) { String name=entry.getKey(); if (MAX_NAME_LENGTH *

    *
    Example:
    *
    ((CharacterEntityReference)CharacterReference.parse("&gt;")).getName() returns "gt"
    *
    * @return the name of this character entity reference. * @see #getName(int codePoint) */ public String getName() { return name; } /** * Returns the character entity reference name of the specified character. *

    * Since all character entity references represent unicode BMP code points, * the functionality of this method is identical to that of {@link #getName(int codePoint)}. *

    *

    *
    Example:
    *
    CharacterEntityReference.getName('>') returns "gt"
    *
    * @return the character entity reference name of the specified character, or null if none exists. */ public static String getName(final char ch) { return getName((int)ch); } /** * Returns the character entity reference name of the specified unicode code point. *

    * Since all character entity references represent unicode BMP code points, * the functionality of this method is identical to that of {@link #getName(char ch)}. *

    *

    *
    Example:
    *
    CharacterEntityReference.getName(62) returns "gt"
    *
    * @return the character entity reference name of the specified unicode code point, or null if none exists. */ public static String getName(final int codePoint) { return CODE_POINT_TO_NAME_MAP.get(codePoint); } /** * Returns the unicode code point of the specified character entity reference name. *

    * If the string does not represent a valid character entity reference name, this method returns {@link #INVALID_CODE_POINT INVALID_CODE_POINT}. *

    * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, * some browsers also recognise them in a case-insensitive way. * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case. *

    *

    *
    Example:
    *
    CharacterEntityReference.getCodePointFromName("gt") returns 62
    *
    * @return the unicode code point of the specified character entity reference name, or {@link #INVALID_CODE_POINT INVALID_CODE_POINT} if the string does not represent a valid character entity reference name. */ public static int getCodePointFromName(final String name) { Integer codePoint=NAME_TO_CODE_POINT_MAP.get(name); if (codePoint==null) { // Most browsers recognise character entity references even if they have the wrong case, so check for this as well: final String lowerCaseName=name.toLowerCase(); if (lowerCaseName!=name) codePoint=NAME_TO_CODE_POINT_MAP.get(lowerCaseName); } return (codePoint!=null) ? codePoint.intValue() : INVALID_CODE_POINT; } /** * Returns the correct encoded form of this character entity reference. *

    * Note that the returned string is not necessarily the same as the original source text used to create this object. * This library recognises certain invalid forms of character references, as detailed in the {@link #decode(CharSequence) decode(String encodedString)} method. *

    * To retrieve the original source text, use the {@link #toString() toString()} method instead. *

    *

    *
    Example:
    *
    CharacterReference.parse("&GT").getCharacterReferenceString() returns "&gt;"
    *
    * * @return the correct encoded form of this character entity reference. * @see CharacterReference#getCharacterReferenceString(int codePoint) */ public String getCharacterReferenceString() { return getCharacterReferenceString(name); } /** * Returns the character entity reference encoded form of the specified unicode code point. *

    * If the specified unicode code point does not have an equivalent character entity reference, this method returns null. * To get either the entity or numeric reference encoded form, use the {@link CharacterReference#getCharacterReferenceString(int codePoint)} method instead. *

    *

    *
    Examples:
    *
    CharacterEntityReference.getCharacterReferenceString(62) returns "&gt;"
    *
    CharacterEntityReference.getCharacterReferenceString(9786) returns null
    *
    * * @return the character entity reference encoded form of the specified unicode code point, or null if none exists. * @see CharacterReference#getCharacterReferenceString(int codePoint) */ public static String getCharacterReferenceString(final int codePoint) { if (codePoint>Character.MAX_VALUE) return null; final String name=getName(codePoint); return name!=null ? getCharacterReferenceString(name) : null; } /** * Returns a map of character entity reference names (String) to unicode code points (Integer). * @return a map of character entity reference names to unicode code points. */ public static Map getNameToCodePointMap() { return NAME_TO_CODE_POINT_MAP; } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append('"'); try { appendCharacterReferenceString(sb,name); sb.append("\" "); appendUnicodeText(sb,codePoint); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens sb.append(' ').append(super.getDebugInfo()); return sb.toString(); } private static String getCharacterReferenceString(final String name) { try { return appendCharacterReferenceString(new StringBuilder(),name).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens } static final Appendable appendCharacterReferenceString(final Appendable appendable, final String name) throws IOException { return appendable.append('&').append(name).append(';'); } static CharacterReference construct(final Source source, final int begin, final int unterminatedMaxCodePoint) { // only called from CharacterReference.construct(), so we can assume that first character is '&' String name; final int nameBegin=begin+1; final int maxNameEnd=nameBegin+MAX_NAME_LENGTH; final int maxSourcePos=source.end-1; int end; int x=nameBegin; boolean unterminated=false; while (true) { final char ch=source.charAt(x); if (ch==';') { end=x+1; name=source.subSequence(nameBegin,x).toString(); break; } if (!isValidReferenceNameChar(ch)) { // At this point, ch is determined to be an invalid character, meaning the character reference is unterminated. unterminated=true; } else if (x==maxSourcePos) { // At this point, we have a valid name character but are at the last position in the source text without the terminating semicolon. unterminated=true; x++; // include this character in the name } if (unterminated) { // Different browsers react differently to unterminated character entity references. // The behaviour of this method is determined by the unterminatedMaxCodePoint parameter. if (unterminatedMaxCodePoint==INVALID_CODE_POINT) { // reject: return null; } else { // accept: end=x; name=source.subSequence(nameBegin,x).toString(); break; } } if (++x>maxNameEnd) return null; } final int codePoint=getCodePointFromName(name); if (codePoint==INVALID_CODE_POINT || (unterminated && codePoint>unterminatedMaxCodePoint)) return null; return new CharacterEntityReference(source,begin,end,codePoint); } private static final boolean isValidReferenceNameChar(final char ch) { return ch>='A' && ch<='z' && (ch<='Z' || ch>='a'); } } jericho-html-3.1/src/java/net/htmlparser/jericho/CharStreamSource.java0000644000175000017500000001207511204550410026054 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.io.*; /** * Represents a character stream source. The purpose of a class that implements this interface is to output text. *

    * For small amounts of data, or when memory usage isn't a prime concern (e.g. in client-side applications), the simplest way to obtain the data is by calling * the {@link #toString()} method. * If the character stream might contain a large amount of data it is recommended to use the {@link #writeTo(Writer)} method to access the data, * especially if running in a multi-user server environment. *

    * The advantage of providing textual data via this interface is that it gives the user the choice as to whether they would like to receive the data as a stream * of characters, or all as a single string. Furthermore, it allows the "active" stream source (see below) to be easily converted into a "passive" stream source * if required. *

    * An active stream source is a stream source that actively outputs to a passive receiver ("sink"). * The {@link #writeTo(Writer)} method in this interface signifies an active source as the transmission of the entire data stream takes place when this method is executed. * In this case the sink is the object that supplies the Writer object, and would typically contain a getWriter() method. * The sink is passive because it just supplies a Writer object to be written to by the code in some other class. *

    * A passive stream source is a stream source that is read from by an active sink. * For character streams, a passive stream source simply supplies a Reader object. * The active sink would typically contain a readFrom(Reader) method which actively reads the entire data stream from the Reader object. *

    * The {@link CharStreamSourceUtil#getReader(CharStreamSource)} method converts a CharStreamSource into a Reader, * allowing the data from the active CharStreamSource to be consumed by an active sink with a readFrom(Reader) method. *

    * Every implementing class must override the {@link #toString()} method to return the output as a string. *

    * An easy way to implement this is by calling the {@link CharStreamSourceUtil#toString(CharStreamSource) CharStreamSourceUtil.toString(this)} method, * which buffers the output from the {@link #writeTo(Writer)} method into a string. * * @see OutputDocument * @see SourceFormatter * @see Renderer * @see TextExtractor */ public interface CharStreamSource { /** * Writes the output to the specified Writer. * * @param writer the destination java.io.Writer for the output. * @throws IOException if an I/O exception occurs. */ void writeTo(Writer writer) throws IOException; /** * Appends the output to the specified Appendable object. * * @param appendable the destination java.lang.Appendable object for the output. * @throws IOException if an I/O exception occurs. */ void appendTo(Appendable appendable) throws IOException; /** * Returns the estimated maximum number of characters in the output, or -1 if no estimate is available. *

    * The returned value should be used as a guide for efficiency purposes only, for example to set an initial StringBuilder capacity. * There is no guarantee that the length of the output is indeed less than this value, * as classes implementing this method often use assumptions based on typical usage to calculate the estimate. *

    * Although implementations of this method should never return a value less than -1, users of this method must not assume that this will always be the case. * Standard practice is to interpret any negative value as meaning that no estimate is available. * * @return the estimated maximum number of characters in the output, or -1 if no estimate is available. */ long getEstimatedMaximumOutputLength(); /** * Returns the output as a string. * @return the output as a string. */ String toString(); } jericho-html-3.1/src/java/net/htmlparser/jericho/ParseText.java0000644000175000017500000002413311204550410024557 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; /** * Represents the text from the {@linkplain Source source} document that is to be parsed. *

    * This interface is normally only of interest to users who wish to create custom tag types. *

    * The parse text is defined as the entire text of the source document in lower case, with all * {@linkplain Segment#ignoreWhenParsing() ignored} segments replaced by space characters. *

    * The text is stored in lower case to make case insensitive parsing as efficient as possible. *

    * This interface provides many methods which are also provided by the java.lang.String class, * but adds an extra parameter called breakAtIndex to the various indexOf methods. * This parameter allows a search on only a specified segment of the text, which is not possible using the normal String class. *

    * ParseText instances are obtained using the {@link Source#getParseText()} method. */ public interface ParseText extends CharSequence { /** A value to use as the breakAtIndex argument in certain methods to indicate that the search should continue to the start or end of the parse text. */ public static final int NO_BREAK=-1; /** * Returns the character at the specified index. * @param index the index of the character. * @return the character at the specified index, which is always in lower case. */ public char charAt(int index); /** * Indicates whether this parse text contains the specified string at the specified position. *

    * This method is analogous to the java.lang.String.startsWith(String prefix, int toffset) method. * * @param str a string. * @param pos the position (index) in this parse text at which to check for the specified string. * @return true if this parse text contains the specified string at the specified position, otherwise false. */ public boolean containsAt(String str, int pos); /** * Returns the index within this parse text of the first occurrence of the specified character, * starting the search at the position specified by fromIndex. *

    * If the specified character is not found then -1 is returned. * * @param searchChar a character. * @param fromIndex the index to start the search from. * @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found. */ public int indexOf(char searchChar, int fromIndex); /** * Returns the index within this parse text of the first occurrence of the specified character, * starting the search at the position specified by fromIndex, * and breaking the search at the index specified by breakAtIndex. *

    * The position specified by breakAtIndex is not included in the search. *

    * If the search is to continue to the end of the text, * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex. *

    * If the specified character is not found then -1 is returned. * * @param searchChar a character. * @param fromIndex the index to start the search from. * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text. * @return the index within this parse text of the first occurrence of the specified character within the specified range, or -1 if the character is not found. */ public int indexOf(char searchChar, int fromIndex, int breakAtIndex); /** * Returns the index within this parse text of the first occurrence of the specified string, * starting the search at the position specified by fromIndex. *

    * If the specified string is not found then -1 is returned. * * @param searchString a string. * @param fromIndex the index to start the search from. * @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found. */ public int indexOf(String searchString, int fromIndex); /** * Returns the index within this parse text of the first occurrence of the specified string, * starting the search at the position specified by fromIndex, * and breaking the search at the index specified by breakAtIndex. *

    * The position specified by breakAtIndex is not included in the search. *

    * If the search is to continue to the end of the text, * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex. *

    * If the specified string is not found then -1 is returned. * * @param searchString a string. * @param fromIndex the index to start the search from. * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the end of the text. * @return the index within this parse text of the first occurrence of the specified string within the specified range, or -1 if the string is not found. */ public int indexOf(String searchString, int fromIndex, int breakAtIndex); /** * Returns the index within this parse text of the last occurrence of the specified character, * searching backwards starting at the position specified by fromIndex. *

    * If the specified character is not found then -1 is returned. * * @param searchChar a character. * @param fromIndex the index to start the search from. * @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found. */ public int lastIndexOf(char searchChar, int fromIndex); /** * Returns the index within this parse text of the last occurrence of the specified character, * searching backwards starting at the position specified by fromIndex, * and breaking the search at the index specified by breakAtIndex. *

    * The position specified by breakAtIndex is not included in the search. *

    * If the search is to continue to the start of the text, * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex. *

    * If the specified character is not found then -1 is returned. * * @param searchChar a character. * @param fromIndex the index to start the search from. * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text. * @return the index within this parse text of the last occurrence of the specified character within the specified range, or -1 if the character is not found. */ public int lastIndexOf(char searchChar, int fromIndex, int breakAtIndex); /** * Returns the index within this parse text of the last occurrence of the specified string, * searching backwards starting at the position specified by fromIndex. *

    * If the specified string is not found then -1 is returned. * * @param searchString a string. * @param fromIndex the index to start the search from. * @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found. */ public int lastIndexOf(String searchString, int fromIndex); /** * Returns the index within this parse text of the last occurrence of the specified string, * searching backwards starting at the position specified by fromIndex, * and breaking the search at the index specified by breakAtIndex. *

    * The position specified by breakAtIndex is not included in the search. *

    * If the search is to continue to the start of the text, * the value {@link #NO_BREAK ParseText.NO_BREAK} should be specified as the breakAtIndex. *

    * If the specified string is not found then -1 is returned. * * @param searchString a string. * @param fromIndex the index to start the search from. * @param breakAtIndex the index at which to break off the search, or {@link #NO_BREAK} if the search is to continue to the start of the text. * @return the index within this parse text of the last occurrence of the specified string within the specified range, or -1 if the string is not found. */ public int lastIndexOf(String searchString, int fromIndex, int breakAtIndex); /** * Returns the length of the parse text. * @return the length of the parse text. */ public int length(); /** * Returns a new character sequence that is a subsequence of this sequence. * * @param begin the begin position, inclusive. * @param end the end position, exclusive. * @return a new character sequence that is a subsequence of this sequence. */ public CharSequence subSequence(int begin, int end); /** * Returns the content of the parse text as a String. * @return the content of the parse text as a String. */ public String toString(); } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProviderJava.java0000644000175000017500000000422211204550410026371 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class LoggerProviderJava implements LoggerProvider { public static final LoggerProvider INSTANCE=new LoggerProviderJava(); private LoggerProviderJava() {} public Logger getLogger(final String name) { return new JavaLogger(java.util.logging.Logger.getLogger(name)); } private class JavaLogger implements Logger { private final java.util.logging.Logger javaLogger; public JavaLogger(final java.util.logging.Logger javaLogger) { this.javaLogger=javaLogger; } public void error(final String message) { javaLogger.severe(message); } public void warn(final String message) { javaLogger.warning(message); } public void info(final String message) { javaLogger.info(message); } public void debug(final String message) { javaLogger.fine(message); } public boolean isErrorEnabled() { return javaLogger.isLoggable(java.util.logging.Level.SEVERE); } public boolean isWarnEnabled() { return javaLogger.isLoggable(java.util.logging.Level.WARNING); } public boolean isInfoEnabled() { return javaLogger.isLoggable(java.util.logging.Level.INFO); } public boolean isDebugEnabled() { return javaLogger.isLoggable(java.util.logging.Level.FINE); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/nodoc/0000755000175000017500000000000011167436712023114 5ustar twernertwernerjericho-html-3.1/src/java/net/htmlparser/jericho/nodoc/SequentialListSegment.java0000644000175000017500000003375211204550410030242 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho.nodoc; import net.htmlparser.jericho.*; import java.util.*; /** * A base class used internally to simulate multiple inheritance of {@link Segment} and java.util.AbstractSequentialList. *

    * It allows a {@link Segment} based class to implement java.util.List without having to implement * all of the List methods explicitly, which would clutter the API documentation with mostly irrelevant methods. * By extending this class, most of the list implementation methods are simply listed in the inherited methods list. * * The list is assumed to be immutable. * * @see Attributes */ public abstract class SequentialListSegment extends Segment implements List { public SequentialListSegment(final Source source, final int begin, final int end) { super(source,begin,end); } /** * Returns the number of items in the list. * @return the number of items in the list. */ public abstract int getCount(); /** * Returns a list iterator of the items in this list (in proper sequence), starting at the specified position in the list. *

    * The specified index indicates the first item that would be returned by an initial call to the next() method. * An initial call to the previous() method would return the item with the specified index minus one. * * @param index index of the first item to be returned from the list iterator (by a call to the next() method). * @return a list iterator of the items in this list (in proper sequence), starting at the specified position in the list. * @throws IndexOutOfBoundsException if the specified index is out of range (index < 0 || index > size()). */ public abstract ListIterator listIterator(int index); /** * Returns the item at the specified position in this list. *

    * This implementation first gets a list iterator pointing to the indexed item (with listIterator(index)). * Then, it gets the element using ListIterator.next and returns it. * * @param index the index of the item to return. * @return the item at the specified position in this list. * @throws IndexOutOfBoundsException if the specified index is out of range (index < 0 || index >= size()). */ public E get(final int index) { final ListIterator li=listIterator(index); try { return(li.next()); } catch(NoSuchElementException ex) { throw(new IndexOutOfBoundsException("index="+index)); } } /** * Returns the number of items in the list. *

    * This is equivalent to {@link #getCount()}, * and is necessary to for the implementation of the java.util.Collection interface. * * @return the number of items in the list. */ public int size() { return getCount(); } /** * Indicates whether this list is empty. * @return true if there are no items in the list, otherwise false. */ public boolean isEmpty() { return getCount()==0; } /** * Indicates whether this list contains the specified object. * * @param o object to be checked for containment in this list. * @return true if this list contains the specified object, otherwise false. */ public boolean contains(final Object o) { return indexOf(o)>=0; } /** * Returns an array containing all of the items in this list. * @return an array containing all of the items in this list. */ public Object[] toArray() { final Object[] array=new Object[getCount()]; int x=0; for (E e : this) array[x++]=e; return array; } /** * Returns an array containing all of the items in this list in the correct order; * the runtime type of the returned array is that of the specified array. * If the list fits in the specified array, it is returned therein. * Otherwise, a new array is allocated with the runtime type of the specified array and the size of this list. *

    * If the list fits in the specified array with room to spare (i.e., the array has more elements than the list), * the item in the array immediately following the end of the collection is set to null. * This is useful in determining the length of the list only * if the caller knows that the list does not contain any null items. * * @param a the array into which the items of the list are to be stored, if it is big enough; otherwise, a new array of the same runtime type is allocated for this purpose. * @return an array containing the items of the list. * @throws NullPointerException if the specified array is null. * @throws ArrayStoreException if the runtime type of the specified array is not a supertype of the runtime type of every item in this list. */ @SuppressWarnings("unchecked") // There is no way to implement this without using unchecked casts public T[] toArray(T[] a) { final int count=getCount(); if (a.lengthcount) a[count]=null; return a; } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean remove(Object o) { throw new UnsupportedOperationException(); } /** * Indicates whether this list contains all of the items in the specified collection. * @param collection the collection to be checked for containment in this list. * @return true if this list contains all of the items in the specified collection, otherwise false. * @throws NullPointerException if the specified collection is null. * @see #contains(Object) */ public boolean containsAll(final Collection collection) { for (Object o : collection) if(!contains(o)) return false; return true; } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean addAll(Collection collection) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean removeAll(Collection collection) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean retainAll(Collection collection) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean add(E e) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public E set(int index, E element) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public void add(int index, E element) { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public E remove(int index) { throw new UnsupportedOperationException(); } /** * Returns the index in this list of the first occurence of the specified object, or -1 if the list does not contain this object. * * @param o object to search for. * @return the index in this list of the first occurence of the specified object, or -1 if the list does not contain this object. */ public int indexOf(final Object o) { final ListIterator li=listIterator(0); if (o==null) { while (li.hasNext()) if (li.next()==null) return li.previousIndex(); } else { while (li.hasNext()) if (o.equals(li.next())) return li.previousIndex(); } return -1; } /** * Returns the index in this list of the last occurence of the specified object, or -1 if the list does not contain this object. * * @param o object to search for. * @return the index in this list of the last occurence of the specified object, or -1 if the list does not contain this object. */ public int lastIndexOf(final Object o) { final ListIterator li=listIterator(getCount()); if (o==null) { while (li.hasPrevious()) if (li.previous()==null) return li.nextIndex(); } else { while (li.hasPrevious()) if (o.equals(li.previous())) return li.nextIndex(); } return -1; } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public void clear() { throw new UnsupportedOperationException(); } /** * This list is unmodifiable, so this method always throws an UnsupportedOperationException. * @throws UnsupportedOperationException */ public boolean addAll(int index, Collection collection) { throw new UnsupportedOperationException(); } /** * Returns an iterator over the items in the list in proper sequence. * @return an iterator over the items in the list in proper sequence. */ public Iterator iterator() { return listIterator(); } /** * Returns a list iterator of the items in this list (in proper sequence), starting with the first item in the list. * @return a list iterator of the items in this list (in proper sequence), starting with the first item in the list. * @see #listIterator(int) */ public ListIterator listIterator() { return listIterator(0); } /** * Returns a view of the portion of this list between fromIndex, inclusive, and toIndex, exclusive. * (If fromIndex and toIndex are equal, the returned list is empty.) * The returned list is backed by this list, so changes in the returned list are reflected in this list, and vice-versa. * The returned list supports all of the optional list operations supported by this list. * * @param fromIndex low endpoint (inclusive) of the subList. * @param toIndex high endpoint (exclusive) of the subList. * @return a view of the specified range within this list. * @throws IndexOutOfBoundsException endpoint index value out of range (fromIndex < 0 || toIndex > size) * @throws IllegalArgumentException endpoint indices out of order (fromIndex > toIndex) * @see java.util.List#subList(int fromIndex, int toIndex) */ public List subList(final int fromIndex, final int toIndex) { return (new SubList(this,fromIndex,toIndex)); } private static class SubList extends AbstractList { private final List list; private final int offset; private final int size; SubList(final List list, final int fromIndex, final int toIndex) { if (fromIndex<0) throw new IndexOutOfBoundsException("fromIndex="+fromIndex); if (toIndex>list.size()) throw new IndexOutOfBoundsException("toIndex="+toIndex); if (fromIndex>toIndex) throw new IllegalArgumentException("fromIndex("+fromIndex+") > toIndex("+toIndex+")"); this.list=list; offset=fromIndex; size=toIndex-fromIndex; } public E get(final int index) { return list.get(getSuperListIndex(index)); } public int size() { return size; } public Iterator iterator() { return listIterator(); } public ListIterator listIterator(final int index) { return new ListIterator() { private final ListIterator i=list.listIterator(getSuperListIndex(index)); public boolean hasNext() { return nextIndex()=0; } public E previous() { if (!hasPrevious()) throw new NoSuchElementException(); return i.previous(); } public int nextIndex() { return i.nextIndex()-offset; } public int previousIndex() { return i.previousIndex()-offset; } public void remove() { throw new UnsupportedOperationException(); } public void set(E o) { throw new UnsupportedOperationException(); } public void add(E o) { throw new UnsupportedOperationException(); } }; } public List subList(final int fromIndex, final int toIndex) { return new SubList(this,fromIndex,toIndex); } private int getSuperListIndex(final int index) { if (index<0 || index>=size) throw new IndexOutOfBoundsException("index="+index+", size="+size); return index+offset; } } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTagTypeNormal.java0000644000175000017500000000233311204550410026013 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class EndTagTypeNormal extends EndTagTypeGenericImplementation { static final EndTagTypeNormal INSTANCE=new EndTagTypeNormal(); private EndTagTypeNormal() { super("/normal",START_DELIMITER_PREFIX,">",false,false); } public StartTagType getCorrespondingStartTagType() { return StartTagType.NORMAL; } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTagTypeUnregistered.java0000644000175000017500000000352011204550410027222 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class EndTagTypeUnregistered extends EndTagType { static final EndTagTypeUnregistered INSTANCE=new EndTagTypeUnregistered(); private EndTagTypeUnregistered() { super("/unregistered",START_DELIMITER_PREFIX,">",false); } protected Tag constructTagAt(final Source source, final int pos) { final ParseText parseText=source.getParseText(); final int nameBegin=pos+getStartDelimiter().length(); final int nameEnd=parseText.indexOf(getClosingDelimiter(),nameBegin); final String name=source.getName(nameBegin,nameEnd); // throws IndexOutOfBoundsException if nameEnd==-1 final EndTag endTag=constructEndTag(source,pos,nameEnd+getClosingDelimiter().length(),name); if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("Encountered possible EndTag at ")).append(" whose content does not match a registered EndTagType").toString()); return endTag; } } jericho-html-3.1/src/java/net/htmlparser/jericho/FormControl.java0000644000175000017500000013720211204550410025106 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; /** * Represents an HTML form control. *

    * A FormControl consists of a single {@linkplain #getElement() element} * that matches one of the {@linkplain FormControlType form control types}. *

    * The term output element is used to describe the element that is * {@linkplain OutputSegment#writeTo(Writer) output} if this form control is {@linkplain OutputDocument#replace(FormControl) replaced} * in an {@link OutputDocument}. *

    * A predefined value control is a form control for which * {@link #getFormControlType()}.{@link FormControlType#hasPredefinedValue() hasPredefinedValue()} * returns true. It has a {@linkplain #getFormControlType() control type} of * {@link FormControlType#CHECKBOX CHECKBOX}, {@link FormControlType#RADIO RADIO}, {@link FormControlType#BUTTON BUTTON}, * {@link FormControlType#SUBMIT SUBMIT}, {@link FormControlType#IMAGE IMAGE}, {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} * or {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE}. *

    * A user value control is a form control for which * {@link #getFormControlType()}.{@link FormControlType#hasPredefinedValue() hasPredefinedValue()} * returns false. It has a {@linkplain #getFormControlType() control type} of * {@link FormControlType#FILE FILE}, {@link FormControlType#HIDDEN HIDDEN}, {@link FormControlType#PASSWORD PASSWORD}, * {@link FormControlType#TEXT TEXT} or {@link FormControlType#TEXTAREA TEXTAREA}. *

    * The functionality of most significance to users of this class relates to the * display characteristics of the output element, * manipulated using the {@link #setDisabled(boolean)} and {@link #setOutputStyle(FormControlOutputStyle)} methods. *

    * As a general rule, the operations dealing with the control's submission values * are better performed on a {@link FormFields} or {@link FormField} object, which provide a more * intuitive interface by grouping form controls of the same {@linkplain #getName() name} together. * The higher abstraction level of these classes means they can automatically ensure that the * submission values of their constituent controls are consistent with each other, * for example by ensuring that only one {@link FormControlType#RADIO RADIO} control with a given name is * {@link #isChecked() checked} at a time. *

    * A {@link FormFields} object can be directly {@linkplain FormFields#FormFields(Collection) constructed} from * a collection of FormControl objects. *

    * FormControl instances are obtained using the {@link Element#getFormControl()} method or are created automatically * with the creation of a {@link FormFields} object via the {@link Segment#getFormFields()} method. * * @see FormControlType * @see FormFields * @see FormField */ public abstract class FormControl extends Segment { FormControlType formControlType; String name; ElementContainer elementContainer; FormControlOutputStyle outputStyle=FormControlOutputStyle.NORMAL; private static final String CHECKBOX_NULL_DEFAULT_VALUE="on"; private static Comparator COMPARATOR=new PositionComparator(); static FormControl construct(final Element element) { final String tagName=element.getStartTag().getName(); if (tagName==HTMLElementName.INPUT) { final String typeAttributeValue=element.getAttributes().getRawValue(Attribute.TYPE); if (typeAttributeValue==null) return new InputFormControl(element,FormControlType.TEXT); FormControlType formControlType=FormControlType.getFromInputElementType(typeAttributeValue); if (formControlType==null) { if (formControlType.isNonFormControl(typeAttributeValue)) return null; if (element.source.logger.isInfoEnabled()) element.source.logger.info(element.source.getRowColumnVector(element.begin).appendTo(new StringBuilder(200)).append(": INPUT control with unrecognised type \"").append(typeAttributeValue).append("\" assumed to be type \"text\"").toString()); formControlType=FormControlType.TEXT; } switch (formControlType) { case TEXT: return new InputFormControl(element,formControlType); case CHECKBOX: case RADIO: return new RadioCheckboxFormControl(element,formControlType); case SUBMIT: return new SubmitFormControl(element,formControlType); case IMAGE: return new ImageSubmitFormControl(element); case HIDDEN: case PASSWORD: case FILE: return new InputFormControl(element,formControlType); default: throw new AssertionError(formControlType); } } else if (tagName==HTMLElementName.SELECT) { return new SelectFormControl(element); } else if (tagName==HTMLElementName.TEXTAREA) { return new TextAreaFormControl(element); } else if (tagName==HTMLElementName.BUTTON) { return "submit".equalsIgnoreCase(element.getAttributes().getRawValue(Attribute.TYPE)) ? new SubmitFormControl(element,FormControlType.BUTTON) : null; } else { return null; } } private FormControl(final Element element, final FormControlType formControlType, final boolean loadPredefinedValue) { super(element.source,element.begin,element.end); elementContainer=new ElementContainer(element,loadPredefinedValue); this.formControlType=formControlType; name=element.getAttributes().getValue(Attribute.NAME); verifyName(); } /** * Returns the {@linkplain FormControlType type} of this form control. * @return the {@linkplain FormControlType type} of this form control. */ public final FormControlType getFormControlType() { return formControlType; } /** * Returns the name of the control. *

    * The name comes from the value of the name {@linkplain Attribute attribute} of the * {@linkplain #getElement() form control's element}, not the {@linkplain Element#getName() name of the element} itself. *

    * Since a {@link FormField} is simply a group of controls with the same name, the terms control name and * field name are for the most part synonymous, with only a possible difference in case differentiating them. *

    * In contrast to the {@link FormField#getName()} method, this method always returns the name using the original case * from the source document, regardless of the current setting of the static * {@link Config#CurrentCompatibilityMode}.{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. * * @return the name of the control. */ public final String getName() { return name; } /** * Returns the {@linkplain Element element} representing this form control in the source document. *

    * The {@linkplain Element#getAttributes() attributes} of this source element should correspond with the * output attributes if the * display characteristics or submission values * have not been modified. * * @return the {@linkplain Element element} representing this form control in the source document. */ public final Element getElement() { return elementContainer.element; } /** * Returns an iterator over the {@link HTMLElementName#OPTION OPTION} {@linkplain Element elements} contained within this control, in order of appearance. *

    * This method is only relevant to form controls with a {@linkplain #getFormControlType() type} of * {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} or {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE}. * * @return an iterator over the {@link HTMLElementName#OPTION OPTION} {@linkplain Element elements} contained within this control, in order of appearance. * @throws UnsupportedOperationException if the {@linkplain #getFormControlType() type} of this control is not {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} or {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE}. */ public Iterator getOptionElementIterator() { // overridden in SelectFormControl throw new UnsupportedOperationException("Only SELECT controls contain OPTION elements"); } /** * Returns the current {@linkplain FormControlOutputStyle output style} of this form control. *

    * This property affects how this form control is displayed if it has been {@linkplain OutputDocument#replace(FormControl) replaced} * in an {@link OutputDocument}. * See the documentation of the {@link FormControlOutputStyle} class for information on the available output styles. *

    * The default output style for every form control is {@link FormControlOutputStyle#NORMAL}. * * @return the current {@linkplain FormControlOutputStyle output style} of this form control. * @see #setOutputStyle(FormControlOutputStyle) */ public FormControlOutputStyle getOutputStyle() { return outputStyle; } /** * Sets the {@linkplain FormControlOutputStyle output style} of this form control. *

    * See the {@link #getOutputStyle()} method for a full description of this property. * * @param outputStyle the new {@linkplain FormControlOutputStyle output style} of this form control. */ public void setOutputStyle(final FormControlOutputStyle outputStyle) { this.outputStyle=outputStyle; } /** * Returns a map of the names and values of this form control's output attributes. *

    * The term output attributes is used in this library to refer to the * attributes of a form control's * output element. *

    * The map keys are the String attribute names, which should all be in lower case. * The map values are the corresponding String attribute values, with a null value given * to an attribute that {@linkplain Attribute#hasValue() has no value}. *

    * Direct manipulation of the returned map affects the attributes of this form control's output element. * It is the responsibility of the user to ensure that all entries added to the map use the correct key and value types, * and that all keys (attribute names) are in lower case. *

    * It is recommended that the submission value modification methods * are used to modify attributes that affect the submission value of the control * rather than manipulating the attributes map directly. *

    * An iteration over the map entries will return the attributes in the same order as they appeared in the source document, or * at the end if the attribute was not present in the source document. *

    * The returned attributes only correspond with those of the {@linkplain #getElement() source element} if the control's * display characteristics and submission values * have not been modified. * * @return a map of the names and values of this form control's output attributes. */ public final Map getAttributesMap() { return elementContainer.getAttributesMap(); } /** * Indicates whether this form control is disabled. *

    * The form control is disabled if the attribute * "disabled" * is present in its output element. *

    * The return value is is logically equivalent to {@link #getAttributesMap()}.containsKey("disabled"), * but using this property may be more efficient in some circumstances. * * @return true if this form control is disabled, otherwise false. */ public final boolean isDisabled() { return elementContainer.getBooleanAttribute(Attribute.DISABLED); } /** * Sets whether this form control is disabled. *

    * If the argument supplied to this method is true and the disabled attribute is not already present * in the output element, the full * XHTML compatible attribute disabled="disabled" is added. * If the attribute is already present, it is left unchanged. *

    * If the argument supplied to this method is false, the attribute is removed from the output element. *

    * See the {@link #isDisabled()} method for more information. * * @param disabled the new value of this property. */ public final void setDisabled(final boolean disabled) { elementContainer.setBooleanAttribute(Attribute.DISABLED,disabled); } /** * Indicates whether this form control is checked. *

    * The term checked is used to describe a checkbox or radio button control that is selected, which is the case if the attribute * "checked" * is present in its output element. *

    * This property is only relevant to form controls with a {@linkplain #getFormControlType() type} of * {@link FormControlType#CHECKBOX} or {@link FormControlType#RADIO}, and throws an UnsupportedOperationException * for other control types. *

    * Use one of the submission value modification methods to change the value * of this property. *

    * If this control is a checkbox, you can set it to checked by calling * {@link #setValue(String) setValue}({@link #getName()}), and set it to unchecked by calling * {@link #clearValues()}. *

    * If this control is a radio button, you should use the {@link FormField#setValue(String)} method or one of the other * higher level submission value modification methods * to set the control to checked, as calling {@link #setValue(String)} method on this object * in the same way as for a checkbox does not automatically uncheck all other radio buttons with the same name. * Even calling {@link #clearValues()} on this object to ensure that this radio button is unchecked is not recommended, as * it can lead to a situation where all the radio buttons with this name are unchecked. * The HTML 4.01 specification of radio buttons * recommends against this situation because it is not defined how user agents should handle it, and behaviour differs amongst browsers. *

    * The return value is logically equivalent to {@link #getAttributesMap()}.containsKey("checked"), * but using this property may be more efficient in some circumstances. * * @return true if this form control is checked, otherwise false. * @throws UnsupportedOperationException if the {@linkplain #getFormControlType() type} of this control is not {@link FormControlType#CHECKBOX} or {@link FormControlType#RADIO}. */ public boolean isChecked() { throw new UnsupportedOperationException("This property is only relevant for CHECKBOX and RADIO controls"); } /** * Returns the initial value of this control if it has a {@linkplain FormControlType#hasPredefinedValue() predefined value}. *

    * Only predefined value controls can return a non-null result. * All other control types return null. *

    * {@link FormControlType#CHECKBOX CHECKBOX} and {@link FormControlType#RADIO RADIO} controls have a guaranteed * predefined value determined by the value of its compulsory * value * attribute. If the attribute is not present in the source document, this library assigns the control a default * predefined value of "on", consistent with popular browsers. *

    * {@link FormControlType#SUBMIT SUBMIT}, {@link FormControlType#BUTTON BUTTON} and {@link FormControlType#IMAGE IMAGE} * controls have an optional predefined value determined by the value of its * value * attribute. This value is * successful * only in the control used to submit the form. *

    * {@link FormControlType#SELECT_SINGLE} and {@link FormControlType#SELECT_MULTIPLE} controls are special cases * because they usually contain multiple * OPTION * elements, each with its own predefined value. * In this case the {@link #getPredefinedValues()} method should be used instead, which returns a collection of all the * control's predefined values. Attempting to call this method on a SELECT control results in * a java.lang.UnsupportedOperationException. *

    * The predefined value of a control is not affected by changes to the * submission value of the control. * * @return the initial value of this control if it has a {@linkplain FormControlType#hasPredefinedValue() predefined value}, or null if none. */ public String getPredefinedValue() { return elementContainer.predefinedValue; } /** * Returns a collection of all {@linkplain #getPredefinedValue() predefined values} in this control in order of appearance. *

    * All objects in the returned collection are of type String, with no null entries. *

    * This method is most useful for * SELECT * controls since they typically contain multiple predefined values. * In other controls it returns a collection with zero or one item based on the output of the * {@link #getPredefinedValue()} method, so for efficiency it is recommended to use the * {@link #getPredefinedValue()} method instead. *

    * The multiple predefined values of a * SELECT * control are defined by the * OPTION * elements within it. * Each OPTION element has an * initial value * determined by the value of its * value * attribute, or if this attribute is not present, by its * {@linkplain CharacterReference#decode(CharSequence) decoded} {@linkplain Element#getContent() content} * text with {@linkplain CharacterReference#decodeCollapseWhiteSpace(CharSequence) collapsed white space}. *

    * The predefined values of a control are not affected by changes to the * submission values of the control. * * @return a collection of all {@linkplain #getPredefinedValue() predefined values} in this control in order of appearance, guaranteed not null. * @see FormField#getPredefinedValues() */ public Collection getPredefinedValues() { if (getPredefinedValue()==null) Collections.emptySet(); return Collections.singleton(getPredefinedValue()); } /** * Returns a list of the control's submission values in order of appearance. *

    * All objects in the returned list are of type String, with no null entries. *

    * The term submission value is used in this library to refer to the value the control * would contribute to the * form data set * of a submitted * form, assuming no modification of the control's * current value by the * user agent or by end user interaction. *

    * For user value controls, the submission value corresponds to the * control's initial value. *

    * The definition of the submission value for each predefined value control type is as follows: *

    * {@link FormControlType#CHECKBOX CHECKBOX} and {@link FormControlType#RADIO RADIO} controls * have a submission value specified by its {@linkplain #getPredefinedValue() predefined value} * if it is {@link #isChecked() checked}, otherwise it has no submission value. *

    * {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} and {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} controls * have submission values specified by the * values of the control's * selected * OPTION elements. *

    * Only a {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} control can have more than one submission value, * all other {@linkplain FormControlType control types} return a list containing either one value or no values. * A {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} control only returns multiple submission values * if it illegally contains multiple selected options in the source document. *

    * {@link FormControlType#SUBMIT SUBMIT}, {@link FormControlType#BUTTON BUTTON}, and {@link FormControlType#IMAGE IMAGE} * controls are only ever * successful * when they are activated by the user to * submit the form. * Because the submission value is intended to be a static representation of a control's data without * interaction by the user, this library never associates submission values with * {@linkplain FormControlType#isSubmit() submit} buttons, so this method always returns an empty list for these * control types. *

    * The submission value(s) of a control can be modified for subsequent output in * an {@link OutputDocument} using the various * submission value modification methods, namely:
    * {@link FormField#setValue(String)}
    * {@link FormField#addValue(String)}
    * {@link FormField#setValues(Collection)}
    * {@link FormField#clearValues()}
    * {@link FormFields#setValue(String fieldName, String value)}
    * {@link FormFields#addValue(String fieldName, String value)}
    * {@link FormFields#setDataSet(Map)}
    * {@link FormFields#clearValues()}
    * {@link #setValue(String) FormControl.setValue(String)}
    * {@link #addValue(String) FormControl.addValue(String)}
    * {@link #clearValues() FormControl.clearValues()}
    *

    * The values returned by this method reflect any changes made using the submission value modification methods, * in contrast to methods found in the {@link Attributes} and {@link Attribute} classes, which always reflect the source document. * * @return a list of the control's submission values in order of appearance, guaranteed not null. * @see #getPredefinedValues() */ public List getValues() { final List values=new ArrayList(); addValuesTo(values); return values; } /** * Clears the control's existing submission values. *

    * This is equivalent to {@link #setValue(String) setValue(null)}. *

    * NOTE: The {@link FormFields} and {@link FormField} classes provide a more appropriate abstraction level for the modification of form control submission values. * * @see FormFields#clearValues() * @see FormField#clearValues() */ public final void clearValues() { setValue(null); } /** * Sets the control's submission value *. *

    * * NOTE: The {@link FormFields} and {@link FormField} classes provide a more appropriate abstraction level for the modification of form control submission values. * Consider using the {@link FormFields#setValue(String fieldName, String value)} method instead. *

    * The specified value replaces any existing submission values of the control. *

    * The return value indicates whether the control has "accepted" the value. * For user value controls, the return value is always true. *

    * For predefined value controls, * calling this method does not affect the control's * {@linkplain #getPredefinedValues() predefined values}, but instead determines whether the control (or its options) become * checked or * selected * as detailed below: *

    * {@link FormControlType#CHECKBOX CHECKBOX} and {@link FormControlType#RADIO RADIO} controls become {@link #isChecked() checked} * and the method returns true if the specified value matches the control's predefined value (case sensitive), * otherwise the control becomes unchecked and the method returns false. * Note that any other controls with the same {@linkplain #getName() name} are not unchecked if this control becomes checked, * possibly resulting in an invalid state where multiple RADIO controls are checked at the same time. * The {@link FormField#setValue(String)} method avoids such problems and its use is recommended over this method. *

    * {@link FormControlType#SELECT_SINGLE SELECT_SINGLE} and {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} * controls receive the specified value by selecting the option with the matching value and deselecting all others. * If none of the options match, all are deselected. * The return value of this method indicates whether one of the options matched. *

    * {@link FormControlType#SUBMIT SUBMIT}, {@link FormControlType#BUTTON BUTTON}, and {@link FormControlType#IMAGE IMAGE} * controls never have a submission value, so calling this method has no effect and * always returns false. * * @param value the new submission value of this control, or null to clear the control of all submission values. * @return true if the control accepts the value, otherwise false. * @see FormFields#setValue(String fieldName, String value) */ public abstract boolean setValue(String value); /** * Adds the specified value to this control's submission values *. *

    * * NOTE: The {@link FormFields} and {@link FormField} classes provide a more appropriate abstraction level for the modification of form control submission values. * Consider using the {@link FormFields#addValue(String fieldName, String value)} method instead. *

    * This is almost equivalent to {@link #setValue(String)}, with only the following differences: *

    * {@link FormControlType#CHECKBOX CHECKBOX} controls retain their existing submission value * instead of becoming unchecked if the specified value does not match the control's {@linkplain #getPredefinedValue() predefined value}. *

    * {@link FormControlType#SELECT_MULTIPLE SELECT_MULTIPLE} controls retain their existing * submission values, meaning that the control's * OPTION * elements whose {@linkplain #getPredefinedValues() predefined values} do not match the specified value are not deselected. * This is the only type of control that can have multiple submission values within the one control. * * @param value the value to add to this control's submission values, must not be null. * @return true if the control accepts the value, otherwise false. * @see FormFields#addValue(String fieldName, String value) */ public boolean addValue(final String value) { return setValue(value); } abstract void addValuesTo(Collection collection); // should not add null values abstract void addToFormFields(FormFields formFields); abstract void replaceInOutputDocument(OutputDocument outputDocument); public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append(formControlType).append(" name=\"").append(name).append('"'); if (elementContainer.predefinedValue!=null) sb.append(" PredefinedValue=\"").append(elementContainer.predefinedValue).append('"'); sb.append(" - ").append(getElement().getDebugInfo()); return sb.toString(); } static final class InputFormControl extends FormControl { // TEXT, HIDDEN, PASSORD or FILE public InputFormControl(final Element element, final FormControlType formControlType) { super(element,formControlType,false); } public boolean setValue(final String value) { elementContainer.setAttributeValue(Attribute.VALUE,value); return true; } void addValuesTo(final Collection collection) { addValueTo(collection,elementContainer.getAttributeValue(Attribute.VALUE)); } void addToFormFields(final FormFields formFields) { formFields.add(this); } void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) { String output=null; if (formControlType!=FormControlType.HIDDEN) { String value=elementContainer.getAttributeValue(Attribute.VALUE); if (formControlType==FormControlType.PASSWORD && value!=null) value=getString(FormControlOutputStyle.ConfigDisplayValue.PasswordChar,value.length()); output=getDisplayValueHTML(value,false); } outputDocument.replace(getElement(),output); } else { replaceAttributesInOutputDocumentIfModified(outputDocument); } } } static final class TextAreaFormControl extends FormControl { // TEXTAREA public String value=UNCHANGED; private static final String UNCHANGED=new String(); public TextAreaFormControl(final Element element) { super(element,FormControlType.TEXTAREA,false); } public boolean setValue(final String value) { this.value=value; return true; } void addValuesTo(final Collection collection) { addValueTo(collection,getValue()); } void addToFormFields(final FormFields formFields) { formFields.add(this); } void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) { outputDocument.replace(getElement(),getDisplayValueHTML(getValue(),true)); } else { replaceAttributesInOutputDocumentIfModified(outputDocument); if (value!=UNCHANGED) outputDocument.replace(getElement().getContent(),CharacterReference.encode(value)); } } private String getValue() { return (value==UNCHANGED) ? CharacterReference.decode(getElement().getContent()) : value; } } static final class RadioCheckboxFormControl extends FormControl { // RADIO or CHECKBOX public RadioCheckboxFormControl(final Element element, final FormControlType formControlType) { super(element,formControlType,true); if (elementContainer.predefinedValue==null) { elementContainer.predefinedValue=CHECKBOX_NULL_DEFAULT_VALUE; if (element.source.logger.isInfoEnabled()) element.source.logger.info(element.source.getRowColumnVector(element.begin).appendTo(new StringBuilder(200)).append(": compulsory \"value\" attribute of ").append(formControlType).append(" control \"").append(name).append("\" is missing, assuming the value \"").append(CHECKBOX_NULL_DEFAULT_VALUE).append('"').toString()); } } public boolean setValue(final String value) { return elementContainer.setSelected(value,Attribute.CHECKED,false); } public boolean addValue(final String value) { return elementContainer.setSelected(value,Attribute.CHECKED,formControlType==FormControlType.CHECKBOX); } void addValuesTo(final Collection collection) { if (isChecked()) addValueTo(collection,getPredefinedValue()); } public boolean isChecked() { return elementContainer.getBooleanAttribute(Attribute.CHECKED); } void addToFormFields(final FormFields formFields) { formFields.add(this); } void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else { if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) { final String html=isChecked() ? FormControlOutputStyle.ConfigDisplayValue.CheckedHTML : FormControlOutputStyle.ConfigDisplayValue.UncheckedHTML; if (html!=null) { outputDocument.replace(getElement(),html); return; } setDisabled(true); } replaceAttributesInOutputDocumentIfModified(outputDocument); } } } static class SubmitFormControl extends FormControl { // BUTTON, SUBMIT or (in subclass) IMAGE public SubmitFormControl(final Element element, final FormControlType formControlType) { super(element,formControlType,true); } public boolean setValue(final String value) { return false; } void addValuesTo(final Collection collection) {} void addToFormFields(final FormFields formFields) { if (getPredefinedValue()!=null) formFields.add(this); } void replaceInOutputDocument(final OutputDocument outputDocument) { if (outputStyle==FormControlOutputStyle.REMOVE) { outputDocument.remove(getElement()); } else { if (outputStyle==FormControlOutputStyle.DISPLAY_VALUE) setDisabled(true); replaceAttributesInOutputDocumentIfModified(outputDocument); } } } static final class ImageSubmitFormControl extends SubmitFormControl { // IMAGE public ImageSubmitFormControl(final Element element) { super(element,FormControlType.IMAGE); } void addToFormFields(final FormFields formFields) { super.addToFormFields(formFields); formFields.addName(this,name+".x"); formFields.addName(this,name+".y"); } } static final class SelectFormControl extends FormControl { // SELECT_MULTIPLE or SELECT_SINGLE public ElementContainer[] optionElementContainers; public SelectFormControl(final Element element) { super(element,element.getAttributes().get(Attribute.MULTIPLE)!=null ? FormControlType.SELECT_MULTIPLE : FormControlType.SELECT_SINGLE,false); final List optionElements=element.getAllElements(HTMLElementName.OPTION); optionElementContainers=new ElementContainer[optionElements.size()]; int x=0; for (Element optionElement : optionElements) { final ElementContainer optionElementContainer=new ElementContainer(optionElement,true); if (optionElementContainer.predefinedValue==null) // use the content of the element if it has no value attribute optionElementContainer.predefinedValue=CharacterReference.decodeCollapseWhiteSpace(optionElementContainer.element.getContent()); optionElementContainers[x++]=optionElementContainer; } } public String getPredefinedValue() { throw new UnsupportedOperationException("Use getPredefinedValues() method instead on SELECT controls"); } public Collection getPredefinedValues() { final LinkedHashSet linkedHashSet=new LinkedHashSet(optionElementContainers.length*2,1.0F); for (int i=0; i getOptionElementIterator() { return new OptionElementIterator(); } public boolean setValue(final String value) { return addValue(value,false); } public boolean addValue(final String value) { return addValue(value,formControlType==FormControlType.SELECT_MULTIPLE); } private boolean addValue(final String value, final boolean allowMultipleValues) { boolean valueFound=false; for (int i=0; i collection) { for (int i=0; i0) sb.setLength(sb.length()-FormControlOutputStyle.ConfigDisplayValue.MultipleValueSeparator.length()); // remove last separator outputDocument.replace(getElement(),getDisplayValueHTML(sb,false)); } else { replaceAttributesInOutputDocumentIfModified(outputDocument); for (int i=0; i { private int i=0; public boolean hasNext() { return i'); if (text==null || text.length()==0) sb.append(FormControlOutputStyle.ConfigDisplayValue.EmptyHTML); else CharacterReference.appendEncode(sb,text,whiteSpaceFormatting); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens sb.append(EndTagType.START_DELIMITER_PREFIX).append(FormControlOutputStyle.ConfigDisplayValue.ElementName).append('>'); return sb.toString(); } final void replaceAttributesInOutputDocumentIfModified(final OutputDocument outputDocument) { elementContainer.replaceAttributesInOutputDocumentIfModified(outputDocument); } static List getAll(final Segment segment) { final ArrayList list=new ArrayList(); getAll(segment,list,HTMLElementName.INPUT); getAll(segment,list,HTMLElementName.TEXTAREA); getAll(segment,list,HTMLElementName.SELECT); getAll(segment,list,HTMLElementName.BUTTON); Collections.sort(list,COMPARATOR); return list; } private static void getAll(final Segment segment, final ArrayList list, final String tagName) { for (Element element : segment.getAllElements(tagName)) { final FormControl formControl=element.getFormControl(); if (formControl!=null) list.add(formControl); } } private static String getString(final char ch, final int length) { if (length==0) return ""; final StringBuilder sb=new StringBuilder(length); for (int i=0; i collection, final String value) { collection.add(value!=null ? value : ""); } private static final class PositionComparator implements Comparator { public int compare(final FormControl formControl1, final FormControl formControl2) { final int formControl1Begin=formControl1.getElement().getBegin(); final int formControl2Begin=formControl2.getElement().getBegin(); if (formControl1BeginformControl2Begin) return 1; return 0; } } ////////////////////////////////////////////////////////////////////////////////////// static final class ElementContainer { // Contains the information common to both a FormControl and to each OPTION element // within a SELECT FormControl public final Element element; public Map attributesMap=null; public String predefinedValue; // never null for option, checkbox or radio elements public ElementContainer(final Element element, final boolean loadPredefinedValue) { this.element=element; predefinedValue=loadPredefinedValue ? element.getAttributes().getValue(Attribute.VALUE) : null; } public Map getAttributesMap() { if (attributesMap==null) attributesMap=element.getAttributes().getMap(true); return attributesMap; } public boolean setSelected(final String value, final String selectedOrChecked, final boolean allowMultipleValues) { if (value!=null && predefinedValue.equals(value.toString())) { setBooleanAttribute(selectedOrChecked,true); return true; } if (!allowMultipleValues) setBooleanAttribute(selectedOrChecked,false); return false; } public String getAttributeValue(final String attributeName) { if (attributesMap!=null) return attributesMap.get(attributeName); else return element.getAttributes().getValue(attributeName); } public void setAttributeValue(final String attributeName, final String value) { // null value indicates attribute should be removed. if (value==null) { setBooleanAttribute(attributeName,false); return; } if (attributesMap!=null) { attributesMap.put(attributeName,value); return; } final String existingValue=getAttributeValue(attributeName); if (existingValue!=null && existingValue.equals(value)) return; getAttributesMap().put(attributeName,value); } public boolean getBooleanAttribute(final String attributeName) { if (attributesMap!=null) return attributesMap.containsKey(attributeName); else return element.getAttributes().get(attributeName)!=null; } public void setBooleanAttribute(final String attributeName, final boolean value) { final boolean oldValue=getBooleanAttribute(attributeName); if (value==oldValue) return; if (value) getAttributesMap().put(attributeName,attributeName); // xhtml compatible attribute else getAttributesMap().remove(attributeName); } public void replaceAttributesInOutputDocumentIfModified(final OutputDocument outputDocument) { if (attributesMap!=null) outputDocument.replace(element.getAttributes(),attributesMap); } } } jericho-html-3.1/src/java/net/htmlparser/jericho/Segment.java0000644000175000017500000015122311204610550024245 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.Iterator; import java.util.List; import java.util.Collections; import java.util.ArrayList; import java.util.regex.Pattern; /** * Represents a segment of a {@link Source} document. *

    * Many of the tag search methods are defined in this class. *

    * The span of a segment is defined by the combination of its begin and end character positions. */ public class Segment implements Comparable, CharSequence { final int begin; final int end; final Source source; private static final char[] WHITESPACE={' ','\n','\r','\t','\f','\u200B'}; // see comments in isWhiteSpace(char) method /** * Constructs a new Segment within the specified {@linkplain Source source} document with the specified begin and end character positions. * @param source the {@link Source} document, must not be null. * @param begin the character position in the source where this segment {@linkplain #getBegin() begins}, inclusive. * @param end the character position in the source where this segment {@linkplain #getEnd() ends}, exclusive. */ public Segment(final Source source, final int begin, final int end) { if (begin==-1 || end==-1 || begin>end) throw new IllegalArgumentException(); this.begin=begin; this.end=end; if (source==null) throw new IllegalArgumentException("source argument must not be null"); this.source=source; } // Only called from Source constructor Segment(final int length) { begin=0; this.end=length; source=(Source)this; } // Only used for creating dummy flag instances of this type (see Tag.NOT_CACHED and Element.NOT_CACHED) Segment() { this(0,0); } // Only used for creating dummy flag instances of this type (see Segment() constructor and StreamedSource.START_SEGMENT) Segment(final int begin, final int end) { this.begin=begin; this.end=end; source=null; } /** * Returns the {@link Source} document containing this segment. *

    * If a {@link StreamedSource} is in use, this method throws an UnsupportedOperationException. * * @return the {@link Source} document containing this segment. */ public final Source getSource() { if (source.isStreamed()) throw new UnsupportedOperationException("Source object is not available when using StreamedSource"); return source; } /** * Returns the character position in the {@link Source} document at which this segment begins, inclusive. * @return the character position in the {@link Source} document at which this segment begins, inclusive. */ public final int getBegin() { return begin; } /** * Returns the character position in the {@link Source} document immediately after the end of this segment. *

    * The character at the position specified by this property is not included in the segment. * * @return the character position in the {@link Source} document immediately after the end of this segment. */ public final int getEnd() { return end; } /** * Compares the specified object with this Segment for equality. *

    * Returns true if and only if the specified object is also a Segment, * and both segments have the same {@link Source}, and the same begin and end positions. * @param object the object to be compared for equality with this Segment. * @return true if the specified object is equal to this Segment, otherwise false. */ public final boolean equals(final Object object) { if (this==object) return true; if (object==null || !(object instanceof Segment)) return false; final Segment segment=(Segment)object; return segment.begin==begin && segment.end==end && segment.source==source; } /** * Returns a hash code value for the segment. *

    * The current implementation returns the sum of the begin and end positions, although this is not * guaranteed in future versions. * * @return a hash code value for the segment. */ public int hashCode() { return begin+end; } /** * Returns the length of the segment. * This is defined as the number of characters between the begin and end positions. * @return the length of the segment. */ public int length() { return end-begin; } /** * Indicates whether this Segment encloses the specified Segment. *

    * This is the case if {@link #getBegin()}<=segment.{@link #getBegin()} && {@link #getEnd()}>=segment.{@link #getEnd()}. *

    * Note that a segment encloses itself. * * @param segment the segment to be tested for being enclosed by this segment. * @return true if this Segment encloses the specified Segment, otherwise false. */ public final boolean encloses(final Segment segment) { return begin<=segment.begin && end>=segment.end; } /** * Indicates whether this segment encloses the specified character position in the source document. *

    * This is the case if {@link #getBegin()} <= pos < {@link #getEnd()}. * * @param pos the position in the {@link Source} document. * @return true if this segment encloses the specified character position in the source document, otherwise false. */ public final boolean encloses(final int pos) { return begin<=pos && posString. *

    * The returned String is newly created with every call to this method, unless this * segment is itself an instance of {@link Source}. * * @return the source text of this segment as a String. */ public String toString() { return source.subSequence(begin,end).toString(); } /** * Performs a simple rendering of the HTML markup in this segment into text. *

    * The output can be configured by setting any number of properties on the returned {@link Renderer} instance before * {@linkplain Renderer#writeTo(Writer) obtaining its output}. * * @return an instance of {@link Renderer} based on this segment. * @see #getTextExtractor() */ public Renderer getRenderer() { return new Renderer(this); } /** * Extracts the textual content from the HTML markup of this segment. *

    * The output can be configured by setting properties on the returned {@link TextExtractor} instance before * {@linkplain TextExtractor#writeTo(Writer) obtaining its output}. *

    * @return an instance of {@link TextExtractor} based on this segment. * @see #getRenderer() */ public TextExtractor getTextExtractor() { return new TextExtractor(this); } /** * Returns an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within this segment. *

    * See the {@link Source#iterator()} method for a detailed description. *

    *

    *
    Example:
    *
    *

    * The following code demonstrates the typical usage of this method to make an exact copy of this segment to writer (assuming no server tags are present): *

    *
    	 * for (Iterator<Segment> nodeIterator=segment.getNoteIterator(); nodeIterator.hasNext();) {
    	 *   Segment nodeSegment=nodeIterator.next();
    	 *   if (nodeSegment instanceof Tag) {
    	 *     Tag tag=(Tag)nodeSegment;
    	 *     // HANDLE TAG
    	 *     // Uncomment the following line to ensure each tag is valid XML:
    	 *     // writer.write(tag.tidy()); continue;
    	 *   } else if (nodeSegment instanceof CharacterReference) {
    	 *     CharacterReference characterReference=(CharacterReference)nodeSegment;
    	 *     // HANDLE CHARACTER REFERENCE
    	 *     // Uncomment the following line to decode all character references instead of copying them verbatim:
    	 *     // characterReference.appendCharTo(writer); continue;
    	 *   } else {
    	 *     // HANDLE PLAIN TEXT
    	 *   }
    	 *   // unless specific handling has prevented getting to here, simply output the segment as is:
    	 *   writer.write(nodeSegment.toString());
    	 * }
    *
    *
    * @return an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within this segment. */ public Iterator getNodeIterator() { return new NodeIterator(this); } /** * Returns a list of all {@link Tag} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The {@link Source#fullSequentialParse()} method should be called after construction of the {@link Source} object * if this method is to be used on a large proportion of the source. * It is called automatically if this method is called on the {@link Source} object itself. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return a list of all {@link Tag} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllTags() { return getAllTags(null); } /** * Returns a list of all {@link Tag} objects of the specified {@linkplain TagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying a null argument to the tagType parameter is equivalent to {@link #getAllTags()}. * * @param tagType the {@linkplain TagType type} of tags to get. * @return a list of all {@link Tag} objects of the specified {@linkplain TagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getAllStartTags(StartTagType) */ public List getAllTags(final TagType tagType) { Tag tag=checkTagEnclosure(Tag.getNextTag(source,begin,tagType)); if (tag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(tag); tag=checkTagEnclosure(tag.getNextTag(tagType)); } while (tag!=null); return list; } /** * Returns a list of all {@link StartTag} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The {@link Source#fullSequentialParse()} method should be called after construction of the {@link Source} object * if this method is to be used on a large proportion of the source. * It is called automatically if this method is called on the {@link Source} object itself. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @return a list of all {@link StartTag} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllStartTags() { StartTag startTag=checkEnclosure(StartTag.getNext(source,begin)); if (startTag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(startTag); startTag=checkEnclosure(startTag.getNextStartTag()); } while (startTag!=null); return list; } /** * Returns a list of all {@link StartTag} objects of the specified {@linkplain StartTagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying a null argument to the startTagType parameter is equivalent to {@link #getAllStartTags()}. * * @param startTagType the {@linkplain StartTagType type} of tags to get. * @return a list of all {@link StartTag} objects of the specified {@linkplain StartTagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllStartTags(final StartTagType startTagType) { if (startTagType==null) return getAllStartTags(); StartTag startTag=(StartTag)checkTagEnclosure(Tag.getNextTag(source,begin,startTagType)); if (startTag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(startTag); startTag=(StartTag)checkTagEnclosure(startTag.getNextTag(startTagType)); } while (startTag!=null); return list; } /** * Returns a list of all {@linkplain StartTagType#NORMAL normal} {@link StartTag} objects with the specified {@linkplain StartTag#getName() name} that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. *

    * Specifying a null argument to the name parameter is equivalent to {@link #getAllStartTags()}, which may include non-{@linkplain StartTagType#NORMAL normal} start tags. *

    * This method also returns {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. * * @param name the {@linkplain StartTag#getName() name} of the start tags to get. * @return a list of all {@linkplain StartTagType#NORMAL normal} {@link StartTag} objects with the specified {@linkplain StartTag#getName() name} that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllStartTags(String name) { if (name==null) return getAllStartTags(); final boolean isXMLTagName=Tag.isXMLName(name); StartTag startTag=checkEnclosure(StartTag.getNext(source,begin,name,StartTagType.NORMAL,isXMLTagName)); if (startTag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(startTag); startTag=checkEnclosure(StartTag.getNext(source,startTag.begin+1,name,StartTagType.NORMAL,isXMLTagName)); } while (startTag!=null); return list; } /** * Returns a list of all {@link StartTag} objects with the specified attribute name/value pair that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return a list of all {@link StartTag} objects with the specified attribute name/value pair that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getAllStartTags(String attributeName, Pattern valueRegexPattern) */ public List getAllStartTags(final String attributeName, final String value, final boolean valueCaseSensitive) { StartTag startTag=checkEnclosure(source.getNextStartTag(begin,attributeName,value,valueCaseSensitive)); if (startTag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(startTag); startTag=checkEnclosure(source.getNextStartTag(startTag.begin+1,attributeName,value,valueCaseSensitive)); } while (startTag!=null); return list; } /** * Returns a list of all {@link StartTag} objects with the specified attribute name and value pattern that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, * without regard to the attribute value. This will also match an attribute that {@linkplain Attribute#hasValue() has no value} at all. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return a list of all {@link StartTag} objects with the specified attribute name and value pattern that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getAllStartTags(String attributeName, String value, boolean valueCaseSensitive) */ public List getAllStartTags(final String attributeName, final Pattern valueRegexPattern) { StartTag startTag=checkEnclosure(source.getNextStartTag(begin,attributeName,valueRegexPattern)); if (startTag==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(startTag); startTag=checkEnclosure(source.getNextStartTag(startTag.begin+1,attributeName,valueRegexPattern)); } while (startTag!=null); return list; } /** * Returns a list of all {@link StartTag} objects with the specified class that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This matches start tags with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple * class names separated by white space in the attribute value. *

    * See the {@link Tag} class documentation for more details about the behaviour of this method. * * @param className the class name (case sensitive) to search for, must not be null. * @return a list of all {@link StartTag} objects with the specified class that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllStartTagsByClass(final String className) { return getAllStartTags("class",getClassPattern(className)); } /** * Returns a list of the immediate children of this segment in the document element hierarchy. *

    * The returned list may include an element that extends beyond the end of this segment, as long as it begins within this segment. *

    * An element found at the start of this segment is included in the list. * Note however that if this segment is an {@link Element}, the overriding {@link Element#getChildElements()} method is called instead, * which only returns the children of the element. *

    * Calling getChildElements() on an Element is much more efficient than calling it on a Segment. *

    * The objects in the list are all of type {@link Element}. *

    * The {@link Source#fullSequentialParse()} method should be called after construction of the {@link Source} object * if this method is to be used on a large proportion of the source. * It is called automatically if this method is called on the {@link Source} object itself. *

    * See the {@link Source#getChildElements()} method for more details. * * @return the a list of the immediate children of this segment in the document element hierarchy, guaranteed not null. * @see Element#getParentElement() */ public List getChildElements() { if (length()==0) return Collections.emptyList(); List childElements=new ArrayList(); int pos=begin; while (true) { final StartTag childStartTag=source.getNextStartTag(pos); if (childStartTag==null || childStartTag.begin>=end) break; if (!Config.IncludeServerTagsInElementHierarchy && childStartTag.getTagType().isServerTag()) { pos=childStartTag.end; continue; } final Element childElement=childStartTag.getElement(); childElements.add(childElement); childElement.getChildElements(); pos=childElement.end; } return childElements; } /** * Returns a list of all {@link Element} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The {@link Source#fullSequentialParse()} method should be called after construction of the {@link Source} object * if this method is to be used on a large proportion of the source. * It is called automatically if this method is called on the {@link Source} object itself. *

    * The elements returned correspond exactly with the start tags returned in the {@link #getAllStartTags()} method. *

    * If this segment is itself an {@link Element}, the result includes this element in the list. * * @return a list of all {@link Element} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllElements() { return getAllElements(getAllStartTags()); } /** * Returns a list of all {@link Element} objects with the specified name that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The elements returned correspond with the start tags returned in the {@link #getAllStartTags(String name)} method, * except that elements which are not entirely enclosed by this segment are excluded. *

    * Specifying a null argument to the name parameter is equivalent to {@link #getAllElements()}, which may include elements of non-{@linkplain StartTagType#NORMAL normal} tags. *

    * This method also returns elements consisting of {@linkplain Tag#isUnregistered() unregistered} tags if the specified name is not a valid {@linkplain Tag#isXMLName(CharSequence) XML tag name}. *

    * If this segment is itself an {@link Element} with the specified name, the result includes this element in the list. * * @param name the {@linkplain Element#getName() name} of the elements to get. * @return a list of all {@link Element} objects with the specified name that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllElements(String name) { return getAllElements(getAllStartTags(name)); } /** * Returns a list of all {@link Element} objects with start tags of the specified {@linkplain StartTagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The elements returned correspond with the start tags returned in the {@link #getAllTags(TagType)} method, * except that elements which are not entirely enclosed by this segment are excluded. *

    * If this segment is itself an {@link Element} with the specified type, the result includes this element in the list. * * @param startTagType the {@linkplain StartTagType type} of start tags to get, must not be null. * @return a list of all {@link Element} objects with start tags of the specified {@linkplain StartTagType type} that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllElements(final StartTagType startTagType) { if (startTagType==null) throw new IllegalArgumentException("startTagType argument must not be null"); return getAllElements(getAllStartTags(startTagType)); } /** * Returns a list of all {@link Element} objects with the specified attribute name/value pair that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The elements returned correspond with the start tags returned in the {@link #getAllStartTags(String attributeName, String value, boolean valueCaseSensitive)} method, * except that elements which are not entirely enclosed by this segment are excluded. *

    * If this segment is itself an {@link Element} with the specified name/value pair, the result includes this element in the list. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return a list of all {@link Element} objects with the specified attribute name/value pair that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getAllElements(String attributeName, Pattern valueRegexPattern) */ public List getAllElements(final String attributeName, final String value, final boolean valueCaseSensitive) { return getAllElements(getAllStartTags(attributeName,value,valueCaseSensitive)); } /** * Returns a list of all {@link Element} objects with the specified attribute name and value pattern that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * The elements returned correspond with the start tags returned in the {@link #getAllStartTags(String attributeName, Pattern valueRegexPattern)} method, * except that elements which are not entirely enclosed by this segment are excluded. *

    * Specifying a null argument to the valueRegexPattern parameter performs the search on the attribute name only, * without regard to the attribute value. This will also match an attribute that {@linkplain Attribute#hasValue() has no value} at all. *

    * If this segment is itself an {@link Element} with the specified attribute name and value pattern, the result includes this element in the list. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return a list of all {@link Element} objects with the specified attribute name and value pattern that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getAllElements(String attributeName, String value, boolean valueCaseSensitive) */ public List getAllElements(final String attributeName, final Pattern valueRegexPattern) { return getAllElements(getAllStartTags(attributeName,valueRegexPattern)); } /** * Returns a list of all {@link Element} objects with the specified class that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This matches elements with a class attribute that contains the specified class name, either as an exact match or where the specified class name is one of multiple * class names separated by white space in the attribute value. *

    * The elements returned correspond with the start tags returned in the {@link #getAllStartTagsByClass(String className)} method, * except that elements which are not entirely enclosed by this segment are excluded. *

    * If this segment is itself an {@link Element} with the specified class, the result includes this element in the list. * * @param className the class name (case sensitive) to search for, must not be null. * @return a list of all {@link Element} objects with the specified class that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllElementsByClass(final String className) { return getAllElements(getAllStartTagsByClass(className)); } /** * Returns a list of all {@link CharacterReference} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. * @return a list of all {@link CharacterReference} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getAllCharacterReferences() { CharacterReference characterReference=getNextCharacterReference(begin); if (characterReference==null) return Collections.emptyList(); final ArrayList list=new ArrayList(); do { list.add(characterReference); characterReference=getNextCharacterReference(characterReference.end); } while (characterReference!=null); return list; } /** * Returns the first {@link StartTag} {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTags()}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. * * @return the first {@link StartTag} {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final StartTag getFirstStartTag() { return checkEnclosure(source.getNextStartTag(begin)); } /** * Returns the first {@link StartTag} of the specified {@linkplain StartTagType type} {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTags(StartTagType) getAllStartTags(startTagType)}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. * * @param startTagType the StartTagType to search for. * @return the first {@link StartTag} of the specified {@linkplain StartTagType type} {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final StartTag getFirstStartTag(StartTagType startTagType) { return checkEnclosure(source.getNextStartTag(begin,startTagType)); } /** * Returns the first {@linkplain StartTagType#NORMAL normal} {@link StartTag} {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTags(String) getAllStartTags(name)}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. *

    * Specifying a null argument to the name parameter is equivalent to {@link #getFirstStartTag()}. * * @param name the {@linkplain StartTag#getName() name} of the start tag to search for, may be null. * @return the first {@linkplain StartTagType#NORMAL normal} {@link StartTag} {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final StartTag getFirstStartTag(String name) { return checkEnclosure(source.getNextStartTag(begin,name)); } /** * Returns the first {@link StartTag} with the specified attribute name/value pair {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTags(String,String,boolean) getAllStartTags(attributeName,value,valueCaseSensitive)}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return the first {@link StartTag} with the specified attribute name/value pair {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. * @see #getFirstStartTag(String attributeName, Pattern valueRegexPattern) */ public final StartTag getFirstStartTag(String attributeName, String value, boolean valueCaseSensitive) { return checkEnclosure(source.getNextStartTag(begin,attributeName,value,valueCaseSensitive)); } /** * Returns the first {@link StartTag} with the specified attribute name and value pattern that is {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTags(String,Pattern) getAllStartTags(attributeName,valueRegexPattern)}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return the first {@link StartTag} with the specified attribute name and value pattern that is {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. * @see #getFirstStartTag(String attributeName, String value, boolean valueCaseSensitive) */ public final StartTag getFirstStartTag(final String attributeName, final Pattern valueRegexPattern) { return checkEnclosure(source.getNextStartTag(begin,attributeName,valueRegexPattern)); } /** * Returns the first {@link StartTag} with the specified class that is {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllStartTagsByClass(String) getAllStartTagsByClass(className)}.iterator().next(), * but does not search beyond the first start tag and returns null if no such start tag exists. * * @param className the class name (case sensitive) to search for, must not be null. * @return the first {@link StartTag} with the specified class that is {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final StartTag getFirstStartTagByClass(final String className) { return checkEnclosure(source.getNextStartTagByClass(begin,className)); } /** * Returns the first {@link Element} {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllElements()}.iterator().next(), * but does not search beyond the first enclosed element and returns null if no such element exists. *

    * If this segment is itself an {@link Element}, this element is returned, not the first child element. * * @return the first {@link Element} {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final Element getFirstElement() { StartTag startTag=checkEnclosure(StartTag.getNext(source,begin)); while (startTag!=null) { final Element element=startTag.getElement(); if (element.end<=end) return element; startTag=checkEnclosure(startTag.getNextStartTag()); } return null; } /** * Returns the first {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllElements(String) getAllElements(name)}.iterator().next(), * but does not search beyond the first enclosed element and returns null if no such element exists. *

    * Specifying a null argument to the name parameter is equivalent to {@link #getFirstElement()}. *

    * If this segment is itself an {@link Element} with the specified name, this element is returned. * * @param name the {@linkplain Element#getName() name} of the element to search for. * @return the first {@linkplain StartTagType#NORMAL normal} {@link Element} with the specified {@linkplain Element#getName() name} {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final Element getFirstElement(String name) { if (name==null) return getFirstElement(); final boolean isXMLTagName=Tag.isXMLName(name); StartTag startTag=checkEnclosure(StartTag.getNext(source,begin,name,StartTagType.NORMAL,isXMLTagName)); while (startTag!=null) { final Element element=startTag.getElement(); if (element.end<=end) return element; startTag=checkEnclosure(StartTag.getNext(source,startTag.begin+1,name,StartTagType.NORMAL,isXMLTagName)); } return null; } /** * Returns the first {@link Element} with the specified attribute name/value pair {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllElements(String,String,boolean) getAllElements(attributeName,value,valueCaseSensitive)}.iterator().next(), * but does not search beyond the first enclosed element and returns null if no such element exists. *

    * If this segment is itself an {@link Element} with the specified attribute name/value pair, this element is returned. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param value the value of the specified attribute to search for, must not be null. * @param valueCaseSensitive specifies whether the attribute value matching is case sensitive. * @return the first {@link Element} with the specified attribute name/value pair {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. * @see #getFirstElement(String attributeName, Pattern valueRegexPattern) */ public final Element getFirstElement(String attributeName, String value, boolean valueCaseSensitive) { StartTag startTag=checkEnclosure(source.getNextStartTag(begin,attributeName,value,valueCaseSensitive)); while (startTag!=null) { final Element element=startTag.getElement(); if (element.end<=end) return element; startTag=checkEnclosure(source.getNextStartTag(startTag.begin+1,attributeName,value,valueCaseSensitive)); } return null; } /** * Returns the first {@link Element} with the specified attribute name and value pattern that is {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllElements(String,Pattern) getAllElements(attributeName,valueRegexPattern)}.iterator().next(), * but does not search beyond the first enclosed element and returns null if no such element exists. *

    * If this segment is itself an {@link Element} with the specified attribute name and value pattern, this element is returned. * * @param attributeName the attribute name (case insensitive) to search for, must not be null. * @param valueRegexPattern the regular expression pattern that must match the attribute value, may be null. * @return the first {@link Element} with the specified attribute name and value pattern that is {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. * @see #getFirstElement(String attributeName, String value, boolean valueCaseSensitive) */ public final Element getFirstElement(final String attributeName, final Pattern valueRegexPattern) { StartTag startTag=checkEnclosure(source.getNextStartTag(begin,attributeName,valueRegexPattern)); while (startTag!=null) { final Element element=startTag.getElement(); if (element.end<=end) return element; startTag=checkEnclosure(source.getNextStartTag(startTag.begin+1,attributeName,valueRegexPattern)); } return null; } /** * Returns the first {@link Element} with the specified class that is {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is functionally equivalent to {@link #getAllElementsByClass(String) getAllElementsByClass(className)}.iterator().next(), * but does not search beyond the first enclosed element and returns null if no such element exists. *

    * If this segment is itself an {@link Element} with the specified class, this element is returned. * * @param className the class name (case sensitive) to search for, must not be null. * @return the first {@link Element} with the specified class that is {@linkplain #encloses(Segment) enclosed} by this segment, or null if none exists. */ public final Element getFirstElementByClass(final String className) { StartTag startTag=checkEnclosure(source.getNextStartTagByClass(begin,className)); while (startTag!=null) { final Element element=startTag.getElement(); if (element.end<=end) return element; startTag=checkEnclosure(source.getNextStartTagByClass(startTag.begin+1,className)); } return null; } /** * Returns a list of the {@link FormControl} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. * @return a list of the {@link FormControl} objects that are {@linkplain #encloses(Segment) enclosed} by this segment. */ public List getFormControls() { return FormControl.getAll(this); } /** * Returns the {@link FormFields} object representing all form fields that are {@linkplain #encloses(Segment) enclosed} by this segment. *

    * This is equivalent to {@link FormFields#FormFields(Collection) new FormFields}({@link #getFormControls()}). * * @return the {@link FormFields} object representing all form fields that are {@linkplain #encloses(Segment) enclosed} by this segment. * @see #getFormControls() */ public FormFields getFormFields() { return new FormFields(getFormControls()); } /** * Parses any {@link Attributes} within this segment. * This method is only used in the unusual situation where attributes exist outside of a start tag. * The {@link StartTag#getAttributes()} method should be used in normal situations. *

    * This is equivalent to source.{@link Source#parseAttributes(int,int) parseAttributes}({@link #getBegin()},{@link #getEnd()}). * * @return the {@link Attributes} within this segment, or null if too many errors occur while parsing. */ public Attributes parseAttributes() { return source.parseAttributes(begin,end); } /** * Causes the this segment to be ignored when parsing. *

    * Ignored segments are treated as blank spaces by the parsing mechanism, but are included as normal text in all other functions. *

    * This method was originally the only means of preventing {@linkplain TagType#isServerTag() server tags} located inside * {@linkplain StartTagType#NORMAL normal} tags from interfering with the parsing of the tags * (such as where an {@linkplain Attribute attribute} of a normal tag uses a server tag to dynamically set its value), * as well as preventing non-server tags from being recognised inside server tags. *

    * It is not necessary to use this method to ignore {@linkplain TagType#isServerTag() server tags} located inside normal tags, * as the attributes parser automatically ignores any server tags. *

    * It is not necessary to use this method to ignore non-server tags inside server tags, or the contents of {@link HTMLElementName#SCRIPT SCRIPT} elements, * as the parser does this automatically when performing a {@linkplain Source#fullSequentialParse() full sequential parse}. *

    * This leaves only very few scenarios where calling this method still provides a significant benefit. *

    * One such case is where XML-style server tags are used inside {@linkplain StartTagType#NORMAL normal} tags. * Here is an example using an XML-style JSP tag: *

    <a href="<i18n:resource path="/Portal"/>?BACK=TRUE">back</a>
    * The first double-quote of "/Portal" will be interpreted as the end quote for the href attribute, * as there is no way for the parser to recognise the il8n:resource element as a server tag. * Such use of XML-style server tags inside {@linkplain StartTagType#NORMAL normal} tags is generally seen as bad practice, * but it is nevertheless valid JSP. The only way to ensure that this library is able to parse the normal tag surrounding it is to * find these server tags first and call the ignoreWhenParsing method to ignore them before parsing the rest of the document. *

    * It is important to understand the difference between ignoring the segment when parsing and removing the segment completely. * Any text inside a segment that is ignored when parsing is treated by most functions as content, and as such is included in the output of * tools such as {@link TextExtractor} and {@link Renderer}. *

    * To remove segments completely, create an {@link OutputDocument} and call its {@link OutputDocument#remove(Segment) remove(Segment)} or * {@link OutputDocument#replaceWithSpaces(int,int) replaceWithSpaces(int begin, int end)} method for each segment. * Then create a new source document using {@link Source#Source(CharSequence) new Source(outputDocument.toString())} * and perform the desired operations on this new source object. *

    * Calling this method after the {@link Source#fullSequentialParse()} method has been called is not permitted and throws an IllegalStateException. *

    * Any tags appearing in this segment that are found before this method is called will remain in the {@linkplain Source#getCacheDebugInfo() tag cache}, * and so will continue to be found by the tag search methods. * If this is undesirable, the {@link Source#clearCache()} method can be called to remove them from the cache. * Calling the {@link Source#fullSequentialParse()} method after this method clears the cache automatically. *

    * For best performance, this method should be called on all segments that need to be ignored without calling * any of the tag search methods in between. * * @see Source#ignoreWhenParsing(Collection segments) */ public void ignoreWhenParsing() { source.ignoreWhenParsing(begin,end); } /** * Compares this Segment object to another object. *

    * If the argument is not a Segment, a ClassCastException is thrown. *

    * A segment is considered to be before another segment if its begin position is earlier, * or in the case that both segments begin at the same position, its end position is earlier. *

    * Segments that begin and end at the same position are considered equal for * the purposes of this comparison, even if they relate to different source documents. *

    * Note: this class has a natural ordering that is inconsistent with equals. * This means that this method may return zero in some cases where calling the * {@link #equals(Object)} method with the same argument returns false. * * @param segment the segment to be compared * @return a negative integer, zero, or a positive integer as this segment is before, equal to, or after the specified segment. * @throws ClassCastException if the argument is not a Segment */ public int compareTo(final Segment segment) { if (this==segment) return 0; if (beginsegment.begin) return 1; if (endsegment.end) return 1; return 0; } /** * Indicates whether this segment consists entirely of {@linkplain #isWhiteSpace(char) white space}. * @return true if this segment consists entirely of {@linkplain #isWhiteSpace(char) white space}, otherwise false. */ public final boolean isWhiteSpace() { for (int i=begin; iwhite space. *

    * The HTML 4.01 specification section 9.1 * specifies the following white space characters: *

      *
    • space (U+0020) *
    • tab (U+0009) *
    • form feed (U+000C) *
    • line feed (U+000A) *
    • carriage return (U+000D) *
    • zero-width space (U+200B) *
    *

    * Despite the explicit inclusion of the zero-width space in the HTML specification, Microsoft IE6 does not * recognise them as white space and renders them as an unprintable character (empty square). * Even zero-width spaces included using the numeric character reference &#x200B; are rendered this way. * * @param ch the character to test. * @return true if the specified character is white space, otherwise false. */ public static final boolean isWhiteSpace(final char ch) { for (char whiteSpaceChar : WHITESPACE) if (ch==whiteSpaceChar) return true; return false; } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(50); sb.append('('); source.getRowColumnVector(begin).appendTo(sb); sb.append('-'); source.getRowColumnVector(end).appendTo(sb); sb.append(')'); return sb.toString(); } /** * Returns the character at the specified index. *

    * This is logically equivalent to toString().charAt(index) * for valid argument values 0 <= index < length(). *

    * However because this implementation works directly on the underlying document source string, * it should not be assumed that an IndexOutOfBoundsException is thrown * for an invalid argument value. * * @param index the index of the character. * @return the character at the specified index. */ public char charAt(final int index) { return source.charAt(begin+index); } /** * Returns a new character sequence that is a subsequence of this sequence. *

    * This is logically equivalent to toString().subSequence(beginIndex,endIndex) * for valid values of beginIndex and endIndex. *

    * However because this implementation works directly on the underlying document source text, * it should not be assumed that an IndexOutOfBoundsException is thrown * for invalid argument values as described in the String.subSequence(int,int) method. * * @param beginIndex the begin index, inclusive. * @param endIndex the end index, exclusive. * @return a new character sequence that is a subsequence of this sequence. */ public CharSequence subSequence(final int beginIndex, final int endIndex) { return source.subSequence(begin+beginIndex,begin+endIndex); } /** * Collapses the {@linkplain #isWhiteSpace(char) white space} in the specified text. * All leading and trailing white space is omitted, and any sections of internal white space are replaced by a single space. */ static final StringBuilder appendCollapseWhiteSpace(final StringBuilder sb, final CharSequence text) { final int textLength=text.length(); int i=0; boolean lastWasWhiteSpace=false; while (true) { if (i>=textLength) return sb; if (!isWhiteSpace(text.charAt(i))) break; i++; } do { final char ch=text.charAt(i++); if (isWhiteSpace(ch)) { lastWasWhiteSpace=true; } else { if (lastWasWhiteSpace) { sb.append(' '); lastWasWhiteSpace=false; } sb.append(ch); } } while (i getAllElements(final List startTags) { if (startTags.isEmpty()) return Collections.emptyList(); final ArrayList elements=new ArrayList(startTags.size()); for (StartTag startTag : startTags) { final Element element=startTag.getElement(); if (element.end<=end) elements.add(element); } return elements; } private StartTag checkEnclosure(final StartTag startTag) { if (startTag==null || startTag.end>end) return null; return startTag; } private Tag checkTagEnclosure(final Tag tag) { if (tag==null || tag.end>end) return null; return tag; } private CharacterReference getNextCharacterReference(final int pos) { final CharacterReference characterReference=source.getNextCharacterReference(pos); if (characterReference==null || characterReference.end>end) return null; return characterReference; } } jericho-html-3.1/src/java/net/htmlparser/jericho/StartTagTypeMasonComponentCall.java0000644000175000017500000000227511204550410030713 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class StartTagTypeMasonComponentCall extends StartTagTypeGenericImplementation { protected static final StartTagTypeMasonComponentCall INSTANCE=new StartTagTypeMasonComponentCall(); private StartTagTypeMasonComponentCall() { super("mason component call","<&","&>",null,true); } } jericho-html-3.1/src/java/net/htmlparser/jericho/LoggerProviderDisabled.java0000644000175000017500000000225611204550410027224 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.logging.*; final class LoggerProviderDisabled implements LoggerProvider { public static final LoggerProvider INSTANCE=new LoggerProviderDisabled(); private LoggerProviderDisabled() {} public Logger getLogger(final String name) { return null; } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTag.java0000644000175000017500000002522711204550410024007 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Represents the end tag of an * {@linkplain Element element} in a specific {@linkplain Source source} document. *

    * An end tag always has a {@linkplain #getTagType() type} that is a subclass of {@link EndTagType}, meaning it * always starts with the characters '</'. *

    * EndTag instances are obtained using one of the following methods: *

      *
    • {@link Element#getEndTag()} *
    • {@link Tag#getNextTag()} *
    • {@link Tag#getPreviousTag()} *
    • {@link Source#getPreviousEndTag(int pos)} *
    • {@link Source#getPreviousEndTag(int pos, String name)} *
    • {@link Source#getPreviousTag(int pos)} *
    • {@link Source#getPreviousTag(int pos, TagType)} *
    • {@link Source#getNextEndTag(int pos)} *
    • {@link Source#getNextEndTag(int pos, String name)} *
    • {@link Source#getNextEndTag(int pos, String name, EndTagType)} *
    • {@link Source#getNextTag(int pos)} *
    • {@link Source#getNextTag(int pos, TagType)} *
    • {@link Source#getEnclosingTag(int pos)} *
    • {@link Source#getEnclosingTag(int pos, TagType)} *
    • {@link Source#getTagAt(int pos)} *
    • {@link Segment#getAllTags()} *
    • {@link Segment#getAllTags(TagType)} *
    *

    * The {@link Tag} superclass defines the {@link Tag#getName() getName()} method used to get the name of this end tag. *

    * See also the XML 1.0 specification for end tags. * * @see Tag * @see StartTag * @see Element */ public final class EndTag extends Tag { private final EndTagType endTagType; /** * Constructs a new EndTag. * * @param source the {@link Source} document. * @param begin the character position in the source document where this tag {@linkplain Segment#getBegin() begins}. * @param end the character position in the source document where this tag {@linkplain Segment#getEnd() ends}. * @param endTagType the {@linkplain #getEndTagType() type} of the end tag. * @param name the {@linkplain Tag#getName() name} of the tag. */ EndTag(final Source source, final int begin, final int end, final EndTagType endTagType, final String name) { super(source,begin,end,name); this.endTagType=endTagType; } /** * Returns the {@linkplain Element element} that is ended by this end tag. *

    * Returns null if this end tag is not properly matched to any {@linkplain StartTag start tag} in the source document. *

    * This method is much less efficient than the {@link StartTag#getElement()} method. *

    * IMPLEMENTATION NOTE: The explanation for why this method is relatively inefficient lies in the fact that more than one * {@linkplain StartTagType start tag type} can have the same * {@linkplain StartTagType#getCorrespondingEndTagType() corresponding end tag type}, so it is not possible to know for certain * which type of start tag this end tag is matched to (see {@link EndTagType#getCorrespondingStartTagType()} for more explanation). * Because of this uncertainty, the implementation of this method must check every start tag preceding this end tag, calling its * {@link StartTag#getElement()} method to see whether it is terminated by this end tag. * * @return the {@linkplain Element element} that is ended by this end tag. */ public Element getElement() { if (element!=Element.NOT_CACHED) return element; int pos=begin; while (pos!=0) { StartTag startTag=source.getPreviousStartTag(pos-1); if (startTag==null) break; Element foundElement=startTag.getElement(); // this automatically sets foundElement.getEndTag().element cache if (foundElement.getEndTag()==this) return foundElement; // no need to set element as it was already done in previous statement pos=startTag.begin; } return element=null; } /** * Returns the {@linkplain EndTagType type} of this end tag. *

    * This is equivalent to (EndTagType){@link #getTagType()}. * * @return the {@linkplain EndTagType type} of this end tag. */ public EndTagType getEndTagType() { return endTagType; } // Documentation inherited from Tag public TagType getTagType() { return endTagType; } // Documentation inherited from Tag public boolean isUnregistered() { return endTagType==EndTagType.UNREGISTERED; } /** * Returns an XML representation of this end tag. *

    * This method is included for symmetry with the {@link StartTag#tidy()} method and simply * returns the {@linkplain Segment#toString() source text} of the tag. * * @return an XML representation of this end tag. */ public String tidy() { return toString(); } /** * Generates the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}. *

    *

    *
    Example:
    *
    *

    * The following method call: *

    * EndTag.generateHTML("INPUT") *
    * returns the following output: *
    * </INPUT> *
    *
    *
    * * @param tagName the {@linkplain #getName() name} of the end tag. * @return the HTML text of a {@linkplain EndTagType#NORMAL normal} end tag with the specified tag {@linkplain #getName() name}. * @see StartTag#generateHTML(String tagName, Map attributesMap, boolean emptyElementTag) */ public static String generateHTML(final String tagName) { return EndTagType.NORMAL.generateHTML(tagName); } public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append(this).append(' '); if (endTagType!=EndTagType.NORMAL) sb.append('(').append(endTagType.getDescription()).append(") "); sb.append(super.getDebugInfo()); return sb.toString(); } /** * Returns the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position. *

    * Called from {@link Source#getPreviousEndTag(int pos, String name)}. * * @param source the {@link Source} document. * @param pos the position to search from. * @param name the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null). * @param endTagType the {@linkplain EndTagType type} of end tag to search for. * @return the previous end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found. */ static EndTag getPrevious(final Source source, final int pos, final String name, final EndTagType endTagType) { if (name==null) return (EndTag)Tag.getPreviousTag(source,pos,endTagType); if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length"); final String searchString=endTagType.START_DELIMITER_PREFIX+name; try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.lastIndexOf(searchString,begin); if (begin==-1) return null; final EndTag endTag=(EndTag)source.getTagAt(begin); if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag; } while ((begin-=1)>=0); } catch (IndexOutOfBoundsException ex) { // this should never happen during a get previous operation so rethrow it: throw ex; } return null; } /** * Returns the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position. *

    * Called from {@link Source#getNextEndTag(int pos, String name, EndTagType endTagType)}. * * @param source the {@link Source} document. * @param pos the position to search from. * @param name the {@linkplain #getName() name} of the tag including its {@linkplain TagType#getNamePrefix() prefix} (must be lower case, may be null). * @param endTagType the {@linkplain EndTagType type} of end tag to search for. * @return the next end tag matching the specified {@linkplain #getName() name} and {@linkplain EndTagType type}, starting at the specified position, or null if none is found. */ static EndTag getNext(final Source source, final int pos, final String name, final EndTagType endTagType) { if (name==null) return (EndTag)Tag.getNextTag(source,pos,endTagType); if (name.length()==0) throw new IllegalArgumentException("name argument must not be zero length"); final String searchString=endTagType.START_DELIMITER_PREFIX+name; try { final ParseText parseText=source.getParseText(); int begin=pos; do { begin=parseText.indexOf(searchString,begin); if (begin==-1) return null; final EndTag endTag=(EndTag)source.getTagAt(begin); if (endTag!=null && endTag.getEndTagType()==endTagType && name.equals(endTag.getName())) return endTag; } while ((begin+=1)",null,false); } } jericho-html-3.1/src/java/net/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent.java0000644000175000017500000000256111204550410033022 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; final class EndTagTypeMasonComponentCalledWithContent extends EndTagTypeGenericImplementation { protected static final EndTagTypeMasonComponentCalledWithContent INSTANCE=new EndTagTypeMasonComponentCalledWithContent(); private EndTagTypeMasonComponentCalledWithContent() { super("/mason component called with content","",true,true); } public StartTagType getCorrespondingStartTagType() { return MasonTagTypes.MASON_COMPONENT_CALLED_WITH_CONTENT; } } jericho-html-3.1/src/java/net/htmlparser/jericho/NodeIterator.java0000644000175000017500000000743111204550410025241 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; /** * Iterates over the "nodes" in a segment. *

    * Every object returned is a Segment. All tags found with the Segment.getAllTags() method are included, as well as segments representing the plain text in between them, * and character references within the plain text are also included as separate nodes. */ class NodeIterator implements Iterator { private final Segment segment; private final Source source; private int pos; private Tag nextTag; private CharacterReference characterReferenceAtCurrentPosition=null; private final boolean legacyIteratorCompatabilityMode=Source.LegacyIteratorCompatabilityMode; public NodeIterator(final Segment segment) { this.segment=segment; source=segment.source; if (segment==source) source.fullSequentialParse(); pos=segment.begin; nextTag=source.getNextTag(pos); if (nextTag!=null && nextTag.begin>=segment.end) nextTag=null; } public boolean hasNext() { return pos=segment.end) nextTag=null; if (posfield in an HTML form, * a field being defined as the group of all {@linkplain FormControl form controls} * having the same {@linkplain FormControl#getName() name}. *

    * The {@link #getFormControls()} method can be used to obtain the collection of this field's constituent * {@link FormControl} objects. *

    * The {@link FormFields} class, which represents a collection of FormField objects, provides the highest level * interface for dealing with form fields and controls. For the most common tasks it can be used directly without * the need to work with its constituent FormField or {@link FormControl} objects. *

    * The FormField class serves two main purposes: *

      *
    1. * Provide methods for the modification and retrieval of form control submission values * while ensuring that the states of all the field's constituent form controls remain consistent with each other. *

      * The methods available for this purpose are:
      * {@link #getValues() List getValues()}
      * {@link #clearValues() void clearValues()}
      * {@link #setValues(Collection) void setValues(Collection)}
      * {@link #setValue(String) boolean setValue(String)}
      * {@link #addValue(String) boolean addValue(String)}
      *

      * Although the {@link FormControl} class provides methods for directly modifying the submission values * of individual form controls, it is generally recommended to use the interface provided by the {@link FormFields} class * unless there is a specific requirement for the lower level functionality. * The {@link FormFields} class contains convenience methods providing most of the functionality of the above methods, * as well as some higher level functionality such as the ability to set the form * submission values as a complete field data set * using the {@link FormFields#setDataSet(Map)} method. *

    2. * Provide a means of determining the data structure of the field, allowing a server receiving a * submitted * form data set * to interpret and store the data in an appropriate way. *

      * The properties available for this purpose are:
      * {@link #allowsMultipleValues() boolean allowsMultipleValues()}
      * {@link #getUserValueCount() int getUserValueCount()}
      * {@link #getPredefinedValues() Collection getPredefinedValues()}
      *

      * The {@link FormFields#getColumnLabels()} and {@link FormFields#getColumnValues(Map)} methods utilise these properties * to convert data from a form data set * (represented as a field data set) into a simple array format, * suitable for storage in a tabular format such as a database table or .CSV file. *

      * The properties need only be utilised directly in the event that a * form data set is to be converted * from its normal format into some other type of data structure. *

    * A form field which allows user values normally consists of a single * user value control, * such as a {@link FormControlType#TEXT TEXT} control. *

    * When a form field consists of more than one control, these controls are normally all * predefined value controls of the same * {@linkplain FormControlType type}, such as {@link FormControlType#CHECKBOX CHECKBOX} controls. *

    * Form fields consisting of more than one control do not necessarily return {@linkplain #allowsMultipleValues() multiple values}. * A form field consisting of {@link FormControlType#CHECKBOX CHECKBOX} controls can return multiple values, whereas * a form field consisting of {@link FormControlType#CHECKBOX RADIO} controls returns at most one value. *

    * The HTML author can disregard convention and mix all types of controls with the same name in the same form, * or include multiple user value controls of the same name. * The evidence that such an unusual combination is present is when {@link #getUserValueCount()}>1. *

    * FormField instances are created automatically with the creation of a {@link FormFields} collection. *

    * The case sensitivity of form field names is determined by the static * {@link Config#CurrentCompatibilityMode}.{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} property. * * @see FormFields * @see FormControl * @see FormControlType */ public final class FormField { private final String name; private int userValueCount=0; private boolean allowsMultipleValues=false; private LinkedHashSet predefinedValues=null; // String objects, null if none private final LinkedHashSet formControls=new LinkedHashSet(); private transient FormControl firstFormControl=null; // this field is simply a cache for the getFirstFormControl() method int columnIndex; // see FormFields.initColumns() /** Constructor called from FormFields class. */ FormField(final String name) { this.name=name; } /** * Returns the control name shared by all of this field's constituent {@linkplain FormControl controls}. *

    * If the static {@link Config#CurrentCompatibilityMode}.{@link Config.CompatibilityMode#isFormFieldNameCaseInsensitive() isFormFieldNameCaseInsensitive()} * property is set to true, the grouping of the controls by name is case insensitive * and this method always returns the name in lower case. *

    * Since a form field is simply a group of controls with the same name, the terms control name and * field name are for the most part synonymous, with only a possible difference in case differentiating them. * * @return the control name shared by all of this field's constituent {@linkplain FormControl controls}. * @see FormControl#getName() */ public String getName() { return name; } /** * Returns a collection of all the constituent {@linkplain FormControl form controls} in this field. *

    * An iterator over this collection returns the controls in the order of appearance in the source. * * @return a collection of all the constituent {@linkplain FormControl form controls} in this field. * @see #getFormControl() * @see #getFormControl(String predefinedValue) */ public Collection getFormControls() { return formControls; } /** * Returns the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}. *

    * Specifying a predefined value of null returns the first control without a predefined value. * * @param predefinedValue the predefined value of the control to be returned, or null to return the first control without a predefined value. * @return the constituent {@link FormControl} with the specified {@linkplain FormControl#getPredefinedValue() predefined value}, or null if none exists. * @see #getFormControl() * @see #getFormControls() */ public FormControl getFormControl(final String predefinedValue) { if (predefinedValue==null) { for (FormControl formControl : formControls) { if (!formControl.getFormControlType().hasPredefinedValue()) return formControl; if (formControl.getFormControlType().getElementName()!=HTMLElementName.SELECT && formControl.getPredefinedValue()==null) return formControl; } } else { for (FormControl formControl : formControls) { if (formControl.getFormControlType().getElementName()==HTMLElementName.SELECT) { if (formControl.getPredefinedValues().contains(predefinedValue)) return formControl; } else { if (predefinedValue.equals(formControl.getPredefinedValue())) return formControl; } } } return null; } /** * Returns the first {@link FormControl} from this field. * @return the first {@link FormControl} from this field, guaranteed not null. * @see #getFormControl(String predefinedValue) * @see #getFormControls() */ public FormControl getFormControl() { return formControls.iterator().next(); } /** * Indicates whether the field allows multiple values. *

    * Returns false in any one of the following circumstances: *

      *
    • The field consists of only one control (unless it is a * {@linkplain FormControlType#SELECT_MULTIPLE multiple select} with more than one option) *
    • The field consists entirely of {@linkplain FormControlType#RADIO radio buttons} *
    • The field consists entirely of {@linkplain FormControlType#isSubmit() submit} buttons *
    * If none of these three conditions are met, the method returns true. * * @return true if the field allows multiple values, otherwise false. */ public boolean allowsMultipleValues() { return allowsMultipleValues; } /** * Returns the number of constituent user value controls in this field. * This should in most cases be either 0 or 1. *

    * A value of 0 indicates the field values consist only of * {@linkplain #getPredefinedValues() predefined values}, which is the case when the field consists only of * predefined value controls. *

    * A value of 1 indicates the field values consist of at most one value set by the user. * It is still possible in this case to receive multiple values in the unlikely event that the HTML author mixed * controls of different types with the same name, but any other values would consist only of * {@linkplain #getPredefinedValues() predefined values}. *

    * A value greater than 1 indicates that the HTML author has included more than one * user value control with the same name. * This would nearly always indicate an unintentional error in the HTML source document, * in which case your application can either log a warning that a poorly designed form has been encountered, * or take special action to try to interpret the multiple user values that might be submitted. * * @return the number of constituent user value controls in this field. */ public int getUserValueCount() { return userValueCount; } /** * Returns a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field. *

    * All objects in the returned collection are of type String, with no null entries. *

    * An interator over this collection returns the values in the order of appearance in the source document. * * @return a collection of the {@linkplain FormControl#getPredefinedValue() predefined values} of all constituent {@linkplain FormControl controls} in this field, or null if none. * @see FormControl#getPredefinedValues() */ public Collection getPredefinedValues() { if (predefinedValues==null) return Collections.emptySet(); return predefinedValues; } /** * Returns a list of the field submission values in order of appearance. *

    * The term field submission values is used in this library to refer to the aggregate of all the * submission values of a field's constituent {@linkplain #getFormControls() form controls}. *

    * All objects in the returned list are of type String, with no null entries. *

    * The list may contain duplicates if the this field has multiple controls with the same value. * * @return a list of the field submission values in order of appearance, guaranteed not null. */ public List getValues() { final List values=new ArrayList(); for (FormControl formControl : formControls) formControl.addValuesTo(values); return values; } /** * Clears the submission values of all the constituent {@linkplain #getFormControls() form controls} in this field. * @see FormControl#clearValues() */ public void clearValues() { for (FormControl formControl : formControls) formControl.clearValues(); } /** * Sets the field submission values of this field to the specified values. *

    * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)} for each * value in the specified collection. *

    * The specified collection must not contain any null values. * * @param values the new field submission values of this field. * @see #addValue(String value) */ public void setValues(final Collection values) { clearValues(); addValues(values); } /** * Sets the field submission values of this field to the single specified value. *

    * This is equivalent to calling {@link #clearValues()} followed by {@link #addValue(String) addValue(value)}. *

    * The return value indicates whether any of the constituent form controls "accepted" the value. * A return value of false implies an error condition as the specified value is not compatible with this field. *

    * Specifying a null value is equivalent to calling {@link #clearValues()} alone, and always returns true. *

    * See the {@link #addValue(String value)} method for more information. * * @param value the new field submission value of this field, or null to {@linkplain #clearValues() clear} the field of all submission values. * @return true if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise false. * @see FormFields#setValue(String fieldName, String value) */ public boolean setValue(final String value) { clearValues(); return value!=null ? addValue(value) : true; } /** * Adds the specified value to the field submission values of this field. *

    * This is achieved internally by attempting to {@linkplain FormControl#addValue(String) add the value} to every constituent * {@linkplain #getFormControls() form control} until one "accepts" it. *

    * The return value indicates whether any of the constituent form controls accepted the value. * A return value of false implies an error condition as the specified value is not compatible with this field. *

    * In the unusual case that this field consists of multiple form controls, but not all of them are * predefined value controls, priority is given to the predefined value controls * before attempting to add the value to the user value controls. * * @param value the new field submission value to add to this field, must not be null. * @return true if one of the constituent {@linkplain #getFormControls() form controls} accepts the value, otherwise false. */ public boolean addValue(final String value) { if (value==null) throw new IllegalArgumentException("value argument must not be null"); if (formControls.size()==1) return getFirstFormControl().addValue(value); List userValueControls=null; for (FormControl formControl : formControls) { if (!formControl.getFormControlType().hasPredefinedValue()) { // A user value control has been found, but is not the only control with this name. // This shouldn't normally happen in a well designed form, but we will save the user value control // for later and give all predefined value controls first opportunity to take the value. if (userValueControls==null) userValueControls=new LinkedList(); userValueControls.add(formControl); continue; } if (formControl.addValue(value)) return true; // return value of true from formControl.addValue(value) means the value was taken by the control } if (userValueControls==null) return false; for (FormControl userFormControl : userValueControls) { if (userFormControl.addValue(value)) return true; } return false; } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append("Field: ").append(name).append(", UserValueCount=").append(userValueCount).append(", AllowsMultipleValues=").append(allowsMultipleValues); if (predefinedValues!=null) { for (String predefinedValue : predefinedValues) sb.append(Config.NewLine).append("PredefinedValue: ").append(predefinedValue); } for (FormControl formControl : formControls) sb.append(Config.NewLine).append("FormControl: ").append(formControl.getDebugInfo()); sb.append(Config.NewLine).append(Config.NewLine); return sb.toString(); } /** * Returns a string representation of this object useful for debugging purposes. *

    * This is equivalent to {@link #getDebugInfo()}. * * @return a string representation of this object useful for debugging purposes. */ public String toString() { return getDebugInfo(); } void addValues(final Collection values) { if (values!=null) for (String value : values) addValue(value); } void addValues(final String[] values) { if (values!=null) for (String value : values) addValue(value); } void addFormControl(final FormControl formControl, final String predefinedValue) { // predefinedValue==null if we are adding a user value if (predefinedValue==null) { userValueCount++; } else { if (predefinedValues==null) predefinedValues=new LinkedHashSet(); predefinedValues.add(predefinedValue); } formControls.add(formControl); allowsMultipleValues=calculateAllowsMultipleValues(formControl); } private boolean calculateAllowsMultipleValues(final FormControl newFormControl) { // false if only one control (unless it is a multiple select with more than one option), // or all of the controls are radio buttons, or all of the controls are submit buttons if (allowsMultipleValues || userValueCount>1) return true; if (userValueCount==1) return predefinedValues!=null; // at this stage we know userValueCount==0 && predefinedValues.size()>=1 if (predefinedValues.size()==1) return false; final FormControlType newFormControlType=newFormControl.getFormControlType(); if (formControls.size()==1) return newFormControlType==FormControlType.SELECT_MULTIPLE; // at this stage we know there are multiple predefined values in multiple controls. // if all of the controls are radio buttons or all are submit buttons, allowsMultipleValues is false, otherwise true. // checking only the first control and the new control is equivalent to checking them all because if they weren't all // the same allowsMultipleValues would already be true. final FormControlType firstFormControlType=getFirstFormControl().getFormControlType(); if (newFormControlType==FormControlType.RADIO && firstFormControlType==FormControlType.RADIO) return false; if (newFormControlType.isSubmit() && firstFormControlType.isSubmit()) return false; return true; } FormControl getFirstFormControl() { // formControls must be ordered collection for this method to work. // It has to return the first FormControl entered into the collection // for the algorithm in calculateAllowsMultipleValues() to work. if (firstFormControl==null) firstFormControl=formControls.iterator().next(); return firstFormControl; } /** only called from FormFields class */ void merge(final FormField formField) { if (formField.userValueCount>userValueCount) userValueCount=formField.userValueCount; allowsMultipleValues=allowsMultipleValues || formField.allowsMultipleValues; if (predefinedValues==null) { predefinedValues=formField.predefinedValues; } else if (formField.predefinedValues!=null) { for (String predefinedValue : predefinedValues) predefinedValues.add(predefinedValue); } for (FormControl formControl : formField.getFormControls()) formControls.add(formControl); } } jericho-html-3.1/src/java/net/htmlparser/jericho/CharSequenceParseText.java0000644000175000017500000001154111204550410027045 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; class CharSequenceParseText implements ParseText { private final CharSequence charSequence; CharSequenceParseText(final CharSequence charSequence) { this.charSequence=charSequence; } public final char charAt(final int index) { final char ch=charSequence.charAt(index); return (ch>='A' && ch<='Z') ? ((char)(ch ^ 0x20)) : ch; } public final boolean containsAt(final String str, final int pos) { for (int i=0; igetEnd() ? getEnd() : breakAtIndex); try { for (int i=(fromIndex<0 ? 0 : fromIndex); ilastPossibleBreakAtIndex) ? lastPossibleBreakAtIndex : breakAtIndex; outerLoop: for (int i=(fromIndex<0 ? 0 : fromIndex); igetEnd() ? getEnd() : fromIndex); i>breakAtIndex; i--) if (charAt(i)==searchChar) return i; return -1; } public final int lastIndexOf(final String searchString, final int fromIndex) { return lastIndexOf(searchString,fromIndex,NO_BREAK); } public final int lastIndexOf(final String searchString, int fromIndex, final int breakAtIndex) { if (searchString.length()==1) return lastIndexOf(searchString.charAt(0),fromIndex,breakAtIndex); if (searchString.length()==0) return fromIndex; final int rightIndex=getEnd()-searchString.length(); if (breakAtIndex>rightIndex) return -1; if (fromIndex>rightIndex) fromIndex=rightIndex; final int lastCharIndex=searchString.length()-1; final char lastChar=searchString.charAt(lastCharIndex); final int actualBreakAtPos=breakAtIndex+lastCharIndex; outerLoop: for (int i=fromIndex+lastCharIndex; i>actualBreakAtPos; i--) { if (charAt(i)==lastChar) { final int startIndex=i-lastCharIndex; for (int j=lastCharIndex-1; j>=0; j--) if (searchString.charAt(j)!=charAt(j+startIndex)) continue outerLoop; return startIndex; } } return -1; } public final int length() { return charSequence.length(); } public final CharSequence subSequence(final int begin, final int end) { // doesn't have to be efficient because it is not actually used anywhere internally. return substring(begin,end); } public final String toString() { return charSequence.toString(); } protected int getEnd() { return charSequence.length(); } protected String substring(final int begin, final int end) { return charSequence.subSequence(begin,end).toString().toLowerCase(); } } jericho-html-3.1/src/java/net/htmlparser/jericho/StreamedSource.java0000644000175000017500000011371011204550410025565 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.Iterator; import java.util.NoSuchElementException; import java.io.Closeable; import java.io.Reader; import java.io.Writer; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.CharBuffer; import java.nio.BufferOverflowException; import java.net.URL; import java.net.URLConnection; /** * Represents a streamed source HTML document. *

    * This class provides a means, via the {@link #iterator()} method, of sequentially parsing every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} * and plain text segment contained within the source document using a minimum amount of memory. *

    * In contrast, the standard {@link Source} class stores the entire source text in memory and caches every tag parsed, * resulting in memory problems when attempting to parse very large files. *

    * The {@link #iterator() iterator} parses and returns each segment as the source text is streamed in. * Previous segments are discarded for garbage collection. * Source documents up to 2GB in size can be processed, a limit which is imposed by the java language because of its use of the int data type to index string operations. *

    * There is however a significant trade-off in functionality when using the StreamedSource class as opposed to the {@link Source} class. * The {@link Tag#getElement()} method is not supported on tags that are returned by the iterator, nor are any methods that use the {@link Element} class in any way. * The {@link Segment#getSource()} method is also not supported. *

    * Most of the methods and constructors in this class mirror similarly named methods in the {@link Source} class where the same functionality is available. *

    * See the description of the {@link #iterator()} method for a typical usage example of this class. *

    * In contrast to a {@link Source} object, the Reader or InputStream specified in the constructor or created implicitly by the constructor * remains open for the life of the StreamedSource object. If the stream is created internally, it is automatically {@linkplain #close() closed} * when the end of the stream is reached or the StreamedSource object is {@linkplain #finalize() finalized}. * However a Reader or InputStream that is specified directly in a constructor is never closed automatically, as it can not be assumed * that the application has no further use for it. It is the user's responsibility to ensure it is closed in this case. * Explicitly calling the {@link #close()} method on the StreamedSource object ensures that all resources used by it are closed, regardless of whether * they were created internally or supplied externally. *

    * The functionality provided by StreamedSource is similar to a StAX parser, * but with some important benefits: *

      *
    • * The source document does not have to be valid XML. It can be plain HTML, can contain invalid syntax, undefined entities, * incorrectly nested elements, {@linkplain TagType#isServerTag() server tags}, or anything else that is commonly found in * "tag soup". *
    • * Every single syntactical construct in the source document's original text is included in the iterator, including the * {@linkplain StartTagType#XML_DECLARATION XML declaration}, {@linkplain CharacterReference character references}, {@linkplain StartTagType#COMMENT comments}, * {@linkplain StartTagType#CDATA_SECTION CDATA sections} and {@linkplain TagType#isServerTag() server tags}, * each providing the segment's {@linkplain Segment#getBegin() begin} and {@linkplain Segment#getEnd() end} position in the source document. * This allows an exact copy of the original document to be generated, allowing modifications to be made only where they are explicitly required. * This is not possible with either SAX or * StAX, which to some extent provide interpretations of the content of the XML * instead of the syntactial structures used in the original source document. *
    *

    * The following table summarises the differences between the StreamedSource, StAX and SAX interfaces. * Note that some of the available features are documented as optional and may not be supported by all implementations of StAX and SAX. *

    * * * * * * * * * * * * * * * * * * * * *
    FeatureStreamedSourceStAXSAX
    Parse XML
    Parse entities without DTD
    Automatically validate XML
    Parse HTML
    Tolerant of syntax or nesting errors
    Provide begin and end character positions of each event1
    Provide source text of each event
    Handle {@linkplain TagType#isServerTag() server tag} events
    Handle {@linkplain StartTagType#XML_DECLARATION XML declaration} event
    Handle {@linkplain StartTagType#COMMENT comment} events
    Handle {@linkplain StartTagType#CDATA_SECTION CDATA section} events
    Handle {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} event
    Handle {@linkplain CharacterReference character reference} events
    Allow chunking of plain text
    Allow chunking of comment text
    Allow chunking of CDATA section text
    Allow specification of maximum buffer size
    * 1 StAX optionally reports the "offset" of each event but this could be either byte or character position depending on the source. *

    * Note that the {@link OutputDocument} class can not be used to create a modified version of a streamed source document. * Instead, the output document must be constructed manually from the segments provided by the {@link #iterator() iterator}. *

    * StreamedSource objects are not thread safe. */ public final class StreamedSource implements Iterable, Closeable { private final StreamedText streamedText; private final StreamedParseText streamedParseText; private final Source source; private final Closeable closeable; // internally created closeable object should be cleaned up internally. private final boolean automaticClose; private boolean coalescing=false; private boolean handleTags=true; private boolean isInitialised=false; private Segment currentSegment=null; private Segment nextParsedSegment=START_SEGMENT; private boolean isXML; private static final boolean assumeNoNestedTags=false; private static final Segment START_SEGMENT=new Segment(-1,-1); private StreamedSource(final Reader reader, final boolean automaticClose, final String encoding, final String encodingSpecificationInfo, final String preliminaryEncodingInfo) throws IOException { closeable=reader; this.automaticClose=automaticClose; streamedText=new StreamedText(reader); streamedParseText=new StreamedParseText(streamedText); source=new Source(streamedText,streamedParseText,encoding,encodingSpecificationInfo,preliminaryEncodingInfo); } private StreamedSource(final EncodingDetector encodingDetector, final boolean automaticClose) throws IOException { this(encodingDetector.openReader(),automaticClose,encodingDetector.getEncoding(),encodingDetector.getEncodingSpecificationInfo(),encodingDetector.getPreliminaryEncoding()+": "+encodingDetector.getPreliminaryEncodingSpecificationInfo()); } /** * Constructs a new StreamedSource object by loading the content from the specified Reader. *

    * If the specified reader is an instance of InputStreamReader, the {@link #getEncoding()} method of the * created StreamedSource object returns the encoding from InputStreamReader.getEncoding(). * * @param reader the java.io.Reader from which to load the source text. * @throws java.io.IOException if an I/O error occurs. */ public StreamedSource(final Reader reader) throws IOException { this(reader,false,(reader instanceof InputStreamReader) ? ((InputStreamReader)reader).getEncoding() : null,(reader instanceof InputStreamReader) ? "InputStreamReader.getEncoding() of constructor argument" : null,null); } /** * Constructs a new StreamedSource object by loading the content from the specified InputStream. *

    * The algorithm for detecting the character {@linkplain #getEncoding() encoding} of the source document from the raw bytes * of the specified input stream is the same as that for the {@link Source#Source(URLConnection) Source(URLConnection)} constructor of the {@link Source} class, * except that the first step is not possible as there is no * Content-Type header to check. *

    * If the specified InputStream does not support the mark method, the algorithm that determines the encoding may have to wrap it * in a BufferedInputStream in order to look ahead at the encoding meta data. * This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. * It is always preferable to use the {@link #StreamedSource(Reader)} constructor if the encoding is known in advance. * * @param inputStream the java.io.InputStream from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public StreamedSource(final InputStream inputStream) throws IOException { this(new EncodingDetector(inputStream),false); } /** * Constructs a new StreamedSource object by loading the content from the specified URL. *

    * This is equivalent to {@link #StreamedSource(URLConnection) StreamedSource(url.openConnection())}. * * @param url the URL from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public StreamedSource(final URL url) throws IOException { this(new EncodingDetector(url.openConnection()),true); } /** * Constructs a new StreamedSource object by loading the content from the specified URLConnection. *

    * The algorithm for detecting the character {@linkplain #getEncoding() encoding} of the source document is identical to that described in the * {@link Source#Source(URLConnection) Source(URLConnection)} constructor of the {@link Source} class. *

    * The algorithm that determines the encoding may have to wrap the input stream in a BufferedInputStream in order to look ahead * at the encoding meta data if the encoding is not specified in the HTTP headers. * This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. * It is always preferable to use the {@link #StreamedSource(Reader)} constructor if the encoding is known in advance. * * @param urlConnection the URL connection from which to load the source text. * @throws java.io.IOException if an I/O error occurs. * @see #getEncoding() */ public StreamedSource(final URLConnection urlConnection) throws IOException { this(new EncodingDetector(urlConnection),true); } /** * Constructs a new StreamedSource object from the specified text. *

    * Although the CharSequence argument of this constructor apparently contradicts the notion of streaming in the source text, * it can still benefits over the equivalent use of the standard {@link Source} class. *

    * Firstly, using the StreamedSource class to iterate the nodes of an in-memory CharSequence source document still requires much less memory * than the equivalent operation using the standard {@link Source} class. *

    * Secondly, the specified CharSequence object could possibly implement its own paging mechanism to minimise memory usage. *

    * If the specified CharSequence is mutable, its state must not be modified while the StreamedSource is in use. * * @param text the source text. */ public StreamedSource(final CharSequence text) { closeable=null; automaticClose=false; streamedText=new StreamedText(text); streamedParseText=new StreamedParseText(streamedText); source=new Source(text,streamedParseText,null,"Document specified encoding can not be determined automatically from a streamed source",null); } /** * Specifies an existing character array to use for buffering the incoming character stream. *

    * The specified buffer is fixed for the life of the StreamedSource object, in contrast to the default buffer which can be automatically replaced * by a larger buffer as needed. * This means that if a {@linkplain Tag tag} (including a {@linkplain StartTagType#COMMENT comment} or {@linkplain StartTagType#CDATA_SECTION CDATA section}) is * encountered that is larger than the specified buffer, an unrecoverable BufferOverflowException is thrown. * This exception is also thrown if {@link #setCoalescing(boolean) coalescing} has been enabled and a plain text segment is encountered * that is larger than the specified buffer. *

    * In general this method should only be used if there needs to be an absolute maximum memory limit imposed on the parser, where that requirement is more important * than the ability to parse any source document successfully. *

    * This method can only be called before the {@link #iterator()} method has been called. * * @param buffer an existing character array to use for buffering the incoming character stream, must not be null. * @return this StreamedSource instance, allowing multiple property setting methods to be chained in a single statement. * @throws IllegalStateException if the {@link #iterator()} method has already been called. */ public StreamedSource setBuffer(char[] buffer) { if (isInitialised) throw new IllegalStateException("setBuffer() can only be called before iterator() is called"); streamedText.setBuffer(buffer); return this; } /** * Specifies whether an unbroken section of plain text in the source document should always be coalesced into a single {@link Segment} by the {@linkplain #iterator() iterator}. *

    * If this property is set to the default value of false, * and a section of plain text is encountered in the document that is larger than the current {@linkplain #getBufferSize() buffer size}, * the text is chunked into multiple consecutive plain text segments in order to minimise memory usage. *

    * If this property is set to true then chunking is disabled, ensuring that consecutive plain text segments are never generated, * but instead forcing the internal buffer to expand to fit the largest section of plain text. *

    * Note that {@link CharacterReference} segments are always handled separately from plain text, regardless of the value of this property. * For this reason, algorithms that process element content almost always have to be designed to expect the text in multiple segments * in order to handle character references, so there is usually no advantage in {@linkplain #setCoalescing(boolean) coalescing} plain text segments. * * @param coalescing the new value of the coalescing property. * @return this StreamedSource instance, allowing multiple property setting methods to be chained in a single statement. * @throws IllegalStateException if the {@link #iterator()} method has already been called. */ public StreamedSource setCoalescing(final boolean coalescing) { if (isInitialised) throw new IllegalStateException("setPlainTextWriter() can only be called before iterator() is called"); this.coalescing=coalescing; return this; } /** * Closes the underlying Reader or InputStream and releases any system resources associated with it. *

    * If the stream is already closed then invoking this method has no effect. * * @throws IOException if an I/O error occurs. */ public void close() throws IOException { if (closeable!=null) closeable.close(); } /** * Returns the character encoding scheme of the source byte stream used to create this object. *

    * This method works in essentially the same way as the {@link Source#getEncoding()} method. *

    * If the byte stream used to create this object does not support the mark method, the algorithm that determines the encoding may have to wrap it * in a BufferedInputStream in order to look ahead at the encoding meta data. * This extra layer of buffering will then remain in place for the life of the StreamedSource, possibly impacting memory usage and/or degrading performance. * It is always preferable to use the {@link #StreamedSource(Reader)} constructor if the encoding is known in advance. *

    * The {@link #getEncodingSpecificationInfo()} method returns a simple description of how the value of this method was determined. * * @return the character encoding scheme of the source byte stream used to create this object, or null if the encoding is not known. * @see #getEncodingSpecificationInfo() */ public String getEncoding() { return source.getEncoding(); } /** * Returns a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. *

    * The description is intended for informational purposes only. * It is not guaranteed to have any particular format and can not be reliably parsed. * * @return a concise description of how the {@linkplain #getEncoding() encoding} of the source document was determined. * @see #getEncoding() */ public String getEncodingSpecificationInfo() { return source.getEncodingSpecificationInfo(); } /** * Returns the preliminary encoding of the source document together with a concise description of how it was determined. *

    * This method works in essentially the same way as the {@link Source#getPreliminaryEncodingInfo()} method. *

    * The description returned by this method is intended for informational purposes only. * It is not guaranteed to have any particular format and can not be reliably parsed. * * @return the preliminary encoding of the source document together with a concise description of how it was determined, or null if no preliminary encoding was required. * @see #getEncoding() */ public String getPreliminaryEncodingInfo() { return source.getPreliminaryEncodingInfo(); } /** * Returns an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within the source document. *

    * Plain text is defined as all text that is not part of a {@link Tag} or {@link CharacterReference}. *

    * This results in a sequential walk-through of the entire source document. * The {@linkplain Segment#getEnd() end} position of each segment should correspond with the {@linkplain Segment#getBegin() begin} position of the subsequent segment, * unless any of the tags are enclosed by other tags. * This could happen if there are {@linkplain TagType#isServerTag() server tags} present in the document, or in rare circumstances where the * {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} contains {@linkplain StartTagType#MARKUP_DECLARATION markup declarations}. *

    * Each segment generated by the iterator is parsed as the source text is streamed in. Previous segments are discarded for garbage collection. *

    * If a section of plain text is encountered in the document that is larger than the current {@linkplain #getBufferSize() buffer size}, * the text is chunked into multiple consecutive plain text segments in order to minimise memory usage. * Setting the {@link #setCoalescing(boolean) Coalescing} property to true disables chunking, ensuring that consecutive plain text segments are never generated, * but instead forcing the internal buffer to expand to fit the largest section of plain text. * Note that {@link CharacterReference} segments are always handled separately from plain text, regardless of whether {@linkplain #setCoalescing(boolean) coalescing} * is enabled. For this reason, algorithms that process element content almost always have to be designed to expect the text in multiple segments * in order to handle character references, so there is usually no advantage in {@linkplain #setCoalescing(boolean) coalescing} plain text segments. *

    * Character references that are found inside tags, such as those present inside attribute values, do not generate separate segments from the iterator. *

    * This method may only be called once on any particular StreamedSource instance. *

    *

    *
    Example:
    *
    *

    * The following code demonstrates the typical (implied) usage of this method through the Iterable interface * to make an exact copy of the document from reader to writer (assuming no server tags are present): *

    *
    	 * StreamedSource streamedSource=new StreamedSource(reader);
    	 * for (Segment segment : streamedSource) {
    	 *   if (segment instanceof Tag) {
    	 *     Tag tag=(Tag)segment;
    	 *     // HANDLE TAG
    	 *     // Uncomment the following line to ensure each tag is valid XML:
    	 *     // writer.write(tag.tidy()); continue;
    	 *   } else if (segment instanceof CharacterReference) {
    	 *     CharacterReference characterReference=(CharacterReference)segment;
    	 *     // HANDLE CHARACTER REFERENCE
    	 *     // Uncomment the following line to decode all character references instead of copying them verbatim:
    	 *     // characterReference.appendCharTo(writer); continue;
    	 *   } else {
    	 *     // HANDLE PLAIN TEXT
    	 *   }
    	 *   // unless specific handling has prevented getting to here, simply output the segment as is:
    	 *   writer.write(segment.toString());
    	 * }
    *

    Note that the last line writer.write(segment.toString()) in the above code can be replaced with the following for improved performance:

    *
    	 * CharBuffer charBuffer=streamedSource.getCurrentSegmentCharBuffer();
    	 * writer.write(charBuffer.array(),charBuffer.position(),charBuffer.length());
    *
    *
    *

    * The following code demonstrates how to process the plain text content of a specific element, in this case to print the content of every paragraph element: *

    *
    	 * StreamedSource streamedSource=new StreamedSource(reader);
    	 * StringBuilder sb=new StringBuilder();
    	 * boolean insideParagraphElement=false;
    	 * for (Segment segment : streamedSource) {
    	 *   if (segment instanceof Tag) {
    	 *     Tag tag=(Tag)segment;
    	 *     if (tag.getName().equals("p")) {
    	 *       if (tag instanceof StartTag) {
    	 *         insideParagraphElement=true;
    	 *         sb.setLength(0);
    	 *       } else { // tag instanceof EndTag
    	 *         insideParagraphElement=false;
    	 *         System.out.println(sb.toString());
    	 *       }
    	 *     }
    	 *   } else if (insideParagraphElement) {
    	 *     if (segment instanceof CharacterReference) {
    	 *       ((CharacterReference)segment).appendCharTo(sb);
    	 *     } else {
    	 *       sb.append(segment);
    	 *     }
    	 *   }
    	 * }
    *
    *
    * @return an iterator over every {@linkplain Tag tag}, {@linkplain CharacterReference character reference} and plain text segment contained within the source document. */ public Iterator iterator() { if (isInitialised) throw new IllegalStateException("iterator() can only be called once"); isInitialised=true; return new StreamedSourceIterator(); } /** * Returns the current {@link Segment} from the {@linkplain #iterator()}. *

    * This is defined as the last {@link Segment} returned from the iterator's next() method. *

    * This method returns null if the iterator's next() method has never been called, or its * hasNext() method has returned the value false. * * @return the current {@link Segment} from the {@linkplain #iterator()}. */ public Segment getCurrentSegment() { return currentSegment; } /** * Returns a CharBuffer containing the source text of the {@linkplain #getCurrentSegment() current segment}. *

    * The returned CharBuffer provides a window into the internal char[] buffer including the position and length that spans the * {@linkplain #getCurrentSegment() current segment}. *

    * For example, the following code writes the source text of the current segment to writer: *

    * CharBuffer charBuffer=streamedSource.getCurrentSegmentCharBuffer();
    * writer.write(charBuffer.array(),charBuffer.position(),charBuffer.length()); *

    * This may provide a performance benefit over the standard way of accessing the source text of the current segment, * which is to use the CharSequence interface of the segment directly, or to call {@link Segment#toString()}. *

    * Because this CharBuffer is a direct window into the internal buffer of the StreamedSource, the contents of the * CharBuffer.array() must not be modified, and the array is only guaranteed to hold the segment source text until the * iterator's hasNext() or next() method is next called. * * @return a CharBuffer containing the source text of the {@linkplain #getCurrentSegment() current segment}. */ public CharBuffer getCurrentSegmentCharBuffer() { return streamedText.getCharBuffer(currentSegment.getBegin(),currentSegment.end); } /** * Indicates whether the source document is likely to be XML. *

    * The algorithm used to determine this is designed to be relatively inexpensive and to provide an accurate result in * most normal situations. * An exact determination of whether the source document is XML would require a much more complex analysis of the text. *

    * The algorithm is as follows: *

      *
    1. If the document begins with an {@linkplain StartTagType#XML_DECLARATION XML declaration}, it is an XML document. *
    2. If the document begins with a {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} that contains the text * "xhtml", it is an XHTML document, and hence * also an XML document. *
    3. If none of the above conditions are met, assume the document is normal HTML, and therefore not an XML document. *
    *

    * This method can only be called after the {@link #iterator()} method has been called. * * @return true if the source document is likely to be XML, otherwise false. * @throws IllegalStateException if the {@link #iterator()} method has not yet been called. */ public boolean isXML() { if (!isInitialised) throw new IllegalStateException("isXML() method only available after iterator() has been called"); return isXML; } /** * Sets the {@link Logger} that handles log messages. *

    * Specifying a null argument disables logging completely for operations performed on this StreamedSource object. *

    * A logger instance is created automatically for each StreamedSource object in the same way as is described in the * {@link Source#setLogger(Logger)} method. * * @param logger the logger that will handle log messages, or null to disable logging. * @see Config#LoggerProvider */ public void setLogger(final Logger logger) { source.setLogger(logger); } /** * Returns the {@link Logger} that handles log messages. *

    * A logger instance is created automatically for each StreamedSource object using the {@link LoggerProvider} * specified by the static {@link Config#LoggerProvider} property. * This can be overridden by calling the {@link #setLogger(Logger)} method. * The name used for all automatically created logger instances is "net.htmlparser.jericho". * * @return the {@link Logger} that handles log messages, or null if logging is disabled. */ public Logger getLogger() { return source.getLogger(); } /** * Returns the current size of the internal character buffer. *

    * This information is generally useful only for investigating memory and performance issues. * * @return the current size of the internal character buffer. */ public int getBufferSize() { return streamedText.getBuffer().length; } /** * Returns a string representation of the object as generated by the default Object.toString() implementation. *

    * In contrast to the {@link Source#toString()} implementation, it is generally not possible for this method to return the entire source text. * * @return a string representation of the object as generated by the default Object.toString() implementation. */ public String toString() { return super.toString(); } /** * Called by the garbage collector on an object when garbage collection determines that there are no more references to the object. *

    * This implementation calls the {@link #close()} method if the underlying Reader or InputStream stream was created internally. */ protected void finalize() { automaticClose(); } StreamedSource setHandleTags(final boolean handleTags) { this.handleTags=handleTags; return this; } StreamedSource setSearchBegin(final int begin) { if (isInitialised) throw new IllegalStateException("setSearchBegin() can only be called before iterator() is called"); final int segmentEnd=begin-1; nextParsedSegment=new Segment(segmentEnd,segmentEnd); return this; } private void automaticClose() { if (automaticClose) try {close();} catch (IOException ex) {} } private static boolean isXML(final Segment firstNonTextSegment) { if (firstNonTextSegment==null || !(firstNonTextSegment instanceof Tag)) return false; Tag tag=(Tag)firstNonTextSegment; if (tag.getTagType()==StartTagType.XML_DECLARATION) return true; // if document has a DOCTYPE declaration and it contains the text "xhtml", it is an XML document: if (tag.source.getParseText().indexOf("xhtml",tag.begin,tag.end)!=-1) return true; return false; } private class StreamedSourceIterator implements Iterator { private final boolean coalescing; private final boolean handleTags; private Segment nextSegment; private int plainTextSegmentBegin=0; private final char[] charByRef=new char[1]; // used to pass a single character by reference public StreamedSourceIterator() { coalescing=StreamedSource.this.coalescing; handleTags=StreamedSource.this.handleTags; loadNextParsedSegment(); isXML=isXML(nextParsedSegment); } public boolean hasNext() { if (nextSegment==Tag.NOT_CACHED) loadNextParsedSegment(); return nextSegment!=null; } public Segment next() { if (!hasNext()) throw new NoSuchElementException(); final Segment result=nextSegment; nextSegment=(result==nextParsedSegment) ? Tag.NOT_CACHED : nextParsedSegment; streamedText.setMinRequiredBufferBegin(result.end); // guaranteed not to be discarded until next call to loadNextParsedSegment() currentSegment=result; return result; } public void remove() { throw new UnsupportedOperationException(); } private final void loadNextParsedSegment() { nextParsedSegment=findNextParsedSegment(); final int plainTextSegmentEnd=(nextParsedSegment!=null) ? nextParsedSegment.begin : streamedText.length(); nextSegment=(plainTextSegmentBegin=source.fullSequentialParseData[0]) { // do not handle character references inside tags or script elements final CharacterReference characterReference=CharacterReference.construct(source,i,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); if (characterReference!=null) return characterReference; } } else if (handleTags && ch=='<') { final Tag tag=TagType.getTagAt(source,i,false,assumeNoNestedTags); if (tag!=null && !tag.isUnregistered()) { final TagType tagType=tag.getTagType(); if (tag.end>source.fullSequentialParseData[0] && tagType!=StartTagType.DOCTYPE_DECLARATION) { source.fullSequentialParseData[0]=(tagType==StartTagType.NORMAL && tag.name==HTMLElementName.SCRIPT) ? Integer.MAX_VALUE : tag.end; } return tag; } } i++; } if (i * This class is only of interest to users who wish to create custom tag types. *

    * The only external difference between this class and its abstract superclass {@link StartTagType} is that it provides a default * implementation of the {@link #constructTagAt(Source, int pos)} method. *

    * Most of the predefined start tag types are implemented using this class or a subclass of it. * * @see EndTagTypeGenericImplementation */ public class StartTagTypeGenericImplementation extends StartTagType { final boolean nameCharAfterPrefixAllowed; /** * Constructs a new StartTagTypeGenericImplementation object with the specified properties. *
    (implementation assistance method) *

    * This is equivalent to calling *
    new {@link #StartTagTypeGenericImplementation(String,String,String,EndTagType,boolean,boolean,boolean) StartTagTypeGenericImplementation}(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false). * * @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes. * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type. * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type. * @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type. * @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}. */ protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag) { this(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false); } /** * Constructs a new StartTagTypeGenericImplementation object with the specified properties. *
    (implementation assistance method) * * @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes. * @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type. * @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type. * @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type. * @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}. * @param hasAttributes indicates whether the new start tag type {@linkplain #hasAttributes() has attributes}. * @param isNameAfterPrefixRequired indicates whether a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}. */ protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag, final boolean hasAttributes, final boolean isNameAfterPrefixRequired) { super(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,hasAttributes,isNameAfterPrefixRequired); nameCharAfterPrefixAllowed=(getNamePrefix().length()==0 || !Character.isLetter(getNamePrefix().charAt(getNamePrefix().length()-1))); } /** * Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features. *
    (default implementation method) *

    * This default implementation performs the following steps: *

      *
    1. * If a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}, search for a valid * {@linkplain Tag#isXMLName(CharSequence) XML tag name} directly after the * {@linkplain #getNamePrefix() name prefix} using the {@link Source#getNameEnd(int pos)} method. * If one is found, set the {@linkplain Tag#getName() name} to include it, otherwise return null. *
    2. * If the last character of the {@linkplain #getNamePrefix() name prefix} is a letter * (indicating that the prefix includes the full {@linkplain Tag#getName() name} of the tag), * and the character following the prefix in the source text is also a letter * or any other valid {@linkplain Tag#isXMLNameChar(char) XML name character}, * return null. *
      Example: the source text "<?xmlt ?>" should not be recognised as an * {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction}, which has the prefix "<?xml". *
    3. * If the tag type {@linkplain #hasAttributes() has attributes}, call * {@link #parseAttributes(Source,int,String) parseAttributes(source,pos,name)} to parse them. * Return null if too many errors occur while parsing the attributes. *
    4. * Find the {@linkplain Tag#getEnd() end} of the tag using the {@link #getEnd(Source, int pos)} method, * where pos is either the end of the {@linkplain StartTag#getAttributes() attributes} segment or the end of the * {@linkplain Tag#getName() name} depending on whether the tag type {@linkplain #hasAttributes() has attributes}. * Return null if the end of the tag can not be found. *
    5. * Construct the {@link StartTag} object using the * {@link #constructStartTag(Source,int,int,String,Attributes) constructStartTag(Source, int pos, int end, String name, Attributes)} * method with the argument values collected over the previous steps. *
    *

    * See {@link TagType#constructTagAt(Source, int pos)} for more important information about this method. * * @param source the {@link Source} document. * @param pos the position in the source document. * @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or null if it does not meet the criteria. */ protected Tag constructTagAt(final Source source, final int pos) { final ParseText parseText=source.getParseText(); final int nameBegin=pos+1; String name=getNamePrefix(); int nameEnd=nameBegin+getNamePrefix().length(); if (isNameAfterPrefixRequired()) { final int extendedNameEnd=source.getNameEnd(nameEnd); if (extendedNameEnd==-1) return null; name=source.getName(nameBegin,extendedNameEnd); nameEnd=extendedNameEnd; } else if (!nameCharAfterPrefixAllowed && Tag.isXMLNameChar(parseText.charAt(nameEnd))) { return null; } int end; Attributes attributes=null; if (hasAttributes()) { // it is necessary to get the attributes so that we can be sure that the search on the closing delimiter doesn't pick up // anything from the attribute values, which can legally contain ">" characters. attributes=parseAttributes(source,pos,name); if (attributes==null) return null; // happens if attributes not properly formed end=getEnd(source,attributes.getEnd()); // should always return a valid end } else { end=getEnd(source,nameEnd); if (end==-1) { if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("StartTag ").append(name).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because it has no closing delimiter").toString()); return null; } } return constructStartTag(source,pos,end,name,attributes); } /** * Returns the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document. *
    (implementation assistance method) *

    * This default implementation simply searches for the first occurrence of the * {@linkplain #getClosingDelimiter() closing delimiter} after the specified position, and returns the position immediately * after the end of it. *

    * If the closing delimiter is not found, the value -1 is returned. * * @param source the {@link Source} document. * @param pos the position in the source document. * @return the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document, or -1 if the end of the tag can not be found. */ protected int getEnd(final Source source, final int pos) { final int delimiterBegin=source.getParseText().indexOf(getClosingDelimiter(),pos); return (delimiterBegin==-1 ? -1 : delimiterBegin+getClosingDelimiter().length()); } } jericho-html-3.1/src/java/net/htmlparser/jericho/NumericCharacterReference.java0000644000175000017500000003154311204550410027701 0ustar twernertwerner// Jericho HTML Parser - Java based library for analysing and manipulating HTML // Version 3.1 // Copyright (C) 2004-2009 Martin Jericho // http://jericho.htmlparser.net/ // // This library is free software; you can redistribute it and/or // modify it under the terms of either one of the following licences: // // 1. The Eclipse Public License (EPL) version 1.0, // included in this distribution in the file licence-epl-1.0.html // or available at http://www.eclipse.org/legal/epl-v10.html // // 2. The GNU Lesser General Public License (LGPL) version 2.1 or later, // included in this distribution in the file licence-lgpl-2.1.txt // or available at http://www.gnu.org/licenses/lgpl.txt // // This library is distributed on an "AS IS" basis, // WITHOUT WARRANTY OF ANY KIND, either express or implied. // See the individual licence texts for more details. package net.htmlparser.jericho; import java.util.*; import java.io.*; /** * Represents an HTML Numeric Character Reference. *

    * A numeric character reference can be one of two types: *

    *
    Decimal Character Reference *
    A numeric character reference specifying the unicode code point in decimal notation.
    * This is signified by the absence of an 'x' character after the '#', (eg "&#62;"). *
    Hexadecimal Character Reference *
    A numeric character reference specifying the unicode code point in hexadecimal notation.
    * This is signified by the presence of an 'x' character after the '#', (eg "&#x3e;"). *
    *

    * Static methods to {@linkplain #encode(CharSequence) encode} and {@linkplain #decode(CharSequence) decode} strings * and single characters can be found in the {@link CharacterReference} superclass. *

    * NumericCharacterReference instances are obtained using one of the following methods: *

      *
    • {@link CharacterReference#parse(CharSequence characterReferenceText)} *
    • {@link Source#getNextCharacterReference(int pos)} *
    • {@link Source#getPreviousCharacterReference(int pos)} *
    • {@link Segment#getAllCharacterReferences()} *
    * * @see CharacterReference * @see CharacterEntityReference */ public class NumericCharacterReference extends CharacterReference { private boolean hex; private NumericCharacterReference(final Source source, final int begin, final int end, final int codePoint, final boolean hex) { super(source,begin,end,codePoint); this.hex=hex; } /** * Indicates whether this numeric character reference specifies the unicode code point in decimal format. *

    * A numeric character reference in decimal format is referred to in this library as a * decimal character reference. * * @return true if this numeric character reference specifies the unicode code point in decimal format, otherwise false. * @see #isHexadecimal() */ public boolean isDecimal() { return !hex; } /** * Indicates whether this numeric character reference specifies the unicode code point in hexadecimal format. *

    * A numeric character reference in hexadecimal format is referred to in this library as a * hexadecimal character reference. * * @return true if this numeric character reference specifies the unicode code point in hexadecimal format, otherwise false. * @see #isDecimal() */ public boolean isHexadecimal() { return hex; } /** * Encodes the specified text, escaping special characters into numeric character references. *

    * Each character is encoded only if the {@link #requiresEncoding(char) requiresEncoding(char)} method would return true for that character. *

    * This method encodes all character references in decimal format, and is exactly the same as calling * {@link #encodeDecimal(CharSequence)}. *

    * To encode text using both character entity references and numeric character references, use the
    * {@link CharacterReference#encode(CharSequence)} method instead. *

    * To encode text using hexadecimal character references only, * use the {@link #encodeHexadecimal(CharSequence)} method instead. * * @param unencodedText the text to encode. * @return the encoded string. * @see #decode(CharSequence) */ public static String encode(final CharSequence unencodedText) { if (unencodedText==null) return null; final StringBuilder sb=new StringBuilder(unencodedText.length()*2); for (int i=0; idecimal character references. *

    * Each character is encoded only if the {@link #requiresEncoding(char) requiresEncoding(char)} method would return true for that character. *

    * To encode text using both character entity references and numeric character references, use the
    * {@link CharacterReference#encode(CharSequence)} method instead. *

    * To encode text using hexadecimal character references only, * use the {@link #encodeHexadecimal(CharSequence)} method instead. * * @param unencodedText the text to encode. * @return the encoded string. * @see #decode(CharSequence) */ public static String encodeDecimal(final CharSequence unencodedText) { return encode(unencodedText); } /** * Encodes the specified text, escaping special characters into hexadecimal character references. *

    * Each character is encoded only if the {@link #requiresEncoding(char) requiresEncoding(char)} method would return true for that character. *

    * To encode text using both character entity references and numeric character references, use the
    * {@link CharacterReference#encode(CharSequence)} method instead. *

    * To encode text using decimal character references only, * use the {@link #encodeDecimal(CharSequence)} method instead. * * @param unencodedText the text to encode. * @return the encoded string. * @see #decode(CharSequence) */ public static String encodeHexadecimal(final CharSequence unencodedText) { if (unencodedText==null) return null; final StringBuilder sb=new StringBuilder(unencodedText.length()*2); for (int i=0; i * The returned string uses the same radix as the original character reference in the source document, * i.e. decimal format if {@link #isDecimal()} is true, and hexadecimal format if {@link #isHexadecimal()} is true. *

    * Note that the returned string is not necessarily the same as the original source text used to create this object. * This library recognises certain invalid forms of character references, * as detailed in the {@link #decode(CharSequence) decode(CharSequence)} method. *

    * To retrieve the original source text, use the {@link #toString() toString()} method instead. *

    *

    *
    Example:
    *
    CharacterReference.parse("&#62").getCharacterReferenceString() returns "&#62;"
    *
    * * @return the correct encoded form of this numeric character reference. * @see CharacterReference#getCharacterReferenceString(int codePoint) */ public String getCharacterReferenceString() { return hex ? getHexadecimalCharacterReferenceString(codePoint) : getDecimalCharacterReferenceString(codePoint); } /** * Returns the numeric character reference encoded form of the specified unicode code point. *

    * This method returns the character reference in decimal format, and is exactly the same as calling * {@link #getDecimalCharacterReferenceString(int codePoint)}. *

    * To get either the character entity reference or numeric character reference, use the
    * {@link CharacterReference#getCharacterReferenceString(int codePoint)} method instead. *

    * To get the character reference in hexadecimal format, use the {@link #getHexadecimalCharacterReferenceString(int codePoint)} method instead. *

    *

    *
    Examples:
    *
    NumericCharacterReference.getCharacterReferenceString(62) returns "&#62;"
    *
    NumericCharacterReference.getCharacterReferenceString('>') returns "&#62;"
    *
    * * @return the numeric character reference encoded form of the specified unicode code point. * @see CharacterReference#getCharacterReferenceString(int codePoint) */ public static String getCharacterReferenceString(final int codePoint) { return getDecimalCharacterReferenceString(codePoint); } static CharacterReference construct(final Source source, final int begin, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { // only called from CharacterReference.construct(), so we can assume that first characters are "&#" final ParseText parseText=source.getParseText(); int codePointStringBegin=begin+2; boolean hex; if (hex=(parseText.charAt(codePointStringBegin)=='x')) codePointStringBegin++; final int unterminatedMaxCodePoint=hex ? unterminatedCharacterReferenceSettings.hexadecimalCharacterReferenceMaxCodePoint : unterminatedCharacterReferenceSettings.decimalCharacterReferenceMaxCodePoint; final int maxSourcePos=source.end-1; String codePointString; int end; int x=codePointStringBegin; boolean unterminated=false; while (true) { final char ch=parseText.charAt(x); if (ch==';') { end=x+1; codePointString=source.substring(codePointStringBegin,x); break; } if ((ch>='0' && ch<='9') || (hex && ((ch>='a' && ch<='f') || (ch>='A' && ch<='F')))) { // We have a valid decimal digit (if hex is false), or a hexadecimal digit (if hex is true) if (x==maxSourcePos) { // We are at the last position in the source text without the terminating semicolon. unterminated=true; x++; // include this digit } } else { // We don't have a valid digit, meaning the character reference is unterminated. unterminated=true; } if (unterminated) { // Different browsers react differently to unterminated numeric character references. // The behaviour of this method is determined by the settings in the unterminatedCharacterReferenceSettings parameter. if (unterminatedMaxCodePoint==INVALID_CODE_POINT) { // reject: return null; } else { // accept: end=x; codePointString=source.substring(codePointStringBegin,x); break; } } x++; } if (codePointString.length()==0) return null; int codePoint=INVALID_CODE_POINT; try { codePoint=Integer.parseInt(codePointString,hex?16:10); if (unterminated && codePoint>unterminatedMaxCodePoint) return null; if (codePoint>Character.MAX_CODE_POINT) codePoint=INVALID_CODE_POINT; } catch (NumberFormatException ex) { // This should only happen if number is larger than Integer.MAX_VALUE. if (unterminated) return null; // If it is a terminated reference just ignore the exception as codePoint will remain with its value of INVALID_CODE_POINT. } return new NumericCharacterReference(source,begin,end,codePoint,hex); } public String getDebugInfo() { final StringBuilder sb=new StringBuilder(); sb.append('"'); try { if (hex) appendHexadecimalCharacterReferenceString(sb,codePoint); else appendDecimalCharacterReferenceString(sb,codePoint); sb.append("\" "); appendUnicodeText(sb,codePoint); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens sb.append(' ').append(super.getDebugInfo()); return sb.toString(); } } jericho-html-3.1/samples/0000755000175000017500000000000011167436712015354 5ustar twernertwernerjericho-html-3.1/samples/webapps/0000755000175000017500000000000011167436712017015 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/0000755000175000017500000000000011167436712021065 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/WEB-INF/0000755000175000017500000000000011214132426022100 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/WEB-INF/web.xml0000644000175000017500000000037710553424710023413 0ustar twernertwerner Jericho HTML Parser Sample Web Applications jericho-html-3.1/samples/webapps/JerichoHTML/WEB-INF/lib/0000755000175000017500000000000011214132426022646 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/WEB-INF/lib/jericho-html-3.1.jar0000644000175000017500000053775511214132416026255 0ustar twernertwernerPK!M: META-INF/PKPK!M:META-INF/MANIFEST.MFMLK-. K-*ϳR03r.JM,IMu *h)f&W+x%irrPK 6OGGPK M:net/PK M:net/htmlparser/PK !M:net/htmlparser/jericho/PK M:&net/htmlparser/jericho/Attribute.classVSW]HKX#>PAj&>E!MB,a1l0 A;я@==s=;1& r8'GH'8yAqcq.&P1, F F88`Tq0%b+uNU .($'b '1YE'Mqg*S Ȩ2=52lg֕15&T`h6bƕLVʹ->nAPRKŰO`.mY!/f <3 D"^`@W$-E0/p$D  飡H(WM46!$6G<& nM׌NҹL\m>K *\Io۲ d`XfsūmK8d+. ؊q涆`%c Znu( i2>ꉘcyKN0 PJ-[l/{,n$ɸ:nh԰ޤcų\9-?ȩ:]=;}ñ;tPn$AP[x[;`‰{ x_PGXi S5+j;LЇu aI44$4+.YVk3X&kL2xl!Njثlk#; Dn#y{{J$["G%$?X"?D%rɝ%tT pA[[돦 ʾ<*ꝿML4z'p8z 9rЍjV|˶6dvQU@ž fƶ6אY 1 G%gUê8q ȂgAX0mn4rڦKUsTȫWUЪ@,*\E--7+ň,<+-`kU&V>lٗe! L)0'!6]G}xXW~<-Wp<{,+,a436@%8껁MF \OmEk(8IH+ih_ϙbus1lWlo$J*J~$ǻ2/]ulWPFo 3?PK28 PK M:)net/htmlparser/jericho/Attributes$1.classn@NbZrXbh¡n@ !-.h*vE/jGƮMy$Pٴg>hw~yNp*.ɻ,RI.冂 n3<3o\{阧ƈ$%DN{gmw~ơxɐo +ɐ6g؏"H wf4p#3#/33ՐGC;MTPʢS M55<]aT ;y=I,xD&zjo-sngZ۴~ZIa{ַzbj˶ζ;˛0TŻ֛ܢh^*=N<'_o}_; (bBXP%T2,V/(a:(& [ x'dk66u>Y isj*IW)jPK iPK M:4net/htmlparser/jericho/Attributes$ParsingState.classTmOP=B (L׹ tso00luiT!߱1k2-[칷ɢ%9s{s}?R""aYœ$d^1cH3~*c3S˘f1ø ᅀAeKWwjVU^ovi74t,e۶ڛ-ӱ:+'Yܡns㴛] );U])vuU z/'5]k{JeL&ksqU*WJNoy>H5W_?\luV3L7?P$^eʡybZi4n7UhMg]@z잀lئm  ?3IG&XG1\ؐPtQ܅EYHE1 `i3Ha[@:ilϖCmo$k8Oty|qnH\g >O<3c)A b1wH%q!|C_S$?q;=S$!3 -,:nZ^/=B4F93\s r1`!.b\\Ĺytsi.c#b@3B.4Cr1fhhL;poD]"*PK@pPK M:'net/htmlparser/jericho/Attributes.class9 X[uHW+qy,l9!F0q*1`'`G Ž8؉t9 KW~eZצoNMvNGh" dǺefnݺsuIFq[O99|͗[ILvyxQC_f4I0+/Kp / *¯;/o8hr |_3o ;No;u;"1/HMH?v& g9b7_0%#Io,w% U?$O oῘ3C2~OoB9"Ak9 h% EQ"#]B!`+R*ư3xZFwXb"j&b=,v!o(JDK Sj|Hߗo, 8D#Ԇ1%Ճʙx<%5e-Bc=KCjW PX-Bo,L̎ 6IOHVVt_W:Ymڕ(h'v*Nި$[Qzu5J2N)QWЄ^oqEu˗!Bim$N)HorJ?U'U%G,neڭ¢XURq%cInfi-!崉L[-J= m9sHt(1QJ,@˲lIw-]Tl* *S9hъ#-u]yJD# VGm,F[mTP(# Lg(؏pJ1+PT-a cC.%HpGߙ:GX WY"^$(,517@|ݑXrX1JRo /N(AdR g*#dVm~,AEͦ-GT!"$#|j|Ya>;= &lᬭp{FhD+VmR,?Iet)znvS 'e 87X\C{B$QU'ҼQ$z3k0Jd &Ef4luH=݋ϲPGi9R\v*Vs(c=0%HpPρc=)⣕`MTB|ꚈN&NSш(a^dHWAi/qHotwlI?4F3J >b{ϱU }\,kG_cǦ\N^%Aɫeܤ2%ZȠr}7yfQɀ #*h*}*ejʫJ49m$g!ل{S(:u=8?qs8}fL˯YEh +6TRK`>V4Iފ^'zLicq@=MœwK*AtÛԵLcuo7bEo~ gph;kYto,Ԇ3WY6o)\{Hz]` NbUgo 6ki:g< @͵ֆY@[x ,>% .cm;8}Nal-(qas&Fi^sJ3`_6r'``K cc2g s-;~[| |.r*n1 q.#-Ay=e~%נrd:˭~ٗ Z;k,~տu UJ)π+{F%m]"33Pߖ[>j ],5wG{& nYu je5j񹜮YX?YY\kCe{xDDaϢBƄ{siCaEf;Z{rA?B%e@6b`}en1~m mFAxȩ J&c;e:Wyӈ! p¡2 Fh5YI3Joi48wj<,L{ILx2D|9Q ~s6[S A:2I갡Z]( *?PKf6}&PK!M:4net/htmlparser/jericho/AttributesOutputSegment.classUmSU~nlXZRզ-ƪ -DmCe6]?oGf7G9] &gys{ݿw7`xwtLc^KXn :G[l?uB y. R(3eEVuL}aMú O@ҫذ/wbXY* GG Ϟ,Ђ9jQ`,{R6% _\RJ淳5^, =9;n=eK0ifCX*QBپRQr`F/n/WȦr|EfSfRmU]ɹ̚[iXJ*6VC UYzw)?@>jho} (ࡁ)p$ҶܥSǺIOWZFRx7煮\"geb e2b`  ^ˡd 0W<|MstRoMQeK]OB%,o[TuiΟVwfѓyK 0DQߨgt)'(ً9gNSO/ _ۨIKH"D_4OgUJk䙤q\>AaP!F=Vz}xއ+c$b5z;^rfA`~˟֏Z#{]\'wǰ 4+H "^nFYo6XX'.1l<֙(GbcZ\Cj)>1<@:}!uN!& m\|J=bI&]PK.S PK!M:.net/htmlparser/jericho/BasicLogFormatter.classSmOA~-8 (E*] wii-Tc,Ǒ㮹^/b$Q;Knfggggf,Ve`J5\q74 21#CB1Ib%pG¸$0Jx$6N ' ^94mΐ5.6K -}G׺iicmhˎ{wK i*Tʂe Oye[.LkzƖnKYjrKV\ܐ M)}A~ 1]\"3[+3iU2Ck;9VŮKLPC0%L\nlIƙZeӕUSw3.VέlU.э}|肀FL2[/ ,5 _ŲF5Đ LHiXaT*ekt>Nmkqa6iG8a)*gdB'ʌ)^̿52A IVu[1Wn޺\#Դ~Tcz1fGB;ҖK>Hq0Gt م 0pL!ߛjVw=]G^N|;;طw<8QzTZE\DmE&zH{,Ř-"SlVĻ??\E"Nz8Io>x@Uf!l"_6LeiEk=&4әȩ͜HG{1uPK5cPK M:"net/htmlparser/jericho/Cache.classVweMtt(%4-)} Qi)R0hZ4ҁ|ǂݸ#"G.\txfq﾿&57wa-v {x3&eneSu؋}Lߍ:`A!b4s."3,1Las:"Q>8&"ɒLRL"2ntb7O3q:r"NpFψ8)6i1E@{$}dFֲwTl/j( X.'rbTFFs3#rl:Y*dZ|V@a65512:=6;(?q ƝetZ%!SOGuMMu5S>SJu/&sQR }V@+K*J1])/)`,iЃ Wú$ ;ЇuUi 9 +ˇ"{!/IxHxJxy 1 a&u!M&CxKxG»"=  #  [͖ I!wzkNMMK_IJ p hXtk2\,L\BiHpwt(gʠ R] i҇B:觓r\y,,E|Q˂m` Ÿx# G p?F8"p<¿"p"I'#p*i#p&Yg#p.y#,EB.FXp rK.C W"\p5JkV!F:5k!Ga 7"܄p3-"܆p;w"lD n{E~DxaGEx q'Dx igExy^Dx eW^Ex u7Dx mwEx}>DcO>E s/D koE{~D؄6@5hE[m ZD[ m1]vCm9h{텶7 }n/~h+n?h mAhF5mA-ڡhA;vhGhwDڑhGhcvWXvO{xv"Z$uhG{hGۀv*ߡv:hg=,vߣ=\6mBی6v>hhA{(6ihCEہm .B.od;M./]o{Dp{4Xzq%0x[" zN>$ >"OޙM[ƆAtcG{ L3<0%AdbQS\)dq!ggrfVAܦ\6H:X811ey 7ґhFs\~i_2l|sWa +N:#WpU\lWqxx5g4kJL{*&cFkbU$FX}ƛr2ײjK6eeqx6b ؖlnMӺɝX\. liYP2r2yX.!=r|K6@zo~7%MndY2o)˂՝jSIQބmŲ" ]Uk]}F}% YWW|+EgX;cZxeCGE|lȷd>]4D]<%;mVOIɎT,g:yFdg|'q3V璳 %_3&5Lg"%$E>^Y.ȨW%p#qץdA!*񦼧D˷E|&]}$>ޓ5XBdW+&Ltʤ~̵҅e" m~mfm~._ȫu%5'$5+)Zx_zFdo\\\rq\Z3YH~䒋XOgx O)ʻY$:EH~6Q'Hi)ask; wr9Qm鲢Z3ɨ8:?9,0Sr<<%9yA6=5UrZy 8gF\ںqViNOдI_*k[EZLn~dnntgNv]'6w,]^0RY'[YzTa6TMɴ$o"xsw3gK-bS:wr9s\=efƵ-4hNV]Oz<|i ӨҴ- w[=TZ\*tN5NJ\X=MpR3)vQ+6=Via&?y2Qd1V9#%͜Sлof68JFqt8FV*NDt"]ˤv❃9eTf}~*#35e 貕;Q^ Ӈȗ,tW,/ng$]^fu^< dU?\#U)%#%{N6h̫M[Z$XN,ArHddT(# vUPk%g$ڤwO)sg4̝0vԆsؖ1%NgRt3E03Cҝ =>N?ԫ. NthlKF֑Ȏ8$M6f69[G}f[KȶkmM[WW7@z2t/%-z9:C' Eޛ䦧fhs7i+0\"ԙNĶ=,ݔT+݌LI0v)%FLL4Z3a0K:bDǿx)Lw AYD{g2U_ 0.z80J6pLK vb_$ag]bMf=f { aO'dokYbz̞dO09 LuSbTY^R9IoKCa rsriB&jψALػĞUbfsc;-R{A^h/T] &V#;2? 6wGmc%"Cه|7\?/bCQ2i_LCUQhC-LjH4(֏3T}^(,s]%&̟_~x!.GZ-F4}맕|bdD?PB?p"ߗ|6a F*/T| Qŵ&jMjVj EZ\uXܐ/ߐ/!?/K3UgF]ԱsAWA|xD<44ƣ *77:Oh|d@2*ԉHtMɗg1echzLusVYr'X/1$IʁrT'ӊT`Z)&Gr~r N{ǔAIj%:hJpMXMTdf#C4N_:8s?h:֖hX.ѱjhIXS0ǚ)G1Q4Nt0vnˠ3:hK $]!'>|Ԥ*M1 ->Tq$)K^ZRQ>rQdҬVPw%ތW>UztS·y|u*Ga{5\=pQΎHVX#N1z-k;ŏEx16+7G<6XO葁g#l|@-aocR_e{E̮Wثbj2fqI>ݚB,K av$:bv5(irUٵ@ڰtRl]$mhf g$+uݯ}3Zs3:tà^_;}CNDٿZe54wr@^7l=)m3к}7TОSϟo\#Dt^eXoILw+^q甜5Q/,EiXo{" C~ oK>lϨ1{}kOApt5'~nv ǒ=HHq]Kz_GGzHHOvn =ѿ#=IpL:z=G?:z.ynttfG'Hwt VG'ICIF: vGF:ҝΑ^腤9z1%?;/I'}$}AhGC>q>oqIIISII3IIsII/u/tE/v2җ8z9K}}UJG_CzWב^赤9z= M-mFGEnGC^GG~G?@AG?DaG?BQG?FqG?AIG?EiG?CYG?GyG@EGDeGBUGFuGAMGEmGC]GG}G@CGDcGBSGFsGAKGEkGC[GG{G@GGo `6Tf H;4:t9+nN=Htt/ҽ]A HW:zK=@GoEуHW;z0 !ޖPG#]ᤷswp;:z'#=ΎMzw!ٍ&7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!7!p p p p p p QA!3a]H QBHnDTbOR[E/z ?® š .} =AQ͵RJj`s>QvH6 l嚠(_bLJ9^׬JLpKT֬ JlW,(6gm ʆ ʖ֬[[nh<]twX[еg(\#V< Ѽqri3,DrDm>noe҆A˫g ױ޳ǚ^޳]lnm˫pڠY[Ivà yCj}MPr26[w\/M:ԔYli&(+Bon *P CC AqЅ+ hW1cJk*pM՘KHeKhy1_L([ :+(cJXQTQV os%HMm.ȡq.e%oC0xv͖%ekqV_ţ;VdK. zUϹ8(,_XZ aLz9zmW$7a`VhavC{KC ŰFBn"nbpG(ă;*\^ /({pgWxpJ*jpwWx*\q W{pzpOyp/k<µpQރ^ 7xV WOMf)Ń+Ճ(̓) `;=ؠp*˃Sx)ǃ L{@xp=x‡<8[^ _`=ت%&CC<ئu7=خ-mfzS{)|߃ ~ ?"yp=D'O=gy/ ?_~~)ʃWPTR{GS&+0<Ƀ =xB+<ŃQx4t L,l\<|ЏRKg+Csѹ <~htB?4Z.PF*㐋q 8dB?D,W!*q 8 ~B\ЏCR!W+㐕 8~JV!*q~VS!q 8dBoP }׾I`UoV8ă([m 'zzN=?TG){T=ЏXx ƃO*>ȃO+,3 у*,s xy=B_R_VXW z5U|]|CPpRX; w y=|_$~p?TG =„?Qx?Ux?Sx?WxPxT!_)CBV`~_+, BoR=^a t{} 2ϧFiOAi§V|1-a!>,8·(pOj:ͧeJҟWLiܧ].i7fO+}ާ.Ƨ=O{*-i/1VէJ>Bi/Uۧi~>RiO+Jtҁ>JiO ;Zn>twntOŷyMm^M|}Xap@p083؋ۇ-Gpj)ۭ w0(|6;2/k|MThp(0Y\PK?]WJPK M:Dnet/htmlparser/jericho/CharacterReference$EncodingFilterWriter.classTR@=KbcbDEiEdvqtabZ>O8᷎Gg|Իp3sO{￾|09 ) FTtl8'yQ.@c*2ȊAWW11:MmmxeDo^ywYdr+ZF>dJ3/Eˌ)XzjVPV2bel IR(*l@CNj&4kn!aj8%bwpW>YGF9zU 䁱)W lCZOy}ZS)G| i<0=; yHUT%[tbwN Gjqu}cٚ%XV1"C<"2vj%{y3۞{MX&KαNĐ8s(BԨuPAnˠAo?'g&wrԿ =T|e24h5XO7H# y#X bN&@ ǤH7j wbjȺn?дO,=TL:iz2- N)9%C>2`a0r!)jjy8aVi?e PK8.gPK M:/net/htmlparser/jericho/CharacterReference.classX \_%SC4J( "~Ucňm6N@+1|BlHhԾlm>6έZĺvl=ݺ]~}_B+ss9<ڣX%Xm4:j{$y)n>tp;qLg 9" .j Zk#hwmQPHKI%L)j7 &.MR'Y WuVܦ^O$%c-#hpU/ '{vCX7L Ru59vf|AntIX$Kpq50}0D]ݧƙ΄*ˈ&5 Fr$KCaG48C,[3W1/@4'ãb%~U9ͭ'HGq5 u r3E$"{/38cT]'&by.r;fpʄQCWRpdO.l2ɶz 2c:3 Or؎h28&ψ"'Xt_bv?W&U-igag id5y388U{X7Y#7#2zIeea inc8ҥ%!Šn#l,ˢOEkvih2ܫ,cIEd܉dD,戹A $BX:2E(nwvo;d#mE1MvŠle>Hf.4jwMʢ|un/CM5Q.,_QY&73[K2IˢB,EXNW~e!2Je9ӵXCT O?ڥڟܺoc?ڕ8$Rx%Q-qJ.ɸU&,Y+:JղX#J^Eb=(3l\1r]U48ƓXt#)}%1ײ8I0HG6m}}O6:ئ} e:~=p9$4̠!y"h$|aD5ڝU[W9ZTf,L=tZdTY ֪)唑grͩXj3T UcGG| hRnVu9gTlIba?{IhZnnmܿF}@QNNO4.zYC^5jYFXk` ' {רavOkU9ˈ41)΃^I=m\kopj"eD`I "Ud|޾`27!["0ܪcʖ-ڣfvi_ՙ[isTh]=Ȓi${빴M C=s+UOjn+G0Ӓpos,9XO0պsznn4/EoO nz+hݰrEb3B>.QU3򩟫xKJO"Oq!Ƞ+^P阱L4-|^H$]0)0D.!i@5;gl A,Cyi|aPFao*- bQZ++/"/F18< Uc U)O!_])̣Ds9z vyO8I)np3LS#)PEq8ưrzޮһTuz,Ƈ_UuZedRheg*(xNzBUC}jS bY'yG],Qߍw&kxɒb a!zY'/8VNJ{&i٥Z5xtGdMD׷NvכO` ʼg!ߝ?-.K7O`}+a/y^2) ) Sjf oqp?AC׏MGJMS盎M9l=Qli7[N#oiJF-b{m=6AQm{(tΠ 1*ܶQ0\p)l&'b=F%f yYqiAc4lma *иp4!:Fb_ƐP?PKrM {PK!M:.net/htmlparser/jericho/CharOutputSegment.class}SkS@=ۖ QTy[)PPeGƏia4ʏcrPh'{ι&66T$cs)1'N?➊x IX1,iG )rV߷4 5H31fqSxॆ 'VV]i0;.+jx!ےT2g4P7]JA, f``u[_O(Mc>$0#!gN`B4yqʏ#Evv!B>9ˌ I 8@+O',*9c)R9>B՞p7} ש҄wh&ہ! z-#R˒R;b!lH/ZdpqOџ: o%`/PKtPK M:2net/htmlparser/jericho/CharSequenceParseText.classTMoU=xNz&5{`h@]X"מ&¸%/$0 HHl] ½8vH,?7^P'VM5B[ǻR:,\DZ d,"!;? j{{UIXol!AYpBy 5 %%\ ~l߬W'z|hs돶*2S&HT  R#C_H]ϲ2'? yf)LgHn'~fXH%G҇cXk`N> ;.p"zPbCumgmkROey 7mLjL*Ȗ>ۇѡVP9_C2O)W7}MJBzD58<,0٥Ss\O[Up-Ԭp^zp)v`uP6]Оb:.xQ a ](Їj]XmZ{&^[Qi&ƈ7I>))Bq:,2S鵕f#ݾ#D;[b۱`3-%_<۫zI6 ]e҄vQk^\b +hh&u\{m&F7C'/Ha螈0s<}}X!$}"'SY} "cfȇPKMvPK M:-net/htmlparser/jericho/CharStreamSource.class]N@ECxEZQB(Q 7^޵6cȷQ|bb=3 G1 7p+qg~>FMέg1NCݶlWp%zvQLb-tMIɜm!%a7Ig᜸xc YI'KˢJiV5{U7yM}xĺYLٲ\BFO~(Q{} {H?QPKʻFPK!M:1net/htmlparser/jericho/CharStreamSourceUtil.classSKsP=GCSRjG@X_:SBGV-܁0!a'ܺv[7beF(cE|9_~ :8K2B,Ã+3k_ aU.|CBB"cI&&Ab+,[-j|E+Z~^ʾ.zjb~fspu!z:JUd-wm2 M6=jv7ږkSmF˱,,v:%("xB5[x<DQø= "V\BZ­ T&ƈzwp`HP7XI|2\c'X;e\7ND{K߷t3HQ .bRΚ%wn=Q/EJ^M"Ύt`#iLkc(#!۴EE]oWboZ7g,"YW7PKK-PK M:5net/htmlparser/jericho/Config$CompatibilityMode.classV]We~dIXZCh |H4$(%ydJz7xs?Jl(BC.fvvf^, aɇpKMtSOYw>c9/҇f,Xc Xf[`[^2\h=< uNƋFC E2Y 9&s#W"NPPT3E0aꖾC9ҪѢaBV74K[ZAK 6DA)(,Lb"%Lԕ,A(D3i 2g↰"[V6 (DEAOm"ќovK8V_ɢUhrjzm>K,-M4fj´4rZG%i!$4Ħ%EsټfzFҔ0Ow*;\Og%ϸn GܳLU^9_ VPO5^͙39 ko:;1` 圶cUN&& IvVc\zD)ҳZ>d+mDYL5ϝP)^܌9 +Zb4x*0De/"1t㎂zѧ. ,+(A'>Y3iA[W`X0xTU ,+x;tN4{=[̪):<I*:TU^o(WVBPU-Y-du!73k ,py"g{B- |  C&1mm}DOt݀ ɦ;z#nX:?d݄p>o JMtGx=Ý{  s$染FZ"WUܞ_$OAO8H*Sv펿),5||\~}ٚ0*00*0ׄQ*0ԆQ*0-P dto[Jh;d&\^.;%;wtU9);qwh HmtW1NK",~8@`iá?"2u^wu:/mÎaǩ6^냚a7[x*c|aDcn?@PK_-( PK M:Jnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings.classTRPNZP#Z.R(^Jcg 2ˉ N;iT|/PGf|JӦ:dfwϞowob98 "K.U)1AH#+}CM붊*=̇ ø0aUL״<. ?+Q--<V`,q6lwSb#9X^{*W,2Lܶuy(5^}\Vmfufk[j֨MCOM Vh >zcΐ{5{mQQP> ;Eѿ׍(4~9T\鴞QF,*g Jo/PKAb#PK M:#net/htmlparser/jericho/Config.classSRA=(P!\d^eZym DOGI-зTI8_&@'PKdaPK M:$net/htmlparser/jericho/Element.classW[~C+Y;)㤭q(`S4dAdUV+۴i#ML[Ii$ =M^i;=~_vfw%A<1;732wql<ʰ>ļײS Ŧ5]1R$ ec3{ֹ%S(%!=6qLlUɮ_$=mа:b>nZ3Sԙ58├LRqctCnԝ!"/a H{D\p/J8JD\Boc@o,x@֞0Lƴl59Q-L#vvL9ȆiE|&r.K*(>H6% 2 1G~6u5$FHZf*7>+glVUYUKV䩜Y)VN+YyB%IWMJ k~h3TIgHhQШ3ɅM:)NٜV'm ½<'~{]Rf K9JX/DR+x@¯pJ¯;XjQt,U^Bzr(n5Lk14 %'YAm/_$43lf30$Ȉo^*u ӌ}K*M 5QTTZʖ6-87Rb\N-[x7*$ħF˭/뱕Z F w2q*d+V GjEM:w]@rI%&ćV#ls0]cYRCNSӪ af*i\C U:4 OB*3 e>%qR_ڦKͩuv]9%y}Wōi浟U LB~Ri){\-ZRirWZ87QsUۜ=7gzpx>GH$(TT?0i߷cA+!v,r UaՑ[X qWܷ9#Eڳ_O3q]x Ccjqwws`A}C͡=*zEu( Kb"CyYH4V`EAXnu o,g$q 颗)80u$(:`{&Bj9GCGfth;0?z_NޝY7rjLw2bX ,(Y|nPKfpUBPK M:-net/htmlparser/jericho/EncodingDetector.classW[ , QWP!Ny&ɚd`p̪6m4}Mbn0i>c?Jsfvgs{^O¿˜@Y:O jfVEB".؜u &do1v=>;,z`&ߋY<#؆Y<"~Ə"؁ w^~ x/xY/W̜grIįE"U=%5xCě"e]oN ǻ,^M/TYM3yS3:gO "yEuz>RPڤUl!`CTڬ+\܀۔:qΫ'DO?ɌO 4e+}ZZP ԍLI O,Dˁ"bTTgcC-⌀9>#[Uu|認:za(YʵqgN1y n2eer^V/ZVs,z&%bӭb`vŽr$XLOQ튄dHV,9KY.lˊ9* X$ Oq$,5dS< i&9a Q STM5` (tj[WIMAvc\@aYU|*aeF$:ng a+WH]Fg-["y.VB&F^_տ1hgDSDg=sC U] gxHV95WLd N<[AP\f|ba+ |7XCR.Z,]$k J -E3I hcYS \I9[6Me,| 7੢huSǩbe~þmqEbgY~F|53NβF)P%T`_a. [Ҭ> (]jb4pOͫ~owUb99 4TiIzИ=V+"CU]烪%QSͼ@3]SYT$m:LR8DO/n7j=G_57agr[uty|7ؐ#{]4?雯Z Gӊ}eD 5%~_ǜ mU0^p udN(aM/a223%DKhJDX}7%_ԲҍfʲeKo6&.!BæKf:@qh qa[lcN,a틐ERZDK5gh 9Xlab0q;te(XGvWA-DVofoY \n"[FI\2Ͼؑa+9^;0q=sDؤ f@"҉(B3OpDˎ| ?d\넗 ϳ"[d/ɤy죱j }Rd!V%Ia Nb%ƱU$c_.ʉ`)Wa\ H&6Ӱ{]|Fݠ::z8*Vh+(E.G(Pġm]5jM5^}VXq,ydm[ QtvR•UgPKvF[PK M:#net/htmlparser/jericho/EndTag.classV[wUN$a `Hj&TE)ho6ZTpڜL&\A]B苮7LBtC3g_n(spU,hsB: vBxޅᢃN'ItЅnE-2.AD,rHK C^ ∌"*D4$"2,t2FB!$Q%)cŵhDEqm7ꇍĘ^?p- 5ǵ߷~8yS(#ZPX}Z5%D7|kV_:LX,iGYH9Zj:SyW{qJrq_f Jk\W `˓2v~>$*zxh4ϕˢpr|G6{bi7*o#$Ur]&}]GOiE,oiFP k( BFcf w4 ] gjY_'C+qa-v;y*Y<~ SLJrq% t8I hEgxҰx()(MY81L8R8'l/yBRIuxKRۅOFF0;$/܁%&YC+Q4|vB)Y!V zo;m)ٞE?[.C ;FGp>$MuRv/("ID{Gt&{67@Z6]V6a01$ʤ##C'eٟSdY*e'6_@BڟɶJ G0Q(mZˎOd ҋa^ 0>ad ?~PKr$PK!M:<net/htmlparser/jericho/EndTagTypeGenericImplementation.classU[wUNd&a%4@AҤ1Ph RNNd&Lx|/>ZZ]G??PΤˬgg_WIs$Ʊ$-ȨxS:IL>˫ftvbJ8ɨYS:N'Q:d윎3 :ˠsg I\ļvIe o ~`5xSH0o ^Q >m;vp\yIv/ DKnU ?)0_`֩VYIC}˨)GyfNW l\@xfQA4-W<۪yYX EpE`烞OFUy|`*1.8g #k=~cBu&" *%>ᚆ7 ]òHwI喲UK4I$4P>(Q鸁'vKڸ&?,e}%@ھlھOȠpUհv-˸mC|dc|SsҡJө>lP:mir|R b:VoU |)09Ga VC5rUfFirT'"n>_=p7n$ӗ<<=d/ y$磊Z&N $=ygʩZPXFvihF;^)}q4i;>>5+o[7(.8*s= ö_vY\l(S"*vӹDc(|<&QBS=:H8pxY6S'oyv+dwԦHG%{ԙefrFe/2.SɍaAS54_摝ewo˽86c/rye3KMbyv2v$dz|:MVDahhV?"7x(IAr8{ߠE 9}FD6F2BrkmI:!swz=$#G6dwz6DYB8M$IN:HKm>S1#Zhnbyul7&?:Hq 5lOP 8 t;R;y]7h#wqu\z;ĝ\:;D>XǞ~$Nտ K;w_t3GGcb-olXZcaUPKY PK!M:Fnet/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent.classR]OA=#[X?Vkhї/@K  L`,Y&>hGהּy;s{go|GDĂ SA+iھ9-Uh,"BD^oZUw9 >P l"Đ,{S a}:H)AKHMHw^ ћ?)P&+'ߋH"QAY:6۫"iWNX-"lP2 ~-dJoA:= eccFtvqt۵+zۮ?u\أEo{SSzmnr øS0? V+d(C?0)PKPK!M:6net/htmlparser/jericho/EndTagTypeMasonNamedBlock.classRN1=2>Ee! p!$b"R&&&~el7D좷sz;S 6lVۈv#Pjz]+U:g6t-sL ss֪ߦØ5:Xlڽ<`p-x3k_3䀹qdl-:8];Q+9'P5KOJ~ZA#8!AJd}4_2.}5kl2q Sͤ| 8Bw~O#[wv9 (zW/5rY"H/MԃoHG UqK86zգz})|Od 4ie+H\@~f o|PKwPK!M:-net/htmlparser/jericho/EndTagTypeNormal.classQ[KA5wW.M.ҋ"HAJoA˃PGE3&K<̙9|99qh6n`# &EES^;:v 5fL8#1M&,q,iazQҠJu\%H7!#(yL4$a8C{]A1,;QA0/;|V UsH݂= 8 6`y Y.\2|0Xȸ8A{{DJ.Ք/G^\5: gr8AA/ )%Vz(DM씭i'`Yܾ{Ͽ\F`ulY\MR(Ims .OkήX4Vmgk6Jfn`Z0Mc W76k [Bm݉~}(0h+" +XDX4K CŐ.`8(h QuՋQFAgi=غ68ᚅ2uT8> rxsr|M9hXñ86CGߍ|r:eZ/"6w+2P=90tU# Z{pyVp(ZH4Huv|Yγ s VW=rj2-]Ss/,(o }\5c GQgHX_Q ),ӤzϨ#![=F#stqn0Xl709"EᶈwxޑNO0 c;L42oCV3jLh4j=0U>uvsH{[=*ב["Њ=&iBS׌ݮDrDbgqoHYbhEl6T:d0rv =K7Ü:R33H'C$/Xn,J ̗{ eӚú^!Χw1jO3}dƗ 3yOY?w1 F..q gNfq*wzwqf ֯gl,`'zPKfr8PK M:*net/htmlparser/jericho/FormControl$1.classmOPwOf'26ATx1[' :7֮]J;gc w~!9`ҽϯsOソ<J1U%n0NKp6,3,Xf%I 4n)(nr(v"eDJ7aD?r2{@{rp@Z`a='qÁ u ܗ1E,xCR}+W7Cv( #*=bi#9 @H-N0UPKwhPK M:9net/htmlparser/jericho/FormControl$ElementContainer.classV[WWN3arLfBRAڢئU;$0a&LX?>`.'\>̅sٳogO*p &*nfS*nᶴ|!W*1#/U&q_)+{2'r@?ۯV%EXbS>-iU n~Cfq?< wժ/EгaloZy2GײY _ge2V,qT\Qk-J+U%t#C|ƴMa4sBv!6甈,He-ju,? ZE!WHƧ“ge#嵂/w[榏BsK vIhλw=+*QY~wUyAkN\3e´#oC998  'd"FLXFyOD!%T"N#q8F󸾏PHj9bHƶ~G#dt; N'1B =EN8;!TBAt`7`魳p2g{OObZ CbzIhNRl JCQNDOX 2r"]aTP$H.1HahGcU)8PKÄp PK M:?net/htmlparser/jericho/FormControl$ImageSubmitFormControl.classSN@=I)Rh) }@A x!W^GgP"Q1AH هz̙ٹ]X>?n DbPP 9}GW4LhfH: 1L;JqTe`{[ CDѷ%CMGv!34d9fap|DOXPek? U^?u\[;b9,,hXHsaaE_Nnd06kg -z #5F=Qr,ͨ'#{ jBɰZ'Ҧ/iuCuZ{6mldlʝф^{rU-ժŖl9ڒ`xI4 c 2Iwȣ"SQB* s|R|=?p8~x@xGU9q죮C!/908L49 İ>B9a[s!ڶ@. U`1rlo`h=RU(%9 ypQ6Kur(Aңdȣzr1jUv*CO{e`0R 9iRru"1ZL܉lXR(P#2PN&,0վQ(.dr|J*66f3 L*( C&rLvZNT_ܥ[2D}lj^{ _M>P)Y*|юtaԝJp$w2O/ӹ|Nuc[BvphI> < I$~"mCy]E[ou"1厐 ~S!vG6}`N1oc98_,T+m(y4iȰzVFmGN}" , PKi PK M:;net/htmlparser/jericho/FormControl$PositionComparator.classRMO@}8)B 1 (RZ)**HHa9PJ= ($R+>myzf\]7\,x(3ϱbɄ_xb*aj7nJ+ncAp;=x/Ot?@.'sO*뷔ԙJ"MxgqƲ|뉎nGW"kqS߉飤+c}jL|ﯧ:IkU[G3ᑏ5\5ppD'JF4F^r+TjྊL8a BDm27wՆeiZoOp6~ s)sdq}svsky5`&OQؼScX$X 4`anΣX]Ȩهe,PK ޫuPK M:Anet/htmlparser/jericho/FormControl$RadioCheckboxFormControl.classWWW"gŸ[C mZ!@MKę J]ǶTOK%{s~޻;"&q;pЍ;=8fZF+DKݻd‹w)nt cn#ݢ 7z'8Nx/^< l‹$ -GU!eXhmSMQIR=մz ji>`1|O>%1|K _K ck1 e0|'Ïg~ Mn%0`nBS,܌jpN#?IWekwe /rQE%imrg5IeM)Lw+*aޢe93AK/kҌtZVXaXZ1=eʀ&"˒Vu R֙vqbS.Qx2H ՗: L]}+O@DlIG Nh/9C)2L{!hAKW2хPn(IX <*D!US*sA/d{H>==ux\;+^%UQS+_A|-qEo,(=Rb}lR[HC iqtnHhw[gW_yx-@%/TȕL*L1I֒^) :Ok%Њji([U#SQrj s*םbڝ!NHs`!vُXH)UՖHOOF;wKPIp(@ Mb"%Z>Ϩ Ob$BEq銊<<{b9;'\x!zN@]xRPuOҲ ,ŋX_PKltB~PK M::net/htmlparser/jericho/FormControl$SelectFormControl.classXcWMv7L6MH$G0ViCe.A6 &Uqؽlgfjkj->ZժaZW}/~ϝn]vs{p[B#Uq {^ qJ"!"Yd* wӠa ESSB ,6dp\% BCKgx ˏ ۏ'(>g4? g,=΅?^q?/ O XgK szm]g栦X mЇ46t;b3S Sjxs+*G=IZZ1-fcH=EWw-PKU_@9 .W_pLwH2)f4YM䘢eh@0zL9h>MUӒ my⌭j=a"s>N銝1Ifw]o'6 $g{Ƀ]Yvo ,(8A$J2Ok NӐF p7>+o$#w}*Ӓ\rXW,) ч3v:c5nPY)1+^$eVx fi/K",i<#+| _'ōo"Șny!ce_ol+ XA6`G36v)ǔ+QBYWHdS/o*AWl4ƜGFG"C:x.seR!'G7EE('J6Tl |îB}\ڮF&cFRTy*. z_kG56N"}Y,mYl>ʹYw#[y@Ь~Wy6Q܏?PKa PK M::net/htmlparser/jericho/FormControl$SubmitFormControl.classT[OA-&xRqE/E)4@,6CYmvgIC }GlנBmM܇9gg}"1(C7b[Q:ݎ&n*iH1)Y̅1FE7u2U2PEhjmzR7 uߴF2m'MKWB"p8*iǚjhf]- [7C\b2Սn$'Su % XUp+-$၄/PS6|o|+$ᑂ)x -Mh1p2=mym&'/pCT)Yw溜puTPuWT[[/L JY `p7&5]箛37ǐQ]چT-^"*z[px&@mT x)LY/ՋFH3uGT0ȅw=~ت&=W(UsrS`å{wR>>~N-ͩ)][ 'Co,ҡG=8ɟo@? G'>|TSY=y=e57GͿ|~샿O"3´? o6OO/?OE?~ W_y8*Z}pk >؅A'HqɌ+`"6`/y*Ws?kE1yc np"7'@qs5n˛ElBĭ ؎:06`nvw;} oc>N} {b/#cbVf{k1S"̒nq@^# x x, 8( E4M1Ri*&B@JQ4O,YAT-UlȖNMJJ[tcI =62#'Z<ѷnDNzߴdR?Q4*z E.mhL1ʩVl6 G5ꚶfRĩ]GCO]) h G䮔%F-CՒ4֨h ۖ+E|Z)Z 4ށ>2t(=?pP4vd7zhcZfɩ"A:}Ã{Gzc#%tKMu-m֏IM21IS{1Ty02q @ pUXڝ>=AjC^ҝcv=XO\aȶ4Æ|DZ:? 1E,=nksk؛9wxݻbo+xB^\#XHZɌr|F`EݾT=,O5USL) `K}5]ԫ풸m䠡$): o*$>=žV,3}?/UM6-%e)E6rX$3'.:šiQNdmU͘-"ㆽ<9c J*aXW3ʞzSr\hsƂe!)mܯLfmJwgm FIJ蛖Qьىxے)/225')>ڑU,=b~5 { Cy'RtV!Kr8"!N:.ܗt6PC^0}owUlbm;m[jOٗ7%6|GG%| 8*I8Pr8&a hy$;+`cy4'>(ku>Y Nb\ <'&%FrϨb0Q03v"Cqgq՚d4CIM5DeJ!AKk"b:ԯ&wԢȔHpJGѐDA!@1 f{DwViϨsn%R`чaN28BKs5&S,ȿ\ǩ-l7F OI.x2 \3Λ%xS3[O| IV|%}L:NsŪ;W.a-5δ+Z``,2<p &>_ %ܨD·bӪH:d3A(P&e YKFz#u%|xN  ߉)|7]|Η9  Coy/'eaV2OJpzc>-i}~XHQxFIq>'Ic,'𰀟S'gsLzWd@ F2(AVz$$pY%<~A8^X>_x2}NҪ l&Ϥ3)S7f/r#` O({ CLeZH_ړKm?Vs}ʆI 9"l7]٢ KdD#q7zE[*u ! 83TCIYSFҵbk#NЛM@7\Rz2%e6U. hJR%Ĉ)hctpj2b~a \.鴢%8BUnaIҍ,(kKEKų5xfo$Q lӔn[|$ P59eg>|[QRS$HR O60ņVA[1;zKUG{Wob2 J&p:eZPջcW.J%ȃLښU"ŏ| ;FkKJfSyZM}W^&;P5pumduMŒμWg'N"vnsvi⼢OidӲ9Dq)?E񃟫=ڙ+e/p뉓/+8/K=ޯ0s,z( C2,Q9F$2FJZ~:|,C bӆ~QCa}&z?jΛkʹ"(~v*z2_Tɼjζ-]I{f8~.)kK~Mz|Ri "|>D=OjBMj!i1+"&gaEa(|T]qjq>xV Єpp\ϟUaDkcuL,-U@Cgp6-`_?7ѮmZmZDpn/$k-B-A[}>ﰛAny6pIhBA n8j{ž`@??-v< ^] py<CY=§b)BM{'K6c9g=ۙz>iBk0DD],=_i\c3Ŕ,E"XI9ÆtZtҹߟ\@sw;QmS5*J뵫dBhj `Vx&¿5$ P'9jQj[u_߲%!o5cMd^z#3'g]TfXTdJJ囨 PfBH[Ty PK_' $PK M:Fnet/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValue.classSmOP~`k) 8/b\2ĤJ]],܍ئ9>Y\ļpDA㢁%wM,X1q/ `U{x^^xnĐN;RrOtxU1}~ɃE-)"Poy[09%3L rKNJ2F<pD?w]:ĺNjwT*0j-NKܖA؇~ZϔW C]a7lf.v 7q!Sm& az+cb]D?kv!{dj0jCՆ7aRI EZ9< LPK'܁-CPK M:3net/htmlparser/jericho/FormControlOutputStyle.classSn@= ܸ%5!mP*!Ab r!FFfjTZUu!iygsfx6ϢxM(Vny^Hs㴗 )" "bU1yG3:ܰ,ݩdOǭJ@iZ:UNkDwS[yg;#:ٞԋ,jw˵Y~T-f܍J3);G U ;&i P;SJsE ˠGAD5F/@5d:hX} J zr_OGElx{elP<;.$_RHܕHPU#Fzmv}2{uv{[\K 㝥Y.GʞPN 8m1Fi6@pY)|Dg W յ#S=.k)! sRGM@n_#|, vl yP '_8߼pBa< 0 |*7OsS ӇyE/qQg 3q|!Eq/1D80 w_e8:$7[[; { 3|!G 3$>2|㳊J  Z6 UTJC+ZI.R!R4R1\ZՆȔ99:-%RMLH%“D,JL5lQ2ٸY6#ǒ<;)H Ц嵂fI WP^5dKU`Y-OZnNW(R'rebY%CERR(i)O ^ӫz~hJ-/'U餚#F'%T2M$2TRIke&Fw"edIiLL=̶Wͅ㳒,`/}9#}+Z0lj jktt,iqvZM#8@clN7%FKBXxz9Y46.աH6o P;7IF01D|/o.Q<c"N"."M)-Ίߋ,HװP1͢1GMKţ"Np3Mqn"<]W NID.jISEL ԴANe-{nxQ&oQm\N3xuQ%5)+?\ǭA|h +pAUZ14xVejı6wӲA[Wu|8sh]XvE 8"jj.7v ӿXp6qƈ6J6Fmq#ѵ„|!`5Mpc{!ߋiIӪֱ&Xk/=3vL7p&-EA}pM34:ܡC u::]beԚy28ko#UxjMm~Zn{k FvUBh[h!3V^먋-b$܄*VGt^Ƕ5,b3鰈Nrv8jDEt7PJMcEf=qPk1w={lfg1 ~953o lf/MvV)]Utiw=rS.9ͪ#D% n}* h57puZn#pYǑSܿd T?D=t?Fk~{G,8wؓSPKgi PK M:&net/htmlparser/jericho/FormField.classVWó>|w|_/%~l$+|؂ooX$S~ld e)͘;<ׇ||Qq pه,1962a5#9 ~`Fli ]B 6Mjmf )OBm&hfFNf,[PfT/cƄiP7aqw؆3z|m0F-Τ~Jz{;AgՑd*ѝT$3+tmNZiSXp>X7bvѮ9+f8ifVԊ%cX!!#dVFu?Ps#ޒ 70F"DԚ]_]BV 5k֓eD# QZFcȔjqJƌ--\fԵE.R5P^keYEKsqW ͧBECѨ5$t?Wk LZR*)j XlӰfgLֻ*6Vr{JoBORnkfQqqا*~ -T~}Wх* ?U&NEN*9ઊ<-u.aPţ8*Ẋ_ຂ_~M1<" y}yf7T,ປf\OQ?g=]`靾BK/S.7=^,kA kζOrvM@J'/\ ~ߏ5%Nѕ2l=_~w)/cZ_?mKJ۽χK xxUuNLKetSg8R`y-GQv _3tYe^?kA>=1aKH.ɰPi,;]ڷ)&&t+^躥 &#CћLiPG J~.n'=X`PGj}уL@f(aI߹-ZUq3=H%Ecx{`\Sg hu<4 =f!=65P/E< 4K4{ozF>nXqĎkGia(iGzX5_UHZӵ9<3κ.,fv,eh}K*H%:aw$7q\QSXhta]ϴTWGVlG^neq#ǡE[籁+ACQ"9| H1K$kC&0%۲q9cDH3acmRdht`Cnu#7Pe7+KK^/Kc˱@p_w\C@[>xpu]n]ֲylmki.&If<]FZ^4e0$a qc b? ~r̊6f2Eto MLE^W@\Sq1ow?v2‡i`rZ~~sobg6+\>-wCTĒ]h,&J9H0vмFZuxM A 7yY.$ qIe ʏS\Boy3Dpd*ϊryW=Pd"gکCص}QﻼI%y?mL,?1r*hRO򉫲} h &n$qe##^!K|hg O?=SI~Dh[e#7\Ϧz]'堣Mf߮PK,pWPK M:.net/htmlparser/jericho/FormFields$Column.classPMK@״Z=TP#  A衷ٶ[I__ Ń\fރfp.UTC#]{ Qbg]%uL8El¦FSipD?1I@Bmne,Gȸ/BxfdV1ʨp.vWʝ$pF*<EE'ыvCd-TT3Ν V]x<ߊ9⊉o3xk2s3WK%z;?`+hb/PK kPK M:'net/htmlparser/jericho/FormFields.classW x\Uo23/3s$46.idi M$)5yI^ 3oZ Z@) ¸N ZDE W܍%}Ӽ6|_s9?=: `h2ܦ!T }  >Çĝ!,] GPǘLO|)>B>8>󏉟}Rx'Qp+:*30J8`$9<|c 6%^=!46FyQ6+XX7}Y7x&rKO|gބwA==}>ydV?Տ)~O3?W@5* d{:ZSj>*լ@cKm*zCO_:`ޟVBN|2!_ӑ֌c0Usy-״S=LnMRk֓tGˤȜKCĶkT2M%Jyy>Y *:Xuc=s'rJ1Lz͊K|^oiȂ55SH؊vׯ!ɕ/ 1&T#[2'O)m? w*p85xZӝgO#X[j7R]fg!hpOJSs] Pޥ&v:X{iLmmtH2cõCݡ[y_/zœQN:Qme&.A-O]:{MX%ߞ:Bm22֚|M.F>BÄ́%I[Ӳ)GkOo.قq^0q9TO9)f9ǘf=sjD/vA'y,$r9%]ux( ~)+<#,X25`.(ɟN?J_Mu7Ef&%Y\K>-Ŀ$HnIF2}E9KQ礨 H#)|/qH\۰[ ETI!)B:軎m(^-' 1Y7l!5RRD-1h@7-R#D/ Ħ@b D=k](vzHq:هw2")E,b)nX<&S+%׳Lf{GP] L֗6?;ITeDt1@rۺHlQ$7 ө"k-rj;n4[j'KK^ä#dyD4=?q7yrywCtdvSI^O]4:-Eׁ7f JȘ4C9 M3^Sأ=rv'@AtjW=-Guq5\úNϒ-7Mq3f;ךou7] B)kXnV.):dRtiSB:]K%%" YT>/c`"t%BѨ#!):iCiU)$FV0(}i(1*ql,W T f$Rz>Ke(42==RFVgQE\P9Mxi/Р _A^JLyz*z72dlD[hTDěR{ZuE4FJC%ޥ~Q?MPC5͠i̡Op%>Edڛ}39\( /4Ǜ}˔2>[0۟DޯՒt SVe;l_4\?F#/N7FϩϑJb~AZ#9SOa+ң`wh8DY 8X?J ,i71YLv G)eJvRpA+וL|]lb;J夒D0EiFb8Vr,c@.N=>%TrIfL4VJNqKe46а<;n v=M?ݵ歺-Ÿ\"[J緪-ӨV f`5RL`#TV0XM$S ;*5ZgׯkM~m!*J+ }٫V#th4kU' 5tUE~CjZ 5ۥvУ v>-ԠՠcfThu,TGLf*5ӨN |7l[5St8vѢ;p>P0#b>>X8OOb>>T43τb>>\< //bR2+bZ:oobV6ъPKJP#L PK!M:/net/htmlparser/jericho/HTMLElementNameSet.classSnP=7/aiB Jh0)J)Ұ"Ls8rl8bOe-l"b͏(sCHĖϜ3gEC* 7H;ɰuɐ!*{t#O˹)9Z[T(!Bk7PKu.PK!M:)net/htmlparser/jericho/HTMLElements.classw\ǟ!t`ن3`&`̲18 1l!a$S=81vY."` q̓6W3tj2N0䧝o G|_,2ppH ߥ"'XNCzXLI"L{K+ffcG?G}pq!}p/B]UfL p_bv<Z FQ"K":FҲ .3+XeYa{uCo{3{+f!už3Q!C@>dSR ccgl`ldlb;,dj c DtT5ǔI'icISA!BaS!3 ,1% NJ(T0e5<2ڐM|5:yK65FYḩH0̍ T0#9.xU^^ v vx#,shʕh7R̢OZ2 -̔Ȝ197s24 dhoPi_(jB|?k+x*OnQrOy.@qM _Jr4[ռ_t##ViJm@HG-LfHU+f;RS aeY'pa]Ҧg JoVb, uR@x2TWêxyg+^/N}?KOuzq?/U_4CTˣVMr@18q-WudA˜ ȆL+K$YNg9tVVOʚnu&mGYVOʞn |4n2غ48+>^I,عFاF̝>b{|>B ߂1_5B9U?U"x1c |q X3M?/fl)Y@awn}Eg wO".IX"&`TmMUrEv J&"aUcRjScΎYIcA9%˓Wf*yE\Qerز"^̦,NLVǠS4S5S5e6ݬugZ_V&"9h9'ۜv\kOVvP9'O;nKtM6>~NыS# *)ʃk1;IY$v+,mHC^GUM |՗v ga~;͏]r ̏F8#σ/$+]e 4u~-ӝ)nltgVOMyltfDCI̖"Αn]N<8_n .b}|X ˈr?q<@,ŲHU%qb'52@Ljk NqH eI0^'n2J* 6#n;d q%#y Q 6Qb'-r U^Fl)nyqW;D,%&b\G+!vD#ze=q_n!!y4aH7XEZjY-=Y-jUj+ܠSj92rRKRKRKRKT%Vj92Ԣ+*VjiL›0vs`s`)Kv Wv ,QQ9T`-*f؀ MfJƝJ'J'R2Zr%%~%〒%k*S2o&k&Ήĝx''H%TJ<?1fkctɍCj(Knet/htmlparser/jericho/HTMLElementTerminatingTagNameSets.classQMO@} Q&^j7cP}MYnͲ<qƃ̾y3o("Z;K~"6l2F7\!<0 ?soad葡Pe@ -6 \o4(*p&'XH(CRIch7F M<zAGl p& ̈́i ?NMh=9Cu]q&4l5BfQKB("v60[.+d9%m`b{PKj/MPK!M:3net/htmlparser/jericho/IntStringHashMap$Entry.classJ@46Fm7`Bp?֡I2> |(&.\8 zp&lUҮdî=A0YI2ƙX 57Fjz`+lhf VJa.B |yBRLtx̏ť(+1?=OUj.F%8a%@~U{0 2d29[?pB!ADEJnHI7[A bX#:A7PK٣PK!M:-net/htmlparser/jericho/IntStringHashMap.classVSemM†!,iE) K@ҦXun f<8NEA%pȌN9Œ'?Ѓxf|&H 9<}'?!Qw53j7ԲckZ_-2#rK5VgՍ+qQD1W˚Z*.sKrZ͛29P0U!DA/TT8O2 ӜaRb^0w,}{E33]GI_8HYSXQWb xsq9oQ-nݸCR΍M y9o3h/ZJPVf!_TꎯȚ+ ^àW!Kns$! f^pI %0iY s# b 򾄫qWBT G5d?`MSÜYzgu׵mB=鳆qN/l_vB>TK#E#U?,xݠKIA{!7uXI~n}hs-lh<0}c_A+um Re¹>m j {\Px㳑#ȑ}|71ioR{hmmmoȞzi[+3k=:U=#xƕxr? >aPV~ƥ#UHAa_j Gh) ԶE*`o!ۛ]mN C3f{8" X_5dcsL)nFB< 0t\D*at1À;^HRЕ WUjuXKX7Sf\̢nLј+ctX'VGrHQKbYX{ԳݵC1f+ZU.WT䨑[x9=ueR=l]Q } F/[ wUoN= =: +G/FzPK8PK M:#net/htmlparser/jericho/Logger.classEο 0/Z.:gW1TҪPUBr<_| Z2Lfa*g(+6j93xa )DF70źʭ204g#C]7kȒKv0WJUM(qf`hzȖt}PKe PK!M:+net/htmlparser/jericho/LoggerDisabled.classMO@-ԖOAEďTH0X˒5۪ʋ@?eZO%n2;yffߝϯ'CGV9 0l1 $Ypʚs%&3xUWJ[H2d َ7 :WS `n?ږkKJH=J֣)?8 e¾G3oJ¿$Kqں$a'n LLʟO8%U5!p6e U3a(~Q9J:b-FXyRHPxv@QPKGPK!M:*net/htmlparser/jericho/LoggerFactory.classT[S`=&K 7ViDKTQQT644?? gkR.̴wvn~ 2t!@'C80o<.".#~B혈k^1@< Lᦈ[<)bH -Nf5mfO,9ycY 4[O/YNoT7ug6cKXռ áTN]UC5ʬcfv$~;=*a7n'n[3?"]nj42A*<86ʪII  +A$ǬGB! ܋ khXz~و:]2IT$`MiDzICn ab*#ΘHUsXjzI|2 Qp=M3hG x*3'-y]/xu(KҍrvB ir4C {d[oٷyl,䴴<14V ;@xGSV]s \Iu\>53KMMr$EWQD47sB|Ezkё;sRM͊jVJ4բeZV]|GHONL| ~zzGPS>P71=LBl C(M?W"|w#_ 2|13evelu. (7EY.?[Jh@G6FMcqGLǣl )TZu!6u劍PKP9K#PK!M:+net/htmlparser/jericho/LoggerProvider.classKO@UVENcFҀ}I-A0\Qƙt6ߙɹ=.60-P0ja?T 0* OH^)ڳ5wwPӇŝ'gfwsMr ĪV0Y-)pU4{Ifq7,%\fj}s`i:?'q۽-rWvc$U#Dł䩊䙊EIMEZ\L#${N#f4RPT?U1 !+9d7[4sq*͉ecM!Ԛ #`j1c."T╰,5A1aGÔG1aƇXPKPDxPK!M:3net/htmlparser/jericho/LoggerProviderDisabled.classJ0ƿӮϪT@ La"xٵKdaϥx1P|(S|_zpZ.,|^:.]b4"F4bjXϤ*V"L[]U*u1P2ۻ'4U* ~&(<E\fbl*~>sAR>Z]Y<)d…Gh=|3ebkqpq㹿qlvs`9!6-Vբ:W'@++PK3SPPK!M::net/htmlparser/jericho/LoggerProviderJava$JavaLogger.class]SP!-UZhQb-ZSRzhO'&I/(]b/xswU \uY`r%C^yr厁e ]g׽v[jLaC5K{AGebݕAw>ڗԕZwV{=ՒzⶲrU&&}x-)0.S>؄>.xDd-P~tdK ˿+*5;ɩo0(XB <i Y2,O6*as^`YGK3[9X5וؾ/}_/],?]0iIMl)!WKSug[ݮ ̞6Lfa*`Ks$O[nZccFA!SI@`  H&yMŨ>%戈R* ),!|k#=SF1Ɔl(2>D2 E!22C8;F#Е0hrZ I1YE~PKSM!})PK!M:/net/htmlparser/jericho/LoggerProviderJava.class[KAmu--KPE)AH!ʶH[K39s\||8Ǝ9 + VWAABQA$k%l0(WhZw.jycyvp{ woDr,!Z I('ƀzcVĕ];$X"\ NK"͠P"͐&~}7]ao.C!t}-[鉊' 9!#ȼ2qD v1'JB-&MnYd<{F)~9#)":B)4KɇF@jxg +EB2BМ~PK{CfPK!M:8net/htmlparser/jericho/LoggerProviderJCL$JCLLogger.class]o0_wm+i-J ! ;75iԞlSL~Դs b}]_^n .6(c' hqg;3}D*HzD}^!Q !D PK_AqPK!M:<net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger.classn@MӚ$[h m90%ZMQPTAP;'YGƮ&wE3S[fW7x4S8y\p%K)\]QCC5׷W6|VZ`k۴NO7KkS拱T@r*IO[f5LlYbo-QrW>]~c wqPj cBnj%*OPK =PK!M:0net/htmlparser/jericho/LoggerProviderLog4J.class]KAku+-K-(R#(Q. aeەUta$Qq"Á993gޙWQ #+aUr "K((#$.aCB!+2& ju;sAnڷ%~.|.4 { UDr!Z*wbu&>5GwM5ĕkU,blMlM( )*T,0z0ayn֦)ɌrX,wpD/,DYtM_"J_tHV|2{d0 .a9<#:B! Ʌ5@jxw)*EV!N(PKȾbPK!M:<net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger.class]O@߳|V\eTTݮXI7oH0h&u١tSfZOqC"1xG)e55vg:ϴӷ6Y*lܷkGǂuS3 m -l$:v P0ӍMe%0:ah}0aBLg:z upFs꣘0ٓy{Gj@k?GGUј:^8] Y9hM, +X𒰭U'g$xCe8&> [o3fe6rVf'D%zP(ngW58.>g$"U^VRܯ s% -wc_P\c[Q*|4QHRiMd,]"\R3U8eNpVʜZtʽ\~PK8PK!M:0net/htmlparser/jericho/LoggerProviderSLF4J.classRJ@=ӋiV^K EA)*T>vMSb"P*?JVEE3ٳ3gV5V0ficzͼDrT8x%$A?v]!ˎ'ǧՋ!ah7έ)}!vWϥ׶B #%۵}B4$^CF- ^VZf4ӵj m*g^TwBf UPڷ&,HaDzBϢC!UUKP$'-w ~Ȭ#l)O8.3U2ɟ Cg8S=;qp Ehb$bDZ#PT WA{PKA@G9ePK!M:1net/htmlparser/jericho/LoggerProviderSTDERR.classR]KA=GfڇI[Q=EJúпAH~$3{{cPQUG *܈p!`CM[ |WRx~p˅ªmMMayն);c57AN2zZhZI mPmX2C ’\Lj%,[~GIոA$÷,)\*K XdM\?4xU2buԕEqA>iwM3,=y47җU0h3S: n"=샽W$*+< @`҇ #PK 1xPK!M:*net/htmlparser/jericho/MasonTagTypes.classT]O`~^֭ta7LA@` U l dzTVa8:5&'%QnKԘx/P;&`Л9o?1 A/qGH`'J=~ J?$$?MIxi~z(⑈Za=D|%Z2c1e-{'kf!m;il&vM FKEjI1 PV(]W2g]}'|Z8N$XBYfh;at2 }/.?f5< (P_07E,0U24p5 M f"{qlv#Wn"~]q ~2"}"(2nED;Qцv*w,D;O0pIAyJټm19 &^n6^`h5uxBcUwviي [dXSTZ.E+zRMBWU'TWJ9Bn;ts 53mB5x)}<R:hʖRwqu$|d>}$.Jz~uˁY\O|6:vɹ M`9F!)[8w} %0%@xo3o! *?p<u)AtP_[BFJhˁr;hSԛ"T^B" PKʹ'PK!M:.net/htmlparser/jericho/MicrosoftTagTypes.classR]OP~nVpL!YR>ekb)k0ngKגx/BnLLWoaD8xsϱLG1"x,$<(aaP~k궪նhij/녢Q,9楒 ߱L˽l{V}V| e|k #fV+ 5~dr,AHļ SV7: ˷\Ǵn$\jZCl6.d=û T/zݾȫ` *H!MUIr*lèQg e(Yu~)X7鴲m^SuC&TZ&W|rZtU>h0mԮ^1rz^exss5zcB KTGh  *|>bǣ3&O#q`P8ye&H5q9x&a Ď~yG=Zha DӖ7Dˉbf@=O~9N B!#@"|i?PK]e;%PK!M:)net/htmlparser/jericho/NodeIterator.classUSU,YVj(hBcVbJ(l%$ n o}Q}2TfQ9 ̈=s{9g\Ŷp#^|$>YA1ų)6nz;&xIZ 3 I^sYs g2Ha>Sh>oܖBe,) 6]2 D [B!X.䋚UVl]Xz:gR.bLBdum,% RN0Ė=e%tyY.\I4KKº+ք%efYm4$ y9&=y Zlӊfkz^g ]EZ{nOH ޓ;rN+%nt yzd~$[4 y}D҆^78H ,Q07Cbmjfdc"ewܕcpc31"0,cECb+SUhXUFDXSxs*;DfM6ENulȫ(lelJb+ X0JbѴl-錓TkN pIhgT% Wm_+)L7ҵ!̿*:vV=s.yR #Ca{ WJ;;5E'PFFlͮQDTo [nzQ35=۷`N#2g٦mFʱ@5aŠRILSs+ny6B=m%*s~܎N|}uY9ͫZ\jA륏,5^}mzH6pq$uGRrI.hJ D!E {hGy( Y>+P@4Sq>^"??]:V"fs⺐}OkjDݨj m$5y;h懸f]89%ѺCdIލ1оxw6bnUn{Y[ ɑcQG'9ucxq~+>\w2u/Bc\h(ڇG?Dop"9>8y7ŽEõ'[{QU:tn h'"ZI2OECgERxPKtAD PK M:net/htmlparser/jericho/nodoc/PK M:Bnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList$1.classU[S@ mV(rjZ/) b*Zo/ݡaBR/3}G9FP Bf{/_ i f4}я܌%p <125j ؏{&4E&܈# n)yciS} JmuW n+1vѰҦ]K()-Ϙ#m[yO׫U3\Otrm|^oKrnF iFmcv3"NT oZ.Qa-vc2[=Lw$|`{e|#[ 11Ll3lV a^ C|0Q^w9QO J;۲kЫC*NYqO QnInk {I+A`aryjGQLTѫU\TzAԒeqg] 7Žwd6uN;FZvͮf+Pkn~g$6]ѫ^_sWqMŢEom/.õ>(v[n}nY5w^7=öVce5NBǾ*j"@]BC]&.j(inn8(;OQST@@SVFFE gwq"QC!3 WhArJt77NlrVV9>OM<<ͤwwIѳ|H]gqiqHW?Rt'2hiбE^95;Du~l*q?gژH+D:ڐc'P':sKq "}P"yLgX,ƞ}qfc1X&#'ħFUP[Ʃ]/f"x19Q8ЕÇQWo>\%K9zIwas$t0'¾Ʈv oyLP$q Mq9 ~~t_19LX8zzEpvׯiZ;1bQ_#${T|sum`=/./PKslPK M:8net/htmlparser/jericho/nodoc/SequentialListSegment.classV[WU $C/@Z=$ZM Rh mx4`0̤3 /׶rP_e[O>Ľ'09oog~[wA3A³!v Nl bj+% /36>?,hd@jLg9۲djJ,YLXKҏ<Lj1/%Ƕ7{s~odldN@&ʴ>Cd4E\3:mb[99Zb/Z]Pni(؛dd$jiӅj\F%-C&rj&$嚝=;<+=$CiU%%|>'">ϟK_ Y{Nif. KIg98dqt_IxG$|f|ë+*.K{qF@gkzZO/ma;atbiV;ijʬ޽UfBH \VUn*K/lP ETkKiR6{m+Mݸj~ʦF*+Pƹ}ĝ!)iZp{"k4xQu^H@MPfT=g5Dp)D:RҲ#? d}4LMc~h,u_Rgj<@uCC=hK݄OET."0*j-@MPc+PG{1ihQ3ϖп#nr5, ~_F Xu>}=4q*Z,|Bh'/Qv0F4'v+=I[P#櫨__?txJ<ޚ ~B>.$s(󿑏q$\H_6u m/ZvZzx 9}QBvBUYaEAc c땥aE1GK'(g~[HVVouѷOr* ԇ'$!u6rƁ:*ܸhg\#&+-bh%nVO#Bs.PK2c PK!M:6net/htmlparser/jericho/NumericCharacterReference.classV[WWN2$ A[U 6hTXAKEh/ᐌ N&{ eeZcWS*gf (t-_>sgG WЉ0/ha@oqrBFZƐ'Xb +8u8aby7LgbDE.YY1*=7&PpU"e| g\2Tfسlyj[֔nI&%"s[_)fT,0:gT|+xm|ΰ{żU(ܠ m:zcJ!8~uYFlkX&狆I`TĐ-9v8TzQ3P]J3)kVtսϳu3z%z-6KwhfG4=ܫGg2)'#fgK!df4>U_nxiЎ:Wm횴 xő,I2݅,U!̺kfY@x= H[xX$@~ a6G @2YN VI5Su$Xu"(AFy-cۂׁWKyEfFX? zJKMkxU㐱dg|;ζMz>D*s^RYg$TCB&є R]DJ֤OhR55?&MqMҔE Ua=($̣S@&iGh$1x-$NshRu3 "HnmZ7')O⇅ʝ" _Jm0H@DYF Gw@ԨZW/NcZKxieuDG,Hџ#鿥U1K3t޺jH]zhbE;HWFF3R%xр~ xqa)אB?PK]m M| PK M:+net/htmlparser/jericho/OutputDocument.classW XT/#q05@ڒX1RLt'Ζ7jtOӅM%]ҊUFm&Mtߒ6%% w=9,w.? ME(x)B7ިV'6>ɉ7A[VoSi}Jw0"vTQw1x?S~KϝLuT܍{T= >?'|D*|cT| 'IψO8UuWqa1g r'F,τ}Z9S*@1exЉ .J~~^C_dȗxGْGqى/+xя()$bn!s)7c =<,$hՇCz؈ TX #ljt~#F֍#cq=ִ_G"M=Z6,Gk 06 b5o#`k ҕih&^j'9&]ɰnh>w1=9H6TyR#KG\4=9দS@4F Mnj`[l83p[m(w(7D0z%%_ t ZFyeZ#pʄR?ǻW La&'ŭDN;]+kruޕw4|ޗBGڸc@c'Ps2E飍u_0SqDʂr0)PԸ%ysQZR`'S"t1ӵmWiCfDEd~M{掹8UXuד\ҷdmhV ZݞA, qt\xb .,-:1*@וɎ;o?B}?ov߃KO&X靄GA$ {%aw9&\9c潒ܔ}gvyHF9Qj |D3(=%. Жb9|^.hڽg<%7θQ)$&i)T) t5LB`HM?m1[ c@ꪖ!t-鯮nmH.&PE&P=K i+p_"Dm$t8| \9ɆdܤW3f\S)or\`l3YA!<YU$VxCO]IW)Um5}Iʲ9WѿgR׆ &}5+0L9 )g#.j($zlREl7J5]=K^_!$s< aGp䝴Ǽ5ShofQxk-f{]feޙQz_pM?1s<7̠ړtmqI :%Y6 KX帄UX-%lR*l |)<}3c(rP9n,@66V{BN[{F.sdJ f&Gt7جk f dd26X]wA G팆tO6$ѢXy\C e )MdYhvPM2 \[x]B("(1K=Wek4]@;>_i x}*cXsm'IZ'vEEW zGlUe*hWXZ>z1}i"I_W20#IE5aiYU^PK PK M:*net/htmlparser/jericho/OutputSegment.classR[kA=lnդ&/.>'bC eKZ8ٌ 3tV[⃠GI^"b!Y9{wx %q;=O3o'&V@G17ja(MeM`w;VºKD\ 㮄R^bNE e;=2+¾T [@P-FZ1 _v/RtvZ]9xANūݭl/ͪ(mK8r(3TPoqib&Oy?L{l#Er|Cz*?.# [_3PKKhPK!M:4net/htmlparser/jericho/OutputSegmentComparator.classKK@ѱmGK0n)(Ҕ6)qݸQt!QP̹/G! q3Haccc!|OJ8  ԭt|!I=v7H;l&sU*scwHMI l%i7-PB6 I-X1'.?3,B +H88qF"|ҩ@;ݬ==@}Lqί/ ggh f߅{X/!v3?y~U~VѠs6X! PK`ݑNPK M:&net/htmlparser/jericho/ParseText.classmMO0eSe vZ``!C+dTw<-E<*zC#ʤ,g]r_r9*Otz v"bPwУQ/ǐ4S^pJjYx)ñK!*qNP~9 3'QXЭۿaLDˇX_ѢEff5ԉC6Z5Zۆ;mMxxx]ivEPK:U-PK!M:(net/htmlparser/jericho/PHPTagTypes.classS]OQ=vˢ-"Zhh-,F6݆ЬZ`!ۍ _I^&jLW|s׊/33w~ <#AG LF(M~0-ns/-#{Y 9 3fz|uazc:ڎr횶tǰѮK G[/Wk /T7*ȵF^) .'QhX*l3ͶuLAj/eH.aVi!T=Qp cD`QqJW; .#*.)>(Ba .>: m贵]Ps>ӋbQ(omK >XTN˰[t%>BaϨ"Ӱ>;64SOxӶ(~ @Ӕɋ{vȳ|9k2W3&]gJb15Py(N~|MU4ɦ=[Vcd E5.EP<=U8D撆UDԏ$@H+\9$dB^]z=b@!K0~PK5QjPK!M:0net/htmlparser/jericho/RemoveOutputSegment.class}RO@~e6;_ IcC"F;K.~(M4hC[oj@hr{>\X/bðe(șrP5p<spe^a9{2ߗa <7ݷ^M? "蕽FhRP'x1;NG;bGD+ F?zi`3}lbMbUx"+&p g6;A+G|unXEM5[>5[  4.?I'f.)l}ޚnivrxbKB-re򚞵%c$SrNh $^SIiN42iSVat8>)a`Vm!7:.: iHW RYV ]u+E+S+](Z\yPTPKB?-PK M:@net/htmlparser/jericho/Renderer$Processor$A_ElementHandler.classU[OA.,lG,7TRʶ "j-(Z&l_' 4j2)"adϙ9s͜3~s mЉR Kp4F@i J1*EH1) )*"Z1BbB b:W1вl3\. 1шdʻ+J;.Sob6Go=hWYQO2ҍ#`{u~Erp`PH+hGS: }֔B]O=h}x׃ zP!S? ZR7q ĮtEvju7W~PK4PK M:Anet/htmlparser/jericho/Renderer$Processor$BR_ElementHandler.classSkKA=׍dZchZ F E'na K*GD сwvsw{ ` E F$R(f9LL L2z*0CF8vMkU7ULBdՍ@Vs\-7U4U{stCE**uJeBz~MNJPѺcݫ_W- Su,0/F sr=:{Jٜ, M|o*/!l@yRlB]AB uu--vЧ_cQc"j =x&_yĹwQ`Ylq+WY`a;bEF%z isc)ixQ_<&q[PKhcrPK M:>net/htmlparser/jericho/Renderer$Processor$ElementHandler.classNNBA''DΆ !Ā?x/݅m~e\Pd|}|ÕB&:cTϿB{kbH'L+[9CY,~a7'[^K+!\;ɾ$m<7,U;neF[zVq CdIphl2)sAܦp- 8í0ZPKFTPK M:Gnet/htmlparser/jericho/Renderer$Processor$FontStyleElementHandler.classTmoP~.0 ^ؘ :T^ ̗9V10Nl_Znҵm&?5M(C4Ysι<=?~~2&"k2.L 7E֒U@Q@IBY2\o?߬kՃ- Ok; JͲrajCnyuir_}bqOmyfWw\>V; Y[& /xX%&7lGڶteCxm PV&gnj UNK]•p!f} lnG<!.V5+@a {d[޶w=qʌpQA f$b!_목[7<: rgf۶Zk :H ) KUnwuYT"e)-7B0kAhXtQLiJ'pcthW鳟D8vdOE3 /`|.#E3"N6HvC+dBGbv »'p8qj0x4=!bBbd7Db$'﫾|_w?~"x0Iq^6IMc*,~nq,BPKsXOPPK M:Anet/htmlparser/jericho/Renderer$Processor$HR_ElementHandler.classSn@=S'qbJ uiҨ8Uy@HRTO묨#w^xF<4"B$39svz.}`UE,0̊27u2Rmk: vǝFS]Bi"I`q#D$f(΅L[솂㏞HgyDnOāGBvE,bgT['\yA5 +SzoCֺb0o|OH&:6~ 6-Ö2*wk.{ \'=熞|: фݵ8+M߷qWq49nYmbWMi[MX['#i 32ZHri0vLJo"xoo:'a!R3ILP l(XBR(Ş(+x$ VTPD8硈Rp c!骥oQ>C#ƐښaatT}V2*nNȠҜQcȾ0}Vd=s]"sQvp 9 hmJthB|>D$.n z9w [ŰU B>8D0?Dh?W: w[CHE:ѡY7Dn"uou+Sl`*RH.jE\#nđPK@}z&PK M:Anet/htmlparser/jericho/Renderer$Processor$LI_ElementHandler.classTjPn5mstnjfK͵.CR00:Q$%% + >%ueS?~P4paa6 e{*kаT 0T WO;5(tenZ2 gN%wɑ?pXFf_F{/dГ`\K ^D@' CS<g]9H0U@ yFۮm;0h|#~ZTt?N9}/g &rZY]DkGOXAA̓6KRY= LnTH6Cd2't!ݪ2 9Z*'ie) qsyBW1_濜zET"_\_@PKowPK M:Bnet/htmlparser/jericho/Renderer$Processor$PRE_ElementHandler.classSmOA~"/"E@pAmCb&61s^7rq]?0S c&;33<7?M y-(,/⁈%-dQ̳X1j!j{h({q$bj$:!L5}vWoC:QsDNWǾw9:XǥaMl2ш:`.vy쵿^քjxQx{*<32* k1O~4Bӄ{:~uvt^o3Ml*# sr?rZÞp\OJ*-io R+7Ǥ?uUa_+<y0wQ^' dfXɵ37"<{z6e|.wy`9r,pY~:y5E.I1n%{;+ë PKYoPK M:Dnet/htmlparser/jericho/Renderer$Processor$RemoveElementHandler.classR[KQۮj-˂zA{Yz!D( MTֳrvEBA?Z Et̙ong^ߞ^bBy 1B6  l̳I2P"/ 5>SusBUxꪞb?>W:p]vݾtXX./bkT}aToc1@ڕ~sZ27Ki#|~' 6auN\sսsq7nH8 K {b_? Q#q\D"5$#0p/$ a&AYvcxaᑆ UY[_Ujjx,{5jy>[]W%ndK}/lH/te`N[7ߨz֥/t'Yl҅jᴜag L_$ qOLc` &tYI뙃PK S PK M:Anet/htmlparser/jericho/Renderer$Processor$TD_ElementHandler.classTaO@~^6֭됉Nĩ+E퓱DcAH60~`27Pq{={^}@M:*:J`YmwhXաR`XpOCtw[m (DH)VƱ s@ te/d[>IN""~%dOD"NĪsBn˗~򔐩 Vmp tkӫ;32$~(c [^0zW`:,xd`: Oʗ (;\yl /!<_?54l(ڝ- f^'l/?KE߾pc/|y\5FzTqILb8$i2)'+ c>/8.g0ʒgS#q]?}jXMj1qb cf8{`m ;g-q2j,|Պ_! q+/>xT|NEJ PKzPK M:Anet/htmlparser/jericho/Renderer$Processor$TR_ElementHandler.classS]kA=UG7Dc[kbZCPDP4$SٮC\Ygew [J Q%w m {Μsaϯ+E&P4D1aK`[`i/ڍ@= WFQR2lNɈPzJv?#'dhd>'CVb߹uBS^`T/ FГ11ul޺r{O] ֑{*({OWYb87;|H7&/ K is/[j@V#xkj^(`y%|G $C [ ιCc i5C>dcnd!Yd+ߐ#_M42\1 PKH'FPK M:/net/htmlparser/jericho/Renderer$Processor.class9 XTYޛK4cEb8j$Iuy0C .I,jmH$P6Kۘ4iӴYMӦKҴ=Λ13 -|sz9wy\~{`4ףI\]M W|n0,bfs%,fss n]FW9 'xYͥܬe /iqRFn:l㦝nb͸xn-N+dAnֱqq4}NB15GƠ1ĝˁz/ZX4,c }\Ţsb?f{ڧL_r!'^V#Q#~'~ ?.'Љ(I܉vS ~3 ~V9 W L_rP:WUR_WPno*x7(x#soR𐂇<2RVfE[(xw8fT Sq{BAp T𔂧KpS8{x{OeckhDGPᰦ7|Ѩ;ڶyBZ!MGL^E~dc'~[YI%?Dk1#ʢ>P#̧B?A&̟"T&P0%(BY=P4Z8ܝb=POjzNM{"mr=klaZ> CH$ZBѴhXz䰶i<;}}!_=ݤR Cm>ML%Nhi<c?ޭHI|J{Ϭq]<^ăWӈE!LBC!-F 4JҚP]|1^N__92^sF<^}Omؑ⏄hdբֈ4.A)B)Zӻa-}#b1-@Ҷ5-=&¦UGtƂD{]q~@)6*LM(v$٘RΗ`ՙؔk~-je;QMXH (¼1:L |޴kWka:t,;sgGxycԮc$c15ƜGC@N/b?Ǥn/kƼ 22Vcf/#gK2ZHAzdbbAr7Bj躱 Mq,uxyRbL^=Xbgai&4'ubNVmi* n@u7 *<ͽ_}*<Lw-ecܜT!k7.UI=͋7cwd|L=l?CV,l|P% XqfU*tÊ8|1}cRb8gHLSLtq">q[#TRV?TThT=K H.Y4&*;Z( .|Veg*MVzY"n.榞${4_ɑ-. R0+a? q+uu~=GB4@=ݞdڣZHNLi\u.+,x8.{ѯJ~bzQ=^Nvq~f=M{B赳 4xKŘ~YczAX6z푯(+ayAX3v_r= 4PR3ZTLl](7,D(JlDWR=gdG %KQ9Ï6/FJECy/PIO총_ÄFMSn643נӨ%T mËNǰeՙCr_bReܱNʌQQJݚ.Rx[;V6{h//uL~ҁ?2C)ϬtP9SEPSLbLϥ E5'U*U?:,JhvP);?)ͼ%nggo'bʾnJQ#/e7j0nnx:n r^*0? s=tD4KHxEIrU{s6!2o hѠRNN(_)KrQi5Eh mS8,_ojOkGQiO =Jd0Tnjśi%7$7|nJ(z%\HuTH%TAC!Ry##BO"GG_F 7ܓ|ʀO!7~IWkc\;?[<'sjHeS OD!h&/!Bwu*.]!p]T]T]!.wTwyqOdG+f(xn L `ay.L(d/FJAX ,7y5{E܌FvKCP=m)cԞ0+qjYٷ0g5P@~̪< YU(rGu+Xk#֚PK"[4О8Gk>[fwY탰0>@@I~ ӰP GgpY++ItKt-% @%ðqXv,ˉpVH[c3 {V3FUfFpqE#4j.Yf- ʭ *&͍gtWM{9ez2BqEQ(<[Oš{+8G@aP!&+bb2A{`Zxzr1GN;$H[)*nJ٩z=zZ3 @G: -6y$8` a͘j^owYtzN*,no|I([j2Ԉf'uq$O UB)'e 9 `ڒB.ƞR,tXWqhn@l Fi (rPI  5%~%!Nl]րu; ÅZڴ7nIP@J3ymK8'-.u|$Lar*nɄUDJM [jXoіXl ҇2f 8߲{2L6~ENSkspٴ|-u KcmDN]--'oj"ߟmSvkᝤF4 2cz*nO+v:+<;p% gk)T( SSӷH$`%[:Zʑ椤"F 4 >}9Mŝn|fg]8kd~eWƴFqV-VaXʅ#5`+y^ChK3 UX5[tYO X{^ +(0^bz{I-iEA6$=',ɖQNcY8i*5&+hLTI˅Xddl!1l|޸R '${1]#]..L]#V#sMgAv'a!>&'td V BIQU95%0%>ksq|I&K Jƾf¾oq8ƾ/YDEp=7]Sv%m7+vace=X2GGFzFUUWW3?w\ (vX,J1<|Eʼn(N>F! X< Ox,a,4y/j4DW (^:YC?5l7XaqVśQoxGŻ*S;y@:2W鹮̉BQ8RArr`Fq a)JVꈓ< 1-ϯ'L^_ax2Cz<5Jj]XlOmr P\y&LKq nqv؅*ܬc/1Hɘ!;t*>Ww lip#ÈqL踏,&yaE3:f$!:cS#TOL8 |IOLIl1Uø'X^lD5shc'Q6\:~[:·籡$h9{Ò4[+$H=CmoI'6V`OlbYUl;rZǛk'C)VZ?x)?",Σt#"ƆYwgqKS2$%%9ښa8'Q$iܣnc< F#Lҏ"@Vg\=q5Mt^WMy2鸾G!mȏg~vrz A@"-?|܁?`ZPNwZwcSē3&O"}c56oxLv4D,=p3e?} 1b=8!קv_AbQ*dd7I9 zSD0DdҩMr0"r_ Y(2/ϳ>F#GO>.jw܉I ^MW{/\&tԯrF8C *OG,[-G>pɧp(~,Q'8)'yPK=열6PK M:$net/htmlparser/jericho/Segment.classY xTo2Y C $Dv45 H1$y&c'3qfP.Zk[b7k6*N(`m}{k}Oy7|_ws{N,*'Bny)j"zJ0 gFbS,K\Q2Xȿ.,E=7 fylk6$`PABw!Hh<< \T#/Q7aHGlp]ޱggk[@m$G7 h#`i *KV\ϭ^s i U |J]z/w9 7AnlK#ȈnJI@ GGO BĖf:<usEC=Ѱ?8,0{RkZ a&:5D2.٭} gLBtIw:jا WGfp/.ڔ94Hck 7L+OG(3+1h!vS7fгQO' G-|2ƴ>{dIXi=?̪O 6\V`?1,̬QXV4Eig39h"_)`[mj-kMQ*4uq˺'2AF"uuMPssU'w5KN S[ _tyM+v85ܟx2PB2?ڌU w~s[Sjg1"WZ`Iv;˔GqEq?$;(m q Z|ϒdj`eu6yGܒqIΡj*VFNiec04J2PA%U?7⏪=c>6~(4mpGz",QK#=T:a8d՜m@7Quv/q:]:Ɨ^j+,cD)ϡROR p*CuڝA+M9/NސΪ M#@`9Y.TMyHG$|kE@S0)": Y: ǰ6t]Oܣ㑨;Twp< ZB7}aE41q+zP8Y`BLv4M9I͔43ܴTߨ:d܇8Ux5]\.yQ_UUXRm&V*bXsx&(b-nZ /EAWgYEl dl30v"6M6Ѣb S\JҢAN3 §n&kS6nqSMtK ދϰ]F\r';w)<`WNnM >ʊv(bR`qG$3*v)8*z(N(8 ^UYR-G"v^E}6q"X᩿&} E\^ 61ASr1ug ˴xϼsS8# :$Idaଧq*'9"|ghSPGǢZ!m.DNU691*)|B>a7{rHPҽcWWk'Pnɿ\h!]:3џbGvY8`aĝ1YJi԰+);)Ppѡ/ȋu\iA L&}eIenh-J,jEK˵I^-W^_C7U/9.LCH/FnMqȈeQb[?U&6=]"o~x/ON#-A?lbaE \\OT鹐+z@6Do(&DM1{M(=)< QXg0{K`;> CQ=!))Eϔb&YI7JdS\<g7sX͌5(L9 *$](1bYS}&8n"s"V7gpPwJ.xr{N(#S(j$ZS<ebYeJbfNN`o`nɈ2,|9^!1rIjxKQpi/I;KRJ-R,U}e,st ziaΤ vGy?\:ܚǐ z KT2dy#M4tnraA"k;bǘwUSXxiTOG y1\)-D|i@+a${ ;ɼ]LH.чg~טHVmvw;=1,L5rپ̄*kzzTJ~S_W0LLnJֶRei1(,3WYUCSNTг+6ZLZeڪ,$WYսKSwTw%͓gڞ.ocg~Vsq̗3ϋ86$U2}J_J"ڞU^Β&f&h'i}Ud51LOH%;eږ!WFsYLONB-[RW4XvMwS| tFY'f.s{/jC0mG{?-)\fţi%@J3 !´DvڧE銏/pgQƈ%1<&iWJeLISZZKqV3*Od!Zşx<>N͵5[feab~,7˳2m;~ת)twz]1瘝lWy]M xww]k ɾP6Ȫӌ +NY[d-o⸭!Kg]')'c G3=EМFǜCOY+4x:X*,t<bTX<14?OKy-boM|O }}WQC.pѯGa>[K<jk׵Mǰ^q VMM *+3& >k&l=RrMڣG6~OgO_H;(ޟM|)j}Ѭu_b#g͸ZK-rWbr[]zK\~Kq>NcPK>}*PK M:#net/htmlparser/jericho/Source.class tXJ:e;c;QnG>rH#;`; %(V%#r-@P M!;$%#Io'wgWxe+)͋ϿϬ%\gvlu'v p Dfٻ٭7x6꾝z>@7԰>$a&z8=Ը~vfes!A=J<{XbxAj~a/{>JSNx +٧V~Ɇ)z=dy0nuH08dO83Z I'{9gsy;EoK,a _%5RDu'F.&|f/%} ].">D~~~·~B 沟W~QIz.57$&!dI?#?3 _7z;'7dgPəs}A rM"$rr7u.K?uRbjbj4Z@B3X|4" G{{H5M, '#[.5X,ӑ&B>]D`F:1"'J]8ܾo@uɠ5G MIú)UUT r9{B#- tB7{B 54W[7T1] Ҧc~Y$42"X)A SLrl8cޖ@s=PhkǶ?5N8"hr[!ooаI v J,H@[ C꾦h7rp+$J7Vk֯l C;J]$UAX.EC[Ե J+E­$6_UEZWTGݰK"П`7\@Dץ$UcJ_NXΙeig1i@s.6T H-Z_> 50/HrZ֫Y&URZ/lRtޜ.+gۛdU,ČFT32 NJ8IJ޽tP4Wơ6 :K=NBhFڔn%N:zdhWLgQ)-u6э̀UszVĔ^e/r䚑Ld+u[u%4|qdz"@zX[k9)Bx*& t_l֘qY.!ҭ-ex džFqץgEAg8y۹,3e[F%Q mѪ(1*|Gc$dLX͡Lz:3*hq "јOP)4c%gĊ"l)jj+'܅^8:N[YFЛ"~6Oa[:ꕝ Qi%P'F[?GM?$O4eIi\ؔPRTaӖLNJWbٞ`|؊Ώ<:*e!ChFUبU@6߶dEd\s#;P8`%R9Vq/SdB2C72 /">eXW2^~ + o2 ޔx@fel ߆Ȭulێ‚T2(6Y>a%x˟.%#+d y#oBV2of%ނfXgrXxKXv|e2oEE6xU2)7؜t$K6f ]$My5reVɼ_.q\XN| ˨j nj\kVq/U<&dv5 cNiz 8o82zCq'IޝJ-eKd]r/ $.-swKG潼Of= e3Y繱'b2 jsJ%~̦#֨ot+{[jKϺh"7od2^0Ҁ7n%iQ*&9oƿWDҵLpO!]?I >o4T|DoG$Hx\*9h^csdVfޛJ5: Z~_/" e6S7f~o2;QGf pz^RL}2?q)2[H`E&F)C>AoKсbKobubeVM~[!ߪeHKv/p Q/7y?U(Uw28>ʸe7qnzf'7U;o$C4$2 Kamb(>&1 ?)ͬU⟖.12bE1E oĐ1ʠ6(qT"%փ~A{μiO|K)Rո1/qD<0 Nu1 | %?*$m $>,>"ci#(7Xrq2K%fQZC_:jF^gyN\xFsǍY5YL!gd ?)gej1ұVwx]WYk< Fb+Rf=GK4mTHWʩ"ӻaQ;]Likomkjֲ߰uSCk(KPANhnkk7h4 }2E17bcӑ8ΑeǻKҎ|fp M;uu큖fAF !:0w-;uurhH){ΜKa``Ҭ#=FsQRmb.rVR螷vB0rB㷅\Ek9-MuJ:$W6F(1>];ؒrdw>bN9DDm6s0;0ogMx#jB9pZ|?^-ڝAcHp'czw:^gz'M E3\!M"[yi9+2v)Bj_Ev5 c=/ .hہ/xZ e;uN;9K""iɖpп/]W{ZuIN c:a <`:x8]Bk%x~_<?Ԟ?{~_3kKx]{ ~=߀7QhE;KKQm7mt >U,ln?,fF?[?Xg slܪi-$f$}VDRuEiV}G l̇gJC4_TgV56 g+h]D8hr b; (:A 44 Dwod{ e%2rH(`,oCFDwxCː/+ +OA 8s} p!Y NJ( ê!g 4y)$7% a,4DF- D. g6ÅH _ ! 50TB/PO,Y<뫎x h+[lN{r%qV2T- eՖ҇-*=8)w?i8 M CeyBf:!;=̓1 a8fj@/\fZcv!h:B|T6M#=]$&XbIZve:+(JG!8l$6ݧ1^1slpfl7(3E/#a 70 ܎Ni$cˏη#w%]d;2#TrZ%ȿ@&Id^)K]@m?em6#D0bG lɧ5g޹ 3@jGjq@A4UAXԛ< jFz[PXy1LU"겊9ŜfV:E^c|rQ㻕k,4IO&$5jĝ!N;XW <|Y\ܯM_I_:g4gv"Zi g׈\K%T"% Cģl2u*Vm!s]# \ IPb#'LJ ,A` ,gpUbR + 3-G(ӡLP.7)JIx=G7Rle/M+TÒps 0GDT䐄[hEXQmdXv gAI!_({zE{>& ʦ#vu1U%:${*A3SښF4Ix*GàpX(V齶2ղ5v_j|",¹q8I-s)!V;9D u"ݑ"R]z>™H;SLvٚܺFC):PӄPshO[GRYF@TwNjGQ~⏥'L0'Υf+N8&3(G)[<>kMkZ֣ ^U9%9= 9%9uqtǍi7ֱYV>Om]`\VdO çrƤyzvę 'WiYzE5(/8wCݱ7c:2xuV"mlL(3߇T99xN; lӦDǴNiR/gW \fTw6GEcٖHu4K¶ E!֨c&͵TO0oT} db 8iXF>hzO±ax8Vn$3 i&,tʎ,бW&ccM{¬3bl=WLMyAB&#]d"Puy*8yz.9Ь%IoO!cպ/3]6s$W&5t[CP0!&FNs=E$5<´ah`Zc S0aO4,`QS Y }_:Q)%vjWɅFj)ҴYeOiLZfgHoNl# Uq9 .k3CRQ'1|i( a&LƏ٘Q?Hvd;@/g/+'^A=JPH6, Sh?Ds fŸ! 5v@HUT\!d t : 6»P"U42TE*Q ."`4Ǥ{ڇAL R4_pSp! 1>yxHiyK<|F4q#7b́ܟqL(Dza  =iq;㫼5zځl3<x~ëy$?x{ Oyxs~,ɫ7s+WaF{%-o;;OT'F3og&TӁ*bR@]=z(*01G=>8v@GwoUZw/i C]UnE_uB B])أνh'`ۺQߡ zc vA.,:hr[ 0*CA(J.ZDLS2=$w@b PdC ¤:lk@VtVjvMݮ\@v vC^W0~O'ESIZhPTzY` G@$[]!?LΛ&FWךT` d|QF3đ*n2Z ɘBpgTCC{GW-[jk!wA0')¦NwhJEhnH!e+zNNGwr@nHJ@.@K%])$PODٰ2,gqt.Tt'|d+-t;{=h!De,=N-äZ n{A)EfP0qBPpl[dtp5>x*&Ej5CC:5<䡋n| WQeJxY;5a&DphGC?sxD=ثdיp7Y-T1G@)PvA\CAp&3%R@DdVhaE&\b*J5M *ib> +X$b*j Q pp3i JL][5qb߫b;tWY8s1 -nAGq&eOӪy| fD&VkXʵ%ʍXNPBM\ z '5#׊ ^j"Q .6CT']A+fxSCDzÔ5)k!þ _5yɠhޱK$)[ުD -;f\dהuo>/,F/ ņp"Nsл$dXϣ?5rlAH's~\&0+}GټEi >!Oݥ6iW](T4H7n*X)"'~ HgT/DسSpZ| w i}[7_XI` yؚ7ok֍Iϰ:ۄkoDs72~l߹R礹Egm{I3;M)1l:q}e4n?Mawtrd=0QCK#xnT4,!-XzE:7nP&}cND,<%K3:T_ƍMoC]kݕ->#2@TP᷂a(QvB$H;*!dRx >(e%V()1;E.z$;<%Y?􆻂׹瞄V܉ș9S?"gݜws9ws@gq"g:LW}#jitM% oCջl 6g ש @UbP,c{1d{18Nu 9^} 'E?fPh41\(¬q6/1cͼ1BL೛ q8PGTi~ {JNc+Pc(Գ[8<5Vue6tn&lISy]VO.7]q,Q]jpдAI$EHޒĞAhA8eʓ ,&xKx:lkOjL?їBsg8GeGP aZxê ¦l4TsˏaYug|Lmo%I>,6Ԍk>eV8,ŊJ1|cJ@Y1R76 sff, DbD-bTʁ$MPrMYces%T$)3i9c6S zri%S3(Q{y mNee{^"^)'&|TaĄ ,dAejkȥ:KE!Tv˳;x] A`fJN1?fh/A?ݙP 4wO5LE wKٴNhi;!oqA%)<Պ/:]zQl6F.B)Q6X+epWe|^dSZ }tj@H."@PaL@r@@iu9? ճld$q٨c:]Ul,ʚUgdLAߊ)h93{pU4s8.2^enʲb3lQ&ߚJf7B{+215*`Ĝ}:NRMFzƵj8-4-7s's9/aѻ&pqQ|,4nRDsZ&Eu=(zw4:hކ/PK6 0PK M:,net/htmlparser/jericho/SourceFormatter.classUkSF=By`phHRG:#<@ i Ч7,δ3<:L13Qk 3{=w޼W%UgQd5CC9oX?ZgMG\iڊ^'}oEtұ eYyT|IqIG1ʐe1qt|TxΚ+GKͥN;鸁17qK) Å~Qpa熋#,әGթG#[-SLȌ̊ȸ #ȋRECHz( w[Yez >"Z ]asܦ"9ğ9_ :Í{"{"mG}?^&{0Ç"8w(GS"Taӧ,}y(;P*QBsvhΣ \8M^L1=}A]4m(Fk1#n8Cj(HM$| ĩЄf䷠mq&Uԋ!:&+K8N4‘F?osr YShe[[Hǫx c!cfR pg3xI1/%T8VRTe\/wxY ֶVl r+xl [\_Up-1LZäZ~ԒX$΁ؤ I p 7f^[(U\FN z?n.܆JUvU906,emj{hѲrTinUXD+Ę*E&i.aA*$2b:_ѴD±*%&A܊`_u5%)eEfyEM̈́vKe!)JQ%ܺbnBUFRf pjuHn*BqIi* 8EUR#d&\,TbªԬ*RhdR8 Eʹ1s#8*¾ 5ӰOI$pahKGd Kj&7G2VŢQn5b1FX]J$:# )J0(rͱ -q%ʙ;Im GV%ڀMU~1V9,G%Uax#qU IV-٨%5ܘVT#S"*yG&! ]pPѲyhkJ((GH s!o=9tk(ѰD;ZH\mņ6pC؛Q+$FLR(T&Q[UrHiWXl[w+8%i װ"Ty_):Q/[֎koFLEUyozjJyʍۺŰ$mLϥȱ95s:q߄o8 'ElƠ2=,0{*b 6w E cD]xQ5#EYanJ:9QVq͏=T=p4Uj,}9iDe&=M{n"ކp.{2'WD*<'~c">'|Pćh |$5ģ"<O"t 1$f8:S>. z$D| :EtmzΣ7;5ÛBT}P l Cb—28> nͥL'g;Pż-Gx]w9cC3xz| `[¦tx8(-6zs\2!ke7ٺO em;F?f$>X 93@haQ!33yA:_:QUPd?B{/xM/B%\/Y3\ EL,0#e\Ae%@1N3- 5D}D̿BCEK쁓eAi`˺11erïXޡAwy7^Ο2WnR~_ɴ]p9Ljgz\K"3\mEC0oˡ  XI2ra@/\u1OG21?o [@ߴ9bp7 +'B!E>e΀*`rMp؆ Ub*̤R*N_LcˌUXj՛}cfUu,ܧ "j-(W'D}ULCV\2Kgj9ciYlh1prKXv-&fxoԩY^WкyZ ׀+tQ( @^Ϣ{- ߤGmnyQo@F4p*o-MFG٘QaV48lfp}7P#@ߪv2/}6]s)vzN*Ex/ػ9lQp$j)KcnI:l _,ӶD+T`(QTH鱀J?[Zz$T-HxӚT; /,g{~`m Z-ZOR[ Pyql E;aM<,R),vwVp ver!_8wy"usqfmPcN_M' JҪXpm]ܙ-&; |v{Y.pP1P ҶiSovr`1QP@Be8Eʾƺ nmҭce)ef&N/Zs Qk&!pdilۇ~h9Tv=*˯l^,9~uh==2 s,pϕA6u p;htnޤpn=}J݄٭}9`<K|\D +^ziyAM˥fAd\Xo& ,x?,MC$`=߮#r;mcD:"4a"bgD"v AG-٪!L{󰀋0e< &kػcV 6\i8 }Ƀg"Z6x@/`j]^v  ~P}+eh9uv晁-6cZ+Un ߖŭ8܂ So#N6lD$:Cf4d>gk{YNvm8'PKLOi-PK M:)net/htmlparser/jericho/StartTagType.classWWW]y"AEF RT6MVW>5,a5ݍbk}o~~Tl9Q=JRV̽3sܻǟFAEHy7gZL甅|h,ÆDOb(&x:O2BsHN*dG䯐jA71-J0ǂ 2 0$X =}!?$Cp|q_20l#x/&ȱt0HƧ?9¾NiڙӔ(r29-&O}R)(ʥ4zR4C_ܜ ZݒAѹsiyԒ'iX+eUcizK1M4M&4%UOĪѬ )R]4 Lά: >p/Fm  s{)> W\ _ W\Z8·p;.|/ ~aZpW[ []C e/CgA AN5Ntj&Y-~I?a m 0ƧsM}K^ϻJN .Pj 5CR t= O撅`VA}U5ǭpqqgZk1#O49T(~EEj'VB&! &UK95e챠W%2 [qmjQ @șIf r#BfԌS4RĔ愾߂y@:nw!!`321++,n-!kjiכXDI9"hYF7!4Tg킙`7}Ԭ? swAW&=% ~ >~W^3~tܤLzФԎVzHLad':/ms"Z]kv7>]/O_58<ڮnnhW8<`9=p6ÃP@w;ֶCL~JoڮcUSSQSrvogoiq jD2\/26TM-eW*Ve T-PK?*2PK!M:5net/htmlparser/jericho/StartTagTypeCDATASection.classKk@>;nAA$⠑88JB Q7tWw`sߜ.'82 q^K>{3v ݑZud+k%0Xn,ODb?ҽs|Ǔ7PC>Cݙ0\ҥvfCžsŔ٬_C )V^TH"K3kVIXP OBa9Pa3A?04ӨhQrRtB:~c23gY~ 8kVŇJ(gCPK6#PK!M:0net/htmlparser/jericho/StartTagTypeComment.classPMK@}ۏdۦ~ď֣7! HRPKlB\x((QoBga}޼a>ܠqqqm⤉:NL3t6#]4E, w-8\eLBE4eTc0XzP [J7-zqZ0 * {*q' g7Y&BE/by{!Xrg=r:9C\ =ڜ6.`P6+vU7 ?ZDe>Qx%PKNɅPK!M:;net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.classRkPn&i]fsӮ·U!P!QhH.mF{׿a>P??J=+:v˹s׷Slypy\‚ؕ4,r>W4tmu w7#0 ԶyxIu_TnA j# [G J;9y<݈|ek*sxv)ixfޏ!ҲUc6# i,4T ,cEUp9X k8c{'`#u Ϭm[ ݨOIyFN ڽ^Oty=e0E_ +Vuq7pm˩CwJ/*O/3N$ZI((T,ˇP>A,V$kݛ_uEgnBPWM- Z 1Տc y a$"net/htmlparser/jericho/StartTagTypeGenericImplementation.classVSg}d.qEW J.bzTHmZ %H&qQ 7^`glvЗN u/|s~_"~`y +DU|% g5뼼8)A,vao2yoE` m0Lif 2}%2{qQ%z,3E_7%{yTrȼXiݶR%)|v Q駊~TR7gnj[^_P2n^ \a@9ש^-ø/0k%<ּLgHCbT> l)c W}઎k :☊<%ta븉UTu8KTx:Y0NWQq`Yӱ:[Ky!w:|P:>ǔM DMMO܂cWe޴O4t|CY4m,ҳ㚹[%(3/Kv ,_{Sb;/f˕,KǷ|u(;M:(o}42[N/ b,]s)T2J,<r䨀fW'8*LB̪~ˤ}0ֲqQ^=ѹVu^7m&Ԁ&?ns;p-mW/_ & ݩ(}ҾXeU`OCIʥmg-cX1rx[&β[(pR9`Hd\:Kc-8YN-E]bJZOy*ybBnNc$lgHTMς$ojγ+n"`le=h*7{WUEY*Z M+!v9èyl'/y6CS0}b?tR7Qt녆0흉bBk? N4|ħ~g u mD:Dܔ|UYjSep'Ц# JHN+L:"#vq?i(Z.bVG«MC jGgK$Z!ܘ7sJ0%a:^a܁*!&aF¬aq 9|$M0z.ϐ,86OJfسݒ[MlpY4j龬Vll&/U/WWfl:ErKN;sUg4u³Q6(k<ϫUjd[zky\I%o]:<ҾMT%nVh߆k쟽c `;2TiX\DU14?֮z(Z,W zw0U!c 7HA${-=t5|CIyF)QS oZ `~f6r;"$  >dF36N9['A+I!هd` 3ye@x Q8@gG#or^vwDx {3=/PKFUPK!M:;net/htmlparser/jericho/StartTagTypeMasonComponentCall.classQJ@=#Vࢫv]BKABBbH4%t,qQP(;x.wq|}pc]=8l %P+R;pz.GJjg0JR [GIҮ. T V{Ɛw$nZޏᑐ55QgW~&TPDj 2t%C V R< ''~(G@7 g✾4[&mNHn'0iنc5ȊcV'QV70_`}PK'PK!M:Hnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.class]KAQswղJ!f.P$F1 tЕe?U]~Tt>f`>99g vt,Xё3 kH!A˦-&iK e<=ZO] QMԡ_m`US%kKd`nSdJO<7ɳAFz2701̏+PbVV;sO5ӵ&IM8+WdތPK BiPK!M:/net/htmlparser/jericho/StartTagTypeNormal.classRMo@}㘏6@K `!E Y ) .+ǎ-\ Q dx<{۸cYNJ*`.XCKc]0Yǖ {z7Q< E0uD~0^oFI$Z g(b'}" >aزY4I3gdڅ5nif:lghㆆ&neȹ-,*83 @=zqG<҄ac4i.<VA2p Y  eN&}_ZU 怋GS>gqͽ~R )Mj{NSR2J&2='c 0J(RQƢ*tRbVR>AUTK)ʳһ]V}}F\w(Wߢ\Q-RoÔCv+\^0F PK#PK!M:2net/htmlparser/jericho/StartTagTypePHPScript.classSmOP~.++B6slY,IB⾘Zn_l&o&()N1JO<{?(c[U\OcK*fq#ޖe(XU!㦂5U2r*2ȧi+`PNn0l\.J}1p|#yP:mRGVuEaj1r}psCDfL Z.Q؋WR j:c'Wꯈdlj(bKE4\P~pe2q ׊ ˸x!1l\.iΰ}.wc9>0 `L>{Ro1%$":4^Q*!]r.l^߀E]e$xȐ%IZ%?7^?AUd8ar7OѭS=-:MRy[B0WЙ>C/)2Q%f #L}.~$;ҕi]bzm  vZ xMB3p~PK  PK!M:1net/htmlparser/jericho/StartTagTypePHPShort.classP]KQ=WW}l&좄H ""p˺]Kϊ~@?*%-fN8v98Mp8%(L娄c7N᢯vz%"]]4cwE"?'rx7T{ E?Tn1 щghD2vM!緩[*24aI, e [n2R*-t+z! ? UmaUИ g٩(rBlNQ<30^/J,*(ܯQ|E釄oPK;8PK!M:4net/htmlparser/jericho/StartTagTypePHPStandard.classQ]KQ=^ʶ zQ{h-P)ADP.e]ٽ.%gEBA?͒\f|~}©@E;28s: fC0axbgbX9#c39R1ڕ"d۾M7frlσ-^a~mԌv7ZDTSBU|U< J' *mWX.t=ǾZ;!X5l=4fh l{C~GPKAn,PK!M:5net/htmlparser/jericho/StartTagTypeUnregistered.classTkS@=K %TyI-+PV+ ]Kl:V͟Sr) Ivs}=w0^i8ᜆ N|p  d62b8f\pGF[;x#}Teғda8#-ɕbt\Ėcج45 ُ%>'^\34Y’ @82Ϡ$),b'3EPC8SY<4[nZS$Y]Ho%$"~#h'D{5i+bMa4Bә73:_<^$J~;Bfca4io^11qP J7i}h@d飿h4ZC,T @  G׀рz_G+oM*o6@3`OyRY% 2|B̛G *змІ8jP:D{j` n4mcas;;PK5, ~PK!M:7net/htmlparser/jericho/StartTagTypeXMLDeclaration.classQ]KA=]uCMz %LB0_)_bAWvGن=~Ttz ǹ9||,!fG98$n0;Vƛ0X^EBEdFFf(gç]I #i2 _I*n/mLwXr<ҍcRf;b9 n TIUz @#_!Xv41҂P.Ń3ŧ#ΉKnʵȲ5g%C,,ҷkd^& PKc?$PK!M:Anet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.classQ]KA=;~}()!IH::NA?+z .=Bw̹s~}@ıDZe sL3n0; W=%= ?Q,#{.#ϝ@Ht]2=5XGO0dtG v0$p5~BjaW_:CѢjQ@dD [̹*AЗ X9zYؾPSH C&kAg<G8-pZC%1KNN.YJY3W$i,-2Kd`PKtL. PK M:.net/htmlparser/jericho/StreamedParseText.classN@H, z0&Lg⁄ (VE}(hM<>q94F#ogz),lͦ<x)rp!AדZ wz1Ő:q=W2RKdH*Ju  aȄ+ %өwV8 u ʷ8,ls1Qac y5p@#Lz}ëvl83WtOA Yw;EH)ڿIP)2(%UF.m4_$ORoh33A[jkiZ8PK1R)FHPK!M:Bnet/htmlparser/jericho/StreamedSource$StreamedSourceIterator.classW%,Bqib[CIeAe شM.J:#߉SH(Zސ@i0`ӖBl <;tbavgn;܌уd0̳{01> &L?=PA 7͏8@ ifL[,ɱ S dI>@i&M&&aA1#x88 09p؏_L~-xŋ<пV{~RoN)SK;Y1IfrL+ yii8 t|AliuMIKO.WW`m!Xժ_fuTNȌ@1nuKj2=j$|_n^pG6 xcfZ (9<TQbtt.0%#dJkfa6H_Jڌn\I|LiGfddQzМE.W(KJ8[.Խ*~;TDPы>c?xϨ؍[U<˳xv7kRW^T#Q𒊗i竁 T.-~4% n*^k*O  ƛEvP0-G^ ܃To8A믾Xo^*f1b{L)83 >R1*bILj|3|w%Xz4'or-g'9\~הgy^("\p]qj9u|I[BWY%8P-!tuuAw6 s./LsO Bq,TWKpg*\(GB3->b|+Q,kmg˷-bMm'͍q[=;.vKq7_{jЧ7Nb,a[Ym"]ǎ=!<:kAO "R#fQd~SgҺc-%$y~Rmu`K,˺#+3q3)DĿKZYG`q-U4r4S5,WR*mMQFZPKt/]PK!M:+net/htmlparser/jericho/StreamedSource.classWxS$#Y~6،`0ASdČ(vH@vRc(r 6IӳdRFnҽ#M$[Mwҽ;9oIXI}:{>sC>`V ap/ >h> 18lj f~?Nь ^Qbf0Gh}<( A91?>~|74>ӇXg ADE^R_nv<̒O3dׂXqQO&fbo3|<>%?Q=q=~L~Γ_0xį~N`^4TyLM&29CQjf⺮ͪYfu)OWQs,=.Yܫ2Lu 4dͦLG[%3^RYZ3YUN쾽mr錹$̘lj* JFNYES4dZSds뚩i-&\ 55F@N6;@';,E:-{>mnLY\f e)N9St@M;$z?@^H7Pjt iYOCɪZV:v b$9%ػ_QMXfcdDڣB)512c!9}bVQ鴚 S6b!XLщbiXj-|e=V_ [ FleM2qGyi B.WY$K/?jLjIjf\GTQ9Kjzф^KB/}K `1wAZ*:! H^ϸh~!IQ4ћ&' ̺:N>Ʀ캢c;Ԫ+|چyz03RS]7Gz@"b HN16$fF 5}7FNW@ft5ֺNsx&}PR (?1e?^hvDx?9Dx+k>PK ,PK!M:)net/htmlparser/jericho/StreamedText.classV wFGllp$ LB6&1zJa,m<8BB閦mB7 4 0IӤm/;Fzst{w}nIx|1xΝ; s ǯ(*e>~7M}e"; q&Wy.1UxMW2~ s-]Flk47#g<_͞JWd\Uo3F|,u nȸ)cV- 5iL[Š)+t1ۈ6VgZp/19JcGC<s"m&!S?4͓mzКؗNt{v]=}G{Fv<:8G.lO[ig8 !ԛMX~7pǽFY;i؞K.oT25҆mJ.6o:~2Ig*3iw7tr25PSfj؜v( FoQނ$Yv(D%lKh; )dLk™ qsIN  I v1|q6CN9'XޱUTlSu'OBlۼJXFnV*AVIB ڗ4]Xa"aSL;Q"m䤙<6ks6pʴ3ӄNs$M= T:IINڻ8ɘLuN\ql۴7k9t 'Qw۶mD'3 Q*FB0gЃ^pYTBx2~6N8*>1zH^,P{|HtazU@FG +_岶crm6 - Cq$5J^i:juZAߩ-=vjNǦ1W)ͻJ哠USj1-*I?U[I iʬRVWUTP1InmjC8L?Fj 7jZ&ΟW4c ml_<1qiRGe9NCޛ>Py..vj`xΞWu"?ӱW5u Nu_}Q!Vְ.dY [D^h%3h @j/)r#u:Uh-~ukE4qo 9=J 4.YȷpLKiiS;Qo l 7CnIΆ;bknV^YsQsES>>RO <^|܇O^|J>3Yn+ E/ ,AzHkV$6uEDVMiZ*ijg :隩]T%Hg$4₪;cF>dHrjr\G]7y*Y$p<쥹z{:A>ⱁĀĔCJeс)K]ۮ:y#~ !;jM<]MjOTFPRjy/^P*X= wBww*]J%F>CaC9YUWi5G1#rx~,x~$yN1#>lٻŊI^#!bZB_?<99[rfʣj~o+~쬚SӮ0~,^][3[3&z$/ZMlș_G*%~wx4 Ybb%z?e{G\(V.2ELPB!%SbCa{Gą2EU=%vCqeh݈M=P2if톔x\io/Y83Ĉۜ9r_B)a PVΘ9tk#Z6BVAK60QaA8*|.bTz3H$)c!KGRglK!wy%csaƊHUG+wY_K̼fSZΒ. EFE](aQ!m)AGwvm0HRdFe\#Y}RIKx=H zEwn=DO~']t)!n!qJiiM}>{h^<*: )"-gn*_CU_E5x+>Fy#R繅%AS$ _d Uӫ˻VC FnoF+ oP3]M}!,ljWF`Fm͎{: Zv\CD}ݰ%.!.+Ւxs%Ğx(pYy .0s5fr``-<jx3-QÐvј5ҌT:*i8Pe\m0{4i(+*"Uvu3Q{#L5V4P,_>fYW;?;wZJ r/`)ĿPKm/0PK!M:0net/htmlparser/jericho/StringOutputSegment.class}TmSQ~.oje,j%fVQ%n,K-}Qje3`{}s='9#.  2n } ²?mq7%X="ݘ@U!dM!VC $<`fx01x,~`1{;=V|lWmYg-a0D;XQs!&Mn VXѫU#[(<_D"繵f'VÔ "S}հN_rlWت OAW,lKxBLzBWS!Wf=SN}(mFVJլJJr3oy:gPçl+J!  +7XLੂa(a\BRL*R+xi7c:dfs敢KjExn fpE+$“b3hѠt_Tfg4g%I#whP 2̪mgug[;ikT=ujNjtv:a^h6FP}bB&iFHN. <Ǵc0m.mn- r_!~6LNn=Z>zcML;>4jp}r-RGͮ!TrrJwa<~Q'E 0|섧 8TA̞ M_'x^q:A{~{M,u\l͵PKwPK M:0net/htmlparser/jericho/SubCache$CacheEntry.classS]SP=&M R|( iDVfđ@/mvҴ ^|:c[Gg|u_/(/ݛ={ǧ/Ʊ! ] 0\pI@BeI * cWqEU4\DŽ8Ljšִ[ 0y'wX!P,=+[tyvrKMnq7+$Zi+B6rKd `P`lZh 3Vd&jͰTC 5(?y 5Gq+D])P*ffK_P y&}Ph CR;jUr(Qǩ ~cDlBv)CwBROF?cI59%(ٙ̄ߠ#"iW_{սV#ү 5.`?PK͖?PK M:Dnet/htmlparser/jericho/SubCache$CacheEntryMissingInternalError.classS]OA=cK.["!ˆ l|\!in6S~/K?eS&瞝s=wf;:Xt𴈲Kls+Xlͅu #c*iBRN? Z׺A9jӧbzAH_aҕ +O(J~֪5NNy8o|uITF*y#E]>+o^$tD$7a֌cέ%B#Z0 {j3~|DN&lh_画񘴋3T,[\sd&t\/{aLN3GIU/%YJ5GSL$)9WZ +pS#~8Y1VRͭ)L2#,iZmu??gx؏_⠆5' Zh{lU=<=;W4^Qw+ PKC!|PK M:1net/htmlparser/jericho/SubCache$TagIterator.classS]OA=mtR(n-- M&C;i)0)b47M|wʒFMv9s?B&HaFyykʻc@3 (J pS-s:CAwŞ| vCo7TMl8A0/Fg{WBבK @tCt٫ a -Zv{Bq6_xFlɖkk'RvrݺUI9 r)iJG:-"ϥ/O҂dMbyML)o>s7۞/Emx]٫rt6qwMM ÃNfHh#0%[ I3JHځD- 32g!K[ER[U̸DM*KllU{]?@<f(?g?s+~G)vq'-O8ф'xC˨*_w)|>XJa^.GQ&1|ItSxi?ͷ|1_y9x!b0/nюt&Ҩ+@k{o[ֶ@g%-n$S!#u(K6 Tֺf-!)lujL%ĉ0ӱcP7@b .`-  'DRF"qV!8pƵ9 x4&ؔ )"h5ǔC]AH4N vI~"%+L3bU]}H1-8F(ND/;&8B`  eC?EĮ֍9P̀ 2'NdLJCa Z+\Z(FD?q``BZcTII8'6mDe ] f3\͒*ެaA Хxb Mô/j؋%j5|/k^v~Sv$4-PÏ?TC/ӐFJC~ư>C$_qT}TUbXӟ+#Q 8BEB6j~˫Jï 49 dc{Y?5 f¶UmM]Lg3E p'4aLxƅ\[Z; 'ybO/O&`iӇ1HڣvTQ@|punĦ_TBPR v$._ ԚTɠB'9ȎeQ#KGA$|dqŅKh69fYrЏG j\i@ Usg.#,cۛ%&m)422(L'Xy%eV͌C3R#ri%ט3Ķ q>p7 򩿈|&DCu1r.Wq>q_}5ނv(/`8HC<Sy+wu7aq(6uݶ"fg%A&QqaR*$"8Ծě8'4%Mxs)p|C8,o"wz>iP{ԴD52Vz$ߠRn|+%ˍ$AvN: r}n 8{ [T:a&b+8|LgWެPFyʹ5E6j{]vTwe'Ej-4P#֙\4wX!$E(ylƹh*01OʹiqnڥZgIJwh"4ѕ3x,Ejf #B)ϊT(5ݔ`kM]IJ얨vOIz@K""g_ՆI/F:5{HL\)LE<.Jd 24eÜ,zNX#o= /AKLgq%ح>Ӄ3^_wy=3ԣ~GCa=Ԅ G/rnxv^pоx%DKK zUdvxJ^^,^ nt(CBu5l^| Owe(ΠVQ;XJv`1_?y|L7NWgj.$oO˯cs7+@ %1Ԟd2|IfIx \zsfuxS,*J)3ףZnFqӵ,מ|k9.Z+=f9FehCM`_TwM;K f_XҏH2@'n (6La{h'+}lOk~ bK`%ۧDI'lͬkKzjVs *&^vY2Hr h/Z5Z2S3HN<'D`EX#Yj%\Kd׿,1跣xHFlofĽ!\9'F ՘Sl6-*/hd}dHAKvdXaSّ-^h]1;KpO`QYBr ;]»]/oG PiEڛidJ{ ;&w PKCM fPK M: net/htmlparser/jericho/Tag.classX \K> QC+46㰢m|B\Hhnzް]ʺuX{ok޺=wTB^pem.'2iV)~&}Ѿħe*/_c |SƷ\Xvߋoie1.?ӏx1 S^~m7NMMǝRB#̤bᴖjQ3 e- j@u$e2!5EDTo Y60h(]K~.2然7#ڻzC͡EmHwK#|ykgW#nmCD:&2[f)e 4rILbrT(%G2a͗MwuSYk(B~7Iap;3F(=8BEs 7 <3JbDJ돥3ZJ꾥IXt@a,uSpo5թ]7%(ZLP̨ ÉK İ,̸.5U1u-jpDϛ4d;OJcpTkMtjjo`.V~a9L7w'Ծut^+Q+.3my-f.ry߶r{ Xf>>5\ElH=X]?5ҪZ`3^2~ xU W_AkoTU;^b2O3 E_ )Cw?\= b)7|f_K2^}ioa-ב8lWѵ܈O7I؅C"Pު͠bT3]ZQmop}ץD4ۧ ebIri4)ɷT*I OQ9֢ձUф,Rv"\$ToBz~Ŭ߭_1w"Jr\J XQFOW`87RlEy ށw*Bx*s"X(TOQ%)b1/.Q%xIKeY/<Ֆّ{-uL2ܣBjKM6EL.iZykY}':hͭuW9[xl0Fq+UtWPʜ4Qӗ FgԸ^Bj<|TORueJ,JIꌥ#ZF?ZTK?Qf|5msY+?,뚜[ ^v+N!3ExD N kQ wuVȉ?]i,χCo!3FE YQf1ʦ%(d|'Uʋxݽ <# y%;x> vI`;)aSC6vhRrT6:L,zr\;g{ ̎\`Wczy&YriVQ`,f3jY>X 5=ճ9E;ZHt[{Ǧ戴~鷙Lhh#Mo h:wݕ|LWdϸ)%na16KL&GLxda᫉I&RJI;&Q vy8ђ3SG`5 ,|*YFITdB#_ 45< o$h;h;Ă|}&hbl~]E/&fXGi +ݍ!Kcl,Ʌ&r"_{x*ݫ$YB7N} v1^S/>I$UI$I!BN$xQ,: m|:nBA TK=2|%o(\t!O>Kb<+fpB;Qж7c( &b$I#yEHh7IRHmi#O"(AHٚGMF>Q8i#5JNauOy \=ovX*\ X,)/:D>2IA#pQ|oᤑw;S, j69߻vQei`fuY A8LPKJ1> PK M:Cnet/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkup.classQMK@gZzB=l{HY%MID?z(qSxi݅7zp<4,hXQD) K:XPְFPlHؽ yb`D̅w{0![C? d u9!Wu1o yl 6P1t /z wL?%0 XP'f,] ao섉d"OЬvYo9iv-˚Shp"dczuS&CJIz zF:r(@ses/P 2%f *PK? PK M:$net/htmlparser/jericho/TagType.classW_;,+$dYlPk%WMe8,Vڇ-5ؖZaIF[~Dsfw>ù3{y|9_p;> m8!x$o,4~`!bE6DsS'\b&z1^`pq\vbΆ8;;!D=gzZcOxJyG xg8x.|/ ^ e~)W\/$aӺ+l MMS 3*Yay ҸM[LsZ48-a ̌oj;-A2BBYy>SY*L6 VКsucp~TϚI[01N3㜰Q4d|fZ7z7\)("zaCnԝӚgFhyСx%F])ִ4J񶳵%MNmM5mO[ڹ4+q%t׌^rJ8PMuDMе Dؕ{kꌊѣ8+, ~WxUů]<}̫XoUǫTuox$'Ot2MT U?ƈ㴊o`0DV3zlnZn\Zxb SG =ՏTXGl˙iWE(EӚkQ 21*Ń*.=bC*F1Fk3#sI+gfMn籪ಊ+}\; l ܘqMG*>?$DblN)ӜKSMsDLwcSVLXT*TP{N '7`Bk>fj0BRݜ܄SZ㩲m;pRe2jsql1mR wR¶~3NR`h`Yddc5jҕHGs.75jzbG;-*ǗzJ`MgrЯٶvH{⩚'Xz&zDQvN cT$+KY/\hWO䡬 WP.FF2B%dYGЋ>ypeS?>H%P\* JWr k _ ,,¯ '6 Iq4q=ps >̣>Kt[Ҟ7ȋ}K)[ڛ8[wt [GXDc$ͣ KkKEä:c~ ^Ŀp;ϬbG;-Y/)nIR2 l=*g(MR 5o-t汧.|g;{t.\ELb*v9tb`{'M~y D"#. 3*iƿWA^N+ڬLv/;BKMB3.vPKQn)PK!M:Gnet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIterator.classT[OA²\ ʥB-`&6"%& kK 6[&rQ|້?B<.B$КI;w̜!{+˖A5i8X,Ơ}P^wg0DѬvR8I +)QK 'KD^cA=_9u@ϩ=QCV .SfyH5=0ݿ_.>QIy=('|؇2&ʇ2 G? dG!zi.ѯHSTqU$'ƭ^2(¨<,ԀiOeieY~b$_bxqn!T}A'k RwQ["z0YBPKe\4PK!M:,net/htmlparser/jericho/TagTypeRegister.classVS[Jĵ-0-+$(8*BvT At vlk+KTNM~iS'OiN;/}L%@W@uI4bʁO(,i&<91.Wy|Fgch>_/T_d ̰P`J/Wd|_י|7|ɷ|w|?ď zYOe"P7TT)sQK$'HI+$KB(,}*Yr%_4&K)jZ!̷DHQ,IHMfih>} r#p4:z2犥dt"֬ Gc౱PX iCFB2h=094>3]_VR5dt)mcf&rtLeT}Лr17r}0zmg?jNODrednM-2 }ƷԔKo)4:((Bq+kub3}wfris嶶?"X:F5ّLb]{ʞSvYӷrNq|-~CǷ!6'/N|DAz'X _G1]VpO*2~UBR)x Ns3LNb.  K|0(c3^ػiW4u&o(ct¯eFoqM)<#heNy& L\gL,37eN w( 8ס=O/@p{jTM\-% 4d oJU*(MÛ=5, 4slpm9hL'ֻR';1YL2SL"~3ݢ]PrL{6є׌m]ڀQc𱈚@(=6F Dhp` K`X8 ƃ`x``"8C\@tl(> j$vl,Sasq/nvWfٵI ӰE-mܪun_ Â,<tNA4?סu 9Na(\^71@?iXuO)׻xي=<oeȌeuniwq@JSrIYZ\R(6#Q-lH97B~8QCnak]ƾ% gi![鿗GiG ~홥-~Do2u<DF#,ZW:%kE[=.[Gm8Ж(?%wtn9JFf߁r9hi61+󇵃* ykax.wJ!s~U.It)7*n~nϢ ^jA"7?Ts8e$'s]ϒtbWQ54)k5פzBL '׀!6vTb,@K!PN:qSgr3=YY{=o j\PXN+짋hrCF0F-F-zc,W򎱰_Vݭ}{yͫtSc:)obJcf7-f4:P5 wa}(܄cثBx Ǒ]"ؼX)<59d@fC^axuiT)݃z~0B7-O屐uPK8$oPK M:Bnet/htmlparser/jericho/TextExtractor$AttributeIncludeChecker.classOP=c~B6"Yٽ<_d~pu9 &E#t ̻繪dE5Z{V8[XLΒ4jE0mLǻ1a UW봔˶?f;>{EaXSX0$g;!@( <:uIh# PK7PK M:4net/htmlparser/jericho/TextExtractor$Processor.classVWW=dxj6@n-VZiCJ03vv푸c??o{޹s6 qvIhw{%!O{O*}ۯ"nxPIy(HညxHBT+^pѧ Q**x F)Ո C ) 5'M*1$cR`#/5p\Ƞ";,LaMN" Ͳ G'`X2kc&oI=Ўf6]2f` u3cnDZ#ĨK#=ɽfv&^f{ŨwԱ;uSwaHFoMd[3~ɧo3˰4ڒFDq'&lV4±8c1,- )ag)FKg8sCWZ%%+xUkKoं79Tc8帊'2^P@cp|8> J3ѯEQ@լ=QsJTy:CRiaS&J@ai3)-~JBo`O8Գ;ɰ35;e.a dosVԔUeh[t#)r(Stt#mԅ3)4h:n@C49o]vwn2y:hwrB-ai77~PVDn Y)a(WezfrNj¤}7ܶk"4tS$ň0fy񌼥LÂ/Ԟ:$ KzD xPdhEt C+آ/G%r70Cghf6soZhzw($5`:IVtGc` 'PouTb ucT4$j{Σ W]XXbZjcmWE|e,.#\BTK:_De,q+c.X~g"h쨋#næ t6Y+ci4(aEt_ªvzRq)%5en\:Jh=U4w( %,o F^FEƯ-}l ư"Z]ҨZA$ƯJ*YNvh#PK * PK M:*net/htmlparser/jericho/TextExtractor.classVSW.!lXVwjľT(mXQV,dq77}}OؙV+2o&Œӝs{^sOIE1F+*k\QLx\4 9i)HbJ2BUIf" r 5my=,p37ޒV}=34yebaKya[Dל) 0ˇ3ܸIQtۂ'nsΈMDG m!}{~ؙz(7\S,t))sjjt9a:v^ =4 V/R3/Ns 3 >;/Bv rvZd貂ɝpJegh>z y.kHopwCŕҏDPSأ-מ*pw2]|"n/2.$Zѻҵ$ H; ш#rZT1`òe8]% Rp[Ç8!,I$%9"8# #! >)>S/% eb8!.ɸ$𕆧𴂯5|##|;|0$0QjQL~a kx'4EnI-d{VZ@Iϸ|NC7\?6{m#44163 P Rp=p AV 01es&AKgo^yT9'|jm%c8V~D ȰLέUp1kNZh[$&;Ni8SBAPy9'"uzU7{$H rNx{xrHY(Of;utw>|!}KGï"Ui3RC}P t^4_"!0x)u6MCaM TTcdHM6ӻ,%ڼDP?/PKKV PK!M:!net/htmlparser/jericho/Util.classuTmSF~ٖ,cҼَWphcZ%!V5W#$|nt23Nͤڙ~(ݓPl3ݻ}vo//\C4dUt(tqIj)ޓ$`Ru|H >ND(JqUÌOk*fzgW_ f8U p|xu8a`~)x]XoO3$ŜXc-:yٌs2wx8= H/WUR*JTOPպ/ì NYp30FVq|^c8 UOݨ8*80tL3 &]@+V7FavuDue_qMdT Qh6X6p )+` CK5^uBG~&3\vĞG;5vyVvªp=qu_`8_yCRTsdDSr\cMWGZk"xŔA+3/2S=JW g$H$TL6bi%ݾI-v";W$MD3B_\D<ʹmC;~M=0PD.z%zo`u&ΒM =A1v"V"!3'$6b?9"~W([r 9o{!)# $7NŬéx#Pޣ"1;Q ]k%~@k%hc\ώ^Y+Ӣ^`J7`X8TaI+#):-.VVҋ2i}1m =ce#Z_P.luqz 'YrJYE:s&+wAix=h,483RlPK?>PK!M:)net/htmlparser/jericho/WriterLogger.classTRP]iӖRDRR/@An- J8i/uw?qcvk}%=܏8сLކF c{8 (NGcb&G(19 ((ю )W\Bd1]ÑpxjMM;];dK_7$$z*UNϺiɧc;YK_y c4֊R2rǴLaPa{s ce-zi$}QلE6ڒdN Dl&B(GuhnSjDMD xm՘PaVG ɱwBHPj ^x*zdXPqgUcbf8*N1 RB;r1Sȩ%sy9z<`Q+ TpQ2~ٷ%kT厽{6,\7v|CbeUwZw^37|.D_Z3J}AY{)Q`u&3<.2Yojkѐ^5Kt뮘Ȋx0_ X6<]|dW*U jͭ:& g_ ]QMOnet/htmlparser/jericho/HTMLElementTerminatingTagNameSets.classPK!M:٣3net/htmlparser/jericho/IntStringHashMap$Entry.classPK!M:8-net/htmlparser/jericho/IntStringHashMap.classPK M:e # net/htmlparser/jericho/Logger.classPK!M:G+ net/htmlparser/jericho/LoggerDisabled.classPK!M:P9K#*net/htmlparser/jericho/LoggerFactory.classPK!M:PDx+=net/htmlparser/jericho/LoggerProvider.classPK!M:3SP3net/htmlparser/jericho/LoggerProviderDisabled.classPK!M:SM!}):unet/htmlparser/jericho/LoggerProviderJava$JavaLogger.classPK!M:{Cf/net/htmlparser/jericho/LoggerProviderJava.classPK!M:h <8net/htmlparser/jericho/LoggerProviderJCL$JCLLogger.classPK!M:_Aq.net/htmlparser/jericho/LoggerProviderJCL.classPK!M: =<net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger.classPK!M:Ⱦb02!net/htmlparser/jericho/LoggerProviderLog4J.classPK!M:8<"net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger.classPK!M:A@G9e0 %net/htmlparser/jericho/LoggerProviderSLF4J.classPK!M: 1x1&net/htmlparser/jericho/LoggerProviderSTDERR.classPK!M:ʹ'*(net/htmlparser/jericho/MasonTagTypes.classPK!M:]e;%.+net/htmlparser/jericho/MicrosoftTagTypes.classPK!M:tAD )q.net/htmlparser/jericho/NodeIterator.classPK M:p3net/htmlparser/jericho/nodoc/PK M:0xB3net/htmlparser/jericho/nodoc/SequentialListSegment$SubList$1.classPK M:sl@K7net/htmlparser/jericho/nodoc/SequentialListSegment$SubList.classPK M:2c 8;net/htmlparser/jericho/nodoc/SequentialListSegment.classPK!M:]m M| 6Anet/htmlparser/jericho/NumericCharacterReference.classPK M: +Gnet/htmlparser/jericho/OutputDocument.classPK M:Kh*{Qnet/htmlparser/jericho/OutputSegment.classPK!M:`ݑN4aSnet/htmlparser/jericho/OutputSegmentComparator.classPK M::U-&Unet/htmlparser/jericho/ParseText.classPK!M:5Qj(Vnet/htmlparser/jericho/PHPTagTypes.classPK!M:B?-09Ynet/htmlparser/jericho/RemoveOutputSegment.classPK M:4@[net/htmlparser/jericho/Renderer$Processor$A_ElementHandler.classPK M:hcrA^net/htmlparser/jericho/Renderer$Processor$BR_ElementHandler.classPK M:FT>`net/htmlparser/jericho/Renderer$Processor$ElementHandler.classPK M:sXOPG?bnet/htmlparser/jericho/Renderer$Processor$FontStyleElementHandler.classPK M:ڰAenet/htmlparser/jericho/Renderer$Processor$HR_ElementHandler.classPK M:@}z&BAgnet/htmlparser/jericho/Renderer$Processor$ListElementHandler.classPK M:owAinet/htmlparser/jericho/Renderer$Processor$LI_ElementHandler.classPK M:YoBElnet/htmlparser/jericho/Renderer$Processor$PRE_ElementHandler.classPK M:8FFD~nnet/htmlparser/jericho/Renderer$Processor$RemoveElementHandler.classPK M:nfp#K6pnet/htmlparser/jericho/Renderer$Processor$StandardBlockElementHandler.classPK M: S Lsnet/htmlparser/jericho/Renderer$Processor$StandardInlineElementHandler.classPK M:zAunet/htmlparser/jericho/Renderer$Processor$TD_ElementHandler.classPK M:H'FAmwnet/htmlparser/jericho/Renderer$Processor$TR_ElementHandler.classPK M:S8#,/wynet/htmlparser/jericho/Renderer$Processor.classPK M:}*$net/htmlparser/jericho/Segment.classPK M::iNx@#net/htmlparser/jericho/Source.classPK!M: ,net/htmlparser/jericho/SourceCompactor.classPK M:6 06net/htmlparser/jericho/SourceFormatter$Processor.classPK M:g7  ,net/htmlparser/jericho/SourceFormatter.classPK M:LOi-%tnet/htmlparser/jericho/StartTag.classPK M:?*2)Gnet/htmlparser/jericho/StartTagType.classPK!M:6#5net/htmlparser/jericho/StartTagTypeCDATASection.classPK!M:NɅ0net/htmlparser/jericho/StartTagTypeComment.classPK!M:w;net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.classPK!M:0 >znet/htmlparser/jericho/StartTagTypeGenericImplementation.classPK!M:FU:net/htmlparser/jericho/StartTagTypeMarkupDeclaration.classPK!M:';net/htmlparser/jericho/StartTagTypeMasonComponentCall.classPK!M:K̢UH$net/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.classPK!M:0s8net/htmlparser/jericho/StartTagTypeMasonNamedBlock.classPK!M: BiUmnet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment.classPK!M:#/net/htmlparser/jericho/StartTagTypeNormal.classPK!M:  2!net/htmlparser/jericho/StartTagTypePHPScript.classPK!M:;81net/htmlparser/jericho/StartTagTypePHPShort.classPK!M:6"4net/htmlparser/jericho/StartTagTypePHPStandard.classPK!M:g!5 net/htmlparser/jericho/StartTagTypeServerCommon.classPK!M:An,< net/htmlparser/jericho/StartTagTypeServerCommonEscaped.classPK!M:5, ~5 net/htmlparser/jericho/StartTagTypeUnregistered.classPK!M:c?$7net/htmlparser/jericho/StartTagTypeXMLDeclaration.classPK!M:tL. Anet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.classPK M:1R)FH.7net/htmlparser/jericho/StreamedParseText.classPK!M:t/]Bnet/htmlparser/jericho/StreamedSource$StreamedSourceIterator.classPK!M: ,+net/htmlparser/jericho/StreamedSource.classPK!M:٘)&net/htmlparser/jericho/StreamedText.classPK!M:m/03/net/htmlparser/jericho/StreamEncodingDetector.classPK!M:w07net/htmlparser/jericho/StringOutputSegment.classPK M:͖?0:net/htmlparser/jericho/SubCache$CacheEntry.classPK M:jUD>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalError.classPK M:Cu uI.@net/htmlparser/jericho/SubCache$FoundCacheEntryMissingInternalError.classPK M:C!|JAnet/htmlparser/jericho/SubCache$SourceCacheEntryMissingInternalError.classPK M:PY15Cnet/htmlparser/jericho/SubCache$TagIterator.classPK M:CM f%Enet/htmlparser/jericho/SubCache.classPK M:J1>  Pnet/htmlparser/jericho/Tag.classPK M:? C\net/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkup.classPK M:Qn)$]net/htmlparser/jericho/TagType.classPK!M:e\4Gdnet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIterator.classPK!M:6EXQ,Rhnet/htmlparser/jericho/TagTypeRegister.classPK M:Nf,qnet/htmlparser/jericho/TextExtractor$1.classPK M:8$o,srnet/htmlparser/jericho/TextExtractor$2.classPK M:7B!net/htmlparser/jericho/Util.classPK!M:L/:D)onet/htmlparser/jericho/WriterLogger.classPK7jericho-html-3.1/samples/webapps/JerichoHTML/index.html0000644000175000017500000000073111167321520023051 0ustar twernertwerner Jericho HTML Parser - Sample Web Applications Jericho HTML Parser

    Jericho HTML Parser - Sample Web Applications

    jericho-html-3.1/samples/webapps/JerichoHTML/images/0000755000175000017500000000000011167436712022332 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/images/warning.gif0000644000175000017500000000024110400614236024447 0ustar twernertwernerGIF89a! ,f   XH!D&Xh2V|!Ȍ I\($Ñjdr̆7a@)&C{nˡ7)T PB=8ի*;jericho-html-3.1/samples/webapps/JerichoHTML/css/0000755000175000017500000000000011167436712021655 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/css/jericho.css0000644000175000017500000000200210472573172024004 0ustar twernertwernerbody,table {font-family: Arial,sans-serif; font-size: 10pt} h1 {font-size: 12pt; font-weight: bold; margin-bottom: 10px; margin-top: 0} th {text-align: left} th,td {vertical-align: middle; border-width: 1px; padding: 0} table {border-width: 1px} table.bordered {border-collapse: collapse} table.bordered td, table.bordered th {border-style: solid; border-color: black; padding: 0px 15px 0px 15px; vertical-align: middle} pre {margin-top: 0px; margin-bottom: 0px} p {margin-top: 10px} legend {color: black; font-size: 8pt; font-weight: bold} #SourceText {font-size: 8pt; border-style: solid; border-width: 1px; border-color: black} #ParserLogFieldset {overflow: hidden} #ParserLog {font-size: 8pt; font-family: Arial,sans-serif; overflow: auto; margin-left: 4px} .Shaded {background-color: #f6f6f6} .LabelColumn {white-space: nowrap; text-align: right; vertical-align: baseline; padding-right: 10px} .Button {cursor: pointer} .HorizontalCheckboxOption {white-space: nowrap; padding-right: 10px; cursor: default}jericho-html-3.1/samples/webapps/JerichoHTML/samples/0000755000175000017500000000000011167436712022531 5ustar twernertwernerjericho-html-3.1/samples/webapps/JerichoHTML/samples/FormatSource.jsp0000644000175000017500000001330611204216170025645 0ustar twernertwerner<%-- - Author: Martin Jericho - Created: 2006-08-19 - Last Modified: 2009-05-18 - Description: Demonstration of the SourceFormatter class of the Jericho HTML Parser --%> <%@ page info="Jericho HTML Parser - Source Formatter" %> <%@ page import="net.htmlparser.jericho.*" %> <%@ page import="java.util.*" %> <%@ page import="java.io.*" %> <%@ page import="javax.servlet.*" %> <%@ page import="javax.servlet.http.*" %> <%! private static final String[] INDENT_TYPES=new String[] {"tab","1","2","4","8"}; private static final String[] INDENT_STRINGS=new String[] {"\t"," "," "," "," "}; private static final String[] INDENT_DESCRIPTIONS=new String[] {"Tab character","1 space","2 spaces","4 spaces","8 spaces"}; private static final String DEFAULT_INDENT_STRING=INDENT_STRINGS[3]; private static final HashMap indentStringMap=initIndentStringMap(); private static String getIndentString(String indentType) { String indentString=(String)indentStringMap.get(indentType); return indentString!=null ? indentString : DEFAULT_INDENT_STRING; } private static HashMap initIndentStringMap() { HashMap map=new HashMap(); for (int i=0; i <% Writer responseWriter=response.getWriter(); String output=""; String parserLog=""; String sourceText=request.getParameter("SourceText"); boolean initialise=sourceText==null; String indentString; boolean tidyTags; boolean collapseWhiteSpace; boolean indentAllElements; if (initialise) { indentString=DEFAULT_INDENT_STRING; tidyTags=true; collapseWhiteSpace=false; indentAllElements=false; } else { indentString=getIndentString(request.getParameter("IndentType")); tidyTags=request.getParameter("TidyTags")!=null; collapseWhiteSpace=request.getParameter("CollapseWhiteSpace")!=null; indentAllElements=request.getParameter("IndentAllElements")!=null; Source source=new Source(sourceText); Writer logWriter=new StringWriter(); source.setLogger(new WriterLogger(logWriter)); String rawOutput=source.getSourceFormatter().setIndentString(indentString).setTidyTags(tidyTags).setCollapseWhiteSpace(collapseWhiteSpace).setIndentAllElements(indentAllElements).toString(); output=CharacterReference.encode(rawOutput); parserLog=CharacterReference.encodeWithWhiteSpaceFormatting(logWriter.toString()); } %> Jericho HTML Parser - Source Formatter
    Jericho HTML Parser

    Jericho HTML Parser - Source Formatter

    Enter HTML to format below:

    Indent String: <% for (int i=0; i /> <%=INDENT_DESCRIPTIONS[i]%> <% } %>
    options: /> Tidy tags /> Collapse white space /> Indent all elements
    Parser Log:
    <%=parserLog%>
    jericho-html-3.1/samples/webapps/JerichoHTML.war0000644000175000017500000050015611214132426021573 0ustar twernertwernerPK%M: META-INF/PKPK%M:META-INF/MANIFEST.MFMLK-. K-*ϳR03r.JM,IMu *h)f&W+x%irrPK 6OGGPK I:css/PKr5css/jericho.cssRj@}/j4HOm!@+HZ2YvGHT)v[b˙sLNa#SIR7M.&-35}Tw>~cy,V5*'4jtKrbf>A&qN N4ʥ !aN/lcAd@0,lJm0sMAG3.klP=#D>GBkfFA A]g6Ѩysuj10]8LKPpF} Br'pcn݅#x5"q~걼lFTVH>YQo饝J':Oe %sH>\S9GDgVckiPK)e+PK I:samples/PKS:samples/FormatSource.jspXmo9$)9KJ ǕrUBd ۗC7ٖTwvv74%&Hs%"D*B2oLRTЅ.לpleK!UtJ% sRgӀ`Ixn?n_(Z_!UM7g%mƷl.=mfb:WLp.V.~gvn^ W嬄[T8] JrujSQ㊔Zo#T~^ fo3vx끻e=K*i >?"7+_[g*-s+<#~5 v1TT=LSζ#~ӑ6!J! C H *mؐ17S(BI$Yp*,<4)U)WVp01%]x8ԳQ3f:*ݛ.Dtc5C'L(66|"O.{(6Μ4K7aT;SRjXrA`[rW;> DtG3ɨrc@G-D-w"V۲A/Y1\\MvT C GA ܴ5ܲuvg{(a9ރ.E0ZO az堿b?mU6~ ,;IqݳSH9פY<-UZvB_bm@(IgJ0j?ZtyVu洴PH/c~[S[4fSK1QEB)]WlF. /` bFZ3s'0YBm O6LN?Z2LGڹM~ðF0n+{"*2DI#EVL9Ԃt-t}K*S TَVѿe7]j@%TWm@vuu2vs5c05rqVTA@ѓlD#P._/'.AKN3beZ'Z{RRݼ0&̋4Ts?亽3 )A('{?nte}7yoP) F9啘T&d@LGt./5e:tDnM&5 @<6GFi\Cɬ%K=$t`vVpEW;VRTƿΒkkQ^A1mhyV1a]2">SM/h߈$ގ)3\Gprf`WAƈExa^]\Up6yĊ-VDo98/AD T͘rHq,5w s^T Iux1.G\L1غ,C ǣxX[Mo:_hյI):/EjUq~JXq@Sg7Ű2s?en bj `MqgUB[Vb);2䖞m'R;3 ғ|¢ȖҷOtk(?Kғ&fN.{kL8j%+Cʆw@M.3MADW@Q.\dEe%~d~]=lant@8-sɵ1dpQلdǪ mE_ JV$&k|Y  ;圙H 3.۴kꭵ̔7ds|G%?#gD֛5#KoVJMNK`|rh^xYj-pnA\{pUhCv͖xfp4eҶf7( )Z̞'+G(lWdLPDP=hr=vp8<;씮坏&뱼qHU=Y`v=mtcŌpFiCwWei'%?wZ߳Ƽ#7Ow QF\[ +GWPy#Eo9. x];qpd̺pu9)I"#KٌPh7fMܖ]2u_d룝87,qoznYU[ c}F_m,\@@!;WP GJRv3Ծ@1=C8ce5`&[΋ ,N1|i쩢诅Քǯw/ ypLhNJv,{V>츂V!]oH>e#edb87`M#>w`cvKF <[`h9,PI$Ca2a=ٚV_qHoXIwX?#XrI/h*(prSq?@ո|z]qOU7េFKpQei^Rtb[9.ڛMة3|Ζ&VZ;^'[>gZ>yS/:&jTWq~"g;Wα 3-!2fiC_)A RR yaBgr# D4'UYP+աx~ c)ua=)Ml88 p]h4Wn F0Пw@@V Fl#)ceojGrT6SD` dV]Qs\J:♶lݬ^~C\ 2۰|u6_ /$sqkK T"I%3C&K6 ֯a5FdEcxϊ7n)ʵ*aۄXU!Mԙ !|R7a|+CL`6M7)U1}ؿ9fnXMSSI-Z'q(:\[nb (&+fCtC@l?Zu+0ƯvRN%wy% ΆhͺyMͺvGh2NO+i^ {)Hh|S/O|Ls|))YZFi^ޓ>L;.Z|A.f7e`Z ;xT|޳gw^]۔ABɻG9o`A>727 /]ݓnZZX0݇]W/P"~7Lo`}A7Q4»˟N;eאâ7 " 7%j09x i$a{p>mHQgG!&R} ^qN3_&'⥘< Ѿ*"g#h4gRHXm8=T$\̫D\YQ_i>H7q d48DR_dirƠuj'UP|v` `` c|(wdbsi1{k_Iݴ%Ԅ:X*!HDҥefr堉O>rDTP(md[ ~,Qk "EAA(:٩m%u`PQcuW-䍔 &V*컣r dk:`%Jn,5ԯ_OJ)s8N=nis"Vvdsm҂jj1t:~jBQ53r`{ip#zd㦈4YTndt~23qS`IW-*q%itI;B MR} z襁4%s4*PY]K! niz^bA7ꀤC`;: >9g<PxMn 8Gi\#6fZcRUli%Q]IGnF*";dgYvBsf4c<ս2 /:) Dz>" Q|loog,QndPSO3.23O0v۴}fOв=3*YM-k4fM1OZ{{E{GrvlCM.P=6TϲhN)}{OǸh}+} QfS:nϼ,N@FPnP>/2QqrƱnMygk XtCNM^'=r%ְ5ʉ)x+BM ب-w0 C&;;画{gH-tԙ_sSBMZ)礱GO%ģJfɏxFZKCpe!4Q=?H{$|Húykmo-Rl< IRC{}0Wj_hŜG*zm]JBM!WM$%!ˊ&m/2YFAۿAݠxٯ.t!ZERS~2soqWهl0_&/vB+C\MwH'22L)6; y Ҕ/t /SWhH [ S"rĬ:ZE7փtIF=77s[y*)\xxta>/Q0;%d&`ɣN0MJoC;|z sxv*fsMYP94n [a|qHi px-kh!16G|߆WRXssŬoQ~8'O*݋8m?Ѱg8x|+%NMMgQO0ΒMt $2%5Uƣ ֳH2oQ&oDDjy)s[$7Ib@L~,w{:8zK0a{7ekEF?[cwOJA}}E)rA"(9I*-xT#FH5UVe{N3Q@Rl7ۃpoIƥ>yjе~EtX'6MRݸQ{}r܏Jbs$|DO%Ү?N/s؈1RK4UE?Ji1l;aD`{*[~GA!>.@sKvq˾38}USCuZ 69JFFلʷ+H'!~bYcoWƩfYГ37wKO_p/6ZWtKx`;@ &V'8?j6tq|RS2$K* B@K`{AΦA, 26`9mRx,pCw{VBGt<Үz 淿NLx l/S[^@6\($ѭ]Y v08Vf) eML0V XZ,]y~u2;c3?81^{Ń'_w6Ũz1]]VY׈kClw֭uAs ¶;_ YŐV:isV+ $V#ݛD=@{V|!OnבFv4ʪڬ#0i C:>tV19{?rή5{˜dy>iƩ½oϮ\J ,ӵ0[A9)@S}6.r tFȭרKrDl<`MK$^xiv2e@td?iz>?tr>{0, Zlr~"ٲo̵P U>"%0!aƙ jnHL:oM8o\|4Eκ;Fb*@ N:+N0gk<ȩ~# _.RJ+hxd'yYT? JJvj)_&Gq[fW`h[J !ek'ѷQ#<ZOƾb8y1@ }U:W荇uz9]`d,T'KT//RYw ~L"G+m#=*3Ɵ/.p!RQJU?kjv*jhAl5SK'NBF*`7ꠞDk8:36fW9$="nY`PDAW/;8e*V3Qu7T #PM B` ku շUx Z4p}BA l+\! 2e>Gu>yA \vN2XmtIk#"8=4&YCt~3f}QrzɻQ8Fh7O:n.(- cD_S-Op(z\⮶-;WYl#Uz*&I2/f`QQրs%}hsQ|x+T34%ܘܗ fH*p?ԧ]SPWIK{eBfͫm.hxY+s2g%n|jXcG/Jv: 6ZrdI? ~jDBСCLٖo%# j53|^䠄n4ᰦ]-ј쾈Kidw4{H"wN,Pz]7 s""TTY*wӸ8ЖMvs@ړC>om~;B+$ۓL|9G 3`#*r2ڇC*kOTu$\-Ǎ$zäk~Ttia 8{S0WA >2K>h!B:RcU!".> >V ОX+uRޥ8 dRrVijkǣ-~N_1$Ō;`zr`sw[^7[;݈$֏WYBm! z Dg_G 8l7Y,Z2=잵\n1VyڏS\);!a}XXxݏq^=gMB!WYlz劘@PXl$F{@":_.^x!5<+ =%R6-Uu=_=(Ȑ?N}R76G'dN8~o`tJW`f`5,?K{%Via%( ,YTۯ>䖁/O_rlHuGS9Ӝ9?/=%ÐI¡Jtp4Dx[ 5uu-\8"{ѣhRUf&`Û6 tϛemJ@PGRRebǭeԫuC! a䚩:T1v57 -вZ%fUri-=Tf:"O׵jY=6[Ly=UI[Q`]`hV' mSdʞ> da,Dj+ۧ`7nŒ`3Pݦ96KnGqHk98Eyt6@OqmP<4 h5I,&Nпeg^y.&(QuaՈOЭm$]jgd *»]5)# 锘r4v-NZI xbrE@%JΏR@B\>𾬵zr)!zj5-?6QxK^$_ Гau ơ&{sH=8x{~JS3!~%I^75=+cCOQ4v'bgt$cÇz ?҂}㟴W'4$}ZX&=P~\]KK (YQ'+3%bXj#!Ma/ n & t1趻#dw%zƚ;~n~;x aDv ɺObfh'TÃgx/;D|k1@J(kt3Eҟ=$EA2x(,@bjͩZVL"*q0Eϑ5c,.CVrqo*:fhM@l*1.Jlz3Ecz>.o4ÓW*W<">!^r!6jP rEzεl8,C@їZᢉ9HxpJeouj͓F;@Xc|@t\l_FkY2qߖ␓!ɓ0`H{IqiUQ;MܑyE;YhMB~+‚M6ބGq|= 9/K%Q?cvM燸XE&'7Ǝ񙘬!"XB傮 .D+@s)̜{p\GxA}cHg8D4r:QFt ͋0Z6d={ ={L;vGE(]+ 'Ld?EE/G!)3 \#9=&< &E%dsEXc|ԝrS;II-!`oL@d<"5Wxʒ ]QWv%Ed1k;䅵Bke ޚ{(їg;WZ7_  CJE x#e宾xZts÷WUۜ|~<# Ezp ^`p`;C G|7pGN5nhA`Ǣ5gb/6_2~%:B^} OmpAR &+<1mTGn]W*fXYZ`ErgGji̯y` 9Id ĜdՒ5kT4ꧩyOK4345x=0Tm|AK&qsz[ ѹvoݺXH0?<\xcf4MKArܼ:MrW4eVYuӨOUg57*FedAWK3AFF&/]\͜Mb2!=U6Foo(H "T`5 ňI:BM]YP)70V{$Xyy#iS}jp1 }iWY;ƤXd8FoX%Ń^ϤYNf[Ϙ0W<-EQ$}D慐`ҹ2X_#vv2B* *O)1oi= н ;͡=M'm]wU٩AWUk]QbTQb攂#B"&D'x'ʯ%tIjM?p@eJËGvsO E٬G%\I᭭Qp+g(ufNNos;Un8TvƛHc<2]+CuWW2Jܢ|bQ ;6eV&%ǞNFWət8zBL5M%NJ>K,5D|y8s}$ 'B )+{[Y QYC /!FLF9tIkFK_<1 ݢr ?d+ٱشֈY9e:j`f>Upټ='dFb+\`$_n00,3y㏪lʢ2u7r*Ja]52+,Jɶ++Z+*MOuqIըɳH}9j-* h|UVhӍ ;Jn^SU\V')wڀW44[m{T)}nv`ʙcwY v)Á~^K-jg&ߑhQntbM+g}dXȁy[ /T>+[5Vf^b%a,,Aټ;T7tq|Noe*F4)iNnl[GCJȻ eڕ>bƛ⅀* ] S|' 稴e3jq6 5U7;dg)cL^fn;yDg;Z@*3,1/,Ķ,ۉ#Qȇ+OqXho'wgLuaKMڐlC{ SS-Ce^\ES4&,_ZH!BaU] y)TAy/Ob6}O˺[eYi2ְ``#n/QDul' N-j/Ap;ܒ5j51?"~Q @K覹ϸ !,O f:Ηx`}TЛ^qyxGƾ`˽e)y@M7 Q<m#B! W06{4iQl`2a9}w%{^^}M٣HLF)5;Yr1\0-@; Hg4㬰59eR4ʙ\ʑ\$’yi(_xdž=˪T˪yMԐzz*)˂yM{ůL@q=%sjۙ+dX2;9̕C[ߥ˥30 $ $LTB?!Pɨ⻀eДM.=wF5nz]Lxs4 @o9ַϐFYG9JKz$J6ٰwxPyb=.&9֣A1=o$%rqċZ|̳[-.M{Y[D=략 IMo\Uf|&!hf=F>Iag/-QJ3ْ;_=M%_sP#6qⱸ_xoExf8t uB{Fu&/ʑWf/(>S5uk/֝,NRxۙ"tSg82eMFd^gg(pߠ~{d-ٽؙ;>TE˽` (! 0|(%>RA+@}zE>n^9@WC  }>L"* _/|~> F4չYs(ILގ`6d׌Xsy~WWWƅn|=ᎸV<2 ALı;ODRóVb'^oR|ȧX9"X÷^_ 9pޟaJ?ĬPFgz0J?"b :OPp50 ~5oH/EJUW?Wֆ./J}50ǙT e(_hUҲ2Ia+UetTeUvp+U::X|cz:X Hss WT㤫qN.`?络rXbĜ2rV?B/['+W-WgqvG3/wS!l>vꆧ1+wȯMB}ґvG  Jm_Z|YB-/ñSlwfiI(mfH8{\JF" ܿ "q٥iVD&Z]:v mmzYɌ~kEXc2q[Nolx A4b.2 㡥j6N9ߝmܭYDv޿kcաu%e=~!!Qz<tƽj?҇1gpLzϼj֙4#'~.0?aAjN|6Q?0_NUT/jll\ )*fF(` V TKvB/T ?#W S[ ]>L0pX1p.;6x\3I-J W?LhXF"4q5!SxB>mMf-P!SZ!yNC;h'YG!ƣ5,f16iRτ/T-46?D[(fت~dW}kF"7vw ʑ:R$:_րj)>R Rݺٟ뼴BP{Ut{&|HfI(}kaMc%Ӹn;9 tbY+1}.5M-[^q)(xSY=ԟiS!H5fN(Dڳi= :o(ͷs[t!:\ 'im}nZe硢Ly^p,u(u'<"2ei^:FB$I=v %K% rّfg, $̅k;7nT&ʅT Ga&_&͒*ϊU v+xq |Eު4c# E:@N&F4h*3l%~ji瓕q͒`]}q8Nsbd$\!&czs x /Nx懽ec{J"h Y6`,M-^Dx%=,&(<ݬhD_@K,:2P 00MvcyA3e+(6x5/׈nf..vMN-i4U4f 1{ܟ? 7 _7 ,A_c+0_ 78Cfp{a'$OR/dTY\츙Q/2l稕?\9h_ #f<dzBA,[U%Fy1fZ8  Fx)h7QX+2cqvTc%KUcG␄CA]?DCДgic!d!<,buT(v;nüb{un0zHđgfs6kj|4W\x+{d%j:.%A] lN ?2,Rt)bE~f/,f9u]"0 тkEkk9m&kI\dJ8i v :-HZi ̈:M%|}sA1cKӇB0te]C'=5,6ܡMRj_r:.)R&Io!T__$nbuf<~ܩoudlĞAC($d>FhkYqjx:OWܐ tc7+.ڦZuKeԇqNGP@V91j)&)>qevw6RFIR|,KB{]D) /Mvv+?؞Oxi[kkV4,X/7N}3Ǩ ӭJ8f'ny%x:3uFxT~UAO6b&D3QC B5DSڭ.C*xWi| ;d,~cqj`1Mg~* E@$>Ɋ,ǻU~H?ęVUz̎6T {>ǨCj b3aҍ=ƽ Q]GWeffӮؾ1ѓp@0CRmiNca)10ggm$P_1:ijlRQ[2}OF$4,~Yizk)G8G84=Ԣl.YLsgڨBQȞ~`(8{Y)Y|? &g$ghg僫ϫT3F@l97F=Q :댂AuY)U7EmP*cBlHZFiχc0^ <m4ǖtMJ"UK&tKXBCRp_qBq/&3nA bsZB9VnP3 28TH^v .Y,֔ʓ3?xfQr"Pjٲ^Y].{]W`XDm[Sk<0 `Gd f97M̬ptH}0a}lms4ĸ7|#@L=^(6{5T6QꅶUU<׾E::lVmfllfKxD!_"&rۋeہ L SBbK"yz~;}"hh0׮%#V%:47EUjQ6~hS{2VHfCPZl\rsxQä|g$oNM*,3z񋺋R;C_ZiCϔ}cW83V2U%6E #N GbotŽ'lQ"ʝXΝj%j (dLGL#wYz=뎞f33 ½OV2*@WUy 1Eۭxwj( 0):c4 \SOSçGZ9n8 ű,f IH˩gĤqeل'eEG9qJGN.?g3 JζvzV2 |!JZ})6kL5R7Q` :#Ǐlgibtql:YlTV,21(dwId S~Cu;bC!l圧?_tcWEYS\ԑ' /B[I6JmhUj -rY eIhR`ux?ζ9y<2H'7?&| r̳zگ398`5Hiq`#橉o6n5rQ98o˺lJ*XJyIM;:fCVMFklMcsY{&C;NaOW[SO_ϐ;dq&sXs̔ysW{o;yXR_[fCTd4>5l/A2]F:vFK0V~dm8ɠ1G2w~]+Y{J{t͍v:u ;CM6)aOA b([lҪKҊrC@p^TTI>^ąmty6+I7z&*v-%)!wp8ԕN lQ!Q̐8*!y;E!p'kcz!V]L S ,'l-z_B>7讁)CW >eҳÃ$}] ՜/֘y  T"i'y9l&hRb mC;6[DZXR1@yhba4n(BD)|4 ty|.n?hKM2_&sO8}Kϐ>,)ʧgPN~*+ KBQ1Js:IM| yNq)[OOM4,*Xk D0iиO9E"xzu,<ͩN@e,$v)@5%.ȟ$Bh&6xǀM{c@k9ϐy8C{U3=q(@%,9t`+`mvuf8L67HrJzl"wLɊHJ+zYƟDN_?Mi_gEC^챸-\|tLE*h-xκSе 1:jS# 7o^9%IԸcynЮUzm3Ƌ< aJ7pFpzTA!3zA:%׵JVgCaqmR(IOŽB6wN^=^8h_pH]v%NQo\4rDze *@Z~JtK\KNg+ގ>^K@v[vC0BNնJq-]̃UB)G+WIA^Zsz2bx#Y8uͩ{,UҺ{]g2uܶIɻoV !/:!3*J`<4ضPew&p Rg=#/ꨞ!$'O_iHBƟqa=OtxY$ZŠ ~ƿBG{;s+ 2{[#+c++O9{QkT 3@S[IJ21gL 㽇3`W|뼒s#)t p`yvZ?Vϓ>+h(HKĞ VE{c w$c\9ݨJ)qq:#+ :',!|pQppACPǫC"0MMb,Z myΤ#S'$ 򊊪bL.|pauN6m!N"Q!,Z*ٱy>|F8$:p*Qt+Xueb*&1!C<{E(D.u|ʏdK'k/[l:*}`s~|X pe2b1s/ùD& ; `7E7zpecrI"t8e޼F,Dؚٞ~g*&zd+9O>*1HRbb̦q5y c)Pu]S긹z,lZgFsY2EZ(l\n$ڜl:|c5[&$3Fh3=HDt@$/ْHxX{ZrJlV+!7zMײE4dm&U5 \;]6G<7D5/>_Rg7i>c(<juGTŠjYZ|="0q_X}N^-żi,!ChQ%N*Hً41r3/Vѳmyk^ _?;anp oS3v7W%^'MqǣlQ:XJڢNAh$2Ȑ}q0ܞٰm/eiBG_zFH?{~M ^Y!O`Gje@4R,sWVobt̅g I{r8*v흞|!͖RdGyռf3C#A,! p/_! ,9l⎱mtZzĴFLςF#~"8R1"!prZR۳⫤+ЄL,+5 "Gd'XI!b8ohBs7DpޟO!_,aLQ>!M>f}^zzlx;YMV:P4d+ 6gv/GR!G s6d*,z!鱀kh3hvhU/'}[]WFBO>BASD#5G2+c189t9='=MҪh΢آ%'Z)v97u}oQ&sec$Õ((Of7P|0^`ǴN `553dx=P5#ܖOuS 6c=n΄g9k% *n"{L3l-t!uew|aFT?ʵ|n) .I)CJ5Fşۜ7ro!6W$݀W6b12cqRI3Z槧ȝ/JB10\>UՖ_-w%ފF}q)nw_2hx}_d5WTJl`> 6(}ږnMnj#;y1Θ<`KKO?'kyVgy^qRAU {\dM-*+pCž5=y &IS6wxt|;zC-&&GBwLxgco7,4T- GH#K(޶oR]9='G+ )!u~VU*ˀ?,YkjO^ _ZYZz|5^tpRDh{HQZMAJ?#qЩ U6L"LWiI6du6CɈ.f2y. fk|ۼÜgI_],AJCBA'lnAb:<<n@}Bߟ= aP :Ҧo Sbn‰p|b?慄}B /3æf12u1ޜO.u ]r v -kA fL&Dy},ȶ@]@A~2%~ gB4޵tj/ %/ ,h˵ˊ'{Jp6aA )u~*פC< Vgˢ#+)5h!HShp>XMJCI-RL:,1$N?miP!3H{ ;5\{#zX35Wgq{FK}DMR!%1bJAJ1M= pc+XߣؽrsishO^<>x5KdԦ:X2˓4@XM@@r]AJ517XîǬL67kfŠ\ꌪQ&)aO k`yL 6;^b쁲ɘK]~BV31 ̓dKn4WL40!0{ZP=KLuC 6ܿdIo1Kɧ#!Vb->8N 2)IS4T0HI@40,  5eWD6I1^4Z 5:5̪q^<2궋guyZ-OarTV 9g~kRyJ3.qXGcr<ԫuhꔶ0}|{%(~!ާz][/02UҚ}?op]cKW1eJ.gv1]y%;QڥGldg%GORka2@ۘ$2=Gw'6wG|eiw`a؋UUǒ1A9o8uuaM6 OpT.9kym1X+ɏ,w]v&}:>j T I&S>ZG?ԭ׾ziT.tOFix<9Sz%zvSK̀D_Xo(Fiv/| DE6FU:+Ȅa g)N{DFlwUGO|RSS[2iKlAd%nXϙ _xО^Ӛ' "}V.rz!x! M֝e[{47-=#Y+ggM>fdޟ}uKMhGV`:BOAH0λ0G1I4'qu bN-խg:}n {݋g ʺ"J˨8c3~lIO˚w6/07!;Pg۔P- ~Nd??q=!ۦllnt|s +n1.Nyhtv}?>}^M)@uDרl4G]r(9zLjL/ܝ/ A|7E*&.ip'bWTD'2$"Z39ql3xbU`Bv0txdg߻OSPԠ\Ld@XCﰭwd*acvu: s'KΧf&V^쭬l-v'Nwa1&>]mj N&5s13qD$He/x8% dYWyO@,& |Cթ8t69;;[J+pܠEu|ugsdul}Ϗmj=mqZ< Qа 1u44a̰g׭7xm[{7J^Mh/û{l->̡'ҕ#(?Xg_Qa;X~SȌZ/wͧj=l~=vH -}nK9d:8δ|K]҅}*Yg_WcEVy̻_/( VЂ:qjZUC9#\1B X*4kc%䢬r?w ;n=nZ6BaqlK|~FvNF=2B!te[d֧~=- 2[pـfeGQHFhbNkDsO  ά#('!OlH”ߤށGOGzlگ#$#|Ao̠Da >q ‚3KmV\c6Sݢ3 LXI&hrWT6T/EW"퇟,Am?ʄكpP"d0 ˬ G6&t(+v,xƻ9$t4BI94Y;B5^*?*-*4cqb ϭid4ctUˑ uijFSm\O(!֭έhϡM4԰.KtWJ->o5Ӽ#sV;J!-$s3T¬h y1>)+0K$:GG3|1Mc[P`ɷj &L *s Fn˟u%(8k"Ğc{y#(?oG||{ԼVkjB}7synx*S!hm[ЖQ514HP).`@z!+XɎtQi:0J.n}Ij_$3U5/X;ڧ|lQdޮu:Xy:uќ=GZfqi,EnPَ^I$bpD Id!"M8O8eWdj szk`n9-ӡ@-˲Y( Sc 9$O)2z>ädctBA$ |z/,( diFFF20B"|y.LKPNNm;h)-by=;ZVҢ/PE_0@>RݜǕBOz֢ب2]QTɎM.ZŹ[egOp&ݨhAV$wXܕvEL 5Oʷeļ ߸7'!NpQ?u5N/ WN>Y܆~i.f_<䙣0;r`%RiGS2 P I¡酈!PBS}ffJ4@2.ةFĖF=f)%&~:.պ%G+оn0Y "dݓ^SThhD(8M]q*@oZҹ3PX5È^v1ۅ&wߢR};ڀ!wFF K"?[f ?5S8;bnRWXi+邖ڋW".a5KMǠKoW,ך8258%܁͂=*Y-|:i<ߡYtÛ s؊c6ptL!_ٗ`IObTᗕ",OKX<4_&D>Ee~@Y/Xgk=O#PЂ9_FzhbrTZe'B4 i?f|p~8E($Zq;ME3xQ&I[ڷ?v6nѼ͎Wi\`qbi {,5{+b.r?j&azDe6<͚wNd ;'T͊sY\fu_/+4ء R%OAxА¶!'h]QSnq{QؠH.QioE9F1UF|BOt\2Xk8לad`d`2ٟ. 4aOe/6~T!{b#Iz^ok||\HȽ?H ˨Wz:)~bX/DR'}#nPϔmᲚj,)XȠ/c'HM]^LM7_tJ0bB3Sr[0gExBw|SbҘP1a׮la칹 rph\%cc1ܻ*#/i:(t; ' 3vYڝu C}cds{  ]@Wåex=`ʤ0 Qɣ5rxDH{NՖW66S&o Dc4iWr;)Or)Cst肎ʤ1T2xgsely:Ac=u$ov*qg`\X] 3t6hi HX)@ ܽಁXUGpU' c)l1Wg' "ԣp*jTNk+Y+d] Hi\YXKʵcI&4Q8O$?CnԠC+P*bj.!B%cL8 x9Z}ʆT~`pnI*$V/ᵥ[&&GA$6asۺ$+'\m۶mɊضbmZY{ԩ:z5k_Z룏>zrF1 8Zc:?TnMٝaxqoT@g_>ԕܧp!_ TwɎΎ zo4˸DH:QLCR"LaנkFmFb!(7ޒ/> /Di=`:eY߀od '_SH\A9dIM4FV^j6XFd}BL0GIӲ ziA&R-<lX|\kHĒ?tuM΁ԓ%uu%zT#d0'+nqQ]ݍѤ9˴Z:bU)FpM/cY']-tb#c[ݥBzIbt -+}qwOzyraW9mybL cה.Ok5JuE,2ϗטmEj/4V* `ť[xHh+ݨ jҪ$ '-Q$8WJ\˷pz閾nMb㗉7rL@@2L`(.|WX ֬״ 9 !Uq"SM(UrM-1ho"lr]f|r<^+ŬŏJpvGl,xɱsFm.˽omkdݮ鮉oʷ=jDr~[ĞB}; {3exc{y Ls8_*F| #x;􇾖_"l)qj鯽hV1Yթ{qC| i{8'|ͦ p^yh('ExS/|^kڊ7Vݻ<- T~N?xgNchښqt69(C9ۨ][ S7Fum)U"{OcEMP) iJ*H|`܇#ɞZ*)^4d>fmխf io*1F)}B*lW 9'fͨݝx& νg^Xy=fP,2rGoiG^̏ ruX5UFC(ğ^8~{ QJ 9s`p=aW"3 ;۴|B&r{+&#`7fok 7qHO%se*L=$IږU{f-ܹ8;/VPNF%`Z~*(p¢(xuqZw {;lUUof>f..x rg.`gEMdHu ˹f6T(܈xY_״Z䙟H!t\ P_m顠aTl!T۔*.U5`QNE%-񅰨ښǟ.T~|"ErYxZy(i@Q_7bd4U͸̝$C\QbX5 Na$rA=RK `US.uqg8k8 1.I8߶K99 8]$h3Μ7>QҀ xɒk{ӺkEF3͓SFMv#jQ^R闘CO0oy)أRg;pcF0-sKiSf;dc-߈H'DLj733u0w@yR "dY̌e()z̘W ?@NA#6C;j'@ <UDmDڻRoF-ۢݕE6ƥ %z{vk4y0t蘻Yv7qՓe`%0ci./d3IoG&4BMg[–~ybl!qg>"^iS? Lfz?6[(/m_žC8=~~x4?s݅|_l``rj'dX;Z w#Y*$Ή]TMWepjeyk, ZEeз6C%Z]C FIGLsCc~Cn9i>8\EH:!bQb2XqTB|Nw*wk0dɲސ-(^ RLE&em<*~OIih(λ;,] u>_:&Xn;$G ,kQH(jvzw5hɭX>7b8jC/8Cha]+FDͱ8NM9xOÉtXLd ]6nQ88 MN!$*2uԣZrju3 CHs^9#ֆWa:I>"#=b4?F?W )WMHbK28>hǙҶoþBcS_~bv% kNˡ7+cNRUKUڹe^$+wLheuY,n|a6o7$^e$1˂oѨ`9 IۊGhIJлRNs=4 *pQ)RQ4O_YjȉBqچe94M>U4塰egl"]Bճ Cʝ±\S'qv`qלmOp;kMw'/4`Im,ҞQ^m1}?9ٔT +CNڪ U`n\;٩hP#q(^`_٧C'0 ylQ08I⍏i00 -F; r1G!0ޙ{D Kglk5pl۬tĿд\XUbFAIL,^$mtvXfYi*c{OR5zR3޻u~ d|]JS㧹oت]g}zvBOd^$ʵYdb%-dP# ]=i}GK+ &tH(zTr$ـq3qk/f5Y8cE|ٞ LCw@HawI `W$O" W?@"{lpSY:h,FȨ]A۳B녫j.wTZ|4;H~;APќ3S9 $o=b<AiѽwZi3w-t xZ`+m3pbXijA=XU3ϭO:=*ڔIJ]+ID|GSX]CO+[i;cF%2[h`3"50%G~N $ŵZl51x `? KYLwxuP*i嵦U(9Tze1h"qhDrep7NSyFҎBY SĂ8n 2=ktM{I!!Kc0rr>dW C d0HX8mreQ/Qݤc:暼c477UfxWe_xDI.N0F_N}"K^_ a҆e~@t˚؃Ա! #o=P&9Be_Vr[sG.pZI"ƊMH!Җ`9'ЦdFaODVʔhc}D*cFw@.bW<)'R&#S_㣇c8AԢr?5V(v,HZsKHK%1u偗ۧg~oK~仅-_yCL LZps- 00h0~/h'hCL|S77BVZ-!GaRbxݵUkKĖOX*A*3 H~_ >XkA/5VTāws'=*kn.Y=$ .;cy,kε]}_@꒓pO!z+}cf527r-B 4kltu QFxF_\/%.FFG9b7iJo?y)| ̱}'dO#xJjC7rwݮjt6 ;voT7mwnd* T@leTY2DIm㷤)Ĩ8׶dL>|7;f;)rl#$hB1]ZAZ[3' NZ 2}5MB$g1|5~$epئK32_EsMjxd'e$n8v1clJT7Elm3J5ZLk]wSqBqR~lao>'K5կ@KvDUavCgXhșv[1)[-2ODVbtSlrvVv5 %/PJW5TPt1v^HOk6_YoiZ0# *SheRź-|.p$HyО fs$yX?o)kS$WD <#c>뢖{/I`Hg3@S%-0V?k {Q#IbxP~-OQzXe~]@kYS_3;Rާ\jyEu 6wbL;;=1\‡ &?Gy'a졫}NܻKC39ox}<,@ii)"z6.jOт12 ?Łð髱H+'!96&Kuܽ-U-F%V9 }+?LMY!&f:/X"3!;pD)(e&ox j넲+^J~Ub8F*k1(}t2xXԄm"#DEyBNY:It"2_o 46B%nˢG FјsM=a߰gycdTyIwpJ†F \P 59v5lQck'wdF@wRDA}.PO-A-spݗz(: a(4 6_o dV&1M^"l͢})"o a["['+ Ec4XW-،nr?!k#H?wj|x`ƚ!08G)MZMn;C[DI~RnylP- lXd2@m@ ]W+@4*4+̚&ȉ'As`3[*E<%E/sɞ녔U(.٦^HHB蒏`b]^j,d4pB7(4pzEsVT#]O~1-NHm.g-mbϻuy'KMU묬WrNc K,psȍ|AmܥS6N~EW$)ur б0 K"cXVK:eJ,'?4%Q7rg*d_\ "nFZ[QE`(9[l nz>xkx.i+[Z87sC-/6lJZY YA8+0

    ylz  R]vJf=H)U}LqF$D/Km>uɾuf9G*?P _&(.~U6*M6WHרXCq2Ro?\`0Z^L~$@"an馳)y:lU{C(_+cL4T1Ӟt)y'wM/W={9ǰj`o=B:ZGEHk-2bӏ:ĆM }O)Uf+'ix;v{x{Wk0 I.9(qtI7jGټm#Tcs=.#dOz%gfZĨwK f&S$PGC?bFk4dޕ1'~zsE q֍!7􂏢8BB&"4>q En!""SR$Xd0-]2,(HC8#Vhy?Y dBQ`.1RpZ9v%G-/X癠50߀H{592f7m] ok^k`|ɹs$BI6mI0Y@] Ð9v#LZ[4[7k/U })yЮ[fq<>Q}݅$g&w8Ї#LVIp3rTxs0*-X+5n[i~0~:8BQf)*|ئ 0-JK鋣YU?LaΡM T[25Lag@Rnwɸqkwy؄A/y|ӡa3N 0( 82o.޳v c(zM?fX#YyPߡH,n\GO]K-hk3љ?㙔fJ4<.;e: \0̠T愬Zrf@AFoYGH%V9B̴D;jk`KDYdAob>:wx^ʒ֔{ %4s L"BzqxЫ>4~ȟ$~ !o 3ΚYT0sa,&7lE-(*|&,} <ÏVƣ1O؟0SgV1/A 2=V$4mq1Yc`^h{D&Xfa/iw}1+4| '^A1P*ܥjYntvğ;N| `tZ`V?MV谫$-a2[Ne׻`_`#m[ <  / 0É2RABI#yRxHq{qRy 7Mx\j:-כd-b!5brgRݻwop0b zA)Tn#>K5G$ f2@-a[Q56ZR9-#.A U$VyA{!\mTjF 2jǾ`->~_7> 8깰C-P1!mXsfɡ_4F@㼗կ6I&*R[>קRXC@d151l)do hLJbz? &i\j5 t́!2U.O{B=l]AEG~/glYAh>UDItJŽ!g}ޢ\.58G/$]xD-uZ6IwtuW\2fIj$.ATB@Mkٸ: m]tz f[Eslݞt~%eDQbׄbi魔Ґi!}ktkMĕsTkk&gUxcVmeId^oT#BjAfj( [[=@! Ӟ\˔aY>.Hc-l\w`ɛ̨zbxG6Xj46޵X ] lB_wL4fJL!V8[=C!1Sd u)3b9Yk䖮ݒut[ct?`vZ ?F#Usy :$CMIcؼ cLy1~a ~!Ma DgOj8#H qq)sƾ՝=›X~Á$ctp'$1;Y5k&l7T4FƋ<uf;$!2 5sRX~ɍ&[TE3ބ &nĦ.|hV 07ɘ\No]p.{Zy6w?n N)2j]6Si[ E5Yd_#ixA/)m ߯1v $\éVQ@B,{l9"yqF Yhe _NR^곩urxv.l !:W^J'$AL2f mo"7p{艽M*lH&Es{`3`Yr pM;z&tAU3)sr/ml 9!DGG#N PXgP'H %CYEmB`6"pI\#jB$)j{ѩ )qG^g4; sL"<l%@IϚ d:PG,o5ԆE;IY>Y݇ 9bҐ4+m)n"2EJH@}Oj}ohlN؟woܴ^/=vg$j wnymX qW ;?&s3pq-OG{+t;zK7n㝍kDW/oΏ0=;w&0Z{N$8$Bw¦¦~[Z5 0ڼ8q+l1/a"WC6o~ PH$9}90, !fc5@JXfjDZ1Gu%trQ8c֧Lf٢al U#?a!s2Fb̸z9mw ʃv}? @I7f 6\_@l<2|O%; }*DDAG!A$|E܍.*`EM.| (ZBU%Jd;*#W$-jB cg&H:=k/iB7UPbm.ͮF?L^Np{kj?P5s2DsM5RqQuPZX }Ewܣ0LIb|6.6Y?<<:7h<ۅ6އt!I47$ǣK҃3} ŢzjޕV*R?B]-V^%E&SqO$8P_ֶׄ $<'_ӲwC 3dɱ'ґS;kDPdsd>[Ua*ڍJݼ#>vr-}G,6j` .*(Vq֍Eyv 2#Îxܬ5ODhIAOj8JQ:.^?HpVFHrUßұPKY\+7V u_GNT-spμ|#޾M$~3ѕQ [ˣY ȶi8ӎNV^A+'A I/XIn totċb7$ r \ &nwlUX/$b,[<9z<ĘqZ 01X iɢi)єU$BJ-E4 I $c@lݸX EX8,9(= 1?+:/GtY.xztSk#:^h5Sm_C i:ft)|+ )FBW >3[^b3MhP{1" uKY KP2E եs[З 7-;L??%]-9\q KiDdžxT=DӤmbH̋Bɏd=lLt1; x ]m KdI} TK\do'< "NM-//tZ ^I7PgP/@3mHfiLj40Q0~=(+w}q/J}qR/N R&{uyɷ*/!y@ .m{iř3K)ߜ Wxw@vO:j#{!4Vww9prgx \Y7(?_!Gǖ#m=43nƮPƮTB>&+.MPNFM]=l g}Bʲ`F+pFa=Fo2u^@axG ]{f%~Z*%/R+HV¹j-Kܺ(EQj[/ %Lsb.8jYYnڇ sF=Ƅ+~ςs##J#ˆQtX}E"7-O-{7vP2Ź=t젮 Tn8xچxOf]gqP8Z Xs+u%- -u ]/Iq2a6U_p_EO%m]G^(GmsQ؇YAd:"B;3֞ߒ*T C1r}'/?> RCe#rPiNMlᢹ:%JP߳gp77)["׹uVR NHK[N5р![2m.~Hy!tYmѶm۶m۶m۶ٶm۶{ύc7*2ɑ9Ftq3َJOJOɦ1vŶqV9›Юıv9nCσ'v x?Fɇ I&[vƔ_ "6j6QZ続5&h"ed Bk-6}PTynngSkM#KaPCvq 7hv(f};x`]HubZB1Zjc^} ztH҄V\v) K4O`i_xQc !3 V 7$Iy )$D+Audj+M<~ [n~ ʡnh/HF9 Tt9*)(?+WG>'vEQ:_w27}Z+=+T] 4upEdlt['oVFob`%rzXҸ7pa8w?0\&~#3-φV0@N͆w݆/4Vi9k`*sS9؈YDrUxJq;+}mɕKm/lGlu,b;Z]DaSR"*$evP&#fֆ8sWV=vEIAOG`#-Mz5IcZjlgt4ız+f!Oe!Lb3och[)RT9%7aebA\SC(!CƒqklcOE%CasmowccjhW#pA'vj ûll]Yf1,hbP3T1Լg2v9;p^41Qg]4gkdcFkZ+` ٻpPo1h12Q bvG MYeEJZKr=pXPeJNGk9~,33fT)#RVF t I&OjZ97& E)b )Dx41)5;7R #.m9,]9bH}BA55Ԥ[zI@V>)[NnZB)|ɴ ? h\TXYIiܒ2 BzMo*-=*{bAf yq&Xf9׸ 4 SRW0);̊TYtac" O_w^N1'0u\J ͍<6*Uu) K+D eicl"1S ;ДFG/NM& 60(Q!I{DIz'{:!{4Ac"9":~Y$J{ڙWݨOz~<@sOp""9,G;" RzH9-%L+IeQR&FuSZ/š2{E۶g9f?PIER2tU{jvtAuy (0أ*^ՔiF|Пka@rSCWl %Eޛ+8insP+DVI8Vtnа}yK#l-[#1lRlbM-w!ޡxe5^oIQxQ?o+p4ySd؏[B/ɲi?E}dۘY> W ]BtWL2=#8'퉏jT=rdmOkyYo¬v⪾ ]ZhY_aV׋,5@# {>Suma1d<:suiraNj:{V5N2 EsLVضft 1zi[4g|y{aMBc޳1.vS,UutGuP㜮\#.&%L١zyB?f`wG;p +>)ȱ#vOMP͋\/7Z*R] p{9p]KR;Aky%( ߥpOb6&YS^EfTOJ\_K 0mf BAo&X,t8KmiHp0mwUrM'rޝn!wz~}{m W?nr_QWg&ӎ!x *Mor;=p O.)dힴ+oצ;$ $gbphpk-U#Df/xo7Cڂ?v{v_YF5QxRuviCWs [h hc +DV+ke[jF9-/dKVSka&v+gFPaAo1\q:lpnx9?\˻7{6AvzxRW*r |Nӕ s5~K\:Vd%ȝFCMQ|mpM'uReiF6aa qݍI6k'sQqN"{[CgRiq,H\oRfnʖPKZ$+J15Z"XE*qXcTHt`Rsr(k"SCt:th ˹p|9k"IDzħ5LN̸͜bJ'[dZ4ӛWڶ|Ԧ]Y##ZY!x7ސ/dk N/ϙ|H7U,(=\ymW$gW^edXm+]{dpG)U~G^G~+,#:ϥTI8jox!I#Ƒbb[3v%6祠D`$7G1xM/\B" OO5@Pvi 3yEI.W\݋T<[$!z$]el:=^T>@kkAH먅y'7Y\a{rT}zA_8VR9&ޒq_Dԏ%|IwJzc}~G<0i컽 ՜d$iv<^Füi Kc`㖇4ܺIh dd.zg%qkP8dYQq2]!@ 0Rtov+kO V/>A2OfkOZKOϼ'^x:_1a<Š,a!ȩfala 8)<0P,,^5b-? q)N-KBƀ~iO.Ii#5/6p.t0t7ܙwCtN -XHQ]TIq!&`仓g9꼶++jXNeǣ6!ĩ -و?:njD wiXAV#&|`B蟗&.Z~"CCdxFD/# lq,0y]`^14NP2Hv5 w%fW|j7e>KˤG^1-ת%"pS]V·WոڥkiSl{kmXw@w@\ 0'GKLKoŔ(?I8L rY\U3C4ɦttALr!QdyQ %TXΔF|bI 9 D-e*a S y"$oP̟ yC\|> mU$ vA4=R9pq&gy@Mfxߣg7 2ўyú ə D]:y`4Ww}S?ay+5> D;4:LΝ$,9l!t3&5X$MϬzS0ZPZ{5Pm r0)Z"øà)ð[1]>]l 7ݷγ/k~\P%L[{݄*{--{>P;a^ Priu!wn ă_BFӏe^{)Ppp9N3"}B=nO3/09 HAb8AߊxsÌB>Hx & Y<1پ45z'_n}rF5=^ɲ?CQ7vog\ ~} {>ī,eS" 35 &Qxqk)[R/W 4uHRaׯxέ;Z 0hSC^fiF#,)ν_LX.4fHQu]mR<(m]UsYϫhJ,oo(jPYc%L[+|Bר*s5Gv>'wd|5) ]xcj1XZĪ,}閘ZG8D?sYHX8 9[{a{<M~Җf`ݽ"ʁCk1\>hXC;) ˊ=HI׬<ТA^JyFw (Ea-D]& doE~8tYx E{`zڳ.VqW@bW)h> "ݷ |.^)xDKn$ФmJl|&]X_XYYPGǒtRTfqky{:@G2rGΈ<y2JΥ1lt3^g@z$ArOCNExu)V~aJ8rU3=Q$Qڡi+S6ʄ=xb*)N(֦ U!ǽiڙʾ j&&4AR%~,M )E" oZ"0XhlYޤ%$\>8~ غ^$Toϐ_K?TpﱘFgI򉸜Lfzkӎc٩CXNtA%PV]~LF4MI>Hy?bهO8k@#FA`M0*"T7^X( (2h?v@-}K~F,?9lLDkeObĺܐq#,3mrSr,hL?_MKE7j:.Nb*M-;6Ps3h> J#Wyv2`UǛGv,nH0MbaßFIƒt]˾ߕnlA?n鉙"* iN?m ^,W2CF7`ŪU1 VQc֐%Q^}Cb2Z;eV [kKHU!E ^Fb\^ȂmJUꂫ+$kȺY6WYG۱olhzD3t${|Iu$t0x::LNx}}7Z7Ĝ5 99J;# ]TTqzX88^n^qzZ^rzت^'R#]e%=Xt#O'O'O'XcT֠6=pkcKHv򔥆MẈL['ic}9#>R#dZuszkNaT;zu4<# ܣGz3 >ڣ >oKN>p:|0_+>qz\p_+>r|&_ț+>sl6_؛+>t:}0QFO+>uz]pQVO+>v}QfOț+|?SR  ܁Ã}x(gfnQ0rqNK˂^2 "aZ9hZt&rh:WhDb(w6ٜkuҟ 4a0-Sr5UWnTNv˝Fm2~7i8tShC%' [Neo݌SlՖv9ɦ\w8$BGGMwJ4nc$T{-9__77>k^=VG7?49=S0Y@^'cWL&6 Rs&gkb/$L7ET3<>0ogEm׹Y{V2> +x`r\bge4^=VCHrwJI&-\xa\ľ(%1 MCBC! >CiOGM>>Ѹ9hP4H6=i:6=jT.>jThz!>e kZ61fKZ6=lP;6M>Z z6=M6=X lP6H35zU>N3599z=] ^n-; ε0|H&/`T,<(LX+zp5lf~-s+gwLĨ3Ŏ֌|grK8,#[?*0a*qKx;Y¿,;O#6tgҀ nٚ4ubmfe J~dڑ(K ƵN ܜGm^_߻mgU_ZQiŅ́N|_,`15 UuSj^ THRseB!!qWs*kԭn໰tR2|ӁΙ)>,+āc{]CMH&N;< 6*h ~]/d&ߑ[-+h,r"C[FZA+hG\E󓎨q*r[jI,RW&;Azfe?h\C O8 $ ƨ4s[sJ~|/1{qZ]FovݏLв*lKWgeXð=G.˰7Ch=`7Co,>cq悙C:Gs}†>AzK]Ub=mz?.}#7QB{A2v`/9g{A\j?P̉ڄhQ`y(Y+nmӐeQ.%mnANTCg]aqSg_ޣӷi1}KLC5PbXrشzvi@K$+H#N6̩Fm"9V Veo>-l0۸3 "ilLe% rjjsxn$Ѓ?}V8~"@.qo*O ɒQ(ƙ!Bʄؐ040rSR * ?( FjW"-|W42,NGNl7mky47/N*yn;J2YA+XexH4{ʉ eB6~/M7j8[4a=qUYN崪#m-C߫d8]y8u z\j-S<_چJ\*{q5VώSm-aɩT!sB]4 4}; :''$1!@ '~Dᆬ6TdI! 8ɢ!"j=#p8 :< B%~2I>+T!V&pcr`G[[UئmY B`0Ҫ"M`˪IoZI!8&3*ߢ#t&eוڦoG?qv@Yte#ˮq.ZeI[IU5Y5Xxp4Oa9#vaTbkćh!!q<iN݈2ZaܣaLh?: ˮl9#ΐJ3UGJ1Žo%vIx[cMuizqlg|K#;'?В_][hI\0?JQV~@:A%L/*^a'DVsnᵗ;[X(p#AS=ysDaZkCDlp68L8!b}:Y`V1lp fjKɠrS-ao?vd-xs9]uOFXlYҋ+V(G躩M8Imj; nސ0_J ^ρG}tVV8= TQE/oH:[:ڹ8y?-5(2&݌h[]^b R ȄŗEvMqOF# %cCd-Pm[\mZǶ!Kv RVzA<`4`Mn.8 C.\Z4 !t,Zs ..h{gL☀;ԇk_qq ΝL$09Z$.%==TVL)Z^̯(p![&Tw LՉB+s+.vq9 >SW%m:4vVr i9ڒNygV(%_qyVZtckx}a 'iV5@ߎp]"{ꓙФ/J8VU WEӓTT8:K+2a vi|DZtő$9@$24[4ՍaF/=:8i*Ajfbd<9쇘,JAbkZT.b2Q`N QI !b$BI,4!ۈ`z0PՅU`YL_ d'5Cj~䘻F6 7:\n -DHWClo v{nKKf .x"i|HtZ8cvXKd'p ?({1)'7j AZ\܎6a6y 6"6qK{nvI >٫CЫ|+Q}# LǤ%DӁv>+dH:s#`⧱&o`vF˸p%u@ [ۏr]uͧ\_xwu9ּ\#){Q;N@!|c pj#%GĪq= _V~ K-ET13 KYϺrЕ3Ÿf^gD4kXMŒe )Āx:b\~누ܣQN0s5I(ͥ@GBySGvՐ^ՈgJT6n9l*O7v WwBP6JDxZj nemF=Gjf|^~[JzҷJQy‰G''ok(i{"LX vW%{*}~ 7/Wg0AH78 ~p-~77L/VO%] :np`P^j cBa7>7 !MEu .!l4eٹ0pDML8S#p~p̪n%{rudM%XbޤjK|jk#Ԃ&E㺙E/ܸ\?>H 0I6Sl!blhd:KCqE*8x Xݨ~Q}+ZvLff9uk}jxXX`-W _g6k80$՚;Τd̳X֎%L2`cُa'aO 7RG@$PSxץV.(9`L_zrKř1z H7:I [+q`>~edB'BK8JI/UY:LPTǙ灮@ ) Q2N|9qkJ*hŎ5"S=cߑZ~/+eDL]>K@(xu +?&okQ ]SĨh+/ ⧑$HC,Y\H#6(*t%/Aã׿ͼX+с&L5u\}0b ' s2LكнEM]" EM\[o#3 !"&없kPeK@cDbML~\`Ngdpzk6CghkNA٫GKk`-we 骢 6NT_tNp7R fO5#}i3n%Ŝ겳`9a~ľj^~ d<l.|!G+ ފ"^_׈>.>UYTt!Bݿw4d05,[i9ڰ+.E5㵥,scn@Ŏb'dqţ~ǍEQu4\ke^`GU J Nn&%(W G|/-H 2*dθf\~"/9ʒ~M۶mgm۶mUڶm۶YJFe;vcLj{b9c_Ztlu ! !|0 8TrI{ٺ o;Xݚ.fCS^Lڝd° *U4hO(?UPY$LiHr^M-}-Bs~͓;,not:aLG$GHȈhi#%I|Uzu#'\Ư]פכD6+ˇ)o0OS'rcj89d QMDɄ.e[N 3.сf!)U%Ѱ D1,M &Ŷ 3^ hರoE7??J2.*^L ʑxv,%k^uz,'x&48ԑݐ='DŨtdk~:Wjt q4Bz.j|_G'^Ql,E1ًF{Huz5^~j@=O:VF.fij-`zI[_ClO?Y+a 3#Cu5cG}KN;>ͯ$>8SY lk-B5$t026iӓUY})#|Z?mjO/4"Dޏg/N$3a<dxQNI'Pb ~"Paz[(uߠ1YS0 3 !CD22 }6>%zЁN<` XxW.!7d}(?N׾ͪ -VH7x!F@l&D vcfnI C~A֑T0s(̵wO7 LHj(x5Av\+Rk 9q@M7:˖]MKFm^xoߡ+7VwQCig"00#Zv<'ST^+uH*O!A/eʉ"OMOMǥс :z ֓?omI~WENб[d@ sϷ8,~/ؠ l̗$ bgXɲ~Wڔh]jrZ_ (1|my=no IAaE^m1f1spXX*ɚme}2) 6?v2+!.u).vLAч.U5%NH2[|c aKsF1a;'[ݿS.F#i=]Ki|[^ ]N[Y#EÒ&~0$ĵYC3X&1tB!!#m¹L.hge0 ̸ V%z-axRvK;g7.1#5Zw%4u_ *y`j0wri$-3&z> i9G8fmsIcM>F4.ƔQfSXJBؿ;wt m%LGpEV.R\T ?za'mpW]LT H5d:!f_?wP:H(E4 0!5@X$SziUj,RpC#1UhuqTqh{\vR*c[X)"R^Sav{f(ed[6:T6=ԬuYh#a=UAØ1 _Af!A(!;@pGȁ݊[ 831bg^Ʈ+g;EW!vuXW 'K T;A$/R@di4JB#f\Ig#>9TeH>C Xwy'̦?r#,W,$!_2$Z8P6c Ť8qy2 K\8b#5+dan{` "{;J@KZ9kkj_q#,hJqRG =(șof=诜7.Ngig`A k^bV|WAb+b*x7]iwFts58!@epi S@՛nUpK~L y&xxײ[e65bskR5%K& - Zx+4yR cFkFMS  YKhR"iE3H}0άmD^u"Ks׹h;M{7ߡGsߦApPܐGY XR~]Cޓ$͈ W%CLZ|*_|Y#""uSkfO}peiq+ Q!SaFI]A͎SHI\u'cNFK# "g)}S\O1fW[ I7}Y2;I&tDiatצU5M7Xp, lV>tMq.`UlҜ6bH-ɓE@WUt[V7sDW?ͬts+!7vHQc9x;BY" wa[MpXy0ew/ 0 a߆PYXDQ?0[y*+MC%2@x@"E$wK; jzO :}ISӽ6fW]T#* SmU ȀjA`z +V絶/5,_`L Tȴ$Fb/-/$ U\UާXEOCk-<+)jSNOyl+ޥ, 6Nu+I>r6br9;F$2Ӫދ&ޛV0 ]0g [4~ŭC];0JלڣM?H-4p~Y'сǶVg v@*7MynU#P'TJ8S[l$(EAl^aPh^vLRުol765>>cO*=4 +kvy:j:boި׿򶶚3l/2:zꚸ;%>;a?z^ AuH5]+^ 7AQbxGDvc3  8aF'pS9Q1NSQep@0u&ƣbnP-24- !q2|4ޭ?&>\o5.Đ,A Z/aqME0_aoWgw?vO%lO YK, sDAeU"C <Ժvqێ57@;z[&e lO3Nȩql^+֟-a(C$GYAÍl\UdOwPPNKNPkf]LaJ旼v)xh&4'#l:)QL=I-0ZM3t*H:Ꞝf#t҂y)Mqs. 64 C)<&t,RR+AQMiM&OY 6[,$0d< +֋VfS0:"^!r0ۉ;\)Ԥkc1wu$Z,O>{8% !Yzz Ai؃ jl,L+ω.1JL"m&7T5G13a0 uR(݂ċ cBM+&Ǧ:Qmp% 'Vd5'avDXUR|(@F5le0B \ yh8dN{7}8i'l/ajLڔ [tdG}|Oy[&; 睰sl.2r39>յA5ziZqXykutZ-@!kuDaeP~LTzFa^xOS S9gU3@vl$\4ec YQ"Bzd_:qqփK0t xfLw匏tyq, .܈OhOͦ8 bSD|8]Hy:$`0XE+dN)o-8|A!4jRC0.$_Axvl_x *;n^GܮկvK2zWVg\+GmuexyA:e8HB;R%R.3bzbc ZlTlP4^`㍚p9p\B;}yް{F]ޞo!B)vC:g~! -mps ҽ_>h}*I*~@`G)}l&VR欨d1x c^D]K)4 Q.r)d]D14=o#{.P[ohg33? UZYFWuII-VRZGl TpDlԤ܁- 3 д+7TLLm=ɤycދyY*4mzxξ88zٱ5e܌"f+=bzc߱ā<@ E>2xɒ'BF eKPYa4?wcqTrsճ5$ye` 4:*unD'^ ᱲ$u6/l68F9ecyLnma3@Y[yqyަt/1[7 Q_BhY68\NN€>m(niQ+t;3DV?4%$e#9DTyYuc1rZ'{e/R2L\¦nJY!8h1Q^k]+Eá{ ԛJiUŷ0b*RhTM%|xn``*Y![F!=tWGf}Ez8xu s*FSИCOgo LJXF?Yx>O6Q1:[V2v$HENV}Q{){d JgCFq^p4ߏ*ʮSN@Q r1sDAha ya-$q`Pn>s!/~(ߩ|;a2'(V]v| ~7DOpTS>QCum69 yGisYJµ R.2\ [#ON^cL&Cp }%P V f(f ҃v?sn:yX,7@|Wȿ6 RO';Uu6QxIR e-;ؾ_\xl3(y֯aJr`&Kحmg2U[>JuL @e0c[jPL3w@LTB/EH쐼*R[f-u/w>R}7Oƨ̐sFTGGI)t;DBS]FKxN]7F$h4ɛ"SNnWF#~r苫Oڈ&Y (l<7YW&f*d&ޅJ!T$YRDh[ [ h/ ld}!Yڦj^  JveO%N 7c6C٧8.ǾMS_hB^1-u@a3oxO Vuݾ-f$lO:zNj25;"v6tͼ_Z۶>n<-GeF&iEKc]5=T;D:BC:%dOtQ%w!ߢS(PRTEEEaR!̏%qН_23eʶ+Gn2TT9ryQ dR0C$.~aAc`\9$|Z8lO}*:ϐn1[Ջ4Bprm .D>X=Qm}8񙭧 ~{Z3J$G"O_qMw$8!O'ĕ]7JP)9u0=0JbT'K/%#*UO)oС7$JuއgD;*P65|:֎_;TjS$ҙ|a䇇|8tLo%#*:go(u"h#\cY(Y z9ωI0"nǼ]@Amy z 5oAcJ (1GK)$D>)_󄇤ZsR`D֦SajyzmϱF^/ \kίL|lګ8Hv {W%$wED;d, e0 Gv a =mes,ŏLYhK3xn5b4.&&.n]2$BIjaŋ"5ίy&BCپ5$6((N liiЧ yVD" .]ookaz' 4trSI'LvraJhfLvoy-!,-EIi-ɪ1H3y~gz]Ի<]$BAHu*Q;IaAH#D7R/@zP?42Re-L2a$8;;#I#xOzVRWSIx;aZyjTdq8t69jܸ³V h-V1E^ מS jPٳݣ^= V~mJPd qkHv)f0;Q!Z>IE}"?3LXDa1aEdޫ!/"oc*|Ub?@)ι;vА1xwN\7yYmS2/(cYœp )DoG(?E=\i/gf\ uy+H/:_D9* Cf8NZ~[9"1ͼ&Hy6e~qjWQ4/#5v^z`?;\;(6O:˒r&(%wy-fB/QrW_ ph/*oh"FUFŽ8"bRWfZ=?U~ع̷!\:RA/֑.]m]l/9/sRnj/Q7v}϶@U#tΣkEG2&^=P(?-a3!sG#gGESG[6Vǀ5YqA$-(6tsuura)bszݸHY2,޹':sK#w7%-3S\uE{GG#֕eViJmI)Se2OΧ 3eo!zU7!E-ő_Dphmp"C[eFxS yDhh(N$":o,t3mow63u^գ]*]Yy06ICF/|\4EsԻȎ#C$@>h9\){(3cvJ1wPf[:/w6#亄2{v{^ЮfI&u=`lVabഖ̀2uQv$&?6K7 \t#[qϫaA$D !\f4MW޻jy$푔rTi܏66Fi).“+L夻?bFRq-T)H6 ,R'hʺJ&_S< $rqbwCA3UOfQ)xoNOK"Q0d UI$]mf9N^QK)6}4Gq 'iΐ/^/! }(TƿtY}TF$k1W6$n?vi"$Fs >Kr[{ˆ$9+9lRM 8}}[K\OtJ Q▴w_A~q'xGbs #[dY\VTW^Eƃ&΃rf.Xya [OhPgTtN5nٱ1,\$SUOCf^&g3CULczO5;3{_ | {` vَ*yLE>ox8;˽*OdQӮ+赲Bְ;| >\W*ߵҍ/ lKj|EYf2(o@4T>rToa*hlweo-[09o*A.jX?A" Dwhg|4?ӆ8@jgӐsMƎ¡N$}}u^R "wݣc)^*j)_:cŒ~59~Ró7ǾuMCpBDD, yVVo$F 2KZ3-]HfҤ &F$SɖH#fE]us`x:mWX*&~!ݢ؜L7;ђLg$<-CMC.$-1,H&z͡ujۺ @05'F;!iPsɎn`yˮfT26(Zļj\8&rN$E6[A_!;| $Yn(ϳZbѡH~ sHsL8+kĊ61#%xඝ|X%FQ2ݶ+ppZ/wl6.S)(7wvv<{wIqۖϊ}zk S߳Ce7 ffkD/yBPgxc+#/4pE'RKKzp%Ƥ7?a{jQi\n`{<ܾSQvLcV2cz;4[!:uWcxKzsȗyhlHTuHu8x;nծ*~^˛xcE %^ 2}rB?bAA${Lb DK&" ŀJ륦 0K}G-Q#iED"AYd|dƢV^BmN*.s\}#63hP&YD"hkK*}6YM<puʡl WpQ}#Kv[bR~v`OzOv~be| ]& *=zGJ'6RB'0M`E@U;w"F)cE$°Hq̊QLE4uD7Vc4)L'ĭ˽VPD4Wng]NpFI6㕻!srN m{&s#[ ZjxCd38)  kwA 82$젛1/ف#/ ck7|훷OW!ͤt"/ :M[}t[GphQǺN:?WY >U[N yD n &yk04'5nsJI%&Ʌ'#OQikFֺ. сʑ!&P/ ъZV<#ֶEedt SB&RHo֢2!;'U?vo$zt 5~rx͌G~rLzp3_rs.l70QB~Er/Ⱦih@!c&Vƥ:!, _ةk?$+iD 6Ѓ U?$O5+ɸӼ2WJ8~2^. YpTMp@e9}?f,RY"ʰu{d\ ? )zFKMCq)v;Jװ \$d5SuuUEۃCQ$wr`>ec obΦ$B#8<(Dxa*]\G:4ך5wC'_?]/33_(J$r忉gSt߽m*W*{:l)( #@q(R k>^Lƶ_'BWVT90d%E.y˵`>H>F _<]v&ϊhCA_seOX B&sR|ʎw:a4vy(PZus2:'0$u>F`M lN[_//66)E>xA~KqwL.z?G]$[޸Ch"B9?WV?L-}0٣9Ltv`9zBf\_ *5~yNl_ߊO/mW/XulOF 0`~(!Djr;  +7V3{%KēM^Jmζ~kp/ۏ3ط|wu'3oSvy DƧ "( I1yvkux;pF|q%6|w +<@S_;F^gQ-O ΪH?HJw "Y;zszĐ-Y䘅g'Wz]V5( Uכ/ 6M6g] Ged=/9B  usaZW&[*cs? .J:r <De杴4;E)asl[!EosU)ѐ׿ $u7,.ZE/sZgn|Jgv;}C7c22ݲ")`N_f"^Jwd0wbq>OxZ<9SP`:Uc$P%V j2vC0O(:+Zٓ{Sy'ޫKf ;d)F^u:-gllR xٲ/ź^LwfVP,98x9hEI^cc`;@!V(cLjOp_cAt@r&$ /0Y3$PFXMl\M7pE?E~ s6. P@,a= #0!"\\.u_ x=!̟Zv^ϼ}xu&ǻc過)W߹ќMt 8B~ r Tݗ@W!k9$2Oml _@hs{%񹺵߉/p;էK 7*ӹDK mpm6ev@&F:I|6oT'w/~fڕk2q}p E@p)zyi<}xcm+e85NDLvdo%P92@7"2c." VZ] fx2)7/oQ"<є#y:g;޶1bCA.#$+(`$,_ Q?|6.DWiH")RtB)M`9z/˛A7j? !8g2]Tg7L0xc~fƤ*F7X78Y-]vٶm۶mۧ.evٶm۶]]f"&;&8'k;smR=Wg14e1*4zwځŞ@5e( V^+tHfS3'3'RE'{3gg{'R!4=,5!zZ[ Z K&e5k:HJmFیˡIp(Xl$bpM4.L._^g8;̹peXef q$+i&j1&*X>jblmacA|[ W͐p Ow@{BHgRtd rDhvquPtcvnmobs4Ӫ̖uѽQۂvg2*ENMN,؆0Q%8 mU@:EbpW2biGn+~?#gD_+U;9ET_zFƌThnU#SuLFf[dm\c]g_^x"ZMSϙo|MAq6{-;!?*~d [J&$(%[n/]aHՌKo}ʆVz%?)3ֵ)໱gU UCtr%z"(,y&9ZSa˞G]c'\eE\pꢚey$xЎ^^OP:p^^t zu  8oS?w>DZ(D%!yhsPR tYRꅡqxސ_PXNUM6ܒ pqTh2j/wW-q3*:;F4!`K?Mُ|LWi|KuקN7$7b,3p# тIO"B\,.V>#rIc*,f|?ӋNP qhqGd"|VmWmy!dLJTw]7cp%K(9.%nBM p笩Gɂ俓dT(3Na\J8< ʂ~I)"(<(!q[EyAeG8Y!&h`9rv l"FX[17>igý5vz?%2'|jXWac!#C(iЩ9TNEH~ld|ũ@aUBMBu7Vhۯ*“?oׇ}R1cϬB$97֛o*^o@4 <;Fr03u닻 ^;7:. uqH!~SP2guw٠(ް]IX{\uđ-tqX !jR(׫0=9OCo@5%ǤhUӵekTX/ z#>o'h^16{HJQ`y_Cn f{?=цOZG!t5yE~_zЅ#Gq|.%rPr$fmN2kS/QHr|o˨W㇇XoI7C1A;,G[hIuVg?ސUCRnr;8Ȓ0MdFoT;u7m6ren7;j\:.=r5o^'^4B 2Jus25џu(>H`wY$7OݦgdsQˉ68u57ru$"}5. %q >/T ~%CiiXg&+)Mz^0f|^4ϒilSfdVm#-HI #[#@j2E|E Z ̚ifޏkM}e3SJXR¨Z}p95SwOb݅2̌޲t`!?sO 8-5t4^ᗦx7f:Lb|ؾὮ a9`)f5X]e B/HJocse~dcoAа|-Dt^ ^-:76\P6yi'b3y./<  9o?i`CWJIA7NO*;yPFxK_qUbn pENZz_ah7? Em%5B&e2B^`?Ade"9q.Due\wgk`M@VfTpΙL9Gݙ ^be;? tc v{?h8PQY(o~S11`yq:9;Sp>Ӥ?81F\(lsrfizo5r0t- {ad(tJL{HD_mHwގy18Dl q܀'?5ECOfE51޸/N4 l*r fø 葂<.kz=R^RS={sA&yYH'M BIoo6q0|i?[Kc =-"5I>3v{"u/V.tףM p룚'& 7Ow% @H/1(磍,3#ށ^쯩5o#af> o )E-y8@\8NZ#܁lbRLZ}o󿛨Z~xs)::2JCUt^7BZ^D0rKNn~bBhb.;ii loȞ `0LppHOR̊ ۇ7l*΁?@o<ʲ5p?jB* 6]45~v~7 DV /6 `D\y1ɜX!cEWsswD^wyّ z]{X&bl)u]qvTWmד{nA/99Ҟ(}%鑇C\d2Ubx_q<$'VFN¶&6x/m2G1?~f^&8+QΌjV%% 9״n=ۘ{DR.2׎PoxHI`rȅRM\(^m/悭w2x75*h΋ݩq:hַZ~#z Y4P4h.yR岭WB2U Ƕ!"bg ȰQ rlSlb{`ɞi )|:VZWcU?]=;| mn1 [ TD>Y^1i:{ʮ=zF1Ɠ;&@i\K\ZbFW@巘5 H,4~'g7&}Eʧ8%#8 Ž(x!1n> $鐳S pEl]jsʇPt2PMf@႗! ɗĒVMy4Cm-%(V;jF=EGL >%JV}3eMEdv&fxS 7>Qʵ2TIjCb@N򏥡ХVCTgbm2WU-M!tB,Y J`Ҷ.Nw+tN8g-$DG1u5cxy $KdDXn;o}y^uIIA aVZvcD)ܧ9aGO"{iBE܎/w{B# :j42'^0ҫ8EaJUSDTPuF%MvOwDiQ4?ev;~l#@̵|\5_9(&Ov{Oc y@_{'P6@= 9Aȑ {`$*J\eW6r|ģ ;O1vW ^eKK<-7_UQ ZUdF_/UDžȣ҈]&֊! IDn%ϱmu32wql(;>&xooM{wzGn)4sG\S6ԜwPwcI {"IK}g6 R0:1 v|D K ׽u&*u; S|JӪVWP@]ly_YO.1I})M~o3zhpӂx4sCM=o)$bR],z^w\0~B-Qee:/[4ԧ- |4K@Fl!BMCQjJ8stŊb!>gu>>jR2ˎ^`13ExQq#C8F"8> q%DdefyѰCŲ|D!7W(SKӓ֏2 ,utۭ¯ *Ɇ P3LLgwgoU[mN z~0K7Ԕgޟ w~Z#q81ؒ_ Po2PV 5:Yւ%$+_{6֧#( d>p o ռ{ܘ"}аs:pS"&PQVl"{i&S"-|lGS>a9tH*fʢG[#S#I .kcCk+c2* k X 10',5kg ,TJC[v"⟪q=jACnHfc"x*㐉gykYOПuh9]jRJo@.Ec|XAwdl\AA{hZv5zLc0\ʭ~ı] |]ǜ8@P!/om٦tngc9hfAJf@ {^5Ow/QlZ@Ej$3ȱoJy {nͽywb%X*dsnD ;4x9| RROF4/Q"ϐ&έֶ+wNd^2\X[_L[ID==%KM6WӔ̄nA8;b:E>M .jx1C9uy}q| Yyi$QQiR;PerIZ_i6Y,Q)R7.&`,0\H7g a356wz#>i:sv\R~5_MC4K0"bp5W:h{@ /︢e (A4N?#۞~zԼKve6dҮBd9ˑF+U3yÁml#|g0$Pw$񎉯[MRa⃛K[1yhP*?`3[EKap{S8 ❤/9'泲O`Kv@F@uY0+aKTnWլ3+ MA5^neA_/OZ9zh%$ʾxGb?AM?_«4TN1 M+AM*ׅ'C.9S-^Z Mx]un 2O _:) إ?lӇ;ZɅrqk٧Jl|cd VIBNBijj%(p $0}~{kjc(95O6%!zb?Y$1(ԛh|=r#ͤ#+k/pk`B-fZ}llPU/]ԦzXNLjþc2(Q%x}y6ǔXAs>(9u`!V%Tik|)/|!MvjJYZ0œD!M7'MB֘-+lZx/2Y[tJ=Q"i[V e> FAq2s%XޒfAi[^4OE  Y +;;Ts@췬,˹%dv50FEƜ @7jcWFǢ”K6\kd}gpEb+.\&NGU"UJ&1de a(: ~ pK[λ)QPLvt,,.b «(@ʍǺJ$yq ef]K5֌饀J/0 ›~2C@5K=( ̰dGBX38;Q3B3=02lnr|'+թSŎR[s%/`\sk"p%+™#r4iSBո_Q B2TR^|Ҕ{!^d͐?T?!%ɢ~~AƼ"Q$!V#3ث}?"A HsEā+dgҗ;/>p6M|a%b6Ԣ'Aye IʀtGzQ IAڮDЪwڦ7ڪCGً߫Ct`%Q F|r#L(lhL/KYaP {!-yKɖ-u,nw LeHhҐhv(;Q 4:V[@y*J4j9J>(Cz: E枿D]k2qb?C/O0Jelif"HPA&GKL8D2:2i `o#nK:&,x߼qY#fH~Zck5gÌU֔%@]QgLFXh˭zsyna/Gv;TlQK4Q$"kKm*Aj41,cVX .;oX 14 ߙtʔ!'C*YCE] ]--YWU>{#(= KȞRvtK ae3:noᆰx=.OM ضYYթGyf_3hq}p$RxI&\ AQC`ҀwAS[Yl uJ= L岊_h9h]{N[tRb#Ax6CF$ߏjÕ?b H;.Z`\JB0]N"vՇTȉ. p>t&*8'Gcݺi?3i{ך4luPù]7썲|!ԊY+&ݬmUCw-cf.)V@FtLaoM\_1n26eU) l\M (P@\v^qx"macQ?[EhkYܐL,C{9'b,)aRW`VR,ZlaN1"Y}NT  ,b-S_1WV.^spMEK*j左~jul#R-: e qWTjXF-0P;iE;h(vq|>&5y;et,ZnTxij-zc[eҽ[4ZR @tlDQz:Vz(rBnFDi Ǩ.0RJj\lddϼ#M Uk! d6j EŊAvzö́ bPÙq>rN Ϗs:"Oupx{ЇgrD ;̭p'Ϙ:[*zkrdz.aϥٌj9k7Rk:WR؍Sk3jRkk~uț0y>)%.Բ>940@%A'D<9;`H]G_vN AIvuuԂ8ruE0hnV‹9-.r] >«4C⓽pnߨ4#Cm lyD88P@#sZz]=™A8?5yM%U#I;ak M iʨdD)ZgH|X+b\3(y̟XV$A"V|\|QMMCÏE[U;vՠ.,HM>ef 4/dlzX`8|^>Z]ΰ&BGT9YmGFH}T#X`gańL6ٌT\`\dWzv\!T BO*^E-1 ASq\w 4imsnZ ZJùplL8PMW^.Ȩq'VޣAM\}x )qR"}VKh>-1vIpetK7L؜Ai^6mF ^L`^M=-Uj>k7˥8Hvsi6K\?b1U1 ޔme 8踁:2]Ybf*u~Ve{8ZDX l!dF6R>;b{?:bk!xgl3}^3ν# 9Zt ^wRx*/*oդH|ifY|7&FU q/peYtI,1&+ fCbы26\Ѯ0 1ՖnQޣC-w R sL]Cj-xlKνge}&wJQgܾH~:q{}_AkOǵy u1\! s:4|/"53vR굧_' UZ'j|U&xQW0%ٳ/ _è6;"T hgCTM7s韟mS~UwuݖR*oNw,Dq^⯈[}R]nfn] RIM&k,+[}A){1f_]7cxџE4kobj8>::||%q{-exژZX[(JHLϔ gH{)!FFԱV8˴x/LR%= %d s|E$ȩ14tDgtT7fN#FfY>$$)uђ-KTufD0`mfNe%-3q0.v۰e2*_S-x]00PS4XU5rUVtZ+ z*3}F"aqꩽfƢ=H)aL[%5uj'\ )@Яɫ&u{Fr-WLP===oUX7Z4:)<~+IW-.$?SB-# dTHQȦҍ9(H6&Kf.y Idb! pգRz-tPb\@g40=O[@򊛐f$NOFb{,}2@ քfS1Rc?Q|^6k r  ϱܑÇsLr!TӦXb{ȧT#"{4ɺIi_fXP_@N~2q-! +QsJqe{Ū$2bF,쀘Ε.;d"nu2@*dnnaL_&Y8S}EL/>8v/?Jiǔ8NˎcЅ't[AvzLOY8Vh-|#; JZu-vHIUB:<}"7t'l37jAy.}3'jǁB@&L AIB2zP8Rl_׶Y_3@ia4k,\m]a *8j Gڱ%Ӧ?%O b솖cz7udH5o8 ܗ"uyʗ2ݪkgۗ.i_\&A~ 9}ؙG)O(/h ݾݾ6/`[;lu[3l,kH TZҬtI^F~ "RIuG{b5 <5f^vEIH@ZJ^BD\@yN7o+r6l'-clEr omr#in6QfJ&PK .XRܿW4%>srcf0 ג2X%Mlk9d?iN$~PV0ZUpg߭))TQH>F!'f% b]`[˒#^ P;6k5 h.t^r:,čuI;h/MZMv\8Xa 6Ç|V\yTԜs!.*j͓\ ' )Eja#$CE׏!$tX'14j&iCY!`O\ƒH]+OpHuБ+0V sE0]8\-IK y2uc?e@ut#g@"g᎚.g2ҨZޥr]K?:Og5%Jc:F*ζvn55XEx2ōmi- [81{쯰Ic]_?3*YӮ`;6*u4G,|×T9TܾK=` qT['Iw.C*y@XqqֽD/i ' <%g>~75L-e:TbH} ?_4wKzuC NO±3 ]a!^91ZL74zLw8 ?>Cr~ pJ=5Tw^9RQfr5$'XX,(J Ҟ'UjGs.*QO}As󣟠>#Y8ܨID1xDCÆbQl 0ڭ{hI}2=h,VuߡpYby|b4}Ĥ%mZP=A3qqJœF500麪b%T!J~L<,peFYR)Mk[L&`;.C*\>+NPʠ\#1vv_L=@D9A "Z19cvd|.6xZEW§Gi+L=@'t.Qg;dd( ;PzG$8KP.jII<**7 T-w B 5a:IIӂ1{\ȐMdK*%J!s:)8\+W Cڿ40xRE1-ڼsdlW!8c $|㌟wZ* G8 'MMAN׉K{$[*֤E]*SOfYZ;fE{Ѱ fZ.}"G')k P)WN)Neix埴j1i"]2^` ­Їw" @@CJx Tqoç Ayu2v{|ʱRfNcÌ&m?zx;SCܱT4W=T%.#׮0=;tscߛ_D}iVnjHC#y:G;d%;gC-|S%ԣRyfWhdQϜV&&ީGߛmTz}Uf V5eeKMfVsL-شo,Zj"*m9)34U׃Y&*RhHӮ5P;eTܛU&-ֲsP"M,|3yӟSf{T$HJ9ML3T|]l`1\9itfFAQ0i+V oյb^W_WTέ\2,LXȝ<8R 3}c/r34qIv$ y2>@>lӭӚ{!:,r;H^ôWʦ~W_٘[' sc_DVƸn wHO[sg lɎm۶m۶۶mgǶm}{VT֪^9(4 Ã=;"-\ ?zq3{FIV%,>Ut|@T2l&8*o˱~Y.FjL?225Ԍ6jҥIj3+LP)iF*z&^DƩ̋|\P.zU=\ˉX&0Y/]}&R[7^m>7? Yz]ʖx y /`[t.[l4[8Mi |E~R}^E=a琑?T^zs^G%X]QΡln^s/W{Ky lx[v>Q1a^MVjf?UAQv~ gMf[-ؾQ$^/ot+Ꮷ/}5ݮ%M>~cն{)%}~6$gw >.?T%|Ghr~xzj00Vs 3#xP'%qC,pTҬ| #5NMB:G|.iO^R<ٻn~aJM9{3{:d+,(Cͼ)$ڭ!= $; rKd }k=#ohPqvћ'ϾTԞGҀeHuSDZλԞֹ疗}koL׵r%{:x0_15 _/heH=07/u,1G^V0Ⱦ=2 ; asm,Z@:fqb"q|5p!I5˹ns -j_}u% KPnSOzǪ(J)FIpaF_~~M; {EG 707w4ІՐ3w twҘנ~~2wzR BzzDd d'y!#t_+,Fȏ ڧ_<=7\ ~<#97wK LhdɉsNG )ۧNwI_"?|.GVHQdѓ2 ap܉pmM9''b}0(0R73WXST/cyF@uH $*ۇ *4QCĀ+1 Iݯ`^QUwc:eE(B''ӓU‡ĝ'3ܕzB/yD_tߠ==r$E)kp⍅z~D-z9y Hgv=H\hl!>sj +J ("2~$'lG8 dU h.⢐uZɨp|ȊE B@Y#>ևa J;#8\tAsX;#S=`r1-Lo]Wd@ )@Jc^ ke<BNZ2dRgE_%/'C301 -K\(:+࠘E]I1zVfܸrO 쥖Y7gJt cBWծ=J}ݿdE,q=`}I-QSC@S_c󰒾>8 $\5}'<8ws{nvf4 0&k?*W>Q0REߟLkLȜyaq3PY >%uL'h>F>B2?r1՘|(ȆrzԊ|*b+4EkFN[3Hl.(=oM>릶): 7Pc"%J<ߒT|RL_|V?$[%ҴHR'}x-IaZI6 =q1U& OnKXz-4h={-r& czuPWLw\@mդ>aJ~ JQ3)1wǔ&*ZXS 2grh;}pʨ0;^;[7@>7/ ^^>:T[ }_Wl>NM#.v:u{3v C_UOh6tI4ׄFHڄD|Em Il/VdoDtfk^3Wqu'ݯQfd/*zzpʹ;@ɾJ"fV8 H+{ d66ޝO稳,W43YRtg9gmi]HHa c,ga%XXB$Y%Ak*mDEHC.-~-o [aƬ{ѐBdm5#E2UHZw>օ[Jzӂh)| "InwTWJZ aMEi0OK~E2p^,3M[̏!Ig_?ȂR.b?!^K&1sBitwqkl6c!33uOD/x۔-_Ґ}{{M bq׏җ ҙ/?#?\YJ&m|1sRv)A׍+ާ]/!LdiVI|eJ︡?~:*x-R3brNW䥮-\L12TE0|3vuM䷍Q|9 ztGt[OHQ*-cLm]#0W:N0DAge2nv=AS=4=YgSi88.\3I)j=sYu{V!ox̋9h6iʤ>'SޛY1Eo/45֕? @-yL趷Ek^,Z|FT7@,c.Q(gI-H,cwMT-v~g*rXZۖ[[ί4,UԺ* [PI~(dG aZYYG?Xv|3~0^cׅ=/&p$vI/9@\p_`Ÿָ^- B|Dpe09ND40w2RT߷aڿȕ\ jfzs5|#/LDE[coкtv=8&z]: 5K;Na68ѹut7)/Vp`q[$u-5nv4 2,ȿ3lGDԞlR=Dvl^OiL \Ht)&1tAr0dlM^s<:-XĞI4ERV{d?"J_w]-T' 9+Ə`)uڶt9 )u=w嶹 #3; LOZpNȺI!rǰR0r?%;̹eʡrV<^dt]ڲ93s1'zƜZw qE9\k9MC٘EIz\|ZyA5@w89 90rJ׻^4*!Ok>b9!|C&Z,mi~Lg F)d7Zԍ5θjbb3 O u5@#c;xSwu)<Тr(K#JY69Hv#ؾ'C DcqC+ pz6sRVsx`8)aMܩKޘ۾F׎L1R +^\عEa._UrHB1͐\ѳXd;rЭJH$$Yf$j"J퉖jM6Sa}Rx',uY8*7a@jlԹA"xENhpFmȠ\H ,,d {O"gs &ލ1LENHY4shCgb"x1PXp_98g$:j4KEP V晷A|~TX#`~yEA<#ń{^QLfEѰ nтta7b VKGBǟ_}KR⹬Ӷq"uVcR3]N=x\ ,⨦tϽ―6}D5XB"nBK1 אc J^LxЂ IȧkjnRM6Ww`ZHR+ ĔbL*5Zæ㙄mQ٦a҆Yab6vm+#I2< 7fNd\?*L>{ IL_t/ BSil??}dגHS&zMmٲnDϺ F6kG~Xw:Э*q_YTsقaKY!QN {AH])`4,Hed$VQC9r۰/<>{k:8[u1s{i\U5,׈MnDZ۫I:[*_sD(٤!)CEX6SL5Tjκ%P`O:(p)Ƚ OFaKoH049ː[4-E F J i_;_2 tQRےiJl&Ld{W*{ "*G!M52C $ ){FP^D*'J>VTI׹"HQߕ72X$=f#4({ZYR|j0hVs:ֆ9%iC#( 9wD`0]_+vt2$/e3Q:)qu_es!`c<*,(4-G?n[ȃTִi+b/ḧ6 LDd\]>P>JFkdpj^?QfJ2 +8@ɘ)ރ)AϵxKe Zw)I x+EpzQ-%˼kx.u_uE{&`j9pө ۛeyYR0 [-h؛&G.:l@BFc,ٛV510lu u&pewN9#Ş8W^n(hruKM\6ѫ"V ilQ톱5\vc4.&]68[  0rNHN9n̊%zsu#@3w6qQȶ ץ5:0ԁVQ•rct(4{Z_QƘq SK H>cx/C _ h;0MKى{su2cclYnjDSf j=ԕIYgQ!2b*Z>34=*DO:ιꃯbғ9IW/SUk*%TcZgPIܩdz wOοOQw().r<].}b97LG^aMͮ1MC x0$>Py\(;:/gN/bXtjxh nUj>Аhn ]Wr@"b2ߊ7JeOf\N_~>>p0RU\ Xng $LHZJHt,*}h5wa^WTngqX\QôSg^juEjPnfk}-EEbsN0X9XR b{"=&&D9T3Wq8oyS7 = K1B8ML¾jzM&vnzN&E:$\+` .E&|/!ga˝̵l$%8d.89X %dbrμ $<)1~cte:ÿ,4p9#[g\qxYxYn9VbVe ?ҶHyT:*~ss{qb_ζm8:kfaEo6hGS0)dq2?#qn&9܈'[%e9z8kR l4ʗ "Y 0f!Ky=n*:ň&p.X\p#l] D,L*ѩT8}%tD,ިaq2J!,'<ѓ< B#xH:ԃz-1+T +wE{ g lXbN..gN0/n#0Mab a"8T+G`dPMM -m+k+oK5¦g^[TwƮ}SAf>g\޾*,B݁ww.W[fm7gb>P/Oluao~yO఼;g<~*v;L͈bQߴw_}*+b ?xd*_T?EFJ1g^T ū:x9 êUϯj>-w?uU{Eu# #ASƏԆXov(>l{!\TA vaT#\Р]f^q Z8iÝ2P_^. ~PH%5H)WnfK,oBP`"L0Pfc \ Ь墖c#]ۮ#p&t\'C.,ueѻⵥc@*_9"@xߨ K/S=D$Vz\`@`(<@l40C0.l",5tu{$\ڮN{ܭm5gOX%AN9PZ/~ ^7m!%Zaa'A8 eZ_H}%&7 $`w#!u"@, 9GZފ1K9Jr-%<^tMc8BܻB>hRIM<=cD)bO3v+G;XU`] TH-j(gpoJ8Qp+)F1ɩ;pYv`+ȣ !k EjGd}kb=:* QW4f5D$:[iu,wU t%&Ůf|8WGjf>0# P*]l B8E 1o~TL) YFFRI&CQXAQfjUEPZ*F@/; ,;@0wYbFJ5 kN:eVE1]kՇ5+9cYpXv/sId3+C>pfU"1djM-夗'b25F7f&"du.YW# Pѭ ꟒؄Uuq!mq"^ɬ5( (; ҷ$ځ;Y8{]33dWW/>z1u|Xa221ꦜs\0IMpIlQ5?kfSNQ oSz%$ v [&e:. KJ{>?&7Z =Z˹wZU^0Ei|YDNs/Q.v;2_Ԍ cY5Rif.Ǒgٚf{a]g>^&RlЬz N,oE)!~XTL *ŒX\SK̶zX$pSR5^ JFnQ3/+숶װ/?!hj?JHm݉F >ԅQMn%+6g3"8sl,Z#F+Y4YM-忓IY{@*$C̖l{5P^NC/װ}rL;rުc(7>*C!Y4 MLRq.ZZJrqE U0*(M(тIfRrfH3<1FE2wtA||k|#ּ»G"{\]m1z ,<'Ĩ$* dogdKW3BJ[ P fKBaL0s d"nlZ}#dm[T冝V-(o\4Ld6:3g%JT$'!L/E?5]Lb_.]9Q:v6%cJ058ZxKZCTeb۶3㰘%I,dmĿ@I/F#k/riˤ H-RU֤ȑjz83sLoJX[< Wu=THQ3WhkQisd6L䀱l ɺ[0v[c7yByPpB~AuьF1Vh~0a<4hU)M}Nm杻z8K<(Z궑U_BAyZ#o Dޔl8=FG7~;;z(Lt_8oi[@bp+k䴇oͲ#ON}6DֵJ9l1L@cV<̷{p.3w&# 㞕3'|,gmMh_(|G<KdzB\åhS͆'FW72?R&DQ9öA7` ⠎7jzir]U9fX/&AG$Ta(#p4#)f۾a3kB^n[[:lsM0mn2 "Iq};eqpPAǤkb7hlP8!͔2 djfbg:1nZJDK{TZimdeuJ%u~;7< !wpTe{)C-Rڼq"-^uj-wbԼџW:M&ܟ? /S_U7Px0օ< ,$SΉy., i:gye>3v'W6xOozoSv?7ܐީ"+ޱ G 8cw<4 <cwณCo@oy2 &*Hs*q^u4wKh `P% })Dj*k'}tP*=a k'۴RLMUx [ Y)/VG&'imdMT^<:SG3Z ؠ\͖cZ0GWfu”Λ ܠIMjbKfB:Y8q&=tP.,,X -9 %[bpj$ɍ3Y[)xcGNd͍么? },<Eu}Zٖ_VW ʶy敬F-ϡo۶h BC,xi&Xf皊$.8FXMd+? Ə}^GV:FNQ,LkXvJIp@U6*\f1S wÁ-.+xrmK!ViHaFȒx%E Z 1;j˟?u;#;QZ"j&|>OA;IbH_^uvF#T1֖gLalSVw8c$,]n|NTmK7*Ҿ":@ $rf/~y#]AV[9<Rx+vW4UZw}dOH@(lk`c!e+ 4˹3B;v?/s] \T { kBgLdl @b$m5A#.7l ˜" Nu:77o׮յZ^Y- 6K2ټJWoG3w&u)c676)mOҗ2:8[(h#G23NzY,+9KEԈl`WCz;u77l/V/nw mQ̭wT2qծ ?*,PM9`E+bym98*quO^d$pܢ^Z!!*ȤFڊ] ImqPюՈ1"}Ez 5&DCW8rxW" i֑t"[2VXtb#@V5oiJ '8jogE-;*TʲdZ4zy3$+]mlBtJ(iSl XA{@B|TоȚc^$!{FkI"Fvmx <0uJ J!&ԵEQKV$a>bNAUc99ZOߪO˅x H[21f݅a/#3t'Z ieՔ܃(R<ӽx/!=-kh,fuFr !gݣd2imgBEt?@X5 W,.a%4W,Q wܚUM@t cQE<1 MJ M"E9Ǵܢ!ގx~%&|0碑|Q!!Q C ^{5TD&uGuvU;0 FRkUwGw@"͵vX&! 7<ʲ|gzpo2eZ$Q8}(")~S}~"Q8?!]uiqnmO6I`]fR }<񁤌B#s`ܞcd5UA (@WIw/0S3*ZGPݧ]-9#ܯ@/z=ZX"f|`c=~ghK1L gOCNͮ'AElZ9=7- 0q> 'P7$GC)A&=л91󃙕+ez  Rx֙"vn t!P_%QX0ڠ@fd#t ^zLyU^}?Bpw)ޙǢ"ɓ>p:ŢЫ̥YR~|GE,& m2 2)z'|_Ai-w[I$d'sC]m"!3 ~r:~-&WIjɷB V"7,"P1V,ik &b5NA;H:P<UGDtӢ@ҏ F9tQyly"<|$uPȩB¢qWIহH5!Nh=s كux;uuz3y]L@vJ~Ӻ^:AD: `gX_pIPLNϩ:+ {} LipU/ F[0UcOBaw|G3uT]N1k1##b1*y}nohU D,ߎ/k3|AW28Ul"+ZҤuR|ߓ$մ7 N-cQs+NcKM 5DO'i7چe{;nd UV*Ώ>O^*Հsqξs=V,U@Bo8˧q8UqI[MVXr{@XKE=zaZ6K!YhoB>neլ&/*nv"Jf+%׿q:%=ƃҏ?k]{}kK@6k$Vύnxb*3Dx&sA\$*1m1`h[s2D#*o* RcZ^ c$u*5q9x*p4޸~PWZYwZ_(J!ktYm۶m۶m۶mFe.Wab"31kek' c_q&=JpEgF }]tȉPԽ/asjy 6WaxA.OV`0\jgJնX7i+f`gǭػVvd/!es9E!s1]ai6}-bRy QAu^t\_[6In:B`rF#:#s-9nC3&l g99G3A<Щ x .ة\ΰ,-%6-XLe t!5Yg!%iTRƄ2ÏEӞwaeTMs0>x uv -Ӕ{0Et2`# 8Q.4x0yRE 9*qO+A Fc)d H&`  &^p_Bl~,DIpG}&XFez2F$ɟC}/wkr鏥EK)'N7Ld}eJca{JXz*YsKo7`4jWm' /t687Xz.'%UVYjPdw4TY{}I3ʲȞ.VT-Q* KKMԤ- -7_}a<>hpyIW:Io[J;d)³UZ 4+K< IRZG`u*+xmޘkG;-kFbyT3 ?יQ+r5vBh2D)iNz_=L0ڎV79%fݫP#NBY f(K15(,J a'!}A39˧\vi-̥Lԗ4v/?i ,W5D$'68,5* S7Yƨ \e!n_.ׄFCE8߹ ©]~>au%_3%elx͗W-ǩyI &}<ޙs]]:vơiZ-6_r^=>n ~ Xdse-l6sn9t <(< #^i+([=>˺{/xߨ3VUVQTnqJw7?\sgA/.abSihE}Lm 5}TohZS 2Qi(EE{*t??(ܟ.,""ljbioDi!йD% AJ-D0f)tNZnܱ3<} @3CS14 WxU# Ƈ%ڄٚ4AÎF>Zh|g<9j\Ipv&֌b7 3Ѿ<0є}aTXJB''*ܺCE"ifxID,y3ܒCN^ÛFf%~c E'.]B\~L'~CЋo)j?qGq)̅_{lGĀgB`o_ycd|7M6FW0 % nJlt@<((^?B 3|vz 'G@mJ"Sfero\ê#6tY&Ehi9tLeDYgLJ+U6>4F&~ôhKeO±Ӧ{}",8i1uLﶮ0y+` 8ymHm]ۈ*סve80VA-a8=y#|L{=ܪ(-=Cv{s *T~O}XѤ:lpjXr `ytﻆ'@O"g>jCoWȿJ"B,K)Փw5e/VszvDmɪfwHFElvoOX`.Y #ђEݢh*RIɤ W S!+ZO8p3KJUP=*Ӫfo Ue;OECRt![.']#$17a|T"Ifx/af;&nj[I[} UmS嗯$(:lhWJd騄2tm: Yc2ޛM[Yգ9|$k}OCiŮ?-_@%'4bQb5vj1i/y1kEO = ͩ!=Ӄ'A e& oIܔw()j:n5 })$gDOD~#&>rVvveɉϜ9"yy_JcOգ{ivk7j^>/|C4,\7=ӥz4kRӕZ ,#}zjcH\_\8뎺08h88RËHxW{iv0)!u%ͽF.fiFBwPC .{Y|H'[A;-Ue,.>pU 'EӚU,4͇YIڤnOW4g/H/Xg/iށʵFhcefSI4>{kE\/'@Hk,LIrP!Fw4[*LR4spUP7P+eSq_Weo)PmR'cr2X\DUmnaZtFjCg<^b^xFP,.um*VDiFtEYG0\>BMq=}J ]w,/l~\|ozuű&iՒ?.qv"!1iX?V̜X|,C1 = +*\:Y:VY _enD( j%k)c6JhHl$E٦QWmeb;|Y/V$Z9S939_KR) \1z[m[TdbC5!݆#[-CgC>m$ M^ts 4lRf]mkIڲ6Ka,pVOU}5j66]d(G](OfE6\ߜ㸝UvY\(x$$EiY-^IGaX& SL=1=L8lڴiKG7@l0# 7#(S`bbILc"ę2U,U Ds"4F11i0ƕBGZ4UZhx}/Z{nHflB0}1P6zNpmr8$OIBWpyfOShE s3b%lD=FR-t^%BC殶O@ӛARClV5t뢯o] lhc\P@Mn|<&V+^tC֍J*+-3g! qAqTT.s;u|}w3ӍDⰁ'EQVnq?|à2/,f@<*U* lȍD̃KnJ)Y ^V)xTKp?$CJډOL#P]P"GFEߍz/~$0cxϢJ g+'jG` Tkz?7e|Fv̎r o|2nR{Z@SuK a{;\&J *:o&D2S( zcpO^7ō*WՅ SqY. 53<4;<<tsXHdnjE^ * >k%2KxɈi.(Y}XMٰ2TJT0XQb$'趛OPE S :$<F4Q0pDxhBGl "wҁe|`n lAێKf mXxn }E3׈2fV&R/ب) w]33;#=_PJOUOPr_@)ghkj"dcol=I-nGޠUKWĺ$TXڳ2q,@?1܊7srg^{>010:Sڸi :RG Ъ&;r''n{3\v)Z[-5D4o' c^Sr$Ś/}˝\qPK܇7`8Uq  &w[*ܤւ G$ %ДhP%ERl2+3S} H#7st 0 yhY n|zPA_2*,ɭCKc'{g{3{w;S7S?Rbb1Z]kdA/6sEjV"zDW)ƒ vzSeQR9Pφfnnf}fOs3c]486Vzh[s уvmpݵ^P|jȅSE_DZ=? B[9Pxy+4+߅^c{p׋$)GK$JkyqyaM+`9L.{2NJGS T |+\8 V+1>'ɪ>qqFU({ؾZ^-[ch8zُ=X̧灎D*3S:WN ʂPG~S *]A6P@UU,¤`aNDJ>}YO̫kaq:~~|:Z(Q\{Y~uuv/ilAKv :jCd'&3u)dyeg˿C/vwaF|<ۼ47@B$Ѩ"(M("D>CHڠ| 8$T;ňuTFi׋Mx͎bc@T{(tX­5ƺyXu ia0]޹a^CQxߴSk#JS9酪Ph'6m29Q1M[RO(d]ɵ]2شI2:Cx*p>*ca==ݸN<VYuU08U:a.9 #TBi)E%g(Zq~anbcEa$=,¢34|ﷻn=Qw=QOh5sDg< w@ny˷Wr S l SڞdXC:sC=BŢ]ڡHEؠ[DwdsW3*paJ10Cw)EڹߊÞ_я0|!japt"|P;1 K%$Ƞ>WX#T |@O|̤HfM,E7J½z`kqe wUE!/t _^[!7G?6GN4 {o9[kWL(D(aaiGwEz~. /$O* t xUÍIց'bA"l;f=X.} HwrYw%gؽDjɃgPF_Ў"2x!< m"xSM6:𴽲Ea$ i̪]#%*Z q/¼lHT~X=uo70NB6Yr%p s'G9<+ z RE4T7[>!wqσbhgbd++zࡽn-hR(EBMu{.HE Qgtk^_ʁ>ѽ@a &o\!`œfYµ1 FF}̩;J݌D&}O uf Ns6sz~1nZ"!*BCǿ|U+CxB.pKڥbPlKԳn5鎓]*yO 5ZYmxY,ѡy4SQwr'@Smݲj`kI("M ]t8F;G*::%sUQzwþ8j=:+j"J!EG, KhJtE|kn+2u/% s lw=>CS򷽆hzh):3e슋օJԕy" z?j#0B$n^Oc:5%쉃T譲Lhh C.co1M Am<+q3}@&گՌu *:P,Nrdw;9[Dv~rŔI]PW13J!o\! : 9jD?bz/V@(w2?fc cSDP KuCc3 Un*\v:8s%~~Aa!-qFMlp܌}_IH`՗+_5Y0kGjdjnbpX+/ JCB xJӢRGU+TQ|4J7pMݖ+e5;PdJ9?_~}0YrT ;4B# @ǚD3uewTqD,9lcufck=Eq\{l/&7MDrwNM\u]հfBǓ5L3h%A p0\s$6R:na"ﺭOypcOZ'>S240z!sy%(8ʏTg'IƉ{BUMyI*gg4QBGStr::icnֶ]"e,3kj:~'<c$L9#`j7݊~DJcHwQ] <6u˕CXGOG%us#-'ĉ:@ebj@xj|cx҅dfz  r?嫍ֈeY[aNCtSszENYyDZWɞ _P qEH7%Qil\᯸-2TLR$yK\O8FBcSP1SMFãɁuXp5fH!LN#ׄch#z.ړO0M| *\)X Éfa0Kj9hlK' Sa=Yp/~8-$F|<v`Voq o@(K5%rJd6a<1S--9a]s}m,]e%叧ԋ l 4W雹' }5N,3'/]=^J[-1Zզ۝:>CziAQp̓涨+3䓖7HEX?IK-lHXt'D)~{өSHGHq,.,Ц[VITXCm7XVjpcDWSS#TMJ<,UKc!â+D?Eni.֨@+^ `+[ԾcR?3OW;ʌ(n[+;#O14jHrt<ŧ٧'Զ#1HY0[sTFLkxL/O jjY KOMLf+܎¥_mJ@?|k5m%_,z{@l(pZT6\sjvP3EnvX+`m̟%eø\. h64TB޾^~ ij,Ă <]ETTh3ֹ?=Ttg#c]6i/<5 xZꜫɩ\ep],Fdtju:m-?RNf|=(oRTP[}M xAǍė&z֋#Qq허F]y}|n&08SuNUW60QU e$*0.zWM5T?Mll8cDƑ& wU1>+ O&~. Q,,}οnrYٸ Ӆc}:_, )OT&#x %r'FoFۃ `Kmw9 $cy!wkC*h剷58Aa3B{%S!%H2#p@k^=&gO> b^Y7uB NT[xh#Y-~cXazePН-9^Y~FfW)wzDjACy1su83(B>#q@Elk+[l̩vZA[XG` mkC=`= ώ~a8FH?~9HwE#8<"r3q Q_W \=IN/F~>=d?$p-s|=hA;Q7%}7$y TA}{d/fF1/p></|_//~A+K2~ %#/w~rzC,*2dLŠeVfc ]uX-93UYf= 534tؚ϶eR \!YM61gbeĵGH􀢫4!BK5U䐟.zmc:CHN3lZ@5EY0Sh&(8^dUNZVELbUeC TTɱq s'6qz/\=Vk&Aǚ>P^q* DGr=xnl 蝯H4D]+XC[j7 OViqAepu+>"h ֙י$lgU Q.ST?y̶ڲrkl ANsEҭ.~NWvS;Wur5!ʼn{*n;aIMCNdHyMsFDXU4{ NA-%cFDل$MUX.\GƓ5޲'sNe)6)aa4f[ج"= O4D5BZ;M:õtR w$-ei*s["Wfcpq K+xciIم'l) 3VRKPUgs E5_Qי|:їꌦ\݇ʄY9Cjc>S_`+ϹsOXj{S qW1Dn@{uҔ^Ek~]3X3,R0J<;X;G cp2->2. xW/O}yX>P[l(قV!ҁVj*LH- κNuKjOXӬy̒ϗ|LysyF&7QdJ&% XRHPȶT|XLƄGmFjI-YIKlߍ  5:&eR"'F,5p'1&h}BJLsyվ`> lMeOn0>oc=-!@0sk/Zկ[5߻I>ۥ_Rod$2̒t NQ#T^9*BJҥ Zj/}0a SAQʓn#3!!$@*7:,%Yȡ^?.7}6V\gSwD[' *Wp?䢭 ]$YG)V02:zk7jF;V`a{kƌb ghōgw ~!ۋpYFܻ?)órt]7zf tcOpo4 hT,+j/=S#cF$oUtMmmǃTG{ lF\\b`aGlpJVehk*l[+jCnaUFtE6ܒ16v<'v:.|W\7ފؼ6K㰏=9,?Q wG?z*H/J !WBnzJ>cgdmT :7J%VE=QC>fx@mb)N4>yP< s]ly QYW᪼?8fnA#bx 66$ِpx2HBhi#ld^R4 Ǚ `1iu@Qm?X؉3"?x'^Jf1>1MtTPq|hC/\Ʀ`&:(h!Y,Kd(s\7#0͵3801J~9"(wgϳ$ؖyUa&gcȹ<_y?n~{\cpp,k[=~^_q.gf"a%fj,uf;t5hr[I٫$0/M.Ieѵs-2IS%3_X_81m)<*YQ&+]tfhQ=vd\JwenYupCSXe6>9]8IAMBJv@/m07? $8> geq i4.L*O n,Ki٩Q "H LAVo&h*˴"VnpwD+&dY;g9|kg^_ s>#ͩ%Pzq#Y 9,{Իc`2C&>=7ҵGNgfݚӅ|J}vr_V~*OLhl|>X!ALR(?j#*>ɯM#/zKbT/f/"'NBCݑ(q2Yl3B&q Mp)0JZ sȔ=X'Xi櫳@bto43:/)Γm[.۶m۶m.muٶm[{}݇o}KC32ǘ-E i*Eo qmYJ9-x^ o)m6()'OFӹǚn$ɂ w[ G Wn0 wn`~|yV(~bJ3Q= |e))V.+&~U_:#x_+@:+R޷8 pAp"~86쿽-51dʟ@8DS;/ҴFF7ζȤG޽^,ŒZaM=OXњ|ANk b z_BC08 >yϢvUyd *n㯥B} aďY,Gv貎벮Szeߐ]2ȟ -3ܑL!UwtI V`oB~ -'gBLJv!Kxq19ϷPU Kє:5SV|KUXcv+PF65$m P|ZiFci:}5ʖTCwH_L캔rI8R8|&7adDͻ5/KڔXV2@2g]l驋HʁԜd-vcBad}ŴNtb_o\cף3.D$[\K\+4Où͓THOGM; 6U\Iow:E :Gr+*) aiSٹO|nwkbm vyJN89aά-{Qx\<ŘqƢF˳ fm5lRV^ׇc Yn5{LKȒW68'8o_sCu_8n5\׌%XTmH9$oܩPTtv> KbJ rTN 2æ[W:=2 c22ĕ͢> ?; d.%K%=#홤zUދ~Ij U4#W֎JzNK=G_yO{ 9`V#_)RFRc'G;;cn,z]瓪~^'o u/TNtjuNNlK88JҖ8>ZVb|\}x={!8K?OԔ2ȴ;8)IKƋvlաd fZJܿ/p!ʽ)n+Zv~e`s ḏHՅؔx&ѯ(->ξYsq>F툧.lU)\/mt!>&?6d_cKa6׶No'Mϐ]zQ[/>^!4L x?eQ7)qX'wvޗc.xXx.,˅ XT{7&E&B'B sS-Pǣf^b'x~b,n$l#EN;y7ê) ^Xd:uX8*a̯\V-$2uL٧\]M. 9PB[3w^wCg~7lj |n`阀q?hzo=baBC\\U-@?5{Ue4Nf:Tr7 e,ԅQp:7wzLlکǢisLVplii?(pw=1"8@uF4D @9s k"!JO:U'^6XE'9'Kitk̈́U9Io˯8kםiM`yK[j\S)ƫ\M\C!0 Q5WхqNK5:g_~#FrlAy1\EzTQ`،\},IWdw-)"]"Jko c RBCfJfJꈎS+,KiՎ]>朖S Mn"8 k -XOD%[f)d[%3׬ͥki80H6F6Il ɤhc.5Ewe@#y4)NsN/g%+ _nHsgj;n6 1E6 ww<ƩSHV{J1~HA&"8!MyR s p|ţMLP PﰀS.Cb9y~C$ rml96N~>wx> qwPzɴbtL$6LHzή ?2-RxD&ː!`x1a\}KC"Jς8% a+ y:P7=;枳ݵɻ=: `U2&c1.͙ Gx.VLq-N4b@aesA\PDC \ԡfF}j57yԬ`4d##Gg U#،T,; d$'X~*uR1Mq߆RwŬ!+ܓh5"t "gͩ::Jyb?ŴXʮVZ',)E BSHYnaԝ|R7kM u Vh}I#nwTx OⅼG]vŕʶ2D@& }̂[]:4Nr42F wf]S渼\{X|~h6E4b`j5MA"`gB\XDŽ@V)Bt} Wsۄ.AwBx /J=0ľ7OmV,*_>U=$S"A38E&^Vs>r)4Eywi2X 7 wS_RB)aCi#9l{3G!Hz6DV+x `ZzgOo-01i@ٙR.6Gƙmsj9-4wkX/ {R '~m$dfkJi܀{Mj9_H iR'HHpts0OҬ«8ȄGo4vJVmHoFd9giM|Ddq7I2 >)-X!blP\>~Yy,kqvS+ԱyN 槤]oO3|QJʠ='_Rp2lxn[X^`wl؅*R_jsyZsB'W/ȧy\G|m4 OА(UkkV.F>-/{ko,݈I`coQעSn?_#[Y:abJChSوŋ 6֢h`~ċd06_t X$w}ݥ7EѡNQr;f"I7O{Qe]VOc$ƒaT;&DYuR%fָS鞪OXłJJlM6c6x|+([i5ﹿox@cCj73{fУ1wۅ2}4C2cA%qӵsI% ba>dQ*=b7Z׺s:y.ڄmSc<k2""B ?h@(Tb6v`].t`h]wͣ˱Zܜsw6BG̢ťUrix" UDļOt9Y 08ΨUjge:XcR}ա؎35*0&W(\J$_2V ITHwS& H涍6gA~}r:A1;sC/_g ?I\K8T"`hv[W0[?On7C=Mocx`$y )+I&rݤ04iV@rz8VD[KhY] j]u61yY;_>8i}B|'=HoT햂'.ǃ{?^%)B(=E'&QiG o<} h!2 K*S3_tHczZ N52yqNx1:rS.Vd'b8%0*а}+2J %p{||3S!*Ñہ֤,=r?$Z(=yoma±6vJnl#H>4PozuT)dgDpV] {L)V=$AAU^:甅= Y5q[A wNMbžt^13\'Z/.Ŗ"OB>K+ϊh17+UΜ5CϕQ,AcC1ockHdĨ==|RM?Vqx96A dGOBJ43+0Qc>\oZ;*p 1Ag]#> o bFz=q r&rlukUlI~T":ȑejmcOsq\UIbD xz`S4a@U`%}&/E򃠫M6ѐm{-6쾘8}?UOo6?o@oQ?D ~JȈ - >>8C媛h-Ѷ.# +s 67/r,ټMT"| ߰}rBhOXL}|6˵ZɯFā[v£d><2+9  Oyqi˨CQcm~^(^t厣 "TNpZ7TjH&ޮvힳL\`ָP85*B;\OVr0plڝ՟̝{Wχ(M@l|$|CM^6pIפuѢe")F9}"5! UJӲVyT(kJ2#'JU.:kP ْS'< }Eoo?{Ƃw@=av;*~y (ԅ(Y*_1djG/H 'pg13Ǎ[V ɰEVP%0D%H;rf3Dm7Z J5 bJYPWl1Ae6gK;oQ.ȗY!ugJ>Mb,gd;h*|"8-ͥxdZ,NIKsn-Y~a ߱@xgJNRJchgk߼x\g(m%DUm+$w Fj}`RJst3x⎮b QԛЂWsE7DmDY,ܮBTog^稵iwS05EϦD7uDL&WDU:wkwv5nUIt3͜|^8nPѹ^d <԰rthEr˖0I'ncM*B=6IǾld+ Q(-s~Z>q쮸W#]5$t.K҂Lby`&Ș;4rr'#%7^6Aq.& VfvmN[4=}MЙE8 x"*/gLZ-S]:e`UUXCRF_ )jB+9h_q(t1 ̉d㜮($xsp po3&f^ ҩ q+<( 5t>1-/D)`o/A@gwS?,/ugs}䃆дoWuUOk}' {Hp!:IxhƞSA^}w4DElsb-f$O3^S2ysNgs\9} MdY9$K1SgZ#6!TU>,BY?pHIH gizTAFx}l[kg5f5=XOkE^"eX60B{SsuI@ \r s|x$0m*-P `f@bfbyHۖJ|xJx /w5eLu s^$ \~RZL"UG&U:C5OȘMu)ñ}BJGN)wrn0Rhnn]}ٍة1Rl|&={7^Q4IV旣C1bugF^ۛkWٚAV -L֓b{vzyݫN 'HkB@pCowsMX4 bCC_ii׮NWAm>hA>LY:LmkIpҝ-xoc-A? ˟WV ;Lp.COxjp;>Atx~gW΀4D._;CAYc(kGc(.`;$hYPъ ҫ $مJPۿvMXuP?JVJ7M/Ulc{j4p:* BpjT/ICEv /<*Id%1z280AZS͊F\+e*=Pv^0ہ4"`4.'q$QZT] j!&̌ Ȳw(踅im|Kk W׍>nr,E%"6r EU:^ǡk8]~ň3pW b}o:ȃU؃=Upx8)?gM7.H\S*&>rl1|1^JN̗Rg<:aɳd;tc"{Rt/jJ, KGk3=WdTiE(U6Ȯf= >^[¿Ƚ ɛ٩!3Zzq֚!meOiyǥP>YHU޳eg9YN&`&C'͜PQT|0g\MqN( @#R@tB Uq qGaUlbw~/ΈDqpd՝t{-i]C I,kcln,b @RhXIUpH)9ЇYlYʸ,&}_G.O&M|}*Ҧ0NyLi˓ۜTNQ:TnUL(7ܑ}k$l.I&m}EfRUnjӳ{cncF.oXrl$g);_"d҇^)(uRV *'R)=)co$TRk -sy+\]w[OX2 n^_zXSZ4zU7Z7tUmk|o ޙ)xnci{; ӉzEhWK†ǤLW;YCK5Qhɫ2K]qrxc[fkyxE8&Hj*,Gw±D.t`lDu7l\5qf4q=m%|4rTssߧg%UsЏ'*^@lB ڙ0 "D)Xp{pW9_U7܁fLnZ p8Y(Sl-䵦 iô {nN%ACE"Q]qPüYkSFS\#WkFnyMl [M1!q,:oMW2sk˻\gPC3۱& m6*"'|3Vx'mO"2G\o=@塔~ sɍ|m 3C?j9O"m!$qS;GshwOX_AthٖI-:" #sE"| c 'P9F{Zorv6L]O|z9EUYi-*Q4h-+-I(e Ivx<'M x$1`#b> !@Gqk3;{ <_@p(1e]&~A3Ƅz?{dm "|c2h=+]6RZ@=N>uB;^D(V7j;r[ETlH]ju"^13~wkLR}A.P}a?LBaw2*4@W[o?8# 0?z aHDKfVjdL#ETe1b'YQ1hI8jyTI0E.Qӹ3x;I6t-1o./?Ǝ]-bL?ױ$'%'WȠ'CD/0̾7ugF&ڧ^?#&_ e ;.gz^⥤ +b5L,$ ˀ{Lnim3f㖀B|UB*|+ͥq|[ hd]sB6| zc^zfY χa/dd\j\HH}mL"Nj3YW9XLG WPuOKʋKSl=Sfɏ :sz7T$޸ƢYm9 V^9MlK2yw-ͅ wl+1Q%i.<%y71fHxk|ι }Z6}MO;\t}>?QJ>Gϸ᛹JB[H=7prO2JEW4 zr&34Y'wxEC@GG-&FW>(wϳwC'Ө^7pb yC$פ {xpqO4y#ȈG1BhO׫rxE**ZdW{EX_ܿhRv8G $qT0_!~ww@^ xHF$?(vame/9:un SQ}vv;#EDf3'xw<'|< SSY,GqהAs@pPGg?UFUGRe*%işIߌito?qnjܿ;ɨ:d J8֑əMUͺ0.a{MWOWaOcfR!1zԏ]'0Nϰ:?Q eBnpjCIgs-*7KE]C6[9cm%""&-7]HZ[Z~η, &'Ѕ{͆r:C0L钬)03&ʙ4|%F,WMF v& 㾆a&딌KdB4|Q W2|83|Z3|<2|^2>Q?a_!GͻH8~c5Q(˒Q6IIӒɾ@;jʢh.G= 6d+KYl( my!Nʜl@_+RFF)hǴhJ,(|G>:`x`a"Dոd,W5#_##HF*iyS7;d)}}kag`(gHmXzRs|v8lJdoYdAw 4}WC+̜0'pЃ?,CmNW' kf,~#}5ɩeۻCl<@ E)iTT%w(P"S崶qn&19~~ңA]Tb!&n5-wmąV@G0}h+;1G7ec[07zv_D\a3"+>ͮH?<'NOV9d ̘i*j簵Ͼ:t!`$0#UiJIk@D_cs+17?Ln#2F@ [;p9$Ŭd">_ogG#L{~E-M1"*  {yr!sCQE3vqdtpC{*Xk&,E\|Vд`XK480CLJp |&RBG]~ϧ*"#Ht8xI[69{k3:mb]0c!oT^mx{raMPt}N\Jo;bna__D Iz}me#y҈Zi#M{6mɛkL6~0Dqi`>|`Q{,ɽ%DAWxB3JސvbcAX1^ug9. NU鄵[JgZTSW?0ys7'C5 fUvJ682 7gi詃1"Hy|zen޴P^7Ɇ*"`E1󏐕?,nqo(|Ӡb1Qet:o#%qF\^q. je .2 r a,R <-9_# o𲧱sf 3h2)_Jhxv;2gk9Tfq]J~0GjVav_j^3SYIKqAry^m5^+k?%jFo:.b{U=((_)4=LN73;*Xq;̾3) LuՖЧ%5N[`/۫9x#hv\sҗߗI-iv=({-,5J.:|5li){w4\747u5Y!d&e2f'^.]}5HR<`гO<WěK6]0Ʉs`Wbq;>|nV#87kt0CN1*8<ǬŘ,8x\x/Kj\c〉}BڠzМLڱD 6$,WWC=O .[~qERЄTpǩ(u{xz@'6M}TDQ+74\ f W._?UZG"V=WsX&cgʹ[bW}#CRMw 6=Y~,[ZO*4'4g53C3#7yl3e8 qװs/}gmnWVԯ<{edԔєi]<>^0 "'ngER\c u/X.p3:w2Vr~~^~Tn9.KbsO޹< %vװy˹XrgԋUAG\!N$Yz^䃗B=Օ =0Lw-s ~k4AOU ɡjoCvT> F8Fθ`Q0Uo,;E$];\BuڙtH_gaHHmbdyP\QIע6anb0&:GjWʃlF?4 ~Ƨ.MAQx4Y7C)RǺ{ee.sL~U H|w1Պ&\yrpHy=ɌxݽO`dO-,3-#K \z/}ڔVtjOLʱ$|")H]4~qa#@@zԱ^#أ8䡒&]x1Xo~eI}0C[Oܗ9{?@QpOg/jEyI7δ<"rm_N 𸙑 \9)n*bҜ/S̚ƺ~/vgPn+$*-xCS?yEèrtF^2Iz6C>:h N~5畐kߦ,}9_MlC 1%Ybh Շ{]YM2ĭ7NCdEsW  h7Io G۽IvQP?V4'c[U4ګ\]hJ):uMϧ1?Rhr0Q)uJ{K@?4۴mSJgNJd2^q!O]NWk3GBIበVԳSE( u9ɋ:|vz+6W$]g˽d%~"DP|Pp!XBi3]\9vYaK- N5^ɘB:8>~hCtEw%?* !.Neh{ﮣN{IC?v嗏օ}(_:cc{ֻ3Cy[\ɸ^|n;?S̔$4vXO_ICt03K X 0w5KgDkB6)P݋+Ou'%v"or}@Wqu*{׊O KtlGe)lsNv_T=8FhP}Tca~>U).e]Pǒ}XIy]b#8D,vI*}3o2[p !(2g@dZL<4Wzk'}PaTYrB#MWޗ4j7|E'=J0GuIYdh+Z̏ugYHL2D oZj1 (ds(yU}ɝ IMcN!8~oE;9e9>V䜘WM'(eKzBDatC--7i(:1{NE7RM^G5}JTFXj P` >u':g8^jtk=1dr=4a1`cIuoˡ:!{OF(a3#ʇn9kmhKu87uȯ** ӄR=DSr/-r-B${LZ[i{br JWKԷ;BQegZyH8s(En9[, =`^uv2箃?.qdI'|6_%9hݩ3vgi232x^4" s=FhF3gG$0J@73zgtjM}16Sm^!8 ~Խ9?HwQmW.cD[1e08;OS-_vLWbMɹvS-JRU.^7sFFdُa{YEKv~'xrО1ޔߕ==?}+C;} iO}>] -{ۓ;A5QYfwVgRHtb߱vOlƴKW:M|lho-%'~2:aZ#8w.7.:c2t_\ѓ҃{;Ve+~ﻗmݻQ$ZX;nlfn eyLE-EPoEnw3r4UՓp F>^Fq󵩳] /1!19g)7OEŵHh\|nFswIkns+uG.~o!8hrg4>=@!q|nku/JFa.9ɰ3{2]S,[$HtP[be_^|g^mQ!"_k\[ҪW8_R)5$kʣ"b8"1e?eс#Ysz_u؛t4B%F쀜"H~&\;FU򼜑ӻˊ +2rlkoܔl>kɇ3U6bEnCҢ‚lZn4.棌U4S.)\6;q?뻄֙8v[:ưnZ3R[i("-#Wd~ gL2io0iEFMx@'e_'8ΰbRpÂw Eh^{CPDUe\7m}odɞֲxĐEAs9lrJFghyADQEڋA++zP$ שJF¨llBO$ûֺשUà ss{N:V 6 +d;c-JPp:wx#yE,Z!6Ւ@?8PDßnvؐ&Є?ݢ&b(&xN%+LJjH#ߡ0D5Sh,lynP ;4T"37 .,p>+oȴ|?\{w7T;pH>l7~5w:j!֌|{`J/8$`i<-80Eͷ6a hX1aLŸn jyӭ% &FMߏ oK:p-2?? l5pH'-2FX!cAZSŇ+Vi$~?;Qc*9"?i!hpvk(f|\ր6Jƫ 8U}Q,;vs 7">KOwyhQE<Ng;~3l݌=0bi8Y('Ni8:ohFLS9\1x up["vqΆDg2)/.0%$D3!؄c̹[ M>Wu~h6vB: ڤ<2bbd;=O (uWxVL,1,\Mp Z̷P 0`y,xh8BmnVoMB A.~dN,#M0ہLLJ&>^4zHWf6t.x 'm W"*ċj@<`Q.`juXtmxo'1r2셷M`@K}k}}TRy S yEv˥x`B)E >ّɂ5`-S.?Ӂ*|F=2B"d ӊꯐوTP/0Ȣ4YJ mɹ5[q(7~`Y|4z\ݶjh ~ǝ1];9n1cF-ȱCx`o=jqqX`LU7!0rRZi^Gpmz3D:'`W 5q,.'{\@K qm:N̳0j|G*AC"Ǜ`k=é<çqBoNЃ$ (^ pf@GA 0z?.!78KXi | 4`9pPHw5.M}4Yڍ'1<1I u`lץ]>6ՄO/x9_ _pemv"LY(V,E`o>2 Ϩ\9bpQΎ'6&ϯIg +@DL~?ô5i X9.< 9~~NEӵ?;(Ҏbt60+ (T` V>4[+޵PKoPK%M: META-INF/PK%M: 6OGG=META-INF/MANIFEST.MFPK I:css/PKr5_Xøcss/jericho.cssPK I:images/PK}[4$ʛimages/warning.gifPKfK:)e+ index.htmlPK I:samples/PKS: 6samples/FormatSource.jspPK %M: WEB-INF/PK %M: $ WEB-INF/lib/PK!M:˲ n N WEB-INF/lib/jericho-html-3.1.jarPKbv16oF|WEB-INF/web.xmlPK H}jericho-html-3.1/samples/console/0000755000175000017500000000000011214132424017000 5ustar twernertwernerjericho-html-3.1/samples/console/SplitLongLines.bat0000644000175000017500000000004510021752254022401 0ustar twernertwerner@call bat_lib\run SplitLongLines %* jericho-html-3.1/samples/console/ConvertStyleSheets.bat0000644000175000017500000000005110021751736023311 0ustar twernertwerner@call bat_lib\run ConvertStyleSheets %* jericho-html-3.1/samples/console/StreamedSourceCopy.bat0000644000175000017500000000005111213757072023257 0ustar twernertwerner@call bat_lib\run StreamedSourceCopy %* jericho-html-3.1/samples/console/data/0000755000175000017500000000000011167436712017727 5ustar twernertwernerjericho-html-3.1/samples/console/data/form.html0000644000175000017500000000544011171401016021543 0ustar twernertwerner Personal Details

    Personal Details

    Name:
    Title:
    Email Address:
    Password:
    Member:
    Address:
    Mailing List Subscriptions: Announcements
    General
    Cheap Viagra Offers
    Anatomical Enlargements
    Favourite Fare: Spam Rhubarb Honey Rum
    Favourite Sports:
    (hold down CTRL key to select multiple items)

    jericho-html-3.1/samples/console/data/main.css0000644000175000017500000000102610514126556021361 0ustar twernertwernerbody,table,textarea,input {font-family: arial, sans-serif; font-size: 10pt} h1 {font-family: arial, sans-serif; font-size: 14pt} h2 {font-family: arial, sans-serif; font-size: 12pt; margin-bottom: 0.8em} h3 {font-size: 10pt; margin-bottom: 0.5em} td,th {vertical-align: top; padding-right: 1.5em} th {padding-top: 0.5em; padding-bottom: 0.5em} th {text-align: left} table {margin-top: 1em; margin-bottom: 1em} .control {width: 100%} div.control {border-style: ridge; border-width: 2px; overflow: auto} jericho-html-3.1/samples/console/data/test.html0000644000175000017500000002240711164140156021570 0ustar twernertwerner <%@ page language="java" %><%@ taglib uri="/WEB-INF/struts-i18n.tld" prefix="i18n" %> "> ]> Jericho HTML Parser Test Document

    Test HTML Document

    This document contains many elements with optional end tags, server-side tags and some common illegal HTML constructs to demonstrate how they are interpreted by the parser.

    Table Example
    First ColumnSecond Column

    Cell 1

    This is a table within the table
    Second row of inner table
    Third row of inner table

    Note that the parser does not consider this text to be a part of the paragraph started before the table because according to the HTML specification a TABLE, being a block-level element, must terminate the P element. See the documentation in HTMLElementName.P for more information, including instructions on how to make this parser compatible with the default behaviour of all major browsers in HTML transitional mode.


    The following text demonstrates the use of a CDATA section which has limited browser compatability

    example of markup that is not to write with < and such. ]]>
    This is preformatted text
    whose formatting should not be altered.

    This paragraph contains a comment. This text is a continuation of the paragraph before the one that is commented out.

    • item 1
    • item 2
      • subitem 1
      • subitem 2
    • item 3
    • item 4
      1. Ordered list within an unordered list
      2. Second item of ordered list

      paragraph within a list item

    This text contains incorrectly nested formatting tags which is

    quite commonly generated by HTML editors.

    This section demonstrates the consequences of illegally nesting block-level elements inside inline-level elements, which is a very common situation caused by the misuse of FONT elements by HTML editors.

    This paragraph starts inside the Arial FONT element. This text occurs after the Arial FONT end tag, but is still considered to be part of the same paragraph.

    • This entire list is surrounded
    • by an Arial font element.

    Limitations when dealing with tags located inside other tags

    This section demonstrates the limitation of the library in distinguishing whether a tag is located inside another tag without a full sequential parse. When a full sequential parse hasn't been performed, the H2 element in the following button's onclick attribute erroneously terminates the current paragraph, and is also returned by tag search methods. See parsing rule 2(i) in the documentation of the Tag class for an explanation.

    This anchor element demonstrates that a tag ending in /> is not considered an empty element tag if it has a name that requires an end tag. In this case the final '/' is included in the href attribute value instead of being interpreted as the end of the tag.

    The same goes for tags that have an optional end tag like this paragraph, which has a grey background despite the fact that the p element is syntactically an empty element tag.

    Microsoft Conditional Comments

    This paragraph is inside a non-validating downlevel-revealed conditional comment which only appears in browsers other than IE because they ignore the invalid tags surrounding it.

    This is an example of a validating downlevel-revealed conditional comment, which hides the invalid conditional tags inside HTML comments. This form must be used if the condition can be true in some IE browsers.

    This is an example of a slightly simplified validating downlevel-revealed conditional comment that can be used only for the condition !IE (to display in any browser except IE).

    This demonstrates the use of nested downlevel-revealed conditional comments.

    Microsoft pseudo-HTML generated by Word

    This section was generated by MS-Word and contains messy and invalid HTML.

     

    Server Tag Examples:

    This paragraph is ignored during a full sequential parse

    '; ?> <%= $variable %> <%=var%> <%abc=def%> <% for (int i=0; i<10; i++) { document.write("This is indented server code"); } %> <%@ include file="relativeFragment.jsp" %>

    This paragraph has a dynamic id attribute

    These checkboxes have dynamic code determining whether they are checked: checked="checked"<% } %>/> />

    The following is Mason server code sampled from the Mason book, chapter 2, section 3.4.9:

    <& menu &> <&| /i18n/itext, lang => $lang &> %# The bits in here will be available from $m->content in the /i18/text Hello, <% $name %>. These words are in English. Bonjour, <% $name %>, ces mots sont franE<#xC3>E<#xA7>ais. Ellohay <% substr($name,2) . substr($name,0,1) . 'ay' %>, esethay ordsway areyay inyay Igpay Atinlay. <%def .make_a_link> <% $text %> <%args> $path %query => ( ) $text <%init> my $url = ... ... <*abc def="ghi"> This is an example of an element from a hypothetical server language whose tag formats have not been registered with the TagTypeRegister class jericho-html-3.1/samples/console/RenderToText.bat0000644000175000017500000000004310570335642022067 0ustar twernertwerner@call bat_lib\run RenderToText %* jericho-html-3.1/samples/console/FormatSource.bat0000644000175000017500000000004310573115072022105 0ustar twernertwerner@call bat_lib\run FormatSource %* jericho-html-3.1/samples/console/FindSpecificTags.bat0000644000175000017500000000004710212355424022642 0ustar twernertwerner@call bat_lib\run FindSpecificTags %* jericho-html-3.1/samples/console/HTMLSanitiser.bat0000644000175000017500000000004411166751032022124 0ustar twernertwerner@call bat_lib\run HTMLSanitiser %* jericho-html-3.1/samples/console/ExtractText.bat0000644000175000017500000000004210351115336021747 0ustar twernertwerner@call bat_lib\run ExtractText %* jericho-html-3.1/samples/console/FormControlDisplayCharacteristics.bat0000644000175000017500000000016710212576626026337 0ustar twernertwerner@call bat_lib\run FormControlDisplayCharacteristics %* @echo Opening new form in web browser... @start NewForm.html jericho-html-3.1/samples/console/Encoding.bat0000644000175000017500000000003710633602104021217 0ustar twernertwerner@call bat_lib\run Encoding %* jericho-html-3.1/samples/console/DisplayAllElements.bat0000644000175000017500000000005110021752222023216 0ustar twernertwerner@call bat_lib\run DisplayAllElements %* jericho-html-3.1/samples/console/FormFieldCSVOutput.bat0000644000175000017500000000013510160652176023145 0ustar twernertwerner@call bat_lib\run FormFieldCSVOutput %* @echo Opening FormData.csv... @start FormData.csv jericho-html-3.1/samples/console/FormFieldSetValues.bat0000644000175000017500000000015010160425340023170 0ustar twernertwerner@call bat_lib\run FormFieldSetValues %* @echo Opening new form in web browser... @start NewForm.html jericho-html-3.1/samples/console/CompactSource.bat0000644000175000017500000000004410751663714022255 0ustar twernertwerner@call bat_lib\run CompactSource %* jericho-html-3.1/samples/console/FormFieldList.bat0000644000175000017500000000004410212600766022177 0ustar twernertwerner@call bat_lib\run FormFieldList %* jericho-html-3.1/samples/console/src/0000755000175000017500000000000011213753550017577 5ustar twernertwernerjericho-html-3.1/samples/console/src/FindSpecificTags.java0000644000175000017500000001006311032257400023577 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class FindSpecificTags { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; MicrosoftTagTypes.register(); MasonTagTypes.register(); Source source=new Source(new URL(sourceUrlString)); System.out.println("\n*******************************************************************************\n"); System.out.println("XML Declarations:"); displaySegments(source.getAllTags(StartTagType.XML_DECLARATION)); System.out.println("XML Processing instructions:"); displaySegments(source.getAllTags(StartTagType.XML_PROCESSING_INSTRUCTION)); PHPTagTypes.register(); // register PHPTagTypes after searching for XML processing instructions, otherwise PHP short tags override them. StartTagType.XML_DECLARATION.deregister(); // deregister XML declarations so they are recognised as PHP short tags, consistent with the real PHP parser. source=new Source(source); // have to create a new Source object after changing tag type registrations otherwise cache might contain tags found with previous configuration. System.out.println("##################### PHP tag types now added to register #####################\n"); System.out.println("H2 Elements:"); displaySegments(source.getAllElements(HTMLElementName.H2)); System.out.println("Document Type Declarations:"); displaySegments(source.getAllTags(StartTagType.DOCTYPE_DECLARATION)); System.out.println("CDATA sections:"); displaySegments(source.getAllTags(StartTagType.CDATA_SECTION)); System.out.println("Common server tags: (eg ASP, JSP, PSP, ASP-style PHP or Mason substitution tag)"); displaySegments(source.getAllTags(StartTagType.SERVER_COMMON)); System.out.println("Tags starting with <%="); displaySegments(source.getAllStartTags("%=")); System.out.println("Tags starting with <%=var"); displaySegments(source.getAllStartTags("%=var")); System.out.println("HTML Comments:"); displaySegments(source.getAllTags(StartTagType.COMMENT)); System.out.println("Elements in namespace \"o\" (generated by MS-Word):"); displaySegments(source.getAllElements("o:")); System.out.println("Tags starting with segments) { for (Segment segment : segments) { System.out.println("-------------------------------------------------------------------------------"); System.out.println(segment.getDebugInfo()); System.out.println(segment); } System.out.println("\n*******************************************************************************\n"); } } jericho-html-3.1/samples/console/src/RenderToText.java0000644000175000017500000000124111166472664023042 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class RenderToText { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; Source source=new Source(new URL(sourceUrlString)); String renderedText=source.getRenderer().toString(); System.out.println("\nSimple rendering of the HTML document:\n"); System.out.println(renderedText); } } jericho-html-3.1/samples/console/src/CompactSource.java0000644000175000017500000000124611030434310023177 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class CompactSource { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; MicrosoftTagTypes.register(); PHPTagTypes.register(); MasonTagTypes.register(); Source source=new Source(new URL(sourceUrlString)); new SourceCompactor(source).writeTo(new OutputStreamWriter(System.out)); } } jericho-html-3.1/samples/console/src/StreamedSourceCopy.java0000644000175000017500000000366611213760104024226 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class StreamedSourceCopy { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; StreamedSource streamedSource=new StreamedSource(new URL(sourceUrlString)); Writer writer=null; try { writer=new FileWriter("StreamedSourceCopyOutput.html"); System.out.println("Processing segments:"); int lastSegmentEnd=0; for (Segment segment : streamedSource) { System.out.println(segment.getDebugInfo()); if (segment.getEnd()<=lastSegmentEnd) continue; // if this tag is inside the previous tag (e.g. a server tag) then ignore it as it was already output along with the previous tag. lastSegmentEnd=segment.getEnd(); if (segment instanceof Tag) { Tag tag=(Tag)segment; // HANDLE TAG // Uncomment the following line to ensure each tag is valid XML: // writer.write(tag.tidy()); continue; } else if (segment instanceof CharacterReference) { CharacterReference characterReference=(CharacterReference)segment; // HANDLE CHARACTER REFERENCE // Uncomment the following line to decode all character references instead of copying them verbatim: // characterReference.appendCharTo(writer); continue; } else { // HANDLE PLAIN TEXT } // unless specific handling has prevented getting to here, simply output the segment as is: writer.write(segment.toString()); } writer.close(); System.err.println("\nA copy of the source document has been output to StreamedSourceCopyOuput.html"); } catch (Throwable t) { if (writer!=null) try {writer.close();} catch (IOException ex) {} } } } jericho-html-3.1/samples/console/src/FormFieldList.java0000644000175000017500000000137511166370232023152 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class FormFieldList { public static void main(String[] args) throws Exception { String sourceUrlString="data/form.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; Source source=new Source(new URL(sourceUrlString)); FormFields formFields=source.getFormFields(); System.out.println("The document "+sourceUrlString+" contains "+formFields.size()+" form fields:\n"); for (FormField formField : formFields) { System.out.println(formField.getDebugInfo()); } } } jericho-html-3.1/samples/console/src/FormFieldSetValues.java0000644000175000017500000000331711031454410024140 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class FormFieldSetValues { public static void main(String[] args) throws Exception { String sourceUrlString="data/form.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; Source source=new Source(new URL(sourceUrlString)); FormFields formFields=source.getFormFields(); formFields.clearValues(); // clear any values that might be set in the source document formFields.addValue("Name","Humphrey Bear"); formFields.addValue("Title","Prime Minister"); formFields.addValue("Member","on"); formFields.addValue("Address","The Lodge\nDeakin ACT 2600\nAustralia"); formFields.addValue("MailingList","A"); formFields.addValue("MailingList","B"); formFields.addValue("FavouriteFare","honey"); formFields.addValue("FavouriteSports","BB"); formFields.addValue("FavouriteSports","AFL"); OutputDocument outputDocument=new OutputDocument(source); outputDocument.replace(formFields); // adds all segments necessary to effect changes // modify stylesheet link since the output file is in a different directory to the input file int cssPathPos=source.toString().indexOf("main.css"); outputDocument.insert(cssPathPos,"data/"); Writer out=new FileWriter("NewForm.html"); outputDocument.writeTo(out); out.close(); System.err.println("\nThe form containing new default values has been output to NewForm.html"); System.err.println("This will open automatically in a web browser after you press a key."); } } jericho-html-3.1/samples/console/src/Encoding.java0000644000175000017500000000220711166464446022203 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class Encoding { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; System.out.println("\nSource URL:"); System.out.println(sourceUrlString); URL url=new URL(sourceUrlString); Source source=new Source(url); System.out.println("\nDocument Title:"); Element titleElement=source.getFirstElement(HTMLElementName.TITLE); System.out.println(titleElement!=null ? titleElement.getContent().toString() : "(none)"); System.out.println("\nSource.getEncoding():"); System.out.println(source.getEncoding()); System.out.println("\nSource.getEncodingSpecificationInfo():"); System.out.println(source.getEncodingSpecificationInfo()); System.out.println("\nSource.getPreliminaryEncodingInfo():"); System.out.println(source.getPreliminaryEncodingInfo()); } } jericho-html-3.1/samples/console/src/SplitLongLines.java0000644000175000017500000000511011032241604023334 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class SplitLongLines { private static final int MAX_LENGTH=70; private static int col; public static void main(String[] args) throws Exception { String sourceUrlString="../../doc/index.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; URL sourceUrl=new URL(sourceUrlString); BufferedReader reader=null; try { reader=new BufferedReader(new InputStreamReader(sourceUrl.openStream())); String line; while ((line=reader.readLine())!=null) { if (line.length()<=MAX_LENGTH) { println(line); continue; } line=line.trim(); if (line.length()<=MAX_LENGTH) { println(line); continue; } Source source=new Source(line); int pos=0; for (Tag tag : source.getAllTags()) { if (pos!=tag.getBegin()) print(line.subSequence(pos,tag.getBegin())); // print the text between this tag and the last printTag(tag,line); pos=tag.getEnd(); } if (pos!=line.length()) print(line.subSequence(pos,line.length())); // print the text between the last tag and the end of line println(); } } finally { if (reader!=null) reader.close(); } } private static void println() { System.out.println(); col=0; } private static void println(CharSequence text) { System.out.println(text); col=0; } private static void print(CharSequence text) { print(text,true); } private static void print(CharSequence text, boolean splitLongText) { if (splitLongText && text.length()>MAX_LENGTH) { String[] words=text.toString().split("\\s"); for (int i=0; i0 && col+text.length()>MAX_LENGTH) println(); System.out.print(text); col+=text.length(); } private static void printTag(Tag tag, String line) { if (tag.length()<=MAX_LENGTH || tag instanceof EndTag) { print(tag); return; } StartTag startTag=(StartTag)tag; Attributes attributes=startTag.getAttributes(); if (attributes!=null) { print(line.substring(startTag.getBegin(),attributes.getBegin())); for (Attribute attribute : attributes) { print(" "); print(attribute); } print(line.substring(attributes.getEnd(),startTag.getEnd())); } else { print(startTag); } } } jericho-html-3.1/samples/console/src/HTMLSanitiser.java0000644000175000017500000004246711166751732023114 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; /** * Provides facilities to sanitise HTML containing unwanted or invalid tags into clean HTML. *

    * The sanitation process consists of the following steps: *

      *
    • * Find all potential HTML tags in the input text. For each tag: *
        *
      • If it is one of the allowed tags * (<br>, <p>, <b>, <i>, * <ol>, <ul>, <li>, <a>) then: *
          *
        • If a matching end tag is required, check that the end tag exists and is correctly nested. If not, reject the tag. *
        • Check that the element is in a valid position (e.g. <li> elements must be inside <ul> or <ol> elements). If not, reject the element. *
        • Keep only the allowed attributes (id, class, href, target, title) and strip any others. *
        • Ensure all attributes are XHTML compliant (all values enclosed in double quotes and fully encoded) *
        • Ensure tags are XHTML compliant (convert to lower case and add closing slash to empty element tag, e.g. <br />) *
        *
      • *
      • If it is not one of the allowed tags or was rejected for any reason: *
          *
        • If the method strips invalid markup, completely remove the tag or element from the output, * otherwise encode it so that it renders verbatim. *
        *
      • *
      *
    • *
    • * If the formatWhiteSpace option is enabled: *
        *
      • Line breaks, being Carriage Return (U+000D) or Line Feed (U+000A) characters, and Form Feed characters (U+000C) * are converted to "<br />". CR/LF pairs are treated as a single line break. *
      • Multiple consecutive spaces are converted so that every second space is converted to "&nbsp;" * while ensuring the last is always a normal space. *
      • Tab characters (U+0009) are converted as if they were four consecutive spaces. *
      *
    • *
    • Ensure all remainding text is fully encoded. *
    */ public class HTMLSanitiser { private HTMLSanitiser() {} // not instantiable // list of HTML elements that will be retained in the final output: private static final Set VALID_ELEMENT_NAMES=new HashSet(Arrays.asList(new String[] { HTMLElementName.BR, HTMLElementName.P, HTMLElementName.B, HTMLElementName.I, HTMLElementName.OL, HTMLElementName.UL, HTMLElementName.LI, HTMLElementName.A })); // list of HTML attributes that will be retained in the final output: private static final Set VALID_ATTRIBUTE_NAMES=new HashSet(Arrays.asList(new String[] { "id","class","href","target","title" })); private static final Object VALID_MARKER=new Object(); /** * Returns a sanitised version of the specified HTML, encoding any unwanted tags. *

    * Calling this method is equivalent to {@link #encodeInvalidMarkup(String,boolean) encodeInvalidMarkup(pseudoHTML,false)}. *

    *

    *
    Example:
    *
    * * * * *
    Method call:
    HTMLSanitiser.encodeInvalidMarkup("<P><u>Line   1</u>\n<b>Line   2</b>\n<script>doBadStuff()</script>")
    Output:
    <p>&lt;u&gt;Line   1&lt;/u&gt;\n<b>Line   2</b>\n&lt;script&gt;doBadStuff()&lt;/script&gt;</p>
    Rendered output:

    <u>Line 1</u> Line 2 <script>doBadStuff()</script>

    * In this example: *
      *
    • The <P> tag is kept and converted to lower case *
    • The optional end tag </p> is added *
    • The <b> element is kept *
    • The unwanted <u> and <script> elements are encoded so that they render verbatim *
    *
    *
    * * @param pseudoHTML The potentially invalid HTML to sanitise. * @return a sanitised version of the specified HTML, encoding any unwanted tags. */ public static String encodeInvalidMarkup(String pseudoHTML) { return encodeInvalidMarkup(pseudoHTML,false); } /** * Returns a sanitised version of the specified HTML, encoding any unwanted tags. *

    * Encoding unwanted and invalid tags results in them appearing verbatim in the rendered output, * helping to highlight the problem so that the source HTML can be fixed. *

    * Specifying a value of true as an argument to the formatWhiteSpace parameter * results in the formatting of white space as described in the sanitisation process in the class description above. *

    *

    *
    Example:
    *
    * * * * *
    Method call:
    HTMLSanitiser.encodeInvalidMarkup("<P><u>Line   1</u>\n<b>Line   2</b>\n<script>doBadStuff()</script>",true)
    Output:
    <p>&lt;u&gt;Line &nbsp; 1&lt;/u&gt;<br /><b>Line &nbsp; 2</b><br />&lt;script&gt;doBadStuff()&lt;/script&gt;</p>
    Rendered output:

    <u>Line   1</u>
    Line   2
    <script>doBadStuff()</script>

    * In this example: *
      *
    • The <P> tag is kept and converted to lower case *
    • The optional end tag </p> is added *
    • The <b> element is kept *
    • The unwanted <u> and <script> elements are encoded so that they render verbatim *
    • The line feed characters are converted to <br /> elements *
    • Non-breaking spaces (&nbsp;) are added to ensure the multiple spaces are rendered as they appear in the input. *
    *
    *
    * * @param pseudoHTML The potentially invalid HTML to sanitise. * @param formatWhiteSpace Specifies whether white space should be marked up in the output. * @return a sanitised version of the specified HTML, encoding any unwanted tags. */ public static String encodeInvalidMarkup(String pseudoHTML, boolean formatWhiteSpace) { return sanitise(pseudoHTML,formatWhiteSpace,false); } /** * Returns a sanitised version of the specified HTML, stripping any unwanted tags. *

    * Calling this method is equivalent to {@link #stripInvalidMarkup(String,boolean) stripInvalidMarkup(pseudoHTML,false)}. *

    *

    *
    Example:
    *
    * * * * *
    Method call:
    HTMLSanitiser.stripInvalidMarkup("<P><u>Line   1</u>\n<b>Line   2</b>\n<script>doBadStuff()</script>")
    Output:
    <p>Line   1\n<b>Line   2</b>\n</p>
    Rendered output:

    Line 1 Line 2

    * In this example: *
      *
    • The <P> tag is kept and converted to lower case *
    • The optional end tag </p> is added *
    • The <b> element is kept *
    • The unwanted <u> and <script> elements are stripped from the output *
    *
    *
    * * @param pseudoHTML The potentially invalid HTML to sanitise. * @return a sanitised version of the specified HTML, stripping any unwanted tags. */ public static String stripInvalidMarkup(String pseudoHTML) { return stripInvalidMarkup(pseudoHTML,false); } /** * Returns a sanitised version of the specified HTML, stripping any unwanted tags. *

    * Stripping unwanted and invalid tags is the preferred option if the output is for public consumption. *

    * Specifying a value of true as an argument to the formatWhiteSpace parameter * results in the formatting of white space as described in the sanitisation process in the class description above. *

    *

    *
    Example:
    *
    * * * * *
    Method call:
    HTMLSanitiser.stripInvalidMarkup("<P><u>Line   1</u>\n<b>Line   2</b>\n<script>doBadStuff()</script>",true)
    Output:
    <p>Line &nbsp; 1<br /><b>Line &nbsp; 2</b><br /></p>
    Rendered output:

    Line   1
    Line   2

    * In this example: *
      *
    • The <P> tag is kept and converted to lower case *
    • The optional end tag </p> is added *
    • The <b> element is kept *
    • The unwanted <u> and <script> elements are stripped from the output *
    • The line feed characters are converted to <br /> elements *
    • Non-breaking spaces (&nbsp;) are added to ensure the multiple spaces are rendered as they appear in the input. *
    *
    *
    * * @param pseudoHTML The potentially invalid HTML to sanitise. * @param formatWhiteSpace Specifies whether white space should be marked up in the output. * @return a sanitised version of the specified HTML, stripping any unwanted tags. */ public static String stripInvalidMarkup(String pseudoHTML, boolean formatWhiteSpace) { return sanitise(pseudoHTML,formatWhiteSpace,true); } private static String sanitise(String pseudoHTML, boolean formatWhiteSpace, boolean stripInvalidElements) { Source source=new Source(pseudoHTML); source.fullSequentialParse(); OutputDocument outputDocument=new OutputDocument(source); List tags=source.getAllTags(); int pos=0; for (Tag tag : tags) { if (processTag(tag,outputDocument)) { tag.setUserData(VALID_MARKER); } else { if (!stripInvalidElements) continue; // element will be encoded along with surrounding text outputDocument.remove(tag); } reencodeTextSegment(source,outputDocument,pos,tag.getBegin(),formatWhiteSpace); pos=tag.getEnd(); } reencodeTextSegment(source,outputDocument,pos,source.getEnd(),formatWhiteSpace); return outputDocument.toString(); } private static boolean processTag(Tag tag, OutputDocument outputDocument) { String elementName=tag.getName(); if (!VALID_ELEMENT_NAMES.contains(elementName)) return false; if (tag.getTagType()==StartTagType.NORMAL) { Element element=tag.getElement(); if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) { if (element.getEndTag()==null) return false; // refect start tag if its required end tag is missing } else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName)) { if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags if (element.getEndTag()==null) outputDocument.insert(element.getEnd(),getEndTagHTML(elementName)); // insert optional end tag if it is missing } outputDocument.replace(tag,getStartTagHTML(element.getStartTag())); } else if (tag.getTagType()==EndTagType.NORMAL) { if (tag.getElement()==null) return false; // reject end tags that aren't associated with a start tag if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags outputDocument.replace(tag,getEndTagHTML(elementName)); } else { return false; // reject abnormal tags } return true; } private static boolean isValidLITag(Tag tag) { Element parentElement=tag.getElement().getParentElement(); if (parentElement==null) return false; // ignore LI elements without a parent if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore LI elements who's parent is not valid return parentElement.getName()==HTMLElementName.UL || parentElement.getName()==HTMLElementName.OL; // only accept LI tags who's immediate parent is UL or OL. } private static void reencodeTextSegment(Source source, OutputDocument outputDocument, int begin, int end, boolean formatWhiteSpace) { if (begin>=end) return; Segment textSegment=new Segment(source,begin,end); String decodedText=CharacterReference.decode(textSegment); String encodedText=formatWhiteSpace ? CharacterReference.encodeWithWhiteSpaceFormatting(decodedText) : CharacterReference.encode(decodedText); outputDocument.replace(textSegment,encodedText); } private static CharSequence getStartTagHTML(StartTag startTag) { // tidies and filters out non-approved attributes StringBuilder sb=new StringBuilder(); sb.append('<').append(startTag.getName()); for (Attribute attribute : startTag.getAttributes()) { if (VALID_ATTRIBUTE_NAMES.contains(attribute.getKey())) { sb.append(' ').append(attribute.getName()); if (attribute.getValue()!=null) { sb.append("=\""); sb.append(CharacterReference.encode(attribute.getValue())); sb.append('"'); } } } if (startTag.getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName())) sb.append(" /"); sb.append('>'); return sb; } private static String getEndTagHTML(String tagName) { return "'; } ////////////////////////////////////////////////////////////////////////////////////// // THE METHODS BELOW ARE USED ONLY FOR DEMONSTRATING THE FUNCTIONALITY OF THE CLASS // ////////////////////////////////////////////////////////////////////////////////////// // See test/src/samples/HTMLSanitiserTest.java for a comprehensive test suite. public static void main(String[] args) throws Exception { System.out.println("Examples of HTMLSanitiser.encodeInvalidMarkup:"); System.out.println("----------------------------------------------\n"); displayEncodeInvalidMarkup("ab & c","encode text"); displayEncodeInvalidMarkup("abc def geh"," element not allowed"); displayEncodeInvalidMarkup("

    abc","add optional end tag"); displayEncodeInvalidMarkup("","remove potentially dangerous script"); displayEncodeInvalidMarkup("

    abc

    ","keep approved attributes but strip non-approved attributes"); displayEncodeInvalidMarkup("

    abc

    ","tidy up attributes to make them XHTML compliant"); displayEncodeInvalidMarkup("List:
    • A
    • B
    • C
    ","inserts optional end tags"); System.out.println("Examples of HTMLSanitiser.stripInvalidMarkup:"); System.out.println("---------------------------------------------\n"); displayStripInvalidMarkup("ab & c","encode text"); displayStripInvalidMarkup("abc def geh"," element not allowed"); displayStripInvalidMarkup("

    abc","add optional end tag"); displayStripInvalidMarkup("","remove potentially dangerous script"); displayStripInvalidMarkup("

    abc

    ","keep approved attributes but strip non-approved attributes"); displayStripInvalidMarkup("

    abc

    ","tidy up attributes to make them XHTML compliant"); displayStripInvalidMarkup("List:
    • A
    • B
    • C
    ","inserts optional end tags"); displayStripInvalidMarkup("List:
  • A
  • B
  • C","missing required
      or
        element"); displayStripInvalidMarkup("List:
        • A
        • B
        • C
        ","
      1. is invalid as it is not directly under
          or
            "); System.out.println("Examples of HTMLSanitiser.stripInvalidMarkup with formatWhiteSpace=true:"); System.out.println("------------------------------------------------------------------------\n"); displayStripInvalidMarkup("abc\ndef",true,"convert LF to
            "); displayStripInvalidMarkup(" abc",true,"ensure consecutive spaces are rendered"); displayStripInvalidMarkup("\tabc",true,"convert TAB to equivalent of four spaces"); } private static void displayEncodeInvalidMarkup(String input, String explanation) { display(input,explanation,HTMLSanitiser.encodeInvalidMarkup(input)); } private static void displayStripInvalidMarkup(String input, String explanation) { display(input,explanation,HTMLSanitiser.stripInvalidMarkup(input)); } private static void displayStripInvalidMarkup(String input, boolean formatWhiteSpace, String explanation) { display(input,explanation,HTMLSanitiser.stripInvalidMarkup(input,formatWhiteSpace)); } private static void display(String input, String explanation, String output) { System.out.println(explanation+":\ninput : "+input+"\noutput: "+output+"\n"); } } jericho-html-3.1/samples/console/src/ExtractText.java0000644000175000017500000000620211166464546022734 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class ExtractText { public static void main(String[] args) throws Exception { String sourceUrlString="data/test.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; MicrosoftTagTypes.register(); PHPTagTypes.register(); PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions MasonTagTypes.register(); Source source=new Source(new URL(sourceUrlString)); // Call fullSequentialParse manually as most of the source will be parsed. source.fullSequentialParse(); System.out.println("Document title:"); String title=getTitle(source); System.out.println(title==null ? "(none)" : title); System.out.println("\nDocument description:"); String description=getMetaValue(source,"description"); System.out.println(description==null ? "(none)" : description); System.out.println("\nDocument keywords:"); String keywords=getMetaValue(source,"keywords"); System.out.println(keywords==null ? "(none)" : keywords); System.out.println("\nLinks to other documents:"); List linkElements=source.getAllElements(HTMLElementName.A); for (Element linkElement : linkElements) { String href=linkElement.getAttributeValue("href"); if (href==null) continue; // A element can contain other tags so need to extract the text from it: String label=linkElement.getContent().getTextExtractor().toString(); System.out.println(label+" <"+href+'>'); } System.out.println("\nAll text from file (exluding content inside SCRIPT and STYLE elements):\n"); System.out.println(source.getTextExtractor().setIncludeAttributes(true).toString()); System.out.println("\nSame again but this time extend the TextExtractor class to also exclude text from P elements and any elements with class=\"control\":\n"); TextExtractor textExtractor=new TextExtractor(source) { public boolean excludeElement(StartTag startTag) { return startTag.getName()==HTMLElementName.P || "control".equalsIgnoreCase(startTag.getAttributeValue("class")); } }; System.out.println(textExtractor.setIncludeAttributes(true).toString()); } private static String getTitle(Source source) { Element titleElement=source.getFirstElement(HTMLElementName.TITLE); if (titleElement==null) return null; // TITLE element never contains other tags so just decode it collapsing whitespace: return CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent()); } private static String getMetaValue(Source source, String key) { for (int pos=0; pos elementList=source.getAllElements(); for (Element element : elementList) { System.out.println("-------------------------------------------------------------------------------"); System.out.println(element.getDebugInfo()); if (element.getAttributes()!=null) System.out.println("XHTML StartTag:\n"+element.getStartTag().tidy(true)); System.out.println("Source text with content:\n"+element); } System.out.println(source.getCacheDebugInfo()); } } jericho-html-3.1/samples/console/src/ConvertStyleSheets.java0000644000175000017500000000332311031454244024254 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class ConvertStyleSheets { public static void main(String[] args) throws Exception { String sourceUrlString="data/form.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; URL sourceUrl=new URL(sourceUrlString); Source source=new Source(sourceUrl); OutputDocument outputDocument=new OutputDocument(source); StringBuilder sb=new StringBuilder(); List linkStartTags=source.getAllStartTags(HTMLElementName.LINK); for (StartTag startTag : linkStartTags) { Attributes attributes=startTag.getAttributes(); String rel=attributes.getValue("rel"); if (!"stylesheet".equalsIgnoreCase(rel)) continue; String href=attributes.getValue("href"); if (href==null) continue; String styleSheetContent; try { styleSheetContent=Util.getString(new InputStreamReader(new URL(sourceUrl,href).openStream())); } catch (Exception ex) { System.err.println(ex.toString()); continue; // don't convert if URL is invalid } sb.setLength(0); sb.append("\n").append(styleSheetContent).append("\n"); outputDocument.replace(startTag,sb.toString()); } System.err.println("Here is the document "+sourceUrlString+" with all external stylesheets converted to inline stylesheets:\n"); outputDocument.writeTo(new OutputStreamWriter(System.out)); } } jericho-html-3.1/samples/console/src/FormControlDisplayCharacteristics.java0000644000175000017500000000551311166464644027306 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class FormControlDisplayCharacteristics { public static void main(String[] args) throws Exception { String sourceUrlString="data/form.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; Source source=new Source(new URL(sourceUrlString)); FormFields formFields=source.getFormFields(); // disable some controls: formFields.get("Password").getFormControl().setDisabled(true); formFields.setValue("MailingList","C"); formFields.get("MailingList").getFormControl("C").setDisabled(true); formFields.get("MailingList").getFormControl("D").setDisabled(true); // remove some controls: formFields.get("button1").getFormControl().setOutputStyle(FormControlOutputStyle.REMOVE); FormControl rhubarbFormControl=formFields.get("FavouriteFare").getFormControl("rhubarb"); rhubarbFormControl.setOutputStyle(FormControlOutputStyle.REMOVE); // set some controls to display value: formFields.setValue("Address","The Lodge\nDeakin ACT 2600\nAustralia"); formFields.get("Address").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); formFields.setValue("FavouriteSports","BB"); formFields.addValue("FavouriteSports","AFL"); formFields.get("FavouriteSports").getFormControl().setOutputStyle(FormControlOutputStyle.DISPLAY_VALUE); OutputDocument outputDocument=new OutputDocument(source); outputDocument.replace(formFields); // adds all segments necessary to effect changes // also need to remove label for the removed "rhubarb" radio button: // label segment begins at the end of the rhubarb control, and ends at the start of the next control: Segment rhubarbLabelSegment=new Segment(source,rhubarbFormControl.getEnd(),source.getNextTag(rhubarbFormControl.getEnd()).getBegin()); outputDocument.remove(rhubarbLabelSegment); // also need to remove instructions for favourite sports control which has been set to output display value: Segment instructionsSegment=source.getFirstElement("class","instructions",false).getContent(); outputDocument.replace(instructionsSegment,"A comma separated list of favourite sports is shown above"); // modify stylesheet link since the output file is in a different directory to the input file int cssPathPos=source.toString().indexOf("main.css"); outputDocument.insert(cssPathPos,"data/"); Writer out=new FileWriter("NewForm.html"); outputDocument.writeTo(out); out.close(); System.err.println("\nThe form containing new default values has been output to NewForm.html"); System.err.println("This will open automatically in a web browser after you press a key."); } } jericho-html-3.1/samples/console/src/FormFieldCSVOutput.java0000644000175000017500000000413111031454374024105 0ustar twernertwernerimport net.htmlparser.jericho.*; import java.util.*; import java.io.*; import java.net.*; public class FormFieldCSVOutput { // newValuesMap is designed to emulate the data structure returned by the // javax.servlet.ServletRequest.getParameterMap() method. private static Map newValuesMap=new LinkedHashMap(); static { newValuesMap.put("Name",new String[] {"Humphrey Bear"}); newValuesMap.put("Title",new String[] {"Prime Minister"}); newValuesMap.put("Member",new String[] {"on"}); newValuesMap.put("Address",new String[] {"The Lodge\nDeakin ACT 2600\nAustralia"}); newValuesMap.put("MailingList",new String[] {"A","B"}); newValuesMap.put("FavouriteFare",new String[] {"honey"}); newValuesMap.put("FavouriteSports",new String[] {"BB","AFL"}); } public static void main(String[] args) throws Exception { String sourceUrlString="data/form.html"; if (args.length==0) System.err.println("Using default argument of \""+sourceUrlString+'"'); else sourceUrlString=args[0]; if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString; Source source=new Source(new URL(sourceUrlString)); FormFields formFields=source.getFormFields(); Writer out=new FileWriter("FormData.csv"); Util.outputCSVLine(out,formFields.getColumnLabels()); Util.outputCSVLine(out,formFields.getColumnValues(newValuesMap)); out.close(); System.err.println("\nThe following form submission data has been output to the CSV file \nFormData.csv, based on the data structure defined in the HTML document \n"+sourceUrlString+'\n'); System.err.println(format(newValuesMap)); System.err.println("The FormData.csv file will open automatically after you press a key."); } private static String format(Map valuesMap) { StringBuilder sb=new StringBuilder(); for (Map.Entry entry : valuesMap.entrySet()) { sb.append(entry.getKey()).append(":\n"); for (String value : entry.getValue()) sb.append("- ").append(value).append('\n'); sb.append('\n'); } return sb.toString(); } } jericho-html-3.1/samples/console/logging.properties0000644000175000017500000000021211030434222022534 0ustar twernertwernerhandlers=java.util.logging.ConsoleHandler .level=INFO java.util.logging.ConsoleHandler.formatter=net.htmlparser.jericho.BasicLogFormatter jericho-html-3.1/samples/console/bat_lib/0000755000175000017500000000000011167436712020412 5ustar twernertwernerjericho-html-3.1/samples/console/bat_lib/set_package_name.bat0000644000175000017500000000004411214132426024332 0ustar twernertwerner@set package_name=jericho-html-3.1 jericho-html-3.1/samples/console/bat_lib/run.bat0000644000175000017500000000045111167327044021703 0ustar twernertwerner@if "%1"=="" goto error @call bat_lib\set_package_name java -classpath classes;../../dist/%package_name%.jar -enableassertions -Djava.util.logging.config.file=logging.properties %* @goto end :error @echo You must specify the name of a sample program to run on the command line :end @pause jericho-html-3.1/samples/console/classes/0000755000175000017500000000000011214132426020437 5ustar twernertwernerjericho-html-3.1/samples/console/classes/DisplayAllElements.class0000644000175000017500000000506111214132426025223 0ustar twernertwerner1 &BC DEF BG H I J KL MNO PQ RQ RS TU VQWX Y Z [ \] ^_ ^`a Dbc d ef g hij k lmn()VCodeLineNumberTableLocalVariableTablethisLDisplayAllElements;main([Ljava/lang/String;)Velement Lnet/htmlparser/jericho/Element;i$Ljava/util/Iterator;args[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; elementListLjava/util/List;LocalVariableTypeTable2Ljava/util/List; Exceptionso SourceFileDisplayAllElements.java '(data/test.htmlp qrjava/lang/StringBuilderUsing default argument of " st su vwx yz{ |}file:~ (  (net/htmlparser/jericho/Source java/net/URL 'z '   net/htmlparser/jericho/Element rO------------------------------------------------------------------------------- w XHTML StartTag:  Source text with content: s wDisplayAllElementsjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(net/htmlparser/jericho/MicrosoftTagTypesregister"net/htmlparser/jericho/PHPTagTypes PHP_SHORT%Lnet/htmlparser/jericho/StartTagType;#net/htmlparser/jericho/StartTagType deregister$net/htmlparser/jericho/MasonTagTypes(Ljava/net/URL;)VgetAllElements()Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;out getDebugInfo getAttributes%()Lnet/htmlparser/jericho/Attributes; getStartTag#()Lnet/htmlparser/jericho/StartTag;net/htmlparser/jericho/StartTagtidy(Z)Ljava/lang/String;-(Ljava/lang/Object;)Ljava/lang/StringBuilder;getCacheDebugInfo!%&'()/**+ ,- ./)L*$Y+" *2L+: Y + L YY+M,N-:h:  $Y ! Y"# ,$ *J  ) - KNQWZjo+>V01wo234567j89o:;< o:=>?@Ajericho-html-3.1/samples/console/classes/FormFieldList.class0000644000175000017500000000352011214132426024171 0ustar twernertwerner1y 89 :;< 8= > ? @ AB CDEFG H I J :KLM NO PQ NR ST SUV WXY()VCodeLineNumberTableLocalVariableTablethisLFormFieldList;main([Ljava/lang/String;)V formField"Lnet/htmlparser/jericho/FormField;i$Ljava/util/Iterator;args[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; formFields#Lnet/htmlparser/jericho/FormFields; ExceptionsZ SourceFileFormFieldList.java  data/form.html[ \]java/lang/StringBuilderUsing default argument of " ^_ ^` abc def ghfile:net/htmlparser/jericho/Source java/net/URL e i jk l] The document  contains m no ^p form fields: qrs tu vw net/htmlparser/jericho/FormField xb FormFieldListjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)V getFormFields%()Lnet/htmlparser/jericho/FormFields;out!net/htmlparser/jericho/FormFieldssize()I(I)Ljava/lang/StringBuilder;iterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object; getDebugInfo! !/*"# $% &'!9L*$Y+" *2L+: Y + L YY+M,NY+- -:: ߱".   ) - K[`#> ()$*+,-./[Z01`U234567jericho-html-3.1/samples/console/classes/ExtractText$1.class0000644000175000017500000000165011214132426024074 0ustar twernertwerner14   !"# $ %&'(#(Lnet/htmlparser/jericho/Segment;)VCodeLineNumberTableLocalVariableTablethis InnerClassesLExtractText$1;x0 Lnet/htmlparser/jericho/Segment;excludeElement$(Lnet/htmlparser/jericho/StartTag;)ZstartTag!Lnet/htmlparser/jericho/StartTag; SourceFileExtractText.javaEnclosingMethod) *+ , -.pcontrolclass /01 23 ExtractText$1$net/htmlparser/jericho/TextExtractor ExtractTextmain([Ljava/lang/String;)Vnet/htmlparser/jericho/StartTaggetName()Ljava/lang/String;getAttributeValue&(Ljava/lang/String;)Ljava/lang/String;java/lang/StringequalsIgnoreCase(Ljava/lang/String;)Z    :*+ 2 Q++ 3 jericho-html-3.1/samples/console/classes/FindSpecificTags.class0000644000175000017500000001045611214132426024641 0ustar twernertwerner1 Kkl mno kp q r s tu vwx yz {z|} ~  m  J z          { {  {    F t()VCodeLineNumberTableLocalVariableTablethisLFindSpecificTags;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; ExceptionsdisplaySegments(Ljava/util/List;)Vsegment Lnet/htmlparser/jericho/Segment;i$Ljava/util/Iterator;segmentsLjava/util/List;LocalVariableTypeTable3Ljava/util/List<+Lnet/htmlparser/jericho/Segment;>; Signature6(Ljava/util/List<+Lnet/htmlparser/jericho/Segment;>;)V SourceFileFindSpecificTags.java LMdata/test.html java/lang/StringBuilderUsing default argument of "   file: Mnet/htmlparser/jericho/Source java/net/URL L L Q ******************************************************************************* XML Declarations: ]^XML Processing instructions:  M LP##################### PHP tag types now added to register #####################  H2 Elements:h2 Document Type Declarations: CDATA sections: OCommon server tags: (eg ASP, JSP, PSP, ASP-style PHP or Mason substitution tag) Tags starting with <%=%= Tags starting with <%=var%=varHTML Comments: 1Elements in namespace "o" (generated by MS-Word):o:7Tags starting with ,?@ ,A,B O3  ) - KNQaiq{!"%&()+,./12"4*537;8D<L=V?^@hBpCzEFHIKLNOQRP UVWXalYZ[\ ]^N?*CL+D)+EFMG ,H ,IԲ OUV"W,X6Z>[P _`/ab?cde ?cfghijjericho-html-3.1/samples/console/classes/ExtractText.class0000644000175000017500000001044611214132426023752 0ustar twernertwerner1 ?lm nop lq r s t uv wxy z{ |{ |} ~ {    n >V >W  I & & t   1U     ()VCodeLineNumberTableLocalVariableTablethis LExtractText;main([Ljava/lang/String;)VhrefLjava/lang/String;label linkElement Lnet/htmlparser/jericho/Element;i$Ljava/util/Iterator;args[Ljava/lang/String;sourceUrlStringsourceLnet/htmlparser/jericho/Source;title descriptionkeywords linkElementsLjava/util/List; textExtractor&Lnet/htmlparser/jericho/TextExtractor;LocalVariableTypeTable2Ljava/util/List; ExceptionsgetTitle3(Lnet/htmlparser/jericho/Source;)Ljava/lang/String; titleElement getMetaValueE(Lnet/htmlparser/jericho/Source;Ljava/lang/String;)Ljava/lang/String;startTag!Lnet/htmlparser/jericho/StartTag;posIkey SourceFileExtractText.java @Adata/test.html java/lang/StringBuilderUsing default argument of "   file: A  Anet/htmlparser/jericho/Source java/net/URL @ @ Document title: `a(none) Document description: cd Document keywords: Links to other documents:a   net/htmlparser/jericho/Element   <I All text from file (exluding content inside SCRIPT and STYLE elements):  Same again but this time extend the TextExtractor class to also exclude text from P elements and any elements with class="control":  ExtractText$1 InnerClasses @  name  metacontent  ExtractTextjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(net/htmlparser/jericho/MicrosoftTagTypesregister"net/htmlparser/jericho/PHPTagTypes PHP_SHORT%Lnet/htmlparser/jericho/StartTagType;#net/htmlparser/jericho/StartTagType deregister$net/htmlparser/jericho/MasonTagTypes(Ljava/net/URL;)VfullSequentialParse()[Lnet/htmlparser/jericho/Tag;outgetAllElements$(Ljava/lang/String;)Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getAttributeValue&(Ljava/lang/String;)Ljava/lang/String; getContent"()Lnet/htmlparser/jericho/Segment;net/htmlparser/jericho/SegmentgetTextExtractor(()Lnet/htmlparser/jericho/TextExtractor;$net/htmlparser/jericho/TextExtractorsetIncludeAttributes)(Z)Lnet/htmlparser/jericho/TextExtractor;#(Lnet/htmlparser/jericho/Segment;)VgetFirstElement4(Ljava/lang/String;)Lnet/htmlparser/jericho/Element;)net/htmlparser/jericho/CharacterReferencedecodeCollapseWhiteSpace,(Ljava/lang/CharSequence;)Ljava/lang/String;length()IgetNextStartTagI(ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;net/htmlparser/jericho/StartTaggetNamegetEnd!>?@AB/*CD EF GHB L*$Y+" *2L+: Y + L YY+M,W ,N--  ,:  ,:  ,!":#:$T%&:'(:  )*+: Y , > - ,./+ 0 1Y,2:/+ C"  ) - KNQWZjow| !#$%&')*A+D-L.]0e1o6~7Dz 9IJ $KJ BLM[NOPQ|RJjST|UJVJWJXYoZ[\ X]^_ `aBQ*34L++)5C:; =DSTbM cdB4=*6+*7+8N--9: -;<-==C"A BCD#E*F/G2HD*ef0gh4ST4iJjk 1jericho-html-3.1/samples/console/classes/FormControlDisplayCharacteristics.class0000644000175000017500000000775511214132426030332 0ustar twernertwerner1 Cde fgh di j k l mn opqrs t u vw xy z{ |}~ x z |  x ' ' | * '  ' l o ' ;t ' ()VCodeLineNumberTableLocalVariableTablethis#LFormControlDisplayCharacteristics;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; formFields#Lnet/htmlparser/jericho/FormFields;rhubarbFormControl$Lnet/htmlparser/jericho/FormControl;outputDocument'Lnet/htmlparser/jericho/OutputDocument;rhubarbLabelSegment Lnet/htmlparser/jericho/Segment;instructionsSegment cssPathPosIoutLjava/io/Writer; Exceptions SourceFile&FormControlDisplayCharacteristics.java DEdata/form.html java/lang/StringBuilderUsing default argument of "   file:net/htmlparser/jericho/Source java/net/URL D D Password    MailingListC Dbutton1  FavouriteFarerhubarbAddress%The Lodge Deakin ACT 2600 Australia FavouriteSportsBBAFL %net/htmlparser/jericho/OutputDocument D net/htmlparser/jericho/Segment  D class instructions  9A comma separated list of favourite sports is shown above main.css data/ java/io/FileWriter NewForm.html  EG The form containing new default values has been output to NewForm.htmlDThis will open automatically in a web browser after you press a key.!FormControlDisplayCharacteristicsjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)V getFormFields%()Lnet/htmlparser/jericho/FormFields;!net/htmlparser/jericho/FormFieldsget6(Ljava/lang/String;)Lnet/htmlparser/jericho/FormField; net/htmlparser/jericho/FormFieldgetFormControl&()Lnet/htmlparser/jericho/FormControl;"net/htmlparser/jericho/FormControl setDisabled(Z)VsetValue'(Ljava/lang/String;Ljava/lang/String;)Z8(Ljava/lang/String;)Lnet/htmlparser/jericho/FormControl;-net/htmlparser/jericho/FormControlOutputStyleREMOVE/Lnet/htmlparser/jericho/FormControlOutputStyle;setOutputStyle2(Lnet/htmlparser/jericho/FormControlOutputStyle;)V DISPLAY_VALUEaddValue"(Lnet/htmlparser/jericho/Source;)Vreplace&(Lnet/htmlparser/jericho/FormFields;)VgetEnd()I getNextTag(I)Lnet/htmlparser/jericho/Tag;net/htmlparser/jericho/TaggetBegin$(Lnet/htmlparser/jericho/Source;II)Vremove#(Lnet/htmlparser/jericho/Segment;)VgetFirstElementG(Ljava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/Element;net/htmlparser/jericho/Element getContent"()Lnet/htmlparser/jericho/Segment;;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V(Ljava/lang/String;)Iinsert(ILjava/lang/CharSequence;)VwriteTo(Ljava/io/Writer;)Vjava/io/Writerclose!BCDEF/*GH IJ KLFz vL*$Y+" *2L+: Y + L YY+M,N--W----:- !W- "-#$W-#%&W-#"'Y,(:-)*Y,+,+,-.:/,0123:45,67869:;Y<=:  > ?@ A G!  ) - K[`mv #$#&1':*E+N-Y.`/e0m1u2Hf vMNsOP[QR`STUV{WXZYZ1E[ZE1\]Y^_ `abcjericho-html-3.1/samples/console/classes/FormatSource.class0000644000175000017500000000335211214132426024102 0ustar twernertwerner1p 23 456 27 8 9 : ;< =>? @A BA CADE F GH IJ K LM 4N O PQR()VCodeLineNumberTableLocalVariableTablethisLFormatSource;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; ExceptionsS SourceFileFormatSource.java  data/test.htmlT UVjava/lang/StringBuilderUsing default argument of " WX WY Z[\ ]^_ `afile:b c denet/htmlparser/jericho/Source java/net/URL ^ f&net/htmlparser/jericho/SourceFormatter g hi jkjava/io/OutputStreamWriter lV m no FormatSourcejava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(net/htmlparser/jericho/MicrosoftTagTypesregister"net/htmlparser/jericho/PHPTagTypes$net/htmlparser/jericho/MasonTagTypes(Ljava/net/URL;)V#(Lnet/htmlparser/jericho/Segment;)VsetIndentString<(Ljava/lang/String;)Lnet/htmlparser/jericho/SourceFormatter; setTidyTags+(Z)Lnet/htmlparser/jericho/SourceFormatter;out(Ljava/io/OutputStream;)VwriteTo(Ljava/io/Writer;)V! !/*"# $% &'!L*$Y+" *2L+: Y + L YY+MY,Y".   ) - KNQTd# ()*+d,-./01jericho-html-3.1/samples/console/classes/FormFieldCSVOutput.class0000644000175000017500000000724711214132426025144 0ustar twernertwerner1 <kl mno kp q r s tu #vwxy z { |}~ z  ;   ;  ! !K (k  newValuesMapLjava/util/Map; Signature6Ljava/util/Map;()VCodeLineNumberTableLocalVariableTablethisLFormFieldCSVOutput;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; formFields#Lnet/htmlparser/jericho/FormFields;outLjava/io/Writer; Exceptionsformat#(Ljava/util/Map;)Ljava/lang/String;valuearr$len$Ii$entryEntry InnerClassesLjava/util/Map$Entry;Ljava/util/Iterator; valuesMapsbLjava/lang/StringBuilder;LocalVariableTypeTable;J(Ljava/util/Map;)Ljava/lang/String; SourceFileFormFieldCSVOutput.java ABdata/form.html java/lang/StringBuilderUsing default argument of "  file:net/htmlparser/jericho/Source java/net/URL A A java/io/FileWriter FormData.csv  =>  B The following form submission data has been output to the CSV file FormData.csv, based on the data structure defined in the HTML document VWDThe FormData.csv file will open automatically after you press a key.   java/util/Map$Entry java/lang/String: - java/util/LinkedHashMapName Humphrey Bear TitlePrime MinisterMemberonAddress%The Lodge Deakin ACT 2600 Australia MailingListAB FavouriteFarehoneyFavouriteSportsBBAFLFormFieldCSVOutputjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)VindexOf(I)I(Ljava/net/URL;)V getFormFields%()Lnet/htmlparser/jericho/FormFields;!net/htmlparser/jericho/FormFieldsgetColumnLabels()[Ljava/lang/String;net/htmlparser/jericho/Util outputCSVLine&(Ljava/io/Writer;[Ljava/lang/String;)VgetColumnValues$(Ljava/util/Map;)[Ljava/lang/String;java/io/Writerclose java/util/MapentrySet()Ljava/util/Set; java/util/Setiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getKeygetValueput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;!;< =>?@ABC/*DE FG HICBL*$Y+" *2L+: Y + L YY+M,NY:--Y+    D>)-K[`kt !"#$E4JKLM[]NO`XPQkMRSTU VWC!YL*M,b, !N+-"#$W-%&:66!2:+' W+ W+ D'('):*r+|-ER[XME-YKJ(Z[M%\['R]`h\ab>ycde'R]fb@?ghBC(Y)*#Y+S,W-#Y.S,W/#Y0S,W1#Y2S,W3#Y4SY5S,W6#Y7S,W8#Y9SY:S,WD&  2 FZsij_ !^ jericho-html-3.1/samples/console/classes/FormFieldSetValues.class0000644000175000017500000000504011214132426025170 0ustar twernertwerner1 4PQ RST PU V W X YZ [\]^_ ` a b cdef cghijklmnopqrstuv $w $x Xy [z{ $|}~ ,` $ ()VCodeLineNumberTableLocalVariableTablethisLFormFieldSetValues;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; formFields#Lnet/htmlparser/jericho/FormFields;outputDocument'Lnet/htmlparser/jericho/OutputDocument; cssPathPosIoutLjava/io/Writer; Exceptions SourceFileFormFieldSetValues.java 56data/form.html java/lang/StringBuilderUsing default argument of "   file:net/htmlparser/jericho/Source java/net/URL 5 5  6Name Humphrey Bear TitlePrime MinisterMemberonAddress%The Lodge Deakin ACT 2600 Australia MailingListAB FavouriteFarehoneyFavouriteSportsBBAFL%net/htmlparser/jericho/OutputDocument 5 main.css data/ java/io/FileWriter NewForm.html  6G The form containing new default values has been output to NewForm.htmlDThis will open automatically in a web browser after you press a key.FormFieldSetValuesjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)V getFormFields%()Lnet/htmlparser/jericho/FormFields;!net/htmlparser/jericho/FormFields clearValuesaddValue'(Ljava/lang/String;Ljava/lang/String;)Z"(Lnet/htmlparser/jericho/Source;)Vreplace&(Lnet/htmlparser/jericho/FormFields;)V(Ljava/lang/String;)Iinsert(ILjava/lang/CharSequence;)VwriteTo(Ljava/io/Writer;)Vjava/io/Writerclose!34567/*89 :; <=7L*$Y+" *2L+: Y + L YY+M,N--W-W-W-W-W-W- W-!"W-!#W$Y,%:-&,'()6*+,Y-.:/01 2 8n  ) - K[`dmv!"#$%&9H>?@A[BC`DEBFG1HIJKLMNOjericho-html-3.1/samples/console/classes/StreamedSourceCopy.class0000644000175000017500000000456311214132426025256 0ustar twernertwerner1 %IJ KLM IN O P Q RS TUVWX Y Z[\ Y K]^ _ `a `bc d efg Q hi hjklmno()VCodeLineNumberTableLocalVariableTablethisLStreamedSourceCopy;main([Ljava/lang/String;)VtagLnet/htmlparser/jericho/Tag;characterReference+Lnet/htmlparser/jericho/CharacterReference;segment Lnet/htmlparser/jericho/Segment;i$Ljava/util/Iterator;lastSegmentEndIexLjava/io/IOException;tLjava/lang/Throwable;args[Ljava/lang/String;sourceUrlStringLjava/lang/String;streamedSource'Lnet/htmlparser/jericho/StreamedSource;writerLjava/io/Writer; Exceptionsp SourceFileStreamedSourceCopy.java &'data/test.htmlq rsjava/lang/StringBuilderUsing default argument of " tu tv wxy z{| }~file:%net/htmlparser/jericho/StreamedSource java/net/URL &{ &java/io/FileWriterStreamedSourceCopyOutput.html sProcessing segments:  net/htmlparser/jericho/Segment x net/htmlparser/jericho/Tag)net/htmlparser/jericho/CharacterReference { 'N A copy of the source document has been output to StreamedSourceCopyOuput.htmljava/lang/Throwablejava/io/IOExceptionStreamedSourceCopyjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)Voutiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object; getDebugInfogetEnd()Ijava/io/Writerwriteclose!$%&'(/*)* +, -.(L*$Y+" *2L+: Y + L YY+MNYN 6,:[: 6 : :-- ! :- - :]"#)f  ) - K[]gor&(),*+-*p /012I34xb56rt789: ;<=>?@[AB]CDEFGHjericho-html-3.1/samples/console/classes/HTMLSanitiser.class0000644000175000017500000002045311214132426024120 0ustar twernertwerner1 | } } }      } }    }     }    } }  }    /     4 4  4   } ; ; ;  4   } !"#$%&'()*+, }-./012345 }6789: }; }< }=>?@ABCDEFG HI nJKLMNOPQVALID_ELEMENT_NAMESLjava/util/Set; Signature#Ljava/util/Set;VALID_ATTRIBUTE_NAMES VALID_MARKERLjava/lang/Object;()VCodeLineNumberTableLocalVariableTablethisLHTMLSanitiser;encodeInvalidMarkup&(Ljava/lang/String;)Ljava/lang/String; pseudoHTMLLjava/lang/String;'(Ljava/lang/String;Z)Ljava/lang/String;formatWhiteSpaceZstripInvalidMarkupsanitise((Ljava/lang/String;ZZ)Ljava/lang/String;tagLnet/htmlparser/jericho/Tag;i$Ljava/util/Iterator;stripInvalidElementssourceLnet/htmlparser/jericho/Source;outputDocument'Lnet/htmlparser/jericho/OutputDocument;tagsLjava/util/List;posILocalVariableTypeTable.Ljava/util/List; processTagF(Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/OutputDocument;)Zelement Lnet/htmlparser/jericho/Element; elementName isValidLITag(Lnet/htmlparser/jericho/Tag;)Z parentElementreencodeTextSegmentL(Lnet/htmlparser/jericho/Source;Lnet/htmlparser/jericho/OutputDocument;IIZ)Vbeginend textSegment Lnet/htmlparser/jericho/Segment; decodedText encodedTextgetStartTagHTML;(Lnet/htmlparser/jericho/StartTag;)Ljava/lang/CharSequence; attribute"Lnet/htmlparser/jericho/Attribute;startTag!Lnet/htmlparser/jericho/StartTag;sbLjava/lang/StringBuilder; getEndTagHTMLtagNamemain([Ljava/lang/String;)Vargs[Ljava/lang/String; ExceptionsRdisplayEncodeInvalidMarkup'(Ljava/lang/String;Ljava/lang/String;)Vinput explanationdisplayStripInvalidMarkup((Ljava/lang/String;ZLjava/lang/String;)Vdisplay9(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Voutput SourceFileHTMLSanitiser.java net/htmlparser/jericho/Source S TU%net/htmlparser/jericho/OutputDocument V WXY Z[\ ]^ _`net/htmlparser/jericho/Tag ab cd ef gf hi ji ~k lm nop qr stu vwx yz {wli |} ~  q t `ulolnet/htmlparser/jericho/Segment    java/lang/StringBuilder    net/htmlparser/jericho/Attribute  i i=" /def
            geh element not allowed

            abcadd optional end tag#remove potentially dangerous script0

            abc

            :keep approved attributes but strip non-approved attributes

            abc

            /tidy up attributes to make them XHTML compliant"List:
            • A
            • B
            • C
            inserts optional end tags-Examples of HTMLSanitiser.stripInvalidMarkup:.--------------------------------------------- List:
          1. A
          2. B
          3. C%missing required
              or
                element)List:
                • A
                • B
                • C
                8
              1. is invalid as it is not directly under
                  or
                    HExamples of HTMLSanitiser.stripInvalidMarkup with formatWhiteSpace=true:I------------------------------------------------------------------------ abc defconvert LF to
                     abc&ensure consecutive spaces are rendered abc(convert TAB to equivalent of four spaces  : input :  output:  java/util/HashSetjava/lang/Stringbrpbia  idclasshreftargettitlejava/lang/Object HTMLSanitiserjava/lang/Exception(Ljava/lang/CharSequence;)VfullSequentialParse()[Lnet/htmlparser/jericho/Tag;"(Lnet/htmlparser/jericho/Source;)V getAllTags()Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object; setUserData(Ljava/lang/Object;)Vremove#(Lnet/htmlparser/jericho/Segment;)VgetBegin()IgetEndtoString()Ljava/lang/String;getName java/util/Setcontains(Ljava/lang/Object;)Z getTagType"()Lnet/htmlparser/jericho/TagType;#net/htmlparser/jericho/StartTagTypeNORMAL%Lnet/htmlparser/jericho/StartTagType; getElement"()Lnet/htmlparser/jericho/Element;#net/htmlparser/jericho/HTMLElementsgetEndTagRequiredElementNames()Ljava/util/Set;net/htmlparser/jericho/Element getEndTag!()Lnet/htmlparser/jericho/EndTag;getEndTagOptionalElementNamesinsert(ILjava/lang/CharSequence;)V getStartTag#()Lnet/htmlparser/jericho/StartTag;replace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V!net/htmlparser/jericho/EndTagType#Lnet/htmlparser/jericho/EndTagType;getParentElementnet/htmlparser/jericho/StartTag getUserData$(Lnet/htmlparser/jericho/Source;II)V)net/htmlparser/jericho/CharacterReferencedecode,(Ljava/lang/CharSequence;)Ljava/lang/String;encodeWithWhiteSpaceFormattingencodeappend(C)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder; getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/AttributesgetKeygetValuejava/lang/SystemoutLjava/io/PrintStream;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/util/ArraysasList%([Ljava/lang/Object;)Ljava/util/List;(Ljava/util/Collection;)V!}|~/*-  0*]  ;* 0*  ;* N Y*N-WY- :- :6 : J :-6--: !@JU\cq{\ @8*Q vp!m p **M,*Z*N, -3 ,%,! *"-+-#,$%+*-&'(3*)'*,! *"+*,$(B".7CReq~*"O u5**L++&++,- +,.5- 6/Y*0:1: 23:+(-5R66666$-  4Y5L+<6*78W*9:M, N, ;N<-=2+ 6->8W-?+@8W+-?38W+"6W*A *7 +B8W+>6W+2 1@NU\ h r*1>T C4Y5C8*8>6D  GEFGEHGIJKLMKNOKPQKRSKTUKVWKEXGEYGIJZLMZNOZPQZRSZTUZVWZ[\Z]^ZE_GE`Gabcdecfgcj!"#%$,%3&:'A)I*Q,X-_.f/m0t1{234679:;<  B *+*hi ? @  B *+*ji C D  M *,*i G H    n,E4Y5+8k8*8l8,8m8DG K+L ,,,qnYoYpSYqSYrSYsSY.SY-SY!SYtSuvnYoYwSYxSYySYzSY{Suv<|Y0<<f@jericho-html-3.1/samples/console/classes/ConvertStyleSheets.class0000644000175000017500000000674411214132426025316 0ustar twernertwerner1 5_` abc _d e f g hi jklm no pq rs t uv wx wyz {C |}~ jE   $g  |   a 0 ()VCodeLineNumberTableLocalVariableTablethisLConvertStyleSheets;main([Ljava/lang/String;)VexLjava/lang/Exception; attributes#Lnet/htmlparser/jericho/Attributes;relLjava/lang/String;hrefstyleSheetContent typeAttribute"Lnet/htmlparser/jericho/Attribute;startTag!Lnet/htmlparser/jericho/StartTag;i$Ljava/util/Iterator;args[Ljava/lang/String;sourceUrlString sourceUrlLjava/net/URL;sourceLnet/htmlparser/jericho/Source;outputDocument'Lnet/htmlparser/jericho/OutputDocument;sbLjava/lang/StringBuilder; linkStartTagsLjava/util/List;LocalVariableTypeTable3Ljava/util/List; Exceptions SourceFileConvertStyleSheets.java 67data/form.html java/lang/StringBuilderUsing default argument of "   file: java/net/URL 6net/htmlparser/jericho/Source 6%net/htmlparser/jericho/OutputDocument 6link   net/htmlparser/jericho/StartTag   stylesheet java/io/InputStreamReader 6 6 java/lang/Exception   Here is the document @ with all external stylesheets converted to inline stylesheets: java/io/OutputStreamWriter 6 ConvertStyleSheetsjava/lang/Objectjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)V"(Lnet/htmlparser/jericho/Source;)VgetAllStartTags$(Ljava/lang/String;)Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object; getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/AttributesgetValue&(Ljava/lang/String;)Ljava/lang/String;equalsIgnoreCase(Ljava/lang/String;)Z#(Ljava/net/URL;Ljava/lang/String;)V openStream()Ljava/io/InputStream;(Ljava/io/InputStream;)Vnet/htmlparser/jericho/Util getString$(Ljava/io/Reader;)Ljava/lang/String; setLength(I)Vget6(Ljava/lang/String;)Lnet/htmlparser/jericho/Attribute;3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;replace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)Vout(Ljava/io/OutputStream;)VwriteTo(Ljava/io/Writer;)V!45678/*9: ;< =>8iL*$Y+" *2L+: Y + L Y+MY,NY-:Y:-::::  :   :  Y Y, !"#: :  % &'W ():    *W+ ,W -IY.+/ 0Y123$9~  ) - KT]gpx !"#$,%8&;'Y(h):?@ AB CD {ED ZFD 0GH IJKLiMNfODTPQ] RSgTUpVWxXYZ xX[\$]^jericho-html-3.1/samples/console/classes/CompactSource.class0000644000175000017500000000307311214132426024240 0ustar twernertwerner1f /0 123 /4 5 6 7 89 :;< => ?> @>AB C DE FG 1H I JKL()VCodeLineNumberTableLocalVariableTablethisLCompactSource;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; ExceptionsM SourceFileCompactSource.java data/test.htmlN OPjava/lang/StringBuilderUsing default argument of " QR QS TUV WXY Z[file:\ ]^_net/htmlparser/jericho/Source java/net/URL X `&net/htmlparser/jericho/SourceCompactor ajava/io/OutputStreamWriter bP c de CompactSourcejava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(net/htmlparser/jericho/MicrosoftTagTypesregister"net/htmlparser/jericho/PHPTagTypes$net/htmlparser/jericho/MasonTagTypes(Ljava/net/URL;)V#(Lnet/htmlparser/jericho/Segment;)Vout(Ljava/io/OutputStream;)VwriteTo(Ljava/io/Writer;)V!/* !" #$zL*$Y+" *2L+: Y + L YY+MY,Y.   ) - KNQTdy z%&w'(d)*+,-.jericho-html-3.1/samples/console/classes/SplitLongLines.class0000644000175000017500000000747011214132426024404 0ustar twernertwerner1 =rs tuv rw x y z {| }~     } < }     } < <  <  t { < { < =z } {  3 3 }  3 MAX_LENGTHI ConstantValueFcol()VCodeLineNumberTableLocalVariableTablethisLSplitLongLines;main([Ljava/lang/String;)VtagLnet/htmlparser/jericho/Tag;i$Ljava/util/Iterator;sourceLnet/htmlparser/jericho/Source;poslineLjava/lang/String;args[Ljava/lang/String;sourceUrlString sourceUrlLjava/net/URL;readerLjava/io/BufferedReader; Exceptionsprintln(Ljava/lang/CharSequence;)VtextLjava/lang/CharSequence;print(Ljava/lang/CharSequence;Z)Viwords splitLongTextZprintTag1(Lnet/htmlparser/jericho/Tag;Ljava/lang/String;)V attribute"Lnet/htmlparser/jericho/Attribute;startTag!Lnet/htmlparser/jericho/StartTag; attributes#Lnet/htmlparser/jericho/Attributes; SourceFileSplitLongLines.java CD../../doc/index.html java/lang/StringBuilderUsing default argument of "  ^ file: java/net/URL Cjava/io/BufferedReaderjava/io/InputStreamReader C C ^_ net/htmlparser/jericho/Source C_   net/htmlparser/jericho/Tag b_ hi ^D D B? ^ bc\s  bnet/htmlparser/jericho/EndTagnet/htmlparser/jericho/StartTag   net/htmlparser/jericho/AttributeSplitLongLinesjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStream(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I openStream()Ljava/io/InputStream;(Ljava/io/InputStream;)V(Ljava/io/Reader;)VreadLinelength()Itrim getAllTags()Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getBegin subSequence(II)Ljava/lang/CharSequence;getEndcloseout(Ljava/lang/Object;)Vjava/lang/CharSequencesplit'(Ljava/lang/String;)[Ljava/lang/String; getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/Attributes substring(II)Ljava/lang/String;!<=>?@A B?CDE/*FG HI JKE" 0L*$Y+" *2L+: Y + L Y+MNYY,N-Y:F :F λY:6:9: !"#6 !$U--%: --% V""$"Fn  )-KTVis}!"#$%&()*,/.G\ 'LM@NOkPQhR?pST0UV-WTTXYVZ[\] ^DE+ &'(F12 3 ^_E> &*)(F67 8G  `a b_E4**F ;<G `a bcEi;*+F0*,-.M>,,2*,d/!㱲((*+`F$&*0(*+`(F. ?@A!B(C5A;E<GTH[IhJG* d?#eVi`aifg hiEr*1F *2*!*3M,4N-M+,5-67!-8:9:/!!+-:,;7!,!F6 MNOQRS#T2UNVSW[Ym[q]G>N jk8#NOrLMrSTXlmSnopqjericho-html-3.1/samples/console/classes/Encoding.class0000644000175000017500000000360611214132426023221 0ustar twernertwerner1u 89 :;< 8= > ? @ AB CDE :FGH IJ KLM N OP Q@RS TU VW XYZ()VCodeLineNumberTableLocalVariableTablethis LEncoding;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;urlLjava/net/URL;sourceLnet/htmlparser/jericho/Source; titleElement Lnet/htmlparser/jericho/Element; Exceptions[ SourceFile Encoding.java !"data/test.html\ ]^java/lang/StringBuilderUsing default argument of " _` _a bcd efg hifile: j^ Source URL: java/net/URL !fnet/htmlparser/jericho/Source !k Document Title:title lmn opq(none) Source.getEncoding(): rc' Source.getEncodingSpecificationInfo(): sc% Source.getPreliminaryEncodingInfo(): tcEncodingjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)Iout(Ljava/net/URL;)VgetFirstElement4(Ljava/lang/String;)Lnet/htmlparser/jericho/Element;net/htmlparser/jericho/Element getContent"()Lnet/htmlparser/jericho/Segment;net/htmlparser/jericho/Segment getEncodinggetEncodingSpecificationInfogetPreliminaryEncodingInfo! !"#/*$% &' ()#eL*$Y+" *2L+: Y + L  + Y+MY,N  -:   -  -  - $N  ) - KSZclt|%4*+,-ch./l_01|O234567jericho-html-3.1/samples/console/classes/RenderToText.class0000644000175000017500000000256111214132426024061 0ustar twernertwerner1W +, -./ +0 1 2 3 45 6789: ; < = >3 -?@AB()VCodeLineNumberTableLocalVariableTablethisLRenderToText;main([Ljava/lang/String;)Vargs[Ljava/lang/String;sourceUrlStringLjava/lang/String;sourceLnet/htmlparser/jericho/Source; renderedText ExceptionsC SourceFileRenderToText.java data/test.htmlD EFjava/lang/StringBuilderUsing default argument of " GH GI JKL MNO PQfile:net/htmlparser/jericho/Source java/net/URL N R STU VF( Simple rendering of the HTML document:  RenderToTextjava/lang/Objectjava/lang/Exceptionjava/lang/SystemerrLjava/io/PrintStream;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;java/io/PrintStreamprintln(Ljava/lang/String;)Vjava/lang/StringindexOf(I)I(Ljava/net/URL;)V getRenderer#()Lnet/htmlparser/jericho/Renderer;net/htmlparser/jericho/Rendererout!/*  sL*$Y+" *2L+: Y + L YY+M,N - *   ) - K[ckr*s !p"#[$%c&#'()*jericho-html-3.1/classes/0000755000175000017500000000000011214132414015326 5ustar twernertwernerjericho-html-3.1/classes/net/0000755000175000017500000000000011214132414016114 5ustar twernertwernerjericho-html-3.1/classes/net/htmlparser/0000755000175000017500000000000011214132414020275 5ustar twernertwernerjericho-html-3.1/classes/net/htmlparser/jericho/0000755000175000017500000000000011214132416021722 5ustar twernertwerner././@LongLink0000000000000000000000000000014500000000000011565 Lustar rootrootjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$StandardBlockElementHandler.classjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$StandardBlockElementHandler.class0000644000175000017500000000244311214132414033774 0ustar twernertwerner1F * + , - . / 0 12 4 5 6 7 8 9:; INSTANCE_0_0= Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler; INSTANCE_1_1 INSTANCE_2_1INSTANCE_0_0_INDENTINSTANCE_1_1_INDENT topMarginI bottomMarginindentZ(IIZ)VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions>()V !)    ?@ AB C% DBEnet/htmlparser/jericho/Renderer$Processor$StandardBlockElementHandlerStandardBlockElementHandler !"     java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandlerE)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$508.(Lnet/htmlparser/jericho/Renderer$Processor;)I access$000 access$510net/htmlparser/jericho/Renderer  !"# ****$%#:.+**+W+,*+W+*&'()#NB Y Y Y Y  Y < 3 jericho-html-3.1/classes/net/htmlparser/jericho/TagTypeRegister.class0000644000175000017500000001052111214132416026032 0ustar twernertwerner1 h i j k 2l m n o #p qr qst l u v w xy l z {|} l ~    # r   ProspectiveTagTypeIterator InnerClassesparent(Lnet/htmlparser/jericho/TagTypeRegister;chCchildren)[Lnet/htmlparser/jericho/TagTypeRegister;tagTypes![Lnet/htmlparser/jericho/TagType; NULL_CHAR ConstantValueDEFAULT_TAG_TYPESroot()VCodeadd$([Lnet/htmlparser/jericho/TagType;)V#(Lnet/htmlparser/jericho/TagType;)VremovegetList()Ljava/util/List; Signature4()Ljava/util/List;addTagTypesToList(Ljava/util/List;)V5(Ljava/util/List;)V getDebugInfo()Ljava/lang/String;toStringappendDebugInfo5(Ljava/lang/StringBuilder;I)Ljava/lang/StringBuilder;getChild+(C)Lnet/htmlparser/jericho/TagTypeRegister;addChild+(Lnet/htmlparser/jericho/TagTypeRegister;)V removeChildindexOfTagType#(Lnet/htmlparser/jericho/TagType;)I addTagType removeTagType access$000*()Lnet/htmlparser/jericho/TagTypeRegister; access$100S(Lnet/htmlparser/jericho/TagTypeRegister;C)Lnet/htmlparser/jericho/TagTypeRegister; access$200K(Lnet/htmlparser/jericho/TagTypeRegister;)[Lnet/htmlparser/jericho/TagType; access$300R(Lnet/htmlparser/jericho/TagTypeRegister;)Lnet/htmlparser/jericho/TagTypeRegister; 67 <= VW B7 CD 89 :; FH R &net/htmlparser/jericho/TagTypeRegister XY ]H ^H ZYjava/util/ArrayList NO Fjava/lang/StringBuilder TU SR R,  [\net/htmlparser/jericho/TagType  A= FGjava/lang/ObjectAnet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIteratorgetStartDelimiterjava/lang/Stringlength()IcharAt(I)Cjava/util/List(Ljava/lang/Object;)Zappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;getDescription setLength(I)Vnet/htmlparser/jericho/ConfigNewLineLjava/lang/String;#net/htmlparser/jericho/StartTagType UNREGISTERED%Lnet/htmlparser/jericho/StartTagType;NORMALCOMMENTMARKUP_DECLARATIONDOCTYPE_DECLARATION CDATA_SECTIONXML_PROCESSING_INSTRUCTIONXML_DECLARATION SERVER_COMMONSERVER_COMMON_ESCAPED!net/htmlparser/jericho/EndTagType#Lnet/htmlparser/jericho/EndTagType;0 26789:;<=>9?@A= B7CDE%******FGE!<**2)FHE^RL* M>, <, 6+: Y :++L+*)IHEh\L* M>, !, 6+:L+*+!+++++Lޱ JKEYK**LMNOE]Q*"*d=+*2W*&*M,>6,2:+LPQREYSRE*YTUE>+W*+* W*I+(W*N-66-2:+W++d +)W+!W*1`>*:662:+W+VWEu***2 *2=*d>=`z6*26 `= d> *2XYE{o** Y+S[*` M>*"*2+,*2S,+S,,*d2S*,ZYE^R* **d M>6*&*2+>,`*2S*,[\E4(*=**2+]HEz*+"=P**#Y+S[*`#N-+S6*-`*2S*-$>**d2S*+S^HEoc*+"=* **d#N6-*2S6--*`2S*-_`EabE*cdE*efE*gDEsg #Y$SY%SY&SY'SY(SY)SY*SY+SY,SY -SY .SY /S0 Y 015 3 4jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$A_ElementHandler.class0000644000175000017500000000327511214132414031604 0ustar twernertwerner1Q ! " # $ %& '( )* + , - . / 01 ! 345INSTANCE6 Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions7  8 9: ;<= >?@ ABC DE FG HG IJ KL MN OP:net/htmlparser/jericho/Renderer$Processor$A_ElementHandlerA_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$000 access$600.(Lnet/htmlparser/jericho/Renderer$Processor;)Z access$700N(Lnet/htmlparser/jericho/Renderer$Processor;)Lnet/htmlparser/jericho/Renderer;net/htmlparser/jericho/Element getStartTag#()Lnet/htmlparser/jericho/StartTag;net/htmlparser/jericho/RendererrenderHyperlinkURL5(Lnet/htmlparser/jericho/StartTag;)Ljava/lang/String;java/lang/Stringlength()I access$800.(Lnet/htmlparser/jericho/Renderer$Processor;)I access$900 access$1000/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$300Y(Lnet/htmlparser/jericho/Renderer$Processor;C)Lnet/htmlparser/jericho/Renderer$Processor; access$1100j(Lnet/htmlparser/jericho/Renderer$Processor;Ljava/lang/String;)Lnet/htmlparser/jericho/Renderer$Processor; access$202/(Lnet/htmlparser/jericho/Renderer$Processor;Z)Z *\P+,++,N--`6+`+ + + W+- W+ W  Y' 2 jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$PositionComparator.class0000644000175000017500000000156511214132414031043 0ustar twernertwerner1&     ()VCodecompareK(Lnet/htmlparser/jericho/FormControl;Lnet/htmlparser/jericho/FormControl;)I'(Ljava/lang/Object;Ljava/lang/Object;)I  InnerClasses)(Lnet/htmlparser/jericho/FormControl$1;)V SignatureNLjava/lang/Object;Ljava/util/Comparator; !"# $%"net/htmlparser/jericho/FormControl 5net/htmlparser/jericho/FormControl$PositionComparatorPositionComparatorjava/lang/Objectjava/util/Comparator$net/htmlparser/jericho/FormControl$1 getElement"()Lnet/htmlparser/jericho/Element;net/htmlparser/jericho/ElementgetBegin()I0   *  /#+>,6A   *+,  *jericho-html-3.1/classes/net/htmlparser/jericho/EndTag.class0000644000175000017500000000630711214132414024117 0ustar twernertwerner1 +B C D EF G H IJ KL EM KG NO P NQ NRS T U V NW XY +Z P +[\ ]^_` ab Ic de If g h ]ij +k dl Im +n +op endTagType#Lnet/htmlparser/jericho/EndTagType;Y(Lnet/htmlparser/jericho/Source;IILnet/htmlparser/jericho/EndTagType;Ljava/lang/String;)VCode getElement"()Lnet/htmlparser/jericho/Element; getEndTagType%()Lnet/htmlparser/jericho/EndTagType; getTagType"()Lnet/htmlparser/jericho/TagType;isUnregistered()Ztidy()Ljava/lang/String; generateHTML&(Ljava/lang/String;)Ljava/lang/String; getDebugInfo getPreviousv(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;getNextA(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/EndTag; .q ,- rst us vw xyz {|} 12 ~ - : - ;<java/lang/StringBuilder . : ) =: net/htmlparser/jericho/EndTag "java/lang/IllegalArgumentException%name argument must not be zero length .(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;1+,- ./0*+*120TH***<,*dM,,N- *-, <*Z340*560*780* 9:0* ;<0 *=:0MAYL+* W* +(*W+*W+>?0, *-, YY-W,:*:6 6*!:"-,#$:6S%T{%|%@?0, *-&, YY-W,:*:6'6*!:"-,#$*(:6S%T{%|%>A0**)M,,,@A0***M,,,jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderJava.class0000644000175000017500000000130211214132416026501 0ustar twernertwerner1      JavaLogger InnerClassesINSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; 4net/htmlparser/jericho/LoggerProviderJava$JavaLogger  )net/htmlparser/jericho/LoggerProviderJava java/lang/Object%net/htmlparser/jericho/LoggerProviderjava/util/logging/Logger.(Ljava/lang/String;)Ljava/util/logging/Logger;H(Lnet/htmlparser/jericho/LoggerProviderJava;Ljava/util/logging/Logger;)V0  * Y*+ Y  jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeComment.class0000644000175000017500000000072311214132416026671 0ustar twernertwerner1   INSTANCE,Lnet/htmlparser/jericho/StartTagTypeComment;()VCodecomment *net/htmlparser/jericho/StartTagTypeComment 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/Cache.class0000644000175000017500000000700211214132414023751 0ustar twernertwerner1 *N 'OP Q 'R 'S 'T U VW VXY Z [ \] ^ _ ` a b c d e f g hg i hj kl m n op !N q !r \s tuv 'N 'wxsourceLnet/htmlparser/jericho/Source;allTagTypesSubCache!Lnet/htmlparser/jericho/SubCache; subCaches"[Lnet/htmlparser/jericho/SubCache;STREAMED_SOURCE_MARKERLnet/htmlparser/jericho/Cache;"(Lnet/htmlparser/jericho/Source;)VCode()VcleargetTagAt (IZ)Lnet/htmlparser/jericho/Tag;getPreviousTag(I)Lnet/htmlparser/jericho/Tag; getNextTag?(ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;addTagAt getTagCount()IgetTagIterator()Ljava/util/Iterator; Signature4()Ljava/util/Iterator; loadAllTagsR(Ljava/util/List;[Lnet/htmlparser/jericho/Tag;[Lnet/htmlparser/jericho/StartTag;)Vp(Ljava/util/List;[Lnet/htmlparser/jericho/Tag;[Lnet/htmlparser/jericho/StartTag;)VtoString()Ljava/lang/String;getSourceLengthgetSeparatelyCachedTagTypes#()[Lnet/htmlparser/jericho/TagType; 36 +,net/htmlparser/jericho/SubCache 3y -. KL /0 ABz {| }~net/htmlparser/jericho/Tag 6 76 89 :; <; > @ |net/htmlparser/jericho/StartTag 6java/lang/StringBuilder HI  Lnet/htmlparser/jericho/Cache 12java/lang/ObjectA(Lnet/htmlparser/jericho/Cache;Lnet/htmlparser/jericho/TagType;)Vjava/util/IteratorhasNext()Znext()Ljava/lang/Object;orphannet/htmlparser/jericho/SourceuseAllTypesCacheZgetTagAtUncached?(Lnet/htmlparser/jericho/Source;IZ)Lnet/htmlparser/jericho/Tag;tagType Lnet/htmlparser/jericho/TagType;getPreviousTagUncached_(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;I)Lnet/htmlparser/jericho/Tag;getNextTagUncached (ILnet/htmlparser/jericho/Tag;)V getTagType"()Lnet/htmlparser/jericho/TagType;sizejava/util/List bulkLoad_Init(I)Vget(I)Ljava/lang/Object;isUnregistered bulkLoad_SetbulkLoad_AddToTypeSpecificCache(Lnet/htmlparser/jericho/Tag;)V"bulkLoad_FinaliseTypeSpecificCacheappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;endInet/htmlparser/jericho/TagType!getTagTypesIgnoringEnclosedMarkup0'*+,-./012345]Q**+*Y*M*,`**S>,*`Y*,2S365 ****765G;*L+ +  <**2 895, ** *:;5 *<;5 *:=5NB*>*!,*2*2*,<=5NB*>*!,*2*2*,>95g[*N -*---:6*&*2*2---?@5 *dAB5*CDEF5 +6*666z+ : ,S-S*6  *** 2* 2 ҄6**2 CGHI55)!Y"L=**2+#W+$J@5*% KL5&M65 'Y()jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$ListElementHandler.class0000644000175000017500000000223111214132414032167 0ustar twernertwerner1= ! " # $ % & ' () + , -./ INSTANCE_OL1 Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler; INSTANCE_ULinitialListBulletNumberI(I)VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions2()V   34 56 78 96 : ;6+*W+W+,+W+W+ # Y Y 0 * jericho-html-3.1/classes/net/htmlparser/jericho/Attributes$1.class0000644000175000017500000000155111214132414025224 0ustar twernertwerner11         "9$SwitchMap$net$htmlparser$jericho$Attributes$ParsingState[I()VCodeEnclosingMethod#$ &'  () *+java/lang/NoSuchFieldError ,) -) .) /) 0)#net/htmlparser/jericho/Attributes$1 InnerClassesjava/lang/Object!net/htmlparser/jericho/Attributes.net/htmlparser/jericho/Attributes$ParsingState ParsingStatevalues3()[Lnet/htmlparser/jericho/Attributes$ParsingState;IN_VALUE0Lnet/htmlparser/jericho/Attributes$ParsingState;ordinal()IIN_NAME AFTER_NAMEBETWEEN_ATTRIBUTES START_VALUEAFTER_TAG_NAME  e OKOKOKOK OK OK #&'256ADEPST`c! %@jericho-html-3.1/classes/net/htmlparser/jericho/RemoveOutputSegment.class0000644000175000017500000000205511214132416026754 0ustar twernertwerner1> ' ( ) *( *) + ,-. '/ 0 12 3 4567beginIend(II)VCode#(Lnet/htmlparser/jericho/Segment;)VgetBegin()IgetEndwriteTo(Ljava/io/Writer;)VappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfo 8  9  !java/lang/StringBuilder Remove: (p :; :<-p := $%*net/htmlparser/jericho/RemoveOutputSegmentjava/lang/Object$net/htmlparser/jericho/OutputSegment()Vnet/htmlparser/jericho/Segmentappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;0 *** *++***+ ! "# $%&%4( Y  *  * )jericho-html-3.1/classes/net/htmlparser/jericho/SubCache$SourceCacheEntryMissingInternalError.class0000644000175000017500000000117411214132414033663 0ustar twernertwerner1   `(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;)VCode&cache entry no longer found in source: Dnet/htmlparser/jericho/SubCache$SourceCacheEntryMissingInternalError$SourceCacheEntryMissingInternalError InnerClasses>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalErrorCacheEntryMissingInternalErrorr(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;Ljava/lang/String;)Vnet/htmlparser/jericho/SubCache  *+,-    jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$SubmitFormControl.class0000644000175000017500000000271711214132414030637 0ustar twernertwerner1A      ! "# $ % &'*K(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;)VCodesetValue(Ljava/lang/String;)Z addValuesTo(Ljava/util/Collection;)V Signature-(Ljava/util/Collection;)VaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)V - ./0 12 345 64 789 :; <4 => ?4net/htmlparser/jericho/FormControl$SubmitFormControlSubmitFormControl InnerClasses"net/htmlparser/jericho/FormControl@r(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)VgetPredefinedValue()Ljava/lang/String;!net/htmlparser/jericho/FormFieldsadd'(Lnet/htmlparser/jericho/FormControl;)V outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;-net/htmlparser/jericho/FormControlOutputStyleREMOVE getElement"()Lnet/htmlparser/jericho/Element;%net/htmlparser/jericho/OutputDocumentremove#(Lnet/htmlparser/jericho/Segment;)V DISPLAY_VALUE setDisabled(Z)V+replaceAttributesInOutputDocumentIfModified$net/htmlparser/jericho/FormControl$1   *+,  *+*6**+*** *+ ) (+ jericho-html-3.1/classes/net/htmlparser/jericho/MicrosoftTagTypes.class0000644000175000017500000000204511214132416026400 0ustar twernertwerner1-    !" #$%&&DOWNLEVEL_REVEALED_CONDITIONAL_COMMENT%Lnet/htmlparser/jericho/StartTagType; TAG_TYPES![Lnet/htmlparser/jericho/TagType;()VCodeisConditionalCommentIfTag(Lnet/htmlparser/jericho/Tag;)ZisConditionalCommentEndifTagregisterdefines#(Lnet/htmlparser/jericho/TagType;)Z ' ()![if![endif  * +, net/htmlparser/jericho/TagType(net/htmlparser/jericho/MicrosoftTagTypesjava/lang/Objectnet/htmlparser/jericho/TaggetName()Ljava/lang/String;Onet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalCommentINSTANCEQLnet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment;1  * * * )K*<=*2N- /#L+=>+2:*  YSjericho-html-3.1/classes/net/htmlparser/jericho/TextExtractor$1.class0000644000175000017500000000073711214132414025723 0ustar twernertwerner1   ()VCodeincludeAttributeF(Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/Attribute;)ZEnclosingMethod &net/htmlparser/jericho/TextExtractor$1 InnerClassesjava/lang/Object()VCoderegisterdefines#(Lnet/htmlparser/jericho/TagType;)Z isParsedByPHP   / 0 1 23 4 25 6 27 net/htmlparser/jericho/TagType"net/htmlparser/jericho/PHPTagTypesjava/lang/Object#net/htmlparser/jericho/StartTagType SERVER_COMMON.net/htmlparser/jericho/StartTagTypePHPStandardINSTANCE0Lnet/htmlparser/jericho/StartTagTypePHPStandard;+net/htmlparser/jericho/StartTagTypePHPShort-Lnet/htmlparser/jericho/StartTagTypePHPShort;,net/htmlparser/jericho/StartTagTypePHPScript.Lnet/htmlparser/jericho/StartTagTypePHPScript;1 * )K*<=*2N- /#L+=>+2:*  * *8,  YSY SY Sjericho-html-3.1/classes/net/htmlparser/jericho/CharOutputSegment.class0000644000175000017500000000243711214132416026400 0ustar twernertwerner1M - . / 0 1. 1/ 2 34 567 -8 9 :;< = >?@AbeginIendchC(IIC)VCode$(Lnet/htmlparser/jericho/Segment;C)VgetBegin()IgetEndwriteTo(Ljava/io/Writer;)V ExceptionsBappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfo C   D &'E FGH *Ijava/lang/StringBuilder Replace: (p FJ FK-p) FL *+(net/htmlparser/jericho/CharOutputSegmentjava/lang/Object$net/htmlparser/jericho/OutputSegmentjava/io/IOException()Vnet/htmlparser/jericho/Segmentjava/lang/Appendableappend(C)Ljava/lang/Appendable;java/lang/Character(C)Ljava/lang/String;-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;0  ****&**+*+* *! *"#*+$%&' +*W$%() *+* ,+;/ Y  * * *jericho-html-3.1/classes/net/htmlparser/jericho/Util.class0000644000175000017500000000301611214132416023666 0ustar twernertwerner1] *+, * -. / 0 -1 23 24 56 57 8 9 :; :< :=> ?@AB BUFFER_SIZEI ConstantValue CSVNewLineLjava/lang/String;()VCode getString$(Ljava/io/Reader;)Ljava/lang/String; ExceptionsC outputCSVLine&(Ljava/io/Writer;[Ljava/lang/String;)VoutputValueEscapeQuotes%(Ljava/io/Writer;Ljava/lang/String;)VgetConcatenatedCharArray((Ljava/lang/String;Ljava/lang/String;)[C java/lang/StringBuilderD EF GH IJ KL M NO PQ PR %& S TU VW XYline.separatorZ [\net/htmlparser/jericho/Utiljava/lang/Objectjava/io/IOExceptionjava/io/Readerread([CII)Iappend([CII)Ljava/lang/StringBuilder;toString()Ljava/lang/String;closenet/htmlparser/jericho/ConfigColumnValueTrueColumnValueFalsejava/io/Writerwrite(Ljava/lang/String;)V(I)Vjava/lang/Stringlength()IcharAt(I)CgetChars(II[CI)Vjava/lang/System getProperty&(Ljava/lang/String;)Ljava/lang/String;1*  aE*MYN*,Y<-,W-:*:*5<<><!" #$]Q=+D+2N-*- - *- *" *- *" + *, * !" %&3'=++>* "* ߱!"'(3'*+`M**,++,*,) jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderJava$JavaLogger.class0000644000175000017500000000226711214132416030522 0ustar twernertwerner1C ! " # $% $& $' $( )* $+ ), )- ).034 javaLoggerLjava/util/logging/Logger;this$0+Lnet/htmlparser/jericho/LoggerProviderJava;H(Lnet/htmlparser/jericho/LoggerProviderJava;Ljava/util/logging/Logger;)VCodeerror(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnabled  5 6 7 8  9: ;< => ?< @< A<B4net/htmlparser/jericho/LoggerProviderJava$JavaLogger JavaLogger InnerClassesjava/lang/Objectnet/htmlparser/jericho/Logger()Vjava/util/logging/Loggerseverewarningfinejava/util/logging/LevelSEVERELjava/util/logging/Level; isLoggable(Ljava/util/logging/Level;)ZWARNINGINFOFINE)net/htmlparser/jericho/LoggerProviderJava  *+**, *+ *+ *+ *+ *  *  *   * 2  /1jericho-html-3.1/classes/net/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings.class0000644000175000017500000000244311214132414033760 0ustar twernertwerner1E ( ) * + ,- ) ./ 01 234 5678 9:; <=$characterEntityReferenceMaxCodePointI%decimalCharacterReferenceMaxCodePoint)hexadecimalCharacterReferenceMaxCodePoint ACCEPT_ALL&UnterminatedCharacterReferenceSettings InnerClassesFLnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;()VCode(III)VtoString()Ljava/lang/String;getDescription(I)Ljava/lang/String; "    java/lang/StringBuilder> ?@ AB Character entity reference: %&! Decimal character reference: % Haxadecimal character reference: #$NoneAll0xC #DDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings java/lang/Objectnet/htmlparser/jericho/ConfigNewLineLjava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;java/lang/Integer(II)Ljava/lang/String; AAA  !*"! ****#$!YMY  **   **   ** %&!6*Y  ' !Y .jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeCDATASection.class0000644000175000017500000000075011214132416027430 0ustar twernertwerner1   INSTANCE1Lnet/htmlparser/jericho/StartTagTypeCDATASection;()VCode CDATA section /net/htmlparser/jericho/StartTagTypeCDATASection 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V     *   Yjericho-html-3.1/classes/net/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkup.class0000644000175000017500000000101211214132414032375 0ustar twernertwerner1    array![Lnet/htmlparser/jericho/TagType;()VCode net/htmlparser/jericho/TagType    =net/htmlparser/jericho/TagType$TagTypesIgnoringEnclosedMarkupTagTypesIgnoringEnclosedMarkup InnerClassesjava/lang/Object#net/htmlparser/jericho/StartTagTypeCOMMENT%Lnet/htmlparser/jericho/StartTagType; CDATA_SECTION0    *  YSYS jericho-html-3.1/classes/net/htmlparser/jericho/CharSequenceParseText.class0000644000175000017500000000337311214132414027163 0ustar twernertwerner1D - . /0 12 10 0 3 45 6 7 8 /2 9 : /; 1<=>? charSequenceLjava/lang/CharSequence;(Ljava/lang/CharSequence;)VCodecharAt(I)C containsAt(Ljava/lang/String;I)ZindexOf(CI)I(CII)I(Ljava/lang/String;I)I(Ljava/lang/String;II)I lastIndexOflength()I subSequence(II)Ljava/lang/CharSequence;toString()Ljava/lang/String;getEnd substring(II)Ljava/lang/String; @ A B $%  *%#java/lang/IndexOutOfBoundsException " # #" +, () &' C),net/htmlparser/jericho/CharSequenceParseTextjava/lang/Object net/htmlparser/jericho/ParseText()Vjava/lang/CharSequencejava/lang/String toLowerCase  **+-!*=AZ  /#>++*`* _C * *66*:5? 6<? !*+ " +*+++6*+d`6 66>*-6++*` #* # 9-* *6*#!*+ #" +*+ +*+d6=+d6+6`6`6D*3d6 d6  + *  `  $% * &'*()**% * +,*jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderLog4J.class0000644000175000017500000000122711214132416026545 0ustar twernertwerner1      Log4JLogger InnerClassesINSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; 6net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger  *net/htmlparser/jericho/LoggerProviderLog4J java/lang/Object%net/htmlparser/jericho/LoggerProviderorg/apache/log4j/Logger-(Ljava/lang/String;)Lorg/apache/log4j/Logger;(Lorg/apache/log4j/Logger;)V0  * Y+ Y  jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMasonComponentCall.class0000644000175000017500000000076311214132416031027 0ustar twernertwerner1   INSTANCE7Lnet/htmlparser/jericho/StartTagTypeMasonComponentCall;()VCodemason component call<&&> 5net/htmlparser/jericho/StartTagTypeMasonComponentCall 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeDoctypeDeclaration.class0000644000175000017500000000156711214132416031053 0ustar twernertwerner1)       INSTANCE7Lnet/htmlparser/jericho/StartTagTypeDoctypeDeclaration;()VCodegetEnd#(Lnet/htmlparser/jericho/Source;I)Idocument type declaration !" #$% &' (5net/htmlparser/jericho/StartTagTypeDoctypeDeclaration  8net/htmlparser/jericho/StartTagTypeGenericImplementation_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseTextcharAt(I)C()I0  *y+N66-6"O6ID"5>,[;]A`6 66+ Y jericho-html-3.1/classes/net/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent.class0000644000175000017500000000133611214132416033135 0ustar twernertwerner1     INSTANCEBLnet/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent;()VCodegetCorrespondingStartTagType'()Lnet/htmlparser/jericho/StartTagType;$/mason component called with content  @net/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent 6net/htmlparser/jericho/EndTagTypeGenericImplementation;(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)V$net/htmlparser/jericho/MasonTagTypes#MASON_COMPONENT_CALLED_WITH_CONTENT%Lnet/htmlparser/jericho/StartTagType;0    *  Yjericho-html-3.1/classes/net/htmlparser/jericho/OutputDocument.class0000644000175000017500000001331311214132414025747 0ustar twernertwerner1 O  N  N  N A  N   N    $  N N  0  0 0 0 0  9 N  9 9 A  9 O  9 sourceTextLjava/lang/CharSequence;outputSegmentsLjava/util/ArrayList; Signature=Ljava/util/ArrayList;"(Lnet/htmlparser/jericho/Source;)VCode#(Lnet/htmlparser/jericho/Segment;)V%(Lnet/htmlparser/jericho/ParseText;)V getSourceText()Ljava/lang/CharSequence;remove(Ljava/util/Collection;)V<(Ljava/util/Collection<+Lnet/htmlparser/jericho/Segment;>;)Vinsert(ILjava/lang/CharSequence;)Vreplace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V(IILjava/lang/CharSequence;)V(IIC)V'(Lnet/htmlparser/jericho/FormControl;)V&(Lnet/htmlparser/jericho/FormFields;)V5(Lnet/htmlparser/jericho/Attributes;Z)Ljava/util/Map;[(Lnet/htmlparser/jericho/Attributes;Z)Ljava/util/Map;5(Lnet/htmlparser/jericho/Attributes;Ljava/util/Map;)V[(Lnet/htmlparser/jericho/Attributes;Ljava/util/Map;)VreplaceWithSpaces(II)Vregister)(Lnet/htmlparser/jericho/OutputSegment;)VwriteTo(Ljava/io/Writer;)V Exceptions(Ljava/io/Writer;II)VappendTo(Ljava/lang/Appendable;)V(Ljava/lang/Appendable;II)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfogetRegisteredOutputSegments()Ljava/util/List;:()Ljava/util/List; Wjava/util/ArrayList ST"java/lang/IllegalArgumentException source argument must not be null W QR!segment argument must not be null net/htmlparser/jericho/Segment W ^Z *net/htmlparser/jericho/RemoveOutputSegment WZ op  *net/htmlparser/jericho/StringOutputSegment We ce(net/htmlparser/jericho/CharOutputSegment Wf .net/htmlparser/jericho/AttributesOutputSegment W Wk)net/htmlparser/jericho/BlankOutputSegment Wn vw vx   $net/htmlparser/jericho/OutputSegment yz {java/lang/StringBuilder ~Replace with Spaces: Remove:  Replace: net/htmlparser/jericho/Source   v(p -p {| ...   %net/htmlparser/jericho/OutputDocumentjava/lang/Object'net/htmlparser/jericho/CharStreamSourcejava/io/IOException()V(Ljava/lang/String;)VsourceLnet/htmlparser/jericho/Source;beginI$(Lnet/htmlparser/jericho/Source;II)Vendjava/util/Collectioniterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getBegin()IgetEnd"net/htmlparser/jericho/FormControlreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)V!net/htmlparser/jericho/FormFields'(Lnet/htmlparser/jericho/Attributes;Z)VgetMap()Ljava/util/Map;add(Ljava/lang/Object;)Zjava/io/Writerflushjava/lang/CharSequencelengthisEmptyjava/lang/Appendableappend2(Ljava/lang/CharSequence;II)Ljava/lang/Appendable; COMPARATORLjava/util/Comparator;java/util/Collectionssort)(Ljava/util/List;Ljava/util/Comparator;)V(C)Ljava/lang/Appendable;+net/htmlparser/jericho/CharStreamSourceUtil=(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/lang/String;java/util/List-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;getRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;&net/htmlparser/jericho/RowColumnVector4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;java/lang/String substring(II)Ljava/lang/String;net/htmlparser/jericho/ConfigNewLineLjava/lang/String;1NOPQRSTUVWXY/#**Y+ Y*+WZYk_**Y+ Y + M*,+ * Y,+ +,* Y,+, W[Y!**Y*+\]Y*^ZY *Y+^_Y/#+M,, N*-U`abY*Y,cdY*++,ceY*Y-cfY*YcgY+*chY+*ciY  Y+!N*--"UjckY* Y+,#UlmnY*$Y%opY *+&WqrY0*+'+( M+(, stquY4*+)+( :+(stvwY*+**)stvxY *++*,W6*-.*/:0:112221m2+*2,W2-$%16+ 3W+4162+*,WstyzYcW**@*/N-0-0:12d65ea@ 6{|Y*8}|Y 9Y:L*;<M,,0N-$ +=>W- +?>W +@>W*AD*A:+(BW-2C+DW+-BW-1C+DW+)BW&+E>-2FG>-1F)BW+ BW-H:I +>W+J>K>W+L>W+M~Y*-.*Ujericho-html-3.1/classes/net/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIterator.class0000644000175000017500000000306411214132416033336 0ustar twernertwerner1D ! " #$ %& ' () %*+ %, %-. ! /034cursor(Lnet/htmlparser/jericho/TagTypeRegister; tagTypeIndexI#(Lnet/htmlparser/jericho/Source;I)VCodehasNext()Znext"()Lnet/htmlparser/jericho/TagType;remove()V()Ljava/lang/Object; SignatureHLjava/lang/Object;Ljava/util/Iterator;  5 678 9: ; <= >?#java/lang/IndexOutOfBoundsException @A BC'java/lang/UnsupportedOperationException Anet/htmlparser/jericho/TagTypeRegister$ProspectiveTagTypeIteratorProspectiveTagTypeIterator InnerClassesjava/lang/Objectjava/util/Iteratornet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText;&net/htmlparser/jericho/TagTypeRegister access$000*()Lnet/htmlparser/jericho/TagTypeRegister; net/htmlparser/jericho/ParseTextcharAt(I)C access$100S(Lnet/htmlparser/jericho/TagTypeRegister;C)Lnet/htmlparser/jericho/TagTypeRegister; access$200K(Lnet/htmlparser/jericho/TagTypeRegister;)[Lnet/htmlparser/jericho/TagType; access$300R(Lnet/htmlparser/jericho/TagTypeRegister;)Lnet/htmlparser/jericho/TagTypeRegister;0ua**+N*6*-`: *ܧ:* ** Z?B *NB* L+*2M*Y`Z+$*** * * , Y A*  2 %1jericho-html-3.1/classes/net/htmlparser/jericho/EncodingDetector.class0000644000175000017500000001032611214132414026171 0ustar twernertwerner1j k Al mn Ao pqrs t uv w x Ayz A{ | } ~   A A A Bt A A  m A A # A = =  = = =~  > = inputStreamLjava/io/InputStream;encodingLjava/lang/String;encodingSpecificationInfopreliminaryEncoding$preliminaryEncodingSpecificationInfoalternativePreliminaryEncodingPREVIEW_BYTE_COUNTI ConstantValueUTF_8 ISO_8859_1(Ljava/net/URLConnection;)VCode Exceptions(Ljava/io/InputStream;)V*(Ljava/io/InputStream;Ljava/lang/String;)V2(Lnet/htmlparser/jericho/StreamEncodingDetector;)VD(Lnet/htmlparser/jericho/StreamEncodingDetector;Ljava/lang/String;)VN(Ljava/io/InputStream;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)VgetInputStream()Ljava/io/InputStream; getEncoding()Ljava/lang/String;getEncodingSpecificationInfogetPreliminaryEncoding'getPreliminaryEncodingSpecificationInfo openReader()Ljava/io/Reader; setEncoding'(Ljava/lang/String;Ljava/lang/String;)ZdetectDocumentSpecifiedEncoding()ZgetPreviewSource3(Ljava/lang/String;)Lnet/htmlparser/jericho/Source;-net/htmlparser/jericho/StreamEncodingDetector QR QX QV#preliminary encoding set explicitly QZ $java/io/UnsupportedEncodingExceptionjava/lang/StringBuilder Q 6 specified as preliminaryEncoding constructor argument ^ Q fg ISO-8859-1 QY [\ ]^ _^ g g HF IF de EF GF gjava/io/BufferedInputStream CD JFA specified as alternativePreliminaryEncoding constructor argumentjava/io/InputStreamReader QW: hi  gAlternative encoding 2 substituted for unsupported preliminary encoding ^ gUTF-8Emandatory XML encoding when no BOM or encoding declaration is present!no encoding specified in document encoding ' specified in document is not supportedUnsupported encoding 3 specified in document, using preliminary encoding  insteadFalternative encoding substituted for unsupported preliminary encoding , net/htmlparser/jericho/Sourcejava/io/ByteArrayInputStream Q Q'net/htmlparser/jericho/EncodingDetectorjava/lang/Objectjava/io/IOExceptionjava/nio/charset/Charset isSupported(Ljava/lang/String;)Z()Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString(Ljava/lang/String;)V isDifinitive#isDocumentSpecifiedEncodingPossiblejava/io/InputStream markSupportedmark(I)Vreset getLogger!()Lnet/htmlparser/jericho/Logger; setLogger"(Lnet/htmlparser/jericho/Logger;)Vnet/htmlparser/jericho/Logger isWarnEnabledwarngetDocumentSpecifiedEncodingisXMLread()I([BII)V%(Ljava/io/Reader;Ljava/lang/String;)V0AB CDEFGFHFIFJFKLMNOFM1PFMQRS *Y+TUQVS *Y+TUQWS=1*+,,Y Y ,  *WTUQXS*+TUQYSA5*+++,+ +***W*WTUQZSk_****++ Y+ *,*-*!'Y Y  " TU[\S* ]^S*_^S*`^S*a^S*bcS`T*#Y* $*(Y Y * % * #Y* *$TdeS *+*,fgS* &* *L4*!(Y Y * % * *!L*+'M* (,)N,**+=-+4- Y , + - * % * .,/,0 *123:j,/*,/,4 Y 5 ,/ 6 :-+-- Y 7 ,/ 8 + 9 .*+5*+ Y : * % * ;  ** Y * ;  TUhiST HM>!* <6,T޻=Y#Y>Y,?+$@TUjericho-html-3.1/classes/net/htmlparser/jericho/HTMLElementTerminatingTagNameSets.class0000644000175000017500000000111511214132414031321 0ustar twernertwerner1    TerminatingStartTagNameSetLjava/util/Set; Signature#Ljava/util/Set;TerminatingEndTagNameSetNonterminatingElementNameSet0(Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;)VCodel(Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;)V    8net/htmlparser/jericho/HTMLElementTerminatingTagNameSetsjava/lang/Object()V0          **+*,*- jericho-html-3.1/classes/net/htmlparser/jericho/LoggerDisabled.class0000644000175000017500000000122311214132416025616 0ustar twernertwerner1   INSTANCE'Lnet/htmlparser/jericho/LoggerDisabled;()VCodeerror(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnabled %net/htmlparser/jericho/LoggerDisabled java/lang/Objectnet/htmlparser/jericho/Logger0   *               Yjericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderJCL$JCLLogger.class0000644000175000017500000000167211214132416027777 0ustar twernertwerner12    ! " # $ % & '),-jclLog Lorg/apache/commons/logging/Log;#(Lorg/apache/commons/logging/Log;)VCodeerror(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnabled . / 0 0 0 0    12net/htmlparser/jericho/LoggerProviderJCL$JCLLogger JCLLogger InnerClassesjava/lang/Objectnet/htmlparser/jericho/Logger()Vorg/apache/commons/logging/Log(Ljava/lang/Object;)V(net/htmlparser/jericho/LoggerProviderJCL    **+ *+ *+ *+ *+ * * *  * +  (* jericho-html-3.1/classes/net/htmlparser/jericho/Attributes.class0000644000175000017500000002331711214132414025103 0ustar twernertwerner1 p 9 9 t 9 9 t   t t q t 9   9 9 !    ! t  9  9 t 9 9 9 ! 9 ! ! !  9  9   !  N N p  9    ! N 9   ^ ^ ! ! 9 d 9  N N! N"# $%& '() InnerClasses* ParsingState attributeListLjava/util/LinkedList; Signature:Ljava/util/LinkedList;(containsServerTagOutsideOfAttributeValueZdefaultMaxErrorCountI;(Lnet/htmlparser/jericho/Source;IILjava/util/LinkedList;Z)VCode_(Lnet/htmlparser/jericho/Source;IILjava/util/LinkedList;Z)V construct|(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;)Lnet/htmlparser/jericho/Attributes;(Lnet/htmlparser/jericho/Source;IIILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;I)Lnet/htmlparser/jericho/Attributes;G(Lnet/htmlparser/jericho/Source;III)Lnet/htmlparser/jericho/Attributes;(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Lnet/htmlparser/jericho/Attributes$ParsingState;IIILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;I)Lnet/htmlparser/jericho/Attributes;reachedMaxErrorCountI(ILnet/htmlparser/jericho/Source;Ljava/lang/String;Ljava/lang/String;II)ZisInvalidEmptyElementTagm(Lnet/htmlparser/jericho/StartTagType;Lnet/htmlparser/jericho/Source;ILjava/lang/String;Ljava/lang/String;I)Zget6(Ljava/lang/String;)Lnet/htmlparser/jericho/Attribute;getValue&(Ljava/lang/String;)Ljava/lang/String; getRawValuegetCount()Iiterator()Ljava/util/Iterator;:()Ljava/util/Iterator; listIterator(I)Ljava/util/ListIterator;?(I)Ljava/util/ListIterator; populateMap!(Ljava/util/Map;Z)Ljava/util/Map;m(Ljava/util/Map;Z)Ljava/util/Map; getDebugInfo()Ljava/lang/String;getDefaultMaxErrorCountsetDefaultMaxErrorCount(I)V generateHTML#(Ljava/util/Map;)Ljava/lang/String;I(Ljava/util/Map;)Ljava/lang/String; appendHTML((Ljava/lang/Appendable;Ljava/util/Map;)V ExceptionsN(Ljava/lang/Appendable;Ljava/util/Map;)V appendTidyJ(Ljava/lang/Appendable;Lnet/htmlparser/jericho/Tag;)Ljava/lang/Appendable;getMap(Z)Ljava/util/Map;8(Z)Ljava/util/Map;log`(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Ljava/lang/CharSequence;ILjava/lang/String;I)V_(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Ljava/lang/CharSequence;ILjava/lang/String;)V()VXLnet/htmlparser/jericho/nodoc/SequentialListSegment; ~+ vw z{StartTag ,- |} Attributes for StartTag .- Attributes/ 012 34 56java/util/LinkedList ~7 89 :;< =>? @A B- C- D} EF G HInet/htmlparser/jericho/Segment JKL MN4terminated in the middle of a quoted attribute value  net/htmlparser/jericho/Attribute ~O PQ R=rejected because of '<' character in unquoted attribute value S T ~U V- WI3rejected because of '<' character in attribute name .contains attribute name with invalid character6rejected because of '<' character after attribute name3has missing whitespace after quoted attribute value XI!rejected because of '<' characterxcontains a '/' character before the closing '>', which is ignored because tags of this name cannot be empty-element tags 4contains attribute name with invalid first character Y-*has missing attribute value after '=' signDrejected because of '<' character at the start of an attribute value7rejected because the name contains an invalid character!net/htmlparser/jericho/Attributes ~#java/lang/IndexOutOfBoundsException0rejected because it has no closing '>' character,rejected because it contains too many errors Z [ \ ]6 ^N _` a b cN de fg hijava/lang/StringBuilder Attributes jk : lNEMPTYm no java/io/IOException pqrjava/util/Map$EntryEntry \ejava/lang/CharSequence e s tjava/util/LinkedHashMap ~u vw ~ jx jy at z {| at position }~2net/htmlparser/jericho/nodoc/SequentialListSegment#net/htmlparser/jericho/Attributes$1.net/htmlparser/jericho/Attributes$ParsingState$(Lnet/htmlparser/jericho/Source;II)VAFTER_TAG_NAME0Lnet/htmlparser/jericho/Attributes$ParsingState;BETWEEN_ATTRIBUTES#net/htmlparser/jericho/StartTagTypeNORMAL%Lnet/htmlparser/jericho/StartTagType;java/lang/Stringlength#net/htmlparser/jericho/HTMLElementsisClosingSlashIgnored(Ljava/lang/String;)Znet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText;atEndOfAttributes$(Lnet/htmlparser/jericho/Source;IZ)Z net/htmlparser/jericho/ParseTextcharAt(I)Cnet/htmlparser/jericho/TaggetTagAt?(Lnet/htmlparser/jericho/Source;IZ)Lnet/htmlparser/jericho/Tag; START_VALUEIN_VALUEend9$SwitchMap$net$htmlparser$jericho$Attributes$ParsingState[Iordinal isWhiteSpace(C)ZloggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabled()Z(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Lnet/htmlparser/jericho/Segment;Lnet/htmlparser/jericho/Segment;Lnet/htmlparser/jericho/Segment;)Vadd(Ljava/lang/Object;)ZgetEndtoString toLowerCaseT(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Lnet/htmlparser/jericho/Segment;)V AFTER_NAME isXMLNameCharisXMLNameStartCharIN_NAMEsize(I)Ljava/lang/Object;getKeyequalsIgnoreCasehasValuegetValueSegment"()Lnet/htmlparser/jericho/Segment;()Ljava/util/ListIterator;java/util/IteratorhasNextnext()Ljava/lang/Object;getName java/util/Mapput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;isEmptynet/htmlparser/jericho/ConfigNewLineLjava/lang/String;entrySet()Ljava/util/Set; java/util/Set^(Ljava/lang/Appendable;Ljava/lang/CharSequence;Ljava/lang/CharSequence;)Ljava/lang/Appendable;P(Ljava/lang/Appendable;Lnet/htmlparser/jericho/Tag;)Lnet/htmlparser/jericho/Tag;(IF)VgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(C)Ljava/lang/StringBuilder;3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;info(Ljava/lang/String;)V19pvwxyz{ |}~ *+**x *,- *  *   96 *` `6  6 66 Y: 6 *: 6 6::666* 6 6<;*:-,6 6MY6 6,6 ,.;'([F  Y*Y::]P**+*+ Y*:Y*d:)6Y*:Y*d`: !Y*"#W$6 M><7 0**+%= MY*:&': !Y*(#W6 = )M*<**++*+,}**+-*+ S=6. !Y*(#W$6  M= M<**+.  6 4**+/*+ 0<**+1*+,\ (**<*+23/**+4*+ 5M66c**+6*+ Y*: !Y*"#W6 M' " 63l<**+7 6M@;/*+,!**+8 MT9Y*   ::* *+<3 _a;b;{;|;';(s;t;;L;M;;; , + +,-=3 8,* *+++-23=1*>=*>*?!N-@+A-*+BM,,C)*+BM, ,D ,E&*F*Gx *HxG;*IN-J.-K!:+ @LCMW+xnbNYOL+PQ*RQSQW*T +UQW4+VQW*IM,J,K!N+WQ-XQW+Y   *NYOL+*ZM+Y [xF:+\]M,J',K^N*-_`-a`bWֱ[x2&*IN-J-K!:+,cM+[ *dY*eh fgx UI**h*hNYȷi+Q j,klQm jQnQmYo G;**hNYȷi+Q j,klQm jQYoxsq9t9u@^ jericho-html-3.1/classes/net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValue.class0000644000175000017500000000166311214132414033200 0ustar twernertwerner1B %& '( )*+,-. /0 1 23 4 5 6 79<MultipleValueSeparatorLjava/lang/String; ElementNameAttributeNamesLjava/util/List; Signature$Ljava/util/List; EmptyHTML PasswordCharC CheckedHTML UncheckedHTML()VCode !", div java/util/ArrayListjava/lang/Stringidclassstyle= >? !@      A@net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValueConfigDisplayValue InnerClassesjava/lang/Objectjava/util/ArraysasList%([Ljava/lang/Object;)Ljava/util/List;(Ljava/util/Collection;)V-net/htmlparser/jericho/FormControlOutputStyle1IIIIIII !"#*$"#I=YYSY SY S *; 8:jericho-html-3.1/classes/net/htmlparser/jericho/LoggerFactory.class0000644000175000017500000000344311214132416025524 0ustar twernertwerner1d . / 01 23 1 45 6 7 89 :; 0<= 0>? 0@AB CD EF GHI 0JK 2LMNOdefaultLoggerProvider'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger;2(Ljava/lang/Class;)Lnet/htmlparser/jericho/Logger;getLoggerProvider)()Lnet/htmlparser/jericho/LoggerProvider;getDefaultLoggerProviderdetermineDefaultLoggerProviderisClassAvailable(Ljava/lang/String;)Z !" '(P $%Q RST U )(  *(!org.slf4j.impl.StaticLoggerBinder +,!org.slf4j.impl.JDK14LoggerFactory V !org.slf4j.impl.Log4jLoggerFactory W org.slf4j.impl.JCLLoggerFactory X org.apache.commons.logging.LogtestY Z[ \]+org.apache.commons.logging.impl.Jdk14Logger^ _`+org.apache.commons.logging.impl.Log4JLogger a org.apache.log4j.Logger bcjava/lang/Throwable$net/htmlparser/jericho/LoggerFactoryjava/lang/Object%net/htmlparser/jericho/LoggerProviderjava/lang/ClassgetName()Ljava/lang/String;net/htmlparser/jericho/ConfigLoggerProviderJAVALOG4JSLF4J%org/apache/commons/logging/LogFactorygetLog4(Ljava/lang/String;)Lorg/apache/commons/logging/Log;getClass()Ljava/lang/Class;java/lang/Stringequals(Ljava/lang/Object;)ZJCLforName%(Ljava/lang/String;)Ljava/lang/Class;0  !"#* $%# * $&#* '(#  )(#  *(#zn '     -K* *  +,# *WL-"#jericho-html-3.1/classes/net/htmlparser/jericho/Element.class0000644000175000017500000001076211214132414024346 0ustar twernertwerner1 vw vx yx 0z @{ @| @} @~ @  @ y @ 0 @   v @ @x   yw v v  & & @ & @ @ v v @ v v P v @ & & v 0 @startTag!Lnet/htmlparser/jericho/StartTag;endTagLnet/htmlparser/jericho/EndTag;content Lnet/htmlparser/jericho/Segment; parentElement Lnet/htmlparser/jericho/Element;depthI childElementsLjava/util/List; Signature2Ljava/util/List; NOT_CACHED0INCLUDE_INCORRECTLY_NESTED_CHILDREN_IN_HIERARCHYZ ConstantValueb(Lnet/htmlparser/jericho/Source;Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/EndTag;)VCode()VgetParentElement"()Lnet/htmlparser/jericho/Element;getChildElements()Ljava/util/List;4()Ljava/util/List;(I)Ljava/util/List;5(I)Ljava/util/List;getDepth()I getContent"()Lnet/htmlparser/jericho/Segment; getStartTag#()Lnet/htmlparser/jericho/StartTag; getEndTag!()Lnet/htmlparser/jericho/EndTag;getName()Ljava/lang/String;isEmpty()ZisEmptyElementTag getAttributes%()Lnet/htmlparser/jericho/Attributes;getAttributeValue&(Ljava/lang/String;)Ljava/lang/String;getFormControl&()Lnet/htmlparser/jericho/FormControl; getDebugInfo getContentEnd K K U FG PI HI JK LM k'java/lang/UnsupportedOperationException4Elements are not supported when using StreamedSource U BC a DE UX kjava/lang/IllegalStateExceptionPThis operation is only possible after a full sequential parse has been performed k|This operation is only possible if a full sequential parse was performed immediately after construction of the Source object [\ [^ \java/util/ArrayList  k Z  kjava/lang/StringBuilderChild si extends beyond end of parent i  YZnet/htmlparser/jericho/Segment ta hi lk de mn op Element jk net/htmlparser/jericho/Elementnet/htmlparser/jericho/StartTagbeginendnet/htmlparser/jericho/EndTag$(Lnet/htmlparser/jericho/Source;II)Vnet/htmlparser/jericho/Source isStreamed(Ljava/lang/String;)VlengthsourceLnet/htmlparser/jericho/Source;wasFullSequentialParseCalled isOrphanedjava/util/Collections emptyListgetNextStartTag$(I)Lnet/htmlparser/jericho/StartTag; getTagType"()Lnet/htmlparser/jericho/TagType;net/htmlparser/jericho/TagType isServerTag getElementloggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabledappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringinfojava/util/Listadd(Ljava/lang/Object;)Z"net/htmlparser/jericho/FormControl constructF(Lnet/htmlparser/jericho/Element;)Lnet/htmlparser/jericho/FormControl;appendDebugTag4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;appendDebugTagType1@0BCDEFGHIJKLMNOPIQRSTUVWeY*+,- ,-**** + Y *,*- --UXW1%***** **YZWZN*B* Y* Y*W***[\W * * *N][^W** ** * `=*Y *>* * *6* :!" >#:*A*$%2*$&Y'()*)+)**),-W** .W>U* N_`aW+**/W***bcW2&**0Y***1*deW*fgW*hiW*2jkW **1lkW*3mnW*45opW *4+6qrW*7siWj^*8&Y'L+9)W*+:W*; +-W+*?)W+,taW"* **uXW @YAjericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderJCL.class0000644000175000017500000000126411214132416026237 0ustar twernertwerner1!      JCLLogger InnerClassesINSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; 2net/htmlparser/jericho/LoggerProviderJCL$JCLLogger   (net/htmlparser/jericho/LoggerProviderJCL java/lang/Object%net/htmlparser/jericho/LoggerProvider%org/apache/commons/logging/LogFactorygetLog4(Ljava/lang/String;)Lorg/apache/commons/logging/Log;#(Lorg/apache/commons/logging/Log;)V0  * Y+ Y  jericho-html-3.1/classes/net/htmlparser/jericho/IntStringHashMap.class0000644000175000017500000000407011214132416026135 0ustar twernertwerner1U 9 : ;< = >?@ ? @ A B C D E F G HIJKEntry InnerClassesDEFAULT_INITIAL_CAPACITYI ConstantValueDEFAULT_LOAD_FACTORFentries0[Lnet/htmlparser/jericho/IntStringHashMap$Entry;size threshold loadFactorbitmask(IF)VCode(I)V()V()IisEmpty()ZgetIndex(I)Iget(I)Ljava/lang/String;getEntry2(I)Lnet/htmlparser/jericho/IntStringHashMap$Entry; containsKey(I)Zput'(ILjava/lang/String;)Ljava/lang/String;increaseCapacityremoveclear containsValue(Ljava/lang/String;)Z "&  -net/htmlparser/jericho/IntStringHashMap$Entry  ! "#  *+ L MN OP ./ "Q 4&R ST'net/htmlparser/jericho/IntStringHashMapjava/lang/ObjectkeyvalueLjava/lang/String;next/Lnet/htmlparser/jericho/IntStringHashMap$Entry;E(ILjava/lang/String;Lnet/htmlparser/jericho/IntStringHashMap$Entry;)Vjava/lang/Stringequals(Ljava/lang/Object;)Z0 !"#$<0**$> x>*$j**d"%$*"&$ *'$* ()$ * *+$*~,-$2&** 2M,, , , M./$-!** 2M,, , M,01$*23$oc* >*2:&  :,  :*Y,*2S*Y Z` **4&$ *<*M*x**d,N-66B-2:1 :* 6*2 *S:Є***j5-$cW* =N*2:C .-* S - *Y d  YN :6&$(*<*S* 78$oc+1*=%*2N-- - Nݧ2*=)*2N-+- - N jericho-html-3.1/classes/net/htmlparser/jericho/Renderer.class0000644000175000017500000000727411214132414024527 0ustar twernertwerner1 0k /lm /n /o /p qr /s /t /u /vw /x /y /z {|} /~ / / / / / / / /  z     'k ' ' ' , Processor InnerClasses rootSegment Lnet/htmlparser/jericho/Segment; maxLineLengthInewLineLjava/lang/String;includeHyperlinkURLsZdecorateFontStylesconvertNonBreakingSpacesblockIndentSizelistIndentSize listBullets[CtableCellSeparator#(Lnet/htmlparser/jericho/Segment;)VCodewriteTo(Ljava/io/Writer;)V ExceptionsappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String;setMaxLineLength$(I)Lnet/htmlparser/jericho/Renderer;getMaxLineLength()I setNewLine5(Ljava/lang/String;)Lnet/htmlparser/jericho/Renderer; getNewLinesetIncludeHyperlinkURLs$(Z)Lnet/htmlparser/jericho/Renderer;getIncludeHyperlinkURLs()ZrenderHyperlinkURL5(Lnet/htmlparser/jericho/StartTag;)Ljava/lang/String;setDecorateFontStylesgetDecorateFontStylessetConvertNonBreakingSpacesgetConvertNonBreakingSpacessetBlockIndentSizegetBlockIndentSizesetListIndentSizegetListIndentSizesetListBullets%([C)Lnet/htmlparser/jericho/Renderer;getListBullets()[CsetTableCellSeparatorgetTableCellSeparator C 67 89 :; <; ; =; >7 ?7 @A B9 45 JK )net/htmlparser/jericho/Renderer$Processor RS VO YZ ^Z `Z bS dS gh jO C S N  Ohref #  javascript: java/lang/StringBuilder NO"java/lang/IllegalArgumentException?listBullets argument must be an array of at least one character Cnet/htmlparser/jericho/Rendererjava/lang/Object'net/htmlparser/jericho/CharStreamSourcejava/io/IOException()Vnet/htmlparser/jericho/ConfigConvertNonBreakingSpacesjava/io/Writerflushp(Lnet/htmlparser/jericho/Renderer;Lnet/htmlparser/jericho/Segment;ILjava/lang/String;ZZZII[CLjava/lang/String;)Vnet/htmlparser/jericho/Segmentlength+net/htmlparser/jericho/CharStreamSourceUtil=(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/lang/String;sourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourcegetBestGuessNewLinenet/htmlparser/jericho/StartTaggetAttributeValue&(Ljava/lang/String;)Ljava/lang/String;java/lang/Stringequals(Ljava/lang/Object;)Z startsWith(Ljava/lang/String;)Zappend(C)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;(Ljava/lang/String;)V!/01 456789:;<;=;>7?7@AB9CDE_S**L***** * *Y*UYoUY+UY#U * *+FGE *++HIJKEA 5Y***********+HILME *NOE*PQE**RSE*TUE*+*VOE&*** *WXE**YZE*[\ED8+!"M,,#$ ,%&'Y(<),*>)+]XE**^ZE*_XE**`ZE*aQE* *bSE* cQE* *dSE* efE&++ ,Y-.*+ *ghE* iUE*+ *jOE* 3 /2././@LongLink0000000000000000000000000000015700000000000011570 Lustar rootrootjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment.classjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalCom0000644000175000017500000000162711214132416034307 0ustar twernertwerner1*    !" # $%INSTANCEQLnet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment;IFLjava/lang/String; ConstantValueENDIF()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;0Microsoft downlevel-revealed conditional comment & ' ()![if![endifOnet/htmlparser/jericho/StartTagTypeMicrosoftDownlevelRevealedConditionalComment  8net/htmlparser/jericho/StartTagTypeGenericImplementation_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/TaggetName()Ljava/lang/String;0  *1%*+N--: - Y jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeUnregistered.class0000644000175000017500000000317611214132416027734 0ustar twernertwerner1[!"# $ %& '() * %+ ,- ./ %01 23 4 567 8 ,9: ; <=INSTANCE1Lnet/htmlparser/jericho/StartTagTypeUnregistered;()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag; unregistered<> >? @AB CD EF GHI JKL MN OPjava/lang/StringBuilder Q!Encountered possible StartTag at RST UV7 whose content does not match a registered StartTagType WX YZ/net/htmlparser/jericho/StartTagTypeUnregistered  #net/htmlparser/jericho/StartTagType_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseTextindexOf(CI)IconstructStartTagy(Lnet/htmlparser/jericho/Source;IILjava/lang/String;Lnet/htmlparser/jericho/Attributes;)Lnet/htmlparser/jericho/StartTag;loggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabled()Znet/htmlparser/jericho/TaggetBegin()IgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;info(Ljava/lang/String;)V0*k_+>`>*+`:+ /+ + Yȷ  Yjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$RemoveElementHandler.class0000644000175000017500000000125511214132414032516 0ustar twernertwerner1   INSTANCE Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V >net/htmlparser/jericho/Renderer$Processor$RemoveElementHandlerRemoveElementHandler  java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler)net/htmlparser/jericho/Renderer$Processornet/htmlparser/jericho/Renderer   *  Y     jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProvider.class0000644000175000017500000000142011214132416025700 0ustar twernertwerner11      ! " # $ % & '()DISABLED'Lnet/htmlparser/jericho/LoggerProvider;STDERRJAVAJCLLOG4JSLF4J getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger;()VCode* + , - . / 0 %net/htmlparser/jericho/LoggerProviderjava/lang/Object-net/htmlparser/jericho/LoggerProviderDisabledINSTANCE+net/htmlparser/jericho/LoggerProviderSTDERR)net/htmlparser/jericho/LoggerProviderJava(net/htmlparser/jericho/LoggerProviderJCL*net/htmlparser/jericho/LoggerProviderLog4J*net/htmlparser/jericho/LoggerProviderSLF4J 1% jericho-html-3.1/classes/net/htmlparser/jericho/StreamedSource$StreamedSourceIterator.class0000644000175000017500000000753511214132416032334 0ustar twernertwerner1 3O 4P 3Q 3R ST 3U SV 3W 3X SY SZ S[ 3\ ]^ 3_` P Sa b cd Sef P 3g Sh i cjk Sl m n co cp cq rs tu vw xy ]z ]{ ]b |} |~ ] S 3 coalescingZ handleTags nextSegment Lnet/htmlparser/jericho/Segment;plainTextSegmentBeginI charByRef[Cthis$0'Lnet/htmlparser/jericho/StreamedSource;*(Lnet/htmlparser/jericho/StreamedSource;)VCodehasNext()Znext"()Lnet/htmlparser/jericho/Segment;remove()VloadNextParsedSegmentfindNextParsedSegment()Ljava/lang/Object; SignatureHLjava/lang/Object;Ljava/util/Iterator; ?@ AI ;< => 67 87 JI 9: DE java/util/NoSuchElementException < 'java/lang/UnsupportedOperationException KG < net/htmlparser/jericho/Segment A     E  script java/nio/BufferOverflowException B#java/lang/IndexOutOfBoundsException FG?@ABCB6*+****+*+* ++ WDEC'* * * FGCUI* Y* L*+*  * *+*+W+HICYJIC**W* *  *<**Y** * * "** ** KGC`$* `<** *!=*">&.*#.*$%:z*s<m*&:Y'Q(:)*#.3*+*#+,-.)OD* Y**L*0+L*0j/k/ /j1k1 1AFLC*2MN3Stjericho-html-3.1/classes/net/htmlparser/jericho/Config$CompatibilityMode.class0000644000175000017500000000605011214132414027560 0ustar twernertwerner1}S T U %T V W X Y Z [ \ ]^ T_ ` a Rbcd ef g hi4 j kl mn o9 p qrnameLjava/lang/String;formFieldNameCaseInsensitiveZ:unterminatedCharacterReferenceSettingsInsideAttributeValue&UnterminatedCharacterReferenceSettings InnerClassesFLnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;;unterminatedCharacterReferenceSettingsOutsideAttributeValueCODE_POINTS_ALLI ConstantValueCODE_POINTS_NONEIECompatibilityMode1Lnet/htmlparser/jericho/Config$CompatibilityMode;MOZILLAOPERAXHTML(Ljava/lang/String;)VCode(Ljava/lang/String;ZLnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;Lnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;)VgetName()Ljava/lang/String;isFormFieldNameCaseInsensitive()ZsetFormFieldNameCaseInsensitive(Z)V3getUnterminatedCharacterEntityReferenceMaxCodePoint(Z)I3setUnterminatedCharacterEntityReferenceMaxCodePoint(ZI)V4getUnterminatedDecimalCharacterReferenceMaxCodePoint4setUnterminatedDecimalCharacterReferenceMaxCodePoint8getUnterminatedHexadecimalCharacterReferenceMaxCodePoint8setUnterminatedHexadecimalCharacterReferenceMaxCodePoint getDebugInfotoString)getUnterminatedCharacterReferenceSettingsI(Z)Lnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;()VsDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings :Q := &' () *- .- NO t0 u0 v0java/lang/StringBuilder"Form field name case insensitive: wx wy z'8Maximum codepoints in unterminated character references: Inside attribute values: w{ Outside attribute values: M? >?/net/htmlparser/jericho/Config$CompatibilityMode :| 46Mozilla 76Opera 86 :; 96java/lang/Objectnet/htmlparser/jericho/Config$characterEntityReferenceMaxCodePoint%decimalCharacterReferenceMaxCodePoint)hexadecimalCharacterReferenceMaxCodePointappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(Z)Ljava/lang/StringBuilder;NewLine-(Ljava/lang/Object;)Ljava/lang/StringBuilder;(III)V1% &'B()@*-@.-/01201346768696:;<!*+YY:=<&**+**-*>?<*@A<*BC<*DE< * FG< *  HE< * IG< *  JE< * KG< *  L?<RF Y***M?<*NO< **PQ< YYYYYYY Y>Y!Y"#$,R+R5jericho-html-3.1/classes/net/htmlparser/jericho/StartTag.class0000644000175000017500000002655111214132414024511 0ustar twernertwerner1 a a a  a a  V  V       a a a a a a  ) a  a        a  a    a  >    a a  a  a   a !" # $ % &'() !* V+ , a-. / 01 023  04 56 e78 9: a; < a= > ?@ A aB C aD aEF aG H ?I J K L e M N OPQR S TU VW XY aZ[ a\ V VD V] V^ _`a attributes#Lnet/htmlparser/jericho/Attributes; startTagType%Lnet/htmlparser/jericho/StartTagType;$assertionsDisabledZ~(Lnet/htmlparser/jericho/Source;IILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;Lnet/htmlparser/jericho/Attributes;)VCode()V getElement"()Lnet/htmlparser/jericho/Element;isEmptyElementTag()ZisSyntacticalEmptyElementTaggetStartTagType'()Lnet/htmlparser/jericho/StartTagType; getTagType"()Lnet/htmlparser/jericho/TagType; getAttributes%()Lnet/htmlparser/jericho/Attributes;getAttributeValue&(Ljava/lang/String;)Ljava/lang/String;parseAttributes&(I)Lnet/htmlparser/jericho/Attributes; getTagContent"()Lnet/htmlparser/jericho/Segment;getFormControl&()Lnet/htmlparser/jericho/FormControl;isEndTagForbiddenisEndTagRequiredisUnregisteredtidy()Ljava/lang/String;(Z)Ljava/lang/String; generateHTML6(Ljava/lang/String;Ljava/util/Map;Z)Ljava/lang/String; Signature\(Ljava/lang/String;Ljava/util/Map;Z)Ljava/lang/String; getDebugInfoappendDebugTag4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;appendDebugTagTypegetEndTagInternal!()Lnet/htmlparser/jericho/EndTag;getOptionalEndTag[(Lnet/htmlparser/jericho/HTMLElementTerminatingTagNameSets;)Lnet/htmlparser/jericho/EndTag;getStartDelimiter getPreviousz(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;{(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;Z)Lnet/htmlparser/jericho/StartTag;getNextC(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/StartTag;h(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;n(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/StartTag; getEndTagD(Lnet/htmlparser/jericho/EndTag;ZZ)[Lnet/htmlparser/jericho/Segment;f(ILnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/EndTag;ZZ)[Lnet/htmlparser/jericho/Segment; b cd ed net/htmlparser/jericho/Element fg hi jkl m no pq rsjava/lang/StringBuilder tEnd tag uv uw at x y! terminates more than one element z {| }~   q       net/htmlparser/jericho/Segment    o     u ~  java/io/IOExceptionjava/lang/RuntimeException   /  />   )       StartTag at O missing required end tag - invalid nested start tag encountered before end tagnet/htmlparser/jericho/EndTag     missing required end tag   net/htmlparser/jericho/StartTag "java/lang/IllegalArgumentException+searchName argument must not be zero length |<searchName argument "" must not start with '/'       #java/lang/IndexOutOfBoundsException        : StartTag with attribute ="F" ignored during search because its case does not match search value "     java/lang/AssertionError   net/htmlparser/jericho/Tag6(Lnet/htmlparser/jericho/Source;IILjava/lang/String;)Velement Lnet/htmlparser/jericho/Element; NOT_CACHEDsourceLnet/htmlparser/jericho/Source;b(Lnet/htmlparser/jericho/Source;Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/EndTag;)Vnet/htmlparser/jericho/SourceloggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabledequals(Ljava/lang/Object;)ZbeginIgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;&net/htmlparser/jericho/RowColumnVectorappendTotoStringinfo(Ljava/lang/String;)VnameLjava/lang/String;#net/htmlparser/jericho/HTMLElementsisClosingSlashIgnored(Ljava/lang/String;)Z#net/htmlparser/jericho/StartTagTypeNORMALendcharAt(I)C!net/htmlparser/jericho/AttributesgetValuegetDefaultMaxErrorCount()IgetClosingDelimiterjava/lang/StringlengthisXMLNameStartChar(C)Z construct(Lnet/htmlparser/jericho/Source;IIILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;I)Lnet/htmlparser/jericho/Attributes;$(Lnet/htmlparser/jericho/Source;II)VgetCorrespondingEndTagType%()Lnet/htmlparser/jericho/EndTagType;getEndTagForbiddenElementNames()Ljava/util/Set; java/util/SetcontainsgetElementNames()Ljava/util/List;java/util/ListgetEndTagRequiredElementNames UNREGISTERED(containsServerTagOutsideOfAttributeValue(C)Ljava/lang/StringBuilder;startDelimiterPrefix getNextTag()Lnet/htmlparser/jericho/Tag; appendTidyJ(Ljava/lang/Appendable;Lnet/htmlparser/jericho/Tag;)Ljava/lang/Appendable;(Ljava/lang/Throwable;)VgetEndTagOptionalElementNames appendHTML((Ljava/lang/Appendable;Ljava/util/Map;)VisEmptygetNameSegmentgetDescriptiongetTerminatingTagNameSetsN(Ljava/lang/String;)Lnet/htmlparser/jericho/HTMLElementTerminatingTagNameSets;!net/htmlparser/jericho/EndTagType getEndTagName getNextEndTagW(ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;&END_TAG_REQUIRED_NESTING_FORBIDDEN_SET+Lnet/htmlparser/jericho/HTMLElementNameSet;)net/htmlparser/jericho/HTMLElementNameSetgetNextStartTag6(ILjava/lang/String;)Lnet/htmlparser/jericho/StartTag;#Lnet/htmlparser/jericho/EndTagType;Y(Lnet/htmlparser/jericho/Source;IILnet/htmlparser/jericho/EndTagType;Ljava/lang/String;)V isXMLName(Ljava/lang/CharSequence;)Z>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;8net/htmlparser/jericho/HTMLElementTerminatingTagNameSetsTerminatingEndTagNameSetLjava/util/Set;NonterminatingElementNameSetTerminatingStartTagNameSetgetPreviousTag?(ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag; getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseText lastIndexOf(Ljava/lang/String;I)IgetTagAt?(Lnet/htmlparser/jericho/Source;IZ)Lnet/htmlparser/jericho/Tag;isNameAfterPrefixRequiredgetName isXMLNameCharindexOfgetPreviousStartTag#()Lnet/htmlparser/jericho/StartTag; toLowerCasegetEnclosingTag(I)Lnet/htmlparser/jericho/Tag;equalsIgnoreCaseget6(Ljava/lang/String;)Lnet/htmlparser/jericho/Attribute; net/htmlparser/jericho/Attributejava/util/regex/Patternmatcher3(Ljava/lang/CharSequence;)Ljava/util/regex/Matcher;java/util/regex/Matchermatches getEndTagTypev(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;java/lang/ClassdesiredAssertionStatus1a$"*+*******L*Y* *+ +l+ Z* K*+ =* * +Yȷ++* *#**."** *d /*** * *+!*"#fZ****$%d=*&`*%`>*  ' * *&**(5))Y* *&`*%`**$%d**+,MA*-*-./*01*2*UI*-*-.3*01*2**4*5* *6*7Y8M,<9W*,*W7*&*:%`>*%`6,*  9W*,*;Y-?*#*+@A*0 ,BW,*$W,s= UAY8N-<9*W-+C:>Y? -DW ->9W-=5)Y8L*+EW+ 9W*+FW+*GW+XL**HI +*W0+<9*J 9W* +/9W+*$W+."*+(9*KLW+}q<*.M*K*MN- *-N/*03*0< * ,* *,*O,PN-*Q*R* **S:&--* 4* * *&YȷTUVY* &&W*X*-*YZ: 2V* 4* * *&YȷT[*=* \* ]N--V-^*-V+_:4+`:#-^0-a+:b=+c:,-^0VY* -d-dW*X-d`=_VY* * \* \W*XaU*% eYfgY8h*L+h%i/#eYY8j*kg+#*,-- ,Yl , *-ma,n:*o:6p6*qa:^---NIr>-s0t%,%!,,%di6  : uw:1v2vv#*,-- ,Yw , *-xa,n:*o:6y6*qa:^---NIr>-s0t%,%!,,%di6  : u*\s:1v2vv)*zM,,a,a,{)*]M,,a,a,| - ,% eY}-%-%,%-,:*o:6*\~y6*: a da:  H| H,!:  l-  - W * C* *Yȷ, -"96, , ,% eY},:*o:6*\~y6*: a dDa:H5H,:  %- :  - 6sma+ Y* ***w:*%* **w:**+z n-)Y-SY,S:,,&-,-Z:2V:* --: *2a ajericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMarkupDeclaration.class0000644000175000017500000000235311214132416030675 0ustar twernertwerner1A"#$ % & '()*+, -. /0 -12 3 45INSTANCE6Lnet/htmlparser/jericho/StartTagTypeMarkupDeclaration;ELEMENTLjava/lang/String; ConstantValueATTLISTENTITYNOTATION()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;getEnd#(Lnet/htmlparser/jericho/Source;I)Imarkup declaration 6 7 89!element!attlist!entity !notation: ;<= >? @4net/htmlparser/jericho/StartTagTypeMarkupDeclaration  8net/htmlparser/jericho/StartTagTypeGenericImplementation_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/TaggetName()Ljava/lang/String;net/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseTextcharAt(I)C()I   *?3*+N--:  - PD+ N6- 6"6> `+ ! Yjericho-html-3.1/classes/net/htmlparser/jericho/FormFields.class0000644000175000017500000001407511214132414025010 0ustar twernertwerner1$ X  W  W W  #   W #   W ?ffffff  W     W  + +    W G  G 6 6 6 6 G  # W W     G  c  6 W W W   Column InnerClassesmapLjava/util/LinkedHashMap; SignatureOLjava/util/LinkedHashMap; formControlsLjava/util/ArrayList;;Ljava/util/ArrayList;columns+[Lnet/htmlparser/jericho/FormFields$Column;(Ljava/util/Collection;)VCode?(Ljava/util/Collection;)VgetCount()Isizeget6(Ljava/lang/String;)Lnet/htmlparser/jericho/FormField;iterator()Ljava/util/Iterator;:()Ljava/util/Iterator; getValues$(Ljava/lang/String;)Ljava/util/List;8(Ljava/lang/String;)Ljava/util/List; getDataSet()Ljava/util/Map;8()Ljava/util/Map; clearValues()V setDataSet(Ljava/util/Map;)V9(Ljava/util/Map;)VsetValue'(Ljava/lang/String;Ljava/lang/String;)ZaddValuegetColumnLabels()[Ljava/lang/String;getColumnValues$(Ljava/util/Map;)[Ljava/lang/String;K(Ljava/util/Map;)[Ljava/lang/String; initColumnsgetFormControls()Ljava/util/List;merge&(Lnet/htmlparser/jericho/FormFields;)V getDebugInfo()Ljava/lang/String;toStringadd'(Lnet/htmlparser/jericho/FormControl;)V9(Lnet/htmlparser/jericho/FormControl;Ljava/lang/String;)VaddNameK(Lnet/htmlparser/jericho/FormControl;Ljava/lang/String;Ljava/lang/String;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)VBLjava/util/AbstractCollection; dwjava/util/LinkedHashMap [\java/util/ArrayList _` bc mn "net/htmlparser/jericho/FormControl i ji hi  k net/htmlparser/jericho/FormField kl p d java/lang/String vw java/util/Map$EntryEntry  [Ljava/lang/String;  { } w      java/lang/StringBuilder           st  (net/htmlparser/jericho/FormFields$Column d i      d !" #!net/htmlparser/jericho/FormFieldsjava/util/AbstractCollectionjava/util/Collectionjava/util/IteratorhasNext()Znext()Ljava/lang/Object;getNamelengthaddToFormFields(Ljava/lang/Object;)Znet/htmlparser/jericho/ConfigCurrentCompatibilityModeCompatibilityMode1Lnet/htmlparser/jericho/Config$CompatibilityMode;/net/htmlparser/jericho/Config$CompatibilityModeisFormFieldNameCaseInsensitive toLowerCase&(Ljava/lang/Object;)Ljava/lang/Object;values()Ljava/util/Collection;(I)Vjava/util/ListisEmptytoArray(([Ljava/lang/Object;)[Ljava/lang/Object;put8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object; java/util/MapentrySet()Ljava/util/Set; java/util/SetgetKeygetValue addValues([Ljava/lang/String;)V(Ljava/lang/String;)Z formField"Lnet/htmlparser/jericho/FormField;getFirstFormControl&()Lnet/htmlparser/jericho/FormControl;predefinedValueLjava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;ColumnValueFalse isBooleanZ columnIndexIequalsColumnValueTrueColumnMultipleValueSeparatorallowsMultipleValuesgetPredefinedValues8(Lnet/htmlparser/jericho/FormField;ZLjava/lang/String;)VgetUserValueCount%(Lnet/htmlparser/jericho/FormField;)V-(Ljava/lang/Object;)Ljava/lang/StringBuilder;getPredefinedValuename(Ljava/lang/String;)VaddFormControl%net/htmlparser/jericho/OutputDocumentreplace1WX[\]^_`]abcdefh\**Y*Y*+ M, /,  N- - -**-Wα]ghif*jif*klf&+L*+mnf *]opqf*+M,,]rstfh\Y*oL*M, =, N-: +-!"#$%W+]uvwf/#*&L+ +  M,'xyfaU*(*+)*M, 6, +N-,#:*:--./DZ]z{|f *+N--,0}|f *+N--,1~fi]*2*#L=*F*2N-34 :+-5!6Y78.9-58:S+fE9*2*#M;&>**2< ,;S+)*N- - +:,#:*:=6-.:6 6    2: 6  ** 2:  3u 5 5 >W, ?SS < F, ?S<, 2 ,  S,, 6Y7, 28@8 8:S q Y,]f **ABwf*YL*M, , N-+C=-D-EF&+GY--EHIWM-E : " #:+GY-IW-J+GY-IWe*++CGKLf*fQE+M, 9, N-!:*:*-!-%W -Mıf:.6Y7L*M, , N+-NW+:f*Of *++PQf *+,+RSf*+,SfMA-N*-:Y-T:*!%W+,Uf0$*&M, ,  N+-V]ZGWY + jericho-html-3.1/classes/net/htmlparser/jericho/OutputSegment.class0000644000175000017500000000122411214132414025571 0ustar twernertwerner1"   COMPARATORLjava/util/Comparator; Signature>Ljava/util/Comparator;getBegin()IgetEndwriteTo(Ljava/io/Writer;)V Exceptions appendTo(Ljava/lang/Appendable;)VtoString()Ljava/lang/String; getDebugInfo()VCode.net/htmlparser/jericho/OutputSegmentComparator ! $net/htmlparser/jericho/OutputSegmentjava/lang/Object'net/htmlparser/jericho/CharStreamSourcejava/io/IOException     Yjericho-html-3.1/classes/net/htmlparser/jericho/StreamedParseText.class0000644000175000017500000000111011214132414026344 0ustar twernertwerner1      streamedText%Lnet/htmlparser/jericho/StreamedText;((Lnet/htmlparser/jericho/StreamedText;)VCodegetEnd()I substring(II)Ljava/lang/String;      (net/htmlparser/jericho/StreamedParseText,net/htmlparser/jericho/CharSequenceParseText(Ljava/lang/CharSequence;)V#net/htmlparser/jericho/StreamedTextjava/lang/String toLowerCase()Ljava/lang/String;0    *+*+  *  *jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$BR_ElementHandler.class0000644000175000017500000000156211214132414031724 0ustar twernertwerner1'     INSTANCE  Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions!  "# $%;net/htmlparser/jericho/Renderer$Processor$BR_ElementHandlerBR_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler&)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$1200.(Lnet/htmlparser/jericho/Renderer$Processor;)V access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)Vnet/htmlparser/jericho/Renderer  * ++ Y       jericho-html-3.1/classes/net/htmlparser/jericho/HTMLElementNameSet.class0000644000175000017500000000231411214132416026304 0ustar twernertwerner15      ! "# $%&()VCode([Ljava/lang/String;)V(Ljava/util/Collection;)V Signature-(Ljava/util/Collection;)V(Ljava/lang/String;)Vunion?(Ljava/lang/String;)Lnet/htmlparser/jericho/HTMLElementNameSet;C(Ljava/util/Collection;)Lnet/htmlparser/jericho/HTMLElementNameSet;W(Ljava/util/Collection;)Lnet/htmlparser/jericho/HTMLElementNameSet;minus'Ljava/util/HashSet; ' ()* +,  -./ 01 23java/lang/String 4))net/htmlparser/jericho/HTMLElementNameSetjava/util/HashSet(I)Vadd(Ljava/lang/Object;)Zjava/util/Collectionsize()Iiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;remove0  * +*+h=+*+2W *+h*+W  **+W*+W*1%+M,,N*-W**+ W*1%+M,,N*- W*jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypePHPShort.class0000644000175000017500000000073011214132416026734 0ustar twernertwerner1   INSTANCE-Lnet/htmlparser/jericho/StartTagTypePHPShort;()VCode PHP short tag +net/htmlparser/jericho/StartTagTypePHPShort 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/SourceCompactor.class0000644000175000017500000000271211214132416026063 0ustar twernertwerner1M ' ( ) * +,- . / 0 1 * 23 45 26 789:;segment Lnet/htmlparser/jericho/Segment;newLineLjava/lang/String;#(Lnet/htmlparser/jericho/Segment;)VCodewriteTo(Ljava/io/Writer;)V Exceptions<appendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; setNewLine<(Ljava/lang/String;)Lnet/htmlparser/jericho/SourceCompactor; getNewLine =   > ?=&net/htmlparser/jericho/SourceFormatter  @A $B CAD EFG "H IJK L#&net/htmlparser/jericho/SourceCompactorjava/lang/Object'net/htmlparser/jericho/CharStreamSourcejava/io/IOException()Vjava/io/Writerflush setTidyTags+(Z)Lnet/htmlparser/jericho/SourceFormatter;<(Ljava/lang/String;)Lnet/htmlparser/jericho/SourceFormatter;setRemoveLineBreaksnet/htmlparser/jericho/Segmentlength()I+net/htmlparser/jericho/CharStreamSourceUtil=(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/lang/String;sourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourcegetBestGuessNewLine1***+ *+++Y**  +  ! * "#* $%*+*&#&****jericho-html-3.1/classes/net/htmlparser/jericho/HTMLElementName.class0000644000175000017500000000611411214132416025632 0ustar twernertwerner1ALjava/lang/String; ConstantValueABBRACRONYMADDRESSAPPLETAREABBASEBASEFONTBDOBIG BLOCKQUOTEBODYBRBUTTONCAPTIONCENTERCITECODECOLCOLGROUPDDDELDFNDIRDIVDLDTEMFIELDSETFONTFORMFRAMEFRAMESETH1H2H3H4H5H6HEADHRHTMLIIFRAMEIMGINPUTINSISINDEXKBDLABELLEGENDLILINKMAPMENUMETANOFRAMESNOSCRIPTOBJECTOLOPTGROUPOPTIONPPARAMPREQSSAMPSCRIPTSELECTSMALLSPANSTRIKESTRONGSTYLESUB SUP TABLE TBODY TD TEXTAREATFOOTTHTHEADTITLETRTTUULVAR&net/htmlparser/jericho/HTMLElementNamejava/lang/Objectaabbracronymaddressappletareabbasebasefontbdobig blockquotebodybrbuttoncaptioncentercitecodecolcolgroupdddeldfndirdivdldtemfieldsetfontformframeframeseth1h2h3h4h5h6headhrhtmliiframeimginputinsisindexkbdlabellegendlilinkmapmenumetanoframesnoscriptobjectoloptgroupoptionpparampreqssampscriptselectsmallspanstrikestrongstylesubsuptabletbodytdtextareatfootththeadtitletrttuulvar[      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$ElementHandler.class0000644000175000017500000000062711214132414031342 0ustar twernertwerner1  process Processor InnerClassesN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions8net/htmlparser/jericho/Renderer$Processor$ElementHandlerElementHandlerjava/lang/Object)net/htmlparser/jericho/Renderer$Processorjava/io/IOExceptionnet/htmlparser/jericho/Renderer    jericho-html-3.1/classes/net/htmlparser/jericho/EndTagTypeGenericImplementation.class0000644000175000017500000000462311214132416031165 0ustar twernertwerner1 #01 2 3 4 "5 "6 "7 #8 9:; <= "> "? @A 9B CD 9E FGH IJK "LM CN 9O 9P @Q RSTU "VWX staticStringLjava/lang/String;;(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)VCodeisStatic()Z getEndTagName&(Ljava/lang/String;)Ljava/lang/String; generateHTMLconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag; &Yjava/lang/StringBuilder &Z [\ ]^ $% )* _^ -,` abE(ILjava/lang/String;Lnet/htmlparser/jericho/IntStringHashMap$Entry;)VCode   -net/htmlparser/jericho/IntStringHashMap$Entryjava/lang/Object()V'net/htmlparser/jericho/IntStringHashMap0  ***,*-  jericho-html-3.1/classes/net/htmlparser/jericho/StreamedSource.class0000644000175000017500000001445411214132416025706 0ustar twernertwerner1 M M M M M M M M M N M M M M   M  $ $ $  $  $  M ! $ M $  +  P    5 @ @     N @ M D D  D D StreamedSourceIterator InnerClasses streamedText%Lnet/htmlparser/jericho/StreamedText;streamedParseText*Lnet/htmlparser/jericho/StreamedParseText;sourceLnet/htmlparser/jericho/Source; closeableLjava/io/Closeable;automaticCloseZ coalescing handleTags isInitialisedcurrentSegment Lnet/htmlparser/jericho/Segment;nextParsedSegmentisXMLassumeNoNestedTags ConstantValue START_SEGMENTJ(Ljava/io/Reader;ZLjava/lang/String;Ljava/lang/String;Ljava/lang/String;)VCode Exceptions-(Lnet/htmlparser/jericho/EncodingDetector;Z)V(Ljava/io/Reader;)V(Ljava/io/InputStream;)V(Ljava/net/URL;)V(Ljava/net/URLConnection;)V(Ljava/lang/CharSequence;)V setBuffer+([C)Lnet/htmlparser/jericho/StreamedSource; setCoalescing*(Z)Lnet/htmlparser/jericho/StreamedSource;close()V getEncoding()Ljava/lang/String;getEncodingSpecificationInfogetPreliminaryEncodingInfoiterator()Ljava/util/Iterator; Signature8()Ljava/util/Iterator;getCurrentSegment"()Lnet/htmlparser/jericho/Segment;getCurrentSegmentCharBuffer()Ljava/nio/CharBuffer;()Z setLogger"(Lnet/htmlparser/jericho/Logger;)V getLogger!()Lnet/htmlparser/jericho/Logger; getBufferSize()ItoStringfinalize setHandleTagssetSearchBegin*(I)Lnet/htmlparser/jericho/StreamedSource;#(Lnet/htmlparser/jericho/Segment;)Z access$000*(Lnet/htmlparser/jericho/StreamedSource;)Z access$100 access$202+(Lnet/htmlparser/jericho/StreamedSource;Z)Z access$300I(Lnet/htmlparser/jericho/StreamedSource;)Lnet/htmlparser/jericho/Segment; access$400 access$500N(Lnet/htmlparser/jericho/StreamedSource;)Lnet/htmlparser/jericho/StreamedText; access$602i(Lnet/htmlparser/jericho/StreamedSource;Lnet/htmlparser/jericho/Segment;)Lnet/htmlparser/jericho/Segment; access$302 access$700H(Lnet/htmlparser/jericho/StreamedSource;)Lnet/htmlparser/jericho/Source; access$800*(Lnet/htmlparser/jericho/StreamedSource;)V[Ljava/lang/Object;Ljava/lang/Iterable;Ljava/io/Closeable; [w WX `a ST c ba c\ ^\ ]\ hw _\ ga YZ [\#net/htmlparser/jericho/StreamedText hm(net/htmlparser/jericho/StreamedParseText h UVnet/htmlparser/jericho/Source h xy zyjava/lang/StringBuilder y : y y hijava/io/InputStreamReader7InputStreamReader.getEncoding() of constructor argument'net/htmlparser/jericho/EncodingDetector hn hl hp hqVDocument specified encoding can not be determined automatically from a streamed sourcejava/lang/IllegalStateException:setBuffer() can only be called before iterator() is called h rCsetPlainTextWriter() can only be called before iterator() is called vw {y"iterator() can only be called onceisXML() method only available after iterator() has been called ?setSearchBegin() can only be called before iterator() is callednet/htmlparser/jericho/Segment hjava/io/IOExceptionnet/htmlparser/jericho/Tag     xhtml    %net/htmlparser/jericho/StreamedSourcejava/lang/Objectjava/lang/Iterablejava/io/Closeable((Lnet/htmlparser/jericho/StreamedText;)V{(Ljava/lang/CharSequence;Lnet/htmlparser/jericho/StreamedParseText;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V openReader()Ljava/io/Reader;getPreliminaryEncodingappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;'getPreliminaryEncodingSpecificationInfo java/net/URLopenConnection()Ljava/net/URLConnection;(Ljava/lang/String;)V)([C)Lnet/htmlparser/jericho/StreamedText;getBeginendI getCharBuffer(II)Ljava/nio/CharBuffer; getBuffer()[C(II)V getTagType"()Lnet/htmlparser/jericho/TagType;#net/htmlparser/jericho/StartTagTypeXML_DECLARATION%Lnet/htmlparser/jericho/StartTagType; getParseText$()Lnet/htmlparser/jericho/ParseText;begin net/htmlparser/jericho/ParseTextindexOf(Ljava/lang/String;II)I1MNOP STUVWXYZ[\]\^\_\`abac\d\efga%hiji]* * ** ** *+ **Y+*Y**Y**-kChlj;/*+++Y++ kChmj3'*++! +!"+!# kChnj*$Y+%&kChoj*$Y+'(&kChpj*$Y+(&kChqjeY* * ** ** * **Y+)*Y**Y+**rsj(* +Y,-*+.W*tuj$* +Y/-* *vwj* * 0kCxyj*1zyj*2{yj*3|}j+* +Y4-* 5Y*6~j*j"**7*89cj"* +Y:-*j *+;j*<j *=yj*>wj*uj**j0$* +Y?-d=*@YA*[wj$* *BL C cjH<* *D*DL+EF+GHI+J+KLj* j*j*Zj*j*j*j*+Zj*+Zj*j*wj @YA ~R 5MQjericho-html-3.1/classes/net/htmlparser/jericho/TagType.class0000644000175000017500000000745111214132414024333 0ustar twernertwerner1 1Z 0[ 0\ 0] 0^ _` _a 0b 0c de df dg 0h ij klm no pq 0r 0s 2tu Z kv wx 0yz | } ~ k 0 0 k k ' '  ' TagTypesIgnoringEnclosedMarkup InnerClasses descriptionLjava/lang/String;startDelimiterclosingDelimiter isServerTagZ namePrefixstartDelimiterPrefixL(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZLjava/lang/String;)VCoderegister()V deregistergetRegisteredTagTypes()Ljava/util/List; Signature4()Ljava/util/List;getDescription()Ljava/lang/String;getStartDelimitergetClosingDelimiter()Z getNamePrefixisValidPosition%(Lnet/htmlparser/jericho/Source;I[I)Z!getTagTypesIgnoringEnclosedMarkup#()[Lnet/htmlparser/jericho/TagType;!setTagTypesIgnoringEnclosedMarkup$([Lnet/htmlparser/jericho/TagType;)VconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag; tagEncloses#(Lnet/htmlparser/jericho/Source;I)ZtoStringgetTagAt@(Lnet/htmlparser/jericho/Source;IZZ)Lnet/htmlparser/jericho/Tag; =A 56 76 86 9: ;6 <6 D 9K  ?9-**+*,*-**,* @A?* BA?* CD? EFGH?*IH?*JH?*9K?*LH?*MN?* -C-..*+ -O* -O-.:6.2:*+OP?QR?* Y*STUV?-!+d*N--WH?*XY?Y*:: ** !*":F:*$%8*$*&'Yȷ()*+,**-*./v>NR#4203d{jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent.class0000644000175000017500000000126411214132416033524 0ustar twernertwerner1    INSTANCEDLnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent;()VCode#mason component called with content<&|&>  Bnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent 8net/htmlparser/jericho/StartTagTypeGenericImplementation@net/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContentBLnet/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent;](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0   *  Yjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$FontStyleElementHandler.class0000644000175000017500000000242611214132414033211 0ustar twernertwerner1@ $ % & ' ( ) *+ - . / 0 123 INSTANCE_B5 Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler; INSTANCE_I INSTANCE_U INSTANCE_CODEdecorationCharC(C)VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions6()V #  78 98 :; <= >Anet/htmlparser/jericho/Renderer$Processor$FontStyleElementHandlerFontStyleElementHandler     java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler?)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$100.(Lnet/htmlparser/jericho/Renderer$Processor;)Z access$200 access$300Y(Lnet/htmlparser/jericho/Renderer$Processor;C)Lnet/htmlparser/jericho/Renderer$Processor; access$202/(Lnet/htmlparser/jericho/Renderer$Processor;Z)Z access$000net/htmlparser/jericho/Renderer  **NB+8++ W+W+*W+,++*W+, !"#=1Y* Y/ Y_ Y| 4 , jericho-html-3.1/classes/net/htmlparser/jericho/CharStreamSource.class0000644000175000017500000000050611214132414026162 0ustar twernertwerner1 writeTo(Ljava/io/Writer;)V ExceptionsappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String;'net/htmlparser/jericho/CharStreamSourcejava/lang/Objectjava/io/IOException  jericho-html-3.1/classes/net/htmlparser/jericho/NodeIterator.class0000644000175000017500000000510411214132416025350 0ustar twernertwerner1y !? @ AB C D E E AF G H AI J KG L M KN KL OP ? QL AR ST UV QW QGX YZ ? [\]^segment Lnet/htmlparser/jericho/Segment;sourceLnet/htmlparser/jericho/Source;posInextTagLnet/htmlparser/jericho/Tag;#characterReferenceAtCurrentPosition+Lnet/htmlparser/jericho/CharacterReference;legacyIteratorCompatabilityModeZ#(Lnet/htmlparser/jericho/Segment;)VCodehasNext()Znext"()Lnet/htmlparser/jericho/Segment;nextNonTagSegment$(II)Lnet/htmlparser/jericho/Segment; skipToPos(I)Vremove()V()Ljava/lang/Object; SignatureHLjava/lang/Object;Ljava/util/Iterator; /; +,_ `. -. #$ %& ab c( '( de )*f g( 67 dh 23 java/util/NoSuchElementExceptioni jkl mnp st uvnet/htmlparser/jericho/Segment /w'java/lang/UnsupportedOperationException 45#net/htmlparser/jericho/NodeIteratorjava/lang/Objectjava/util/Iteratornet/htmlparser/jericho/SourceLegacyIteratorCompatabilityModefullSequentialParse()[Lnet/htmlparser/jericho/Tag;begin getNextTag(I)Lnet/htmlparser/jericho/Tag;net/htmlparser/jericho/Tagend()Lnet/htmlparser/jericho/Tag;)net/htmlparser/jericho/CharacterReference getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseTextindexOf(CII)IxDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings&UnterminatedCharacterReferenceSettings InnerClasses ACCEPT_ALLFLnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings; construct(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;)Lnet/htmlparser/jericho/CharacterReference;$(Lnet/htmlparser/jericho/Source;II)Vnet/htmlparser/jericho/Config !"#$%&'()*+,-./01k_****+*++* *W*+ *** * * +* 231'* * * 451* <* ]* ** * M**  * * ** * , *, ,* Y**671**N-**- -*:&6[*:5* * *Y** &`6Y**Z 891'* * ** :;1YA4<1*=>r Uoqjericho-html-3.1/classes/net/htmlparser/jericho/Attributes$ParsingState.class0000644000175000017500000000237711214132414027477 0ustar twernertwerner1B . /0"2 3 4 5 6 7 8 9 : ; <=AFTER_TAG_NAME ParsingState InnerClasses0Lnet/htmlparser/jericho/Attributes$ParsingState;BETWEEN_ATTRIBUTESIN_NAME AFTER_NAME START_VALUEIN_VALUEAFTER_VALUE_FINAL_QUOTE$VALUES1[Lnet/htmlparser/jericho/Attributes$ParsingState;values3()[Lnet/htmlparser/jericho/Attributes$ParsingState;CodevalueOfD(Ljava/lang/String;)Lnet/htmlparser/jericho/Attributes$ParsingState;(Ljava/lang/String;I)V Signature()VBLjava/lang/Enum; !"" >?@.net/htmlparser/jericho/Attributes$ParsingState &A () ()       java/lang/Enumclone()Ljava/lang/Object;!net/htmlparser/jericho/Attributes5(Ljava/lang/Class;Ljava/lang/String;)Ljava/lang/Enum;@0@@@@@@@ !"#$%  &'% *()%*+*+,+%Y Y  Y  YYYYY SY SY SYSYSYSYS*- 1@jericho-html-3.1/classes/net/htmlparser/jericho/CharStreamSourceUtil.class0000644000175000017500000000163311214132416027024 0ustar twernertwerner16   ! "#$ % "&'( ) *+,'DEFAULT_ESTIMATED_MAXIMUM_OUTPUT_LENGTHI ConstantValue()VCode getReader;(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/io/Reader;toString=(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/lang/String; java/io/StringReader  -. /0java/lang/StringBuilder 1 23java/io/IOExceptionjava/lang/RuntimeException 4 5+net/htmlparser/jericho/CharStreamSourceUtiljava/lang/Object(Ljava/lang/String;)V'net/htmlparser/jericho/CharStreamSourcegetEstimatedMaximumOutputLength()J(I)VappendTo(Ljava/lang/Appendable;)V(Ljava/lang/Throwable;)V()Ljava/lang/String;1*  Y* L8*@@ Y N*- :Y-$' jericho-html-3.1/classes/net/htmlparser/jericho/EndTagTypeNormal.class0000644000175000017500000000116011214132416026124 0ustar twernertwerner1     INSTANCE)Lnet/htmlparser/jericho/EndTagTypeNormal;()VCodegetCorrespondingStartTagType'()Lnet/htmlparser/jericho/StartTagType;/normal  'net/htmlparser/jericho/EndTagTypeNormal 6net/htmlparser/jericho/EndTagTypeGenericImplementation;(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)V#net/htmlparser/jericho/StartTagTypeNORMAL%Lnet/htmlparser/jericho/StartTagType;0    *  Yjericho-html-3.1/classes/net/htmlparser/jericho/SubCache$CacheEntryMissingInternalError.class0000644000175000017500000000152511214132414032502 0ustar twernertwerner1+       r(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;Ljava/lang/String;)VCodejava/lang/StringBuilder 6INTERNAL ERROR: Inconsistent Cache State for TagType " !" !#" - !$% &' (' )*>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalErrorCacheEntryMissingInternalError InnerClassesjava/lang/AssertionError()Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;-(Ljava/lang/Object;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;net/htmlparser/jericho/Tag getDebugInfo()Ljava/lang/String;toString(Ljava/lang/Object;)Vnet/htmlparser/jericho/SubCache  C7*Y+ , -    jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderSTDERR.class0000644000175000017500000000122511214132416026627 0ustar twernertwerner1$      INSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; #net/htmlparser/jericho/WriterLoggerjava/io/OutputStreamWriter ! " #+net/htmlparser/jericho/LoggerProviderSTDERR java/lang/Object%net/htmlparser/jericho/LoggerProviderjava/lang/SystemerrLjava/io/PrintStream;(Ljava/io/OutputStream;)V%(Ljava/io/Writer;Ljava/lang/String;)V0   *YY+ Y jericho-html-3.1/classes/net/htmlparser/jericho/CharacterEntityReference.class0000644000175000017500000004527211214132416027673 0ustar twernertwerner1 5T #U #V #W &X #Y Z[\ 0] ^ #_` a b #cd e #f #ghi j 5k l ml no np #q rs rt ru #v #wx #Ty %z{ | Z}~      !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz Z{?@ &z Z| }~   . 0 . & #nameLjava/lang/String;_nbspC ConstantValue_iexcl_cent_pound_curren_yen_brvbar_sect_uml_copy_ordf_laquo_not_shy_reg_macr_deg_plusmn_sup2_sup3_acute_micro_para_middot_cedil_sup1_ordm_raquo_frac14_frac12_frac34_iquest_Agrave_Aacute_Acirc_Atilde_Auml_Aring_AElig_Ccedil_Egrave_Eacute_Ecirc_Euml_Igrave_Iacute_Icirc_Iuml_ETH_Ntilde_Ograve_Oacute_Ocirc_Otilde_Ouml_times_Oslash_Ugrave_Uacute_Ucirc_Uuml_Yacute_THORN_szlig_agrave_aacute_acirc_atilde_auml_aring_aelig_ccedil_egrave_eacute_ecirc_euml_igrave_iacute_icirc_iuml_eth_ntilde_ograve_oacute_ocirc_otilde_ouml_divide_oslash_ugrave_uacute_ucirc_uuml_yacute_thorn_yuml_fnof_Alpha_Beta_Gamma_Delta_Epsilon_Zeta_Eta_Theta_Iota_Kappa_Lambda_Mu_Nu_Xi_Omicron_Pi_Rho_Sigma_Tau_Upsilon_Phi_Chi_Psi_Omega_alpha_beta_gamma_delta_epsilon_zeta_eta_theta_iota_kappa_lambda_mu_nu_xi_omicron_pi_rho_sigmaf_sigma_tau_upsilon_phi_chi_psi_omega _thetasym_upsih_piv_bull "_hellip &_prime 2_Prime 3_oline >_frasl D_weierp!_image!_real!_trade!"_alefsym!5_larr!_uarr!_rarr!_darr!_harr!_crarr!_lArr!_uArr!_rArr!_dArr!_hArr!_forall"_part"_exist"_empty"_nabla"_isin"_notin" _ni" _prod"_sum"_minus"_lowast"_radic"_prop"_infin"_ang" _and"'_or"(_cap")_cup"*_int"+_there4"4_sim"<_cong"E_asymp"H_ne"`_equiv"a_le"d_ge"e_sub"_sup"_nsub"_sube"_supe"_oplus"_otimes"_perp"_sdot"_lceil#_rceil# _lfloor# _rfloor# _lang#)_rang#*_loz%_spades&`_clubs&c_hearts&e_diams&f_quot"_amp&_lt<_gt>_OEligR_oeligS_Scaron`_scarona_Yumlx_circ_tilde_ensp _emsp _thinsp _zwnj _zwj _lrm _rlm _ndash _mdash _lsquo _rsquo _sbquo _ldquo _rdquo _bdquo _dagger _Dagger !_permil 0_lsaquo 9_rsaquo :_euro _apos'NAME_TO_CODE_POINT_MAPLjava/util/Map; Signature6Ljava/util/Map;CODE_POINT_TO_NAME_MAP)Lnet/htmlparser/jericho/IntStringHashMap;MAX_NAME_LENGTHI%(Lnet/htmlparser/jericho/Source;III)VCodegetName()Ljava/lang/String;(C)Ljava/lang/String;(I)Ljava/lang/String;getCodePointFromName(Ljava/lang/String;)IgetCharacterReferenceStringgetNameToCodePointMap()Ljava/util/Map;8()Ljava/util/Map; getDebugInfo&(Ljava/lang/String;)Ljava/lang/String;appendCharacterReferenceString@(Ljava/lang/Appendable;Ljava/lang/String;)Ljava/lang/Appendable; Exceptions constructN(Lnet/htmlparser/jericho/Source;II)Lnet/htmlparser/jericho/CharacterReference;isValidReferenceNameChar(C)Z()V <= ?B 67 89 B 45 java/lang/Integer @  EJjava/lang/StringBuilder <S  KL"  ; java/io/IOExceptionjava/lang/RuntimeException < I@ @   :; ;   PQ CD/net/htmlparser/jericho/CharacterEntityReferencejava/util/HashMap <nbsp < iexclcentpoundcurrenyenbrvbarsectumlcopyordflaquonotshyregmacrdegplusmnsup2sup3acutemicroparamiddotcedilsup1ordmraquofrac14frac12frac34iquestAgraveAacuteAcircAtildeAumlAringAEligCcedilEgraveEacuteEcircEumlIgraveIacuteIcircIumlETHNtildeOgraveOacuteOcircOtildeOumltimesOslashUgraveUacuteUcircUumlYacuteTHORNszligagraveaacuteacircatildeaumlaringaeligccedilegraveeacuteecirceumligraveiacuteicirciumlethntildeograveoacuteocircotildeoumldivideoslashugraveuacuteucircuumlyacutethornyumlfnofAlphaBetaGammaDeltaEpsilonZetaEtaThetaIotaKappaLambdaMuNuXiOmicronPiRhoSigmaTauUpsilonPhiChiPsiOmegaalphabetagammadeltaepsilonzetaetathetaiotakappalambdamunuxiomicronpirhosigmafsigmatauupsilonphichipsiomegathetasymupsihpivbullhellipprimePrimeolinefraslweierpimagerealtradealefsymlarruarrrarrdarrharrcrarrlArruArrrArrdArrhArrforallpartexistemptynablaisinnotinniprodsumminuslowastradicpropinfinangandorcapcupintthere4simcongasympneequivlegesubsupnsubsubesupeoplusotimesperpsdotlceilrceillfloorrfloorlangranglozspadesclubsheartsdiamsquotampltgtOEligoeligScaronscaronYumlcirctildeenspemspthinspzwnjzwjlrmrlmndashmdashlsquorsquosbquoldquordquobdquodaggerDaggerpermillsaquorsaquoeuroapos'net/htmlparser/jericho/IntStringHashMap     java/util/Map$EntryEntry InnerClasses java/lang/String    ;)net/htmlparser/jericho/CharacterReferenceget java/util/Map&(Ljava/lang/Object;)Ljava/lang/Object; toLowerCaseintValue()Iappend(C)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder; codePointappendUnicodeText/(Ljava/lang/Appendable;I)Ljava/lang/Appendable;(Ljava/lang/Throwable;)VtoStringjava/lang/Objectjava/lang/Appendable(C)Ljava/lang/Appendable;0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;net/htmlparser/jericho/SourceendcharAt(I)C subSequence(II)Ljava/lang/CharSequence;(IF)V(I)Vput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;sizeentrySet()Ljava/util/Set; java/util/Setiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getKeylengthgetValue'(ILjava/lang/String;)Ljava/lang/String;MAX_ENTITY_REFERENCE_LENGTH!#56789:;<9:=>9:?@9:AB9:CD9:EF9:GH9:IJ9:KL9:MN9:OP9:QR9:ST9:UV9:WX9:YZ9:[\9:]^9:_`9:ab9:cd9:ef9:gh9:ij9:kl9:mn9:op9:qr9:st9:uv9:wx9:yz9:{|9:}~9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:  9:  9: 9:9:9:9:9:9:9:9:9: 9:!"9:#$9:%&9:'(9:)*9:+,9:-.9:/09:129:349:569:789:9:9:;<9:=>9:?@9:AB9:CD9:EF9:GH9:IJ9:KL9:MN9:OP9:QR9:ST9:UV9:WX9:YZ9:[\9:]^9:_`9:ab9:cd9:ef9:gh9:ij9:kl9:mn9:op9:qr9:st9:uv9:wx9:yz9:{|9:}~9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:9:  9:  9: 9:9:9:9:9:9:9:9:9: 9:!"9:#$9:%&9:'(9:)*9:+,9:-.9:/09:129:3 4567 89 :;<=>*+*?@>* ?A> ?B> CD>A5*L+* M,*,L+ + E@>* EB>& L+ +  FG>6HI@>\H YL+"W+*W+W+*W MY,+ *W+(+ EJ>- Y*LY+KL>"*&+;MNO> `6`6*d666 *6  ;`6* NL ! 6  6  6* N-"6    #Y* $PQ>*AzZ aRS>ui%Y &'Y()W*Y()W+Y()W,Y()W-Y()W.Y()W/Y()W0Y()W1Y()W2Y()W3Y()W4Y()W5Y()W6Y()W7Y()W8Y()W9Y()W:Y()W;Y()W<Y()W=Y()W>Y()W?Y()W@Y()WAY()WBY()WCY()WDY()WEY()WFY()WGY()WHY()WIY()WJY()WKY·()WLY÷()WMYķ()WNYŷ()WOYƷ()WPYǷ()WQYȷ()WRYɷ()WSYʷ()WTY˷()WUY̷()WVYͷ()WWYη()WXYϷ()WYYз()WZYѷ()W[Yҷ()W\Yӷ()W]YԷ()W^Yշ()W_Yַ()W`Y׷()WaYط()WbYٷ()WcYڷ()WdY۷()WeYܷ()WfYݷ()WgY޷()WhY߷()WiY()WjY()WkY()WlY()WmY()WnY()WoY()WpY()WqY()WrY()WsY()WtY()WuY()WvY()WwY()WxY()WyY()WzY()W{Y()W|Y()W}Y()W~Y()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY()WY·()WY÷()WYķ()WYŷ()WYƷ()WYǷ()WYȷ()WYɷ()WYѷ()WYҷ()WYַ()WY "()WY &()WY 2()WY 3()W»Y >()WûY D()WĻY!()WŻY!()WƻY!()WǻY!"()WȻY!5()WɻY!()WʻY!()W˻Y!()W̻Y!()WͻY!()WλY!()WϻY!з()WлY!ѷ()WѻY!ҷ()WһY!ӷ()WӻY!Է()WԻY"()WջY"()WֻY"()W׻Y"()WػY"()WٻY"()WڻY" ()WۻY" ()WܻY"()WݻY"()W޻Y"()W߻Y"()WY"()WY"()WY"()WY" ()WY"'()WY"(()WY")()WY"*()WY"+()WY"4()WY"<()WY"E()WY"H()WY"`()WY"a()WY"d()WY"e()WY"()WY"()WY"()WY"()WY"()WY"()WY"()WY"()WY"ŷ()WY#()WY# ()WY# ()WY# ()WY#)()WY#*()WY%ʷ()WY&`()WY&c()WY&e()WY&f()WY"()WY&()WY<()WY>()W YR()W YS()W Y`()W Ya()W Yx()WYƷ()WYܷ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()WY ()W Y !()W!Y 0()W"Y 9()W#Y :()W$Y ()W%Y'()W&Y'(n )*+K*,?*-.L+/0M,1 ,1+2 ,3W`4 .Z jericho-html-3.1/classes/net/htmlparser/jericho/FormControlType.class0000644000175000017500000000535611214132414026066 0ustar twernertwerner1 i jkVl ;m ;n o p q r st ,u v .w<x y z>{ |? }@ ~A B C D E F G H  , . ,M .BUTTON(Lnet/htmlparser/jericho/FormControlType;CHECKBOXFILEHIDDENIMAGEPASSWORDRADIOSELECT_MULTIPLE SELECT_SINGLESUBMITTEXTTEXTAREA elementNameLjava/lang/String;hasPredefinedValueZsubmitINPUT_ELEMENT_TYPE_MAPLjava/util/HashMap; SignatureOLjava/util/HashMap;#NON_FORM_CONTROL_TYPE_ATTRIBUTE_SETLjava/util/HashSet;'Ljava/util/HashSet;$VALUES)[Lnet/htmlparser/jericho/FormControlType;values+()[Lnet/htmlparser/jericho/FormControlType;CodevalueOf<(Ljava/lang/String;)Lnet/htmlparser/jericho/FormControlType;*(Ljava/lang/String;ILjava/lang/String;ZZ)V(Ljava/lang/String;ZZ)VgetElementName()Ljava/lang/String;()ZisSubmitgetFromInputElementTypeisNonFormControl(Ljava/lang/String;)Z()V:Ljava/lang/Enum; UVV &net/htmlparser/jericho/FormControlType Z \ IJ KL ML NO ` RS button \] <=input >= ?= @= A= B= C=select D= E= F= G=textarea H=java/util/HashMap \java/util/HashSetcheckbox filehiddenimagepasswordradiotext resetjava/lang/Enumclone()Ljava/lang/Object;5(Ljava/lang/Class;Ljava/lang/String;)Ljava/lang/Enum;(Ljava/lang/String;I)Vjava/lang/String toLowerCaseget&(Ljava/lang/Object;)Ljava/lang/Object;contains(Ljava/lang/Object;)Z(IF)Vput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;add@1;@<=@>=@?=@@=@A=@B=@C=@D=@E=@F=@G=@H=IJKLMLNOPQRSPTUV WXY  Z[Y *\]Y$*+*-** P^_`Y*KaY*baY* c[Y * deY * fgYYYYYYYYY !"Y#!$Y% &Y' (Y) *+ YSYSYSYSYSYSYSY"SY$SY &SY (SY +S,Y - .Y / 01W 21W 31W 41W 51W 61W 7&1W 8(1W 9W :9WPhjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeGenericImplementation.class0000644000175000017500000000472611214132416031560 0ustar twernertwerner1 #/ $/ #0 12 13 45 #6 78 #9 7: 7; <3 => #? #@ AB #C 7D EF 7GH IJ KL MNO #PQ R ES #T #U <VWXnameCharAfterPrefixAllowedZ](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)VCode_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)VconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;getEnd#(Lnet/htmlparser/jericho/Source;I)I '* YZ[ \] ^_` ab %&c de fg hi jklm nb og pqr -] -. stu vg wxjava/lang/StringBuilder 'y StartTag z{ at | }~ not recognised as type ' Z%' because it has no closing delimiter Z Z 8net/htmlparser/jericho/StartTagTypeGenericImplementation#net/htmlparser/jericho/StartTagType getNamePrefix()Ljava/lang/String;java/lang/Stringlength()IcharAt(I)Cjava/lang/CharacterisLetter(C)Znet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText;isNameAfterPrefixRequired()Z getNameEnd(I)IgetName(II)Ljava/lang/String; net/htmlparser/jericho/ParseTextnet/htmlparser/jericho/Tag isXMLNameChar hasAttributesparseAttributesW(Lnet/htmlparser/jericho/Source;ILjava/lang/String;)Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/AttributesloggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabledgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;getDescriptiontoStringinfo(Ljava/lang/String;)VconstructStartTagy(Lnet/htmlparser/jericho/Source;IILjava/lang/String;Lnet/htmlparser/jericho/Attributes;)Lnet/htmlparser/jericho/StartTag;getClosingDelimiterindexOf(Ljava/lang/String;I)I!#$%&'()*+,-'*)E9*+,-****d+,) +N`6*:*`6* $+ 6+ :6*-  :*#*+:*+6^*+6O+A++Yȷ**+ -.)."+*!"> *!`jericho-html-3.1/classes/net/htmlparser/jericho/EndTagType.class0000644000175000017500000000335511214132414024761 0ustar twernertwerner1T )*+ , - )./0 12 3 4 5 6 78 9 :; < => ?@ASTART_DELIMITER_PREFIXLjava/lang/String; ConstantValue UNREGISTERED#Lnet/htmlparser/jericho/EndTagType;NORMAL:(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)VCodegetCorrespondingStartTagType'()Lnet/htmlparser/jericho/StartTagType; getEndTagName&(Ljava/lang/String;)Ljava/lang/String; generateHTMLconstructEndTagT(Lnet/htmlparser/jericho/Source;IILjava/lang/String;)Lnet/htmlparser/jericho/EndTag;()VB CD()VCode %&, = >?    @    line.separatorA BC !net/htmlparser/jericho/Configjava/lang/Object/net/htmlparser/jericho/Config$CompatibilityModeDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettingsjava/lang/BooleantoString(Z)Ljava/lang/String;IEjava/lang/System getProperty&(Ljava/lang/String;)Ljava/lang/String;1        !"#$%&'*(&'7+    jericho-html-3.1/classes/net/htmlparser/jericho/FormField.class0000644000175000017500000001076011214132414024622 0ustar twernertwerner1 @v ?w ?x ?yz v ?{ ?| ?} ~    1 ~  v ? ? ?   ?  %v ~ )v ) ) )  ) ? ~  ? ?nameLjava/lang/String;userValueCountIallowsMultipleValuesZpredefinedValuesLjava/util/LinkedHashSet; Signature-Ljava/util/LinkedHashSet; formControls?Ljava/util/LinkedHashSet;firstFormControl$Lnet/htmlparser/jericho/FormControl; columnIndex(Ljava/lang/String;)VCodegetName()Ljava/lang/String;getFormControls()Ljava/util/Collection;>()Ljava/util/Collection;getFormControl8(Ljava/lang/String;)Lnet/htmlparser/jericho/FormControl;&()Lnet/htmlparser/jericho/FormControl;()ZgetUserValueCount()IgetPredefinedValues,()Ljava/util/Collection; getValues()Ljava/util/List;&()Ljava/util/List; clearValues()V setValues(Ljava/util/Collection;)V-(Ljava/util/Collection;)VsetValue(Ljava/lang/String;)ZaddValue getDebugInfotoString addValues([Ljava/lang/String;)VaddFormControl9(Lnet/htmlparser/jericho/FormControl;Ljava/lang/String;)VcalculateAllowsMultipleValues'(Lnet/htmlparser/jericho/FormControl;)ZgetFirstFormControlmerge%(Lnet/htmlparser/jericho/FormField;)V Pd CD EF GHjava/util/LinkedHashSet KH MN AB  [ "net/htmlparser/jericho/FormControl  [ Tselect T ^V  java/util/ArrayList f cd mf ji"java/lang/IllegalArgumentExceptionvalue argument must not be null PQ ] sZjava/util/LinkedList java/lang/StringBuilderField: , UserValueCount= , AllowsMultipleValues= java/lang/String BPredefinedValue:  FormControl: kT lT qr [ UV net/htmlparser/jericho/FormFieldjava/lang/Objectiterator()Ljava/util/Iterator;java/util/IteratorhasNextnext()Ljava/lang/Object;getFormControlType*()Lnet/htmlparser/jericho/FormControlType;&net/htmlparser/jericho/FormControlTypehasPredefinedValuegetElementNamegetPredefinedValuejava/util/Collectioncontains(Ljava/lang/Object;)Zequalsjava/util/CollectionsemptySet()Ljava/util/Set; addValuesTosizejava/util/Listaddappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(Z)Ljava/lang/StringBuilder;net/htmlparser/jericho/ConfigNewLineSELECT_MULTIPLE(Lnet/htmlparser/jericho/FormControlType;RADIOisSubmit1?@ABCDEFGHIJKHILMNODPQR5)*****Y**+ STR* UVR*IWXYR+E* M, 1,  N--- --̧I* M, 8,  N--+-+--XZR*  E[R*\]R*^VR**I_`aR9-YL* M, ,  N-++IbcdR/#* L+ +  M,efR **+IghiR*+ *+jiR+ Y !*" *#+$M* N- ?-  :, %Y&M,'W+$,,(N- -  :+$kTR)Y*L++,* ,-,*./,*0W*2* M, !, 1N+2,3,-,W* M, $,  N+2,4,-5,W+2,2,W+6lTR*7mfR4(+&+8M, , 1N*-WIgmnR2&+$+M,>6,2:*WopRK?,*Y`**Y*,9W*+9W**+:qrRz* ****"+M*",;*#N,< -<,= -=sZR+***  *tuR+* *+** +**+1+** M, , 1N*-9W+>8M, ,  N*-9Wjericho-html-3.1/classes/net/htmlparser/jericho/MasonTagTypes.class0000644000175000017500000000321511214132416025510 0ustar twernertwerner1I $ % & '( ) *+ , -. / 01 2 34 5 67 89:;MASON_COMPONENT_CALL%Lnet/htmlparser/jericho/StartTagType;#MASON_COMPONENT_CALLED_WITH_CONTENT'MASON_COMPONENT_CALLED_WITH_CONTENT_END#Lnet/htmlparser/jericho/EndTagType;MASON_NAMED_BLOCKMASON_NAMED_BLOCK_END TAG_TYPES![Lnet/htmlparser/jericho/TagType;()VCoderegisterdefines#(Lnet/htmlparser/jericho/TagType;)ZisParsedByMason   < = !> ?@ A ?B C ?D E ?F G ?H net/htmlparser/jericho/TagType$net/htmlparser/jericho/MasonTagTypesjava/lang/Object#net/htmlparser/jericho/StartTagType SERVER_COMMON5net/htmlparser/jericho/StartTagTypeMasonComponentCallINSTANCE7Lnet/htmlparser/jericho/StartTagTypeMasonComponentCall;Bnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContentDLnet/htmlparser/jericho/StartTagTypeMasonComponentCalledWithContent;@net/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContentBLnet/htmlparser/jericho/EndTagTypeMasonComponentCalledWithContent;2net/htmlparser/jericho/StartTagTypeMasonNamedBlock4Lnet/htmlparser/jericho/StartTagTypeMasonNamedBlock;0net/htmlparser/jericho/EndTagTypeMasonNamedBlock2Lnet/htmlparser/jericho/EndTagTypeMasonNamedBlock;1* )K*<=*2N- !/#L+=>+2:* "! * *#PD YSY SY SY SYSjericho-html-3.1/classes/net/htmlparser/jericho/CharacterReference.class0000644000175000017500000001657311214132414026476 0ustar twernertwerner1: U 7 U U U U U U U ,  U   U U U U U U U U  U U U U U  , U U U  4 U 7 7 U U U ,    N N N N  7EncodingFilterWriter InnerClasses codePointIINVALID_CODE_POINT ConstantValueMAX_ENTITY_REFERENCE_LENGTH TAB_LENGTH%(Lnet/htmlparser/jericho/Source;III)VCode getCodePoint()IgetChar()C appendCharTo(Ljava/lang/Appendable;)V Exceptions(Ljava/lang/Appendable;Z)V isTerminated()Zencode,(Ljava/lang/CharSequence;)Ljava/lang/String;(C)Ljava/lang/String;encodeWithWhiteSpaceFormattingdecode-(Ljava/lang/CharSequence;Z)Ljava/lang/String;.(Ljava/lang/CharSequence;ZZ)Ljava/lang/String;decodeCollapseWhiteSpacereencodegetCharacterReferenceString()Ljava/lang/String;(I)Ljava/lang/String;"getDecimalCharacterReferenceString&getHexadecimalCharacterReferenceStringgetUnicodeTextappendUnicodeText/(Ljava/lang/Appendable;I)Ljava/lang/Appendable;parseE(Ljava/lang/CharSequence;)Lnet/htmlparser/jericho/CharacterReference;(getCodePointFromCharacterReferenceString(Ljava/lang/CharSequence;)IrequiresEncoding(C)ZgetEncodingFilterWriter"(Ljava/io/Writer;)Ljava/io/Writer; appendEncode/(Ljava/lang/Appendable;C)Ljava/lang/Appendable;G(Ljava/lang/Appendable;Ljava/lang/CharSequence;Z)Ljava/lang/Appendable;(appendEncodeCheckForWhiteSpaceFormatting(Ljava/lang/Appendable;CZ)Z getPreviousM(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/CharacterReference;getNext&UnterminatedCharacterReferenceSettings(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;)Lnet/htmlparser/jericho/CharacterReference;)appendHexadecimalCharacterReferenceString%appendDecimalCharacterReferenceString construct appendDecodeI(Ljava/lang/Appendable;Ljava/lang/CharSequence;IZZ)Ljava/lang/Appendable;getHighSurrogate(I)CgetLowSurrogate access$000 c [\ jm   hi  \ java/lang/StringBuilder g c   zjava/io/IOExceptionjava/lang/RuntimeException c `\ tv wu   tu pq y{ |{ c }{ ~{ U+    znet/htmlparser/jericho/Source c  fg r >net/htmlparser/jericho/CharacterReference$EncodingFilterWriter c net/htmlparser/jericho/Segment g
                      '   !"# $% &%&#x&#  { '\ (#java/lang/IndexOutOfBoundsException )+, -.%net/htmlparser/jericho/StreamedSource c/ 01 23 456 7o 89)net/htmlparser/jericho/CharacterReferenceDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings$(Lnet/htmlparser/jericho/Source;II)Vnet/htmlparser/jericho/ConfigConvertNonBreakingSpacesZjava/lang/CharacterisSupplementaryCodePoint(I)Zjava/lang/Appendableappend(C)Ljava/lang/Appendable;sourceLnet/htmlparser/jericho/Source;endcharAtjava/lang/CharSequencelength(I)Vjava/lang/ObjecttoString(Ljava/lang/Throwable;)VappendCollapseWhiteSpaceL(Ljava/lang/StringBuilder;Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;/net/htmlparser/jericho/CharacterEntityReference0net/htmlparser/jericho/NumericCharacterReference()V0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;java/lang/Integer(II)Ljava/lang/String;java/lang/String toUpperCase(Ljava/lang/CharSequence;Z)V ACCEPT_ALLFLnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;getNameIsApostropheEncoded(Ljava/io/Writer;)VgetBeginappendCharacterReferenceString@(Ljava/lang/Appendable;Ljava/lang/String;)Ljava/lang/Appendable; isWhiteSpace getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseText lastIndexOf(CI)IindexOf$characterEntityReferenceMaxCodePointN(Lnet/htmlparser/jericho/Source;II)Lnet/htmlparser/jericho/CharacterReference;CurrentCompatibilityModeCompatibilityMode1Lnet/htmlparser/jericho/Config$CompatibilityMode;/net/htmlparser/jericho/Config$CompatibilityMode)getUnterminatedCharacterReferenceSettingsI(Z)Lnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;(Ljava/lang/CharSequence;)V setHandleTags*(Z)Lnet/htmlparser/jericho/StreamedSource;setSearchBegin*(I)Lnet/htmlparser/jericho/StreamedSource;iterator()Ljava/util/Iterator;java/util/IteratorhasNextnext()Ljava/lang/Object;!U7[\]\^_`\a\^b*cde*+*fge*hie*jke *+ljmeZN*"+*W+* W'* >+ W +Wlnoe$* * d ; pqe<(*Y*h*LY+ pre0YLY+ sqe<(*Y*h*LY+ tqe * tue *tveaM*>*9*&'Y**:Y*56 wqe*wue#Y** xqe *yz y{e$L'L+L+|ze* |{e-Y!"LY+}ze*# }{e-Y!$LY+~ze*% ~{e-Y!&LY+eC7*'(W)*M,+d>*0W*,(W*l e,Y*-./ e*0L+ +1e+2' 3 e 4Y*5 e*6**le! +*>+6+7 +7:86>`69L366+6*6`6  : -  +  *:(Wx66 *+ 6    * W/p * W*;(W d6,*lej^2N-/' 3*<(W>*W3*-=W* *"W >*Wle *.?e *.@ eA5*AN-&B<"*,/:-&dB< eA5*AN-&C<"*,/:-&`C<e'*D()(;le%*E(F(;leT8*AG&*A`G# *,H *,IJN5K45K en bLM:6NY+OPQ:R:S4T7:  U U** V(W*l e W z` e X~`e*lZ4UY*jericho-html-3.1/classes/net/htmlparser/jericho/SubCache.class0000644000175000017500000001454611214132414024436 0ustar twernertwerner1 9 9 : 9 9  9 9    9 9   9 9 9 9 9 9 9  9  9     1 3 9 9 9 9 CacheEntry InnerClasses TagIterator#FoundCacheEntryMissingInternalError$SourceCacheEntryMissingInternalErrorCacheEntryMissingInternalErrorcacheLnet/htmlparser/jericho/Cache;tagType Lnet/htmlparser/jericho/TagType;bof,Lnet/htmlparser/jericho/SubCache$CacheEntry;eofarray-[Lnet/htmlparser/jericho/SubCache$CacheEntry;INITIAL_CAPACITYI ConstantValue@A(Lnet/htmlparser/jericho/Cache;Lnet/htmlparser/jericho/TagType;)VCodesize()Iclear()V bulkLoad_Init(I)V bulkLoad_Set (ILnet/htmlparser/jericho/Tag;)VbulkLoad_AddToTypeSpecificCache(Lnet/htmlparser/jericho/Tag;)V"bulkLoad_FinaliseTypeSpecificCachegetTagAt (IZ)Lnet/htmlparser/jericho/Tag;addTagAtgetPreviousTag(I)Lnet/htmlparser/jericho/Tag; getNextTaggetTagIterator()Ljava/util/Iterator; Signature4()Ljava/util/Iterator;toString()Ljava/lang/String;appendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;w(Lnet/htmlparser/jericho/SubCache$CacheEntry;ILnet/htmlparser/jericho/SubCache$CacheEntry;)Lnet/htmlparser/jericho/Tag;addPreviousTag addNextTagcompactadd(Lnet/htmlparser/jericho/SubCache$CacheEntry;Lnet/htmlparser/jericho/SubCache$CacheEntry;Lnet/htmlparser/jericho/SubCache$CacheEntry;)V getIndexOfPos(I)IgetNextZ(Lnet/htmlparser/jericho/SubCache$CacheEntry;)Lnet/htmlparser/jericho/SubCache$CacheEntry; getPrevious lastIndexinsert/(Lnet/htmlparser/jericho/SubCache$CacheEntry;)VremovedoubleCapacity access$000$(Lnet/htmlparser/jericho/SubCache;)I access$100P(Lnet/htmlparser/jericho/SubCache;)[Lnet/htmlparser/jericho/SubCache$CacheEntry; IJ vS OU*net/htmlparser/jericho/SubCache$CacheEntry BC DE O FG S HG L  L zU qr L  _^ ut op `k lY st bk mY+net/htmlparser/jericho/SubCache$TagIterator Ojava/lang/StringBuilder ij ghCache for TagType :   _Y  Dnet/htmlparser/jericho/SubCache$SourceCacheEntryMissingInternalError OCnet/htmlparser/jericho/SubCache$FoundCacheEntryMissingInternalError nW wx yx RSnet/htmlparser/jericho/SubCachejava/lang/Object>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalError#(IILnet/htmlparser/jericho/Tag;ZZ)Vnet/htmlparser/jericho/CachegetSourceLengthindex nextCachedZpreviousCachednet/htmlparser/jericho/TagbeginpostagLnet/htmlparser/jericho/Tag; getTagType"()Lnet/htmlparser/jericho/TagType;net/htmlparser/jericho/TagType isServerTag()ZincludeInSearch$(Lnet/htmlparser/jericho/SubCache;)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;-(Ljava/lang/Object;)Ljava/lang/StringBuilder;net/htmlparser/jericho/ConfigNewLineLjava/lang/String;sourceLnet/htmlparser/jericho/Source;getPreviousTagUncached_(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;I)Lnet/htmlparser/jericho/Tag;net/htmlparser/jericho/SourceuseAllTypesCachegetNextTagUncached isRedundantremovedjava/lang/Mathmin(II)I`(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;)V09:BCDEFGHGIJKLMNOPQU I**@*+*,**YZS**Y+ Z SRSQ * `TUQ/#* *  *  ** SVWQE9*`**S* ** `Z * S*  XYQ& `>*Y,,SZ[QL @* =** `**Y++S* Y ` \UQ-!* *  ** * S]^Qnb* * *>*2: *_YQU I*>*2:*:*Y,`d`aQk_* * *=*2N----**--:*baQ{o* * *=*2N-)---*-*-:**--:*cdQ Y*efghQ* Y!"#ijQC7+$%*&'%W=*+*2&'%W+`kQzn- 8*(*+):*(**++*++++d=*+YNLbkQzn+ 8*(*-,:*(**+-* ----`=*-YLNlYQ@4, *,>*6*2:-6& .r/ 06] U**W *+**Y62:# ./ 06*2:H+1Y*,*2  / 063Y*,*4 *5mYQMA, * ,>*6*2:-6& .v/ 06a* U**W *+**Y62:# ./ 06w*2:H+1Y*,*2  / 063Y*,*4 *5nWQE9*=>/*2:/ *dZ SұopQI=,.*,6, + +.*+7, - -.*-7qrQ=*>z6*2:#*:   =;/*:   >`z6stQ *+ `2utQ *+ d2vSQ* wxQQE+ =**8**>!*2:*`Z S*+SyxQ:.*=+ >!*`2:*Z SzUQ5)*xL*=+*2S*+{|Q*}~Q*<*9;9=39> 19? @9A jericho-html-3.1/classes/net/htmlparser/jericho/TextExtractor$2.class0000644000175000017500000000132011214132414025711 0ustar twernertwerner1"   ()VCodeincludeAttributeF(Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/Attribute;)ZEnclosingMethod   name !&net/htmlparser/jericho/TextExtractor$2 InnerClassesjava/lang/Object#(Lnet/htmlparser/jericho/Element;)VCodegetPredefinedValue()Ljava/lang/String;getPredefinedValues()Ljava/util/Collection; Signature,()Ljava/util/Collection;getOptionElementIterator()Ljava/util/Iterator;8()Ljava/util/Iterator;setValue(Ljava/lang/String;)ZaddValue(Ljava/lang/String;Z)Z addValuesTo(Ljava/util/Collection;)V-(Ljava/util/Collection;)VaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)VgetOptionLabel4(Lnet/htmlparser/jericho/Element;)Ljava/lang/String; multiple  ?option  3net/htmlparser/jericho/FormControl$ElementContainer <> I net/htmlparser/jericho/Element ?  'java/lang/UnsupportedOperationException;Use getPredefinedValues() method instead on SELECT controls ?java/util/LinkedHashSet ? Jnet/htmlparser/jericho/FormControl$SelectFormControl$OptionElementIterator ? MN selected L    java/lang/StringBuilder ? VW   Ulabel 4net/htmlparser/jericho/FormControl$SelectFormControl"net/htmlparser/jericho/FormControl getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/Attributesget6(Ljava/lang/String;)Lnet/htmlparser/jericho/Attribute;&net/htmlparser/jericho/FormControlTypeSELECT_MULTIPLE(Lnet/htmlparser/jericho/FormControlType; SELECT_SINGLEr(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)VgetAllElements$(Ljava/lang/String;)Ljava/util/List;java/util/Listsize()Iiteratorjava/util/IteratorhasNext()Znext()Ljava/lang/Object;$(Lnet/htmlparser/jericho/Element;Z)VpredefinedValueLjava/lang/String;element Lnet/htmlparser/jericho/Element; getContent"()Lnet/htmlparser/jericho/Segment;)net/htmlparser/jericho/CharacterReferencedecodeCollapseWhiteSpace,(Ljava/lang/CharSequence;)Ljava/lang/String;(Ljava/lang/String;)V(IF)Vadd(Ljava/lang/Object;)Z_(Lnet/htmlparser/jericho/FormControl$SelectFormControl;Lnet/htmlparser/jericho/FormControl$1;)VformControlType setSelected((Ljava/lang/String;Ljava/lang/String;Z)ZgetBooleanAttribute access$200+(Ljava/util/Collection;Ljava/lang/String;)V!net/htmlparser/jericho/FormFields9(Lnet/htmlparser/jericho/FormControl;Ljava/lang/String;)V outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;-net/htmlparser/jericho/FormControlOutputStyleREMOVE getElement"()Lnet/htmlparser/jericho/Element;%net/htmlparser/jericho/OutputDocumentremove#(Lnet/htmlparser/jericho/Segment;)V DISPLAY_VALUE(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;@net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValueConfigDisplayValueMultipleValueSeparatorlengthjava/lang/String setLengthgetDisplayValueHTML-(Ljava/lang/CharSequence;Z)Ljava/lang/String;replace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V+replaceAttributesInOutputDocumentIfModifiedgetAttributeValue&(Ljava/lang/String;)Ljava/lang/String;$net/htmlparser/jericho/FormControl$1078<> ?@A*++ +M*,  >, : A: Y:* SBCA YDEA=1Y* h L=* +* 2W+FGHIA Y*FJKLA*+MLA!*+*MNA6*>6* * 2+ >OPA9-=* $* 2!+* 2"ٱFQRSA, =* +** 2#TUA*$%+*&'*$(l)Yd*M>* 0* 2!,* 2+,W,-,W,.,,.-/d0+*&*,12#*+3=* * 2+4 VWA!*56L++*:*7897; 8=8jericho-html-3.1/classes/net/htmlparser/jericho/BlankOutputSegment.class0000644000175000017500000000243211214132416026545 0ustar twernertwerner1I * + , -. -/ 0 1 234 5 6 7 *8 9 :;<=>beginIend(II)VCode#(Lnet/htmlparser/jericho/Segment;)VgetBegin()IgetEndwriteTo(Ljava/io/Writer;)V Exceptions?appendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfo @  A    #$B CDjava/lang/StringBuilder E CF '(Replace with Spaces: (p CG CH-p)net/htmlparser/jericho/BlankOutputSegmentjava/lang/Object$net/htmlparser/jericho/OutputSegmentjava/io/IOException()Vnet/htmlparser/jericho/Segmentjava/lang/Appendableappend(C)Ljava/lang/Appendable;(I)V(C)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;0 *** *++** *+!"#$)*=*+ W!"%& **d'(<0 Y**d L*=*+ W+ )(4( Y **) jericho-html-3.1/classes/net/htmlparser/jericho/HTMLElements.class0000644000175000017500000001650111214132416025215 0ustar twernertwerner1                         !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX YZ C [ \ ] ^ _ ` abcALLLjava/util/List; Signature$Ljava/util/List;BLOCK+Lnet/htmlparser/jericho/HTMLElementNameSet;INLINEEND_TAG_FORBIDDEN_SET_UL_OL_DD_DT_THEAD_TBODY_TFOOT_TR_THEAD_TBODY_TFOOT_TR_TD_TH DEPRECATEDSTART_TAG_OPTIONAL_SETCONSTANT_NAME_MAPLjava/util/HashMap;9Ljava/util/HashMap;TERMINATING_TAG_NAME_SETS_MAPaLjava/util/HashMap;END_TAG_OPTIONAL_SETLjava/util/Set;#Ljava/util/Set;END_TAG_REQUIRED_SETCLOSING_SLASH_IGNORED_SET&END_TAG_REQUIRED_NESTING_FORBIDDEN_SET&END_TAG_OPTIONAL_NESTING_FORBIDDEN_SETNESTING_FORBIDDEN_SET()VCodegetElementNames()Ljava/util/List;&()Ljava/util/List;getBlockLevelElementNames()Ljava/util/Set;%()Ljava/util/Set;getInlineLevelElementNamesgetDeprecatedElementNamesgetEndTagForbiddenElementNamesgetEndTagOptionalElementNamesgetEndTagRequiredElementNamesgetStartTagOptionalElementNamesgetTerminatingStartTagNames#(Ljava/lang/String;)Ljava/util/Set;7(Ljava/lang/String;)Ljava/util/Set;getTerminatingEndTagNamesgetNonterminatingElementNamesgetNestingForbiddenElementNamesgetConstantElementName&(Ljava/lang/String;)Ljava/lang/String;isClosingSlashIgnored(Ljava/lang/String;)ZgetTerminatingTagNameSetsN(Ljava/lang/String;)Lnet/htmlparser/jericho/HTMLElementTerminatingTagNameSets;buildTerminatingTagNameSetsMap()Ljava/util/HashMap;c()Ljava/util/HashMap; buildTagMap;()Ljava/util/HashMap; d e f ghjava/lang/String ij 8net/htmlparser/jericho/HTMLElementTerminatingTagNameSetsjava/util/HashMap kbody)net/htmlparser/jericho/HTMLElementNameSethtml l mn o pqcolgroup rtabledd dldtheadframesetli optionoptgroupselectp msthtd captionlegendtbodytfoottheadtrt uvw xy z{!element!attlist!entity !notation![if![endifjava/util/ArrayListaabbracronymaddressappletareabbasebasefontbdobig blockquotebrbuttoncentercitecodecoldeldfndirdivemfieldsetfontformframeh1h2h3h4h5h6hriiframeimginputinsisindexkbdlabellinkmapmenumetanoframesnoscriptobjectolparampreqssampscriptsmallspanstrikestrongstylesubsuptextareatitlettuulvar| }~   s #net/htmlparser/jericho/HTMLElementsjava/lang/ObjectTerminatingStartTagNameSetTerminatingEndTagNameSetNonterminatingElementNameSetget&(Ljava/lang/Object;)Ljava/lang/Object;contains(Ljava/lang/Object;)Z(IF)V(Ljava/lang/String;)Vunion?(Ljava/lang/String;)Lnet/htmlparser/jericho/HTMLElementNameSet;0(Ljava/util/Set;Ljava/util/Set;Ljava/util/Set;)Vput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;(Ljava/util/Collection;)VC(Ljava/util/Collection;)Lnet/htmlparser/jericho/HTMLElementNameSet;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;java/util/ArraysasList%([Ljava/lang/Object;)Ljava/util/List;([Ljava/lang/String;)VkeySetminus1*         * L++  * L++  * L++  !*L++** * 0 $Y K*YYYYW* YY!" Y# Y#W*$YY%"Y&$Y&W*'YY%"Y&'Y&W*(YY)Y(YW*YYYYW**YY*Y+"*Y+"W*,YY,-Y.,YW*/YY"%012*Y"%03045YW*6YY678Y#6Y#W*2YY3"Y!"#2Y#W*7YY678Y#7Y#W*1YY3"Y!"#1Y#W*8YY678Y#8Y#W*9YY!"Y!"#Y#W* vjY K:L+;+<M*,,W*==W*>>W*??W*@@W*AAW*BBW*CY[YDSYESYFSYGSYHSYISYJSYKSYLSY MSY NSY OSY SY PSYQSY4SYRSYSSYTSYUSY SY$SYVSYWSYXSYYSY&SY'SYZSY[SY\SY]SY ^SY!)SY"_SY#`SY$aSY%bSY&cSY'dSY((SY)eSY*SY+fSY,gSY-hSY.iSY/jSY0kSY1lSY2mSY35SY4*SY5nSY6oSY7pSY8qSY9rSY:sSY;tSY<uSY=-SY>,SY?/SY@vSYAwSYBxSYCySYDzSYE{SYF.SYG|SYH}SYI~SYJSYKSYLSYMSYN#SYO6SYP2SYQSYR7SYS1SYT8SYUSYV9SYWSYXSYYSYZSYY/SY_SY`SYaSYbSYcSYdSYSYuSY XSY pSY wSY &SY YSYRSYsSYrSYOSY]SYkSYeSY#SY[SYGSY(YSYfSYJSYSYySY~SYNSY|SYZSY SY WSY TSY zSY lSYSYSSYESYFSYDSYhSYHSYtSY\SYLSYPSY{SYoSYxSYSYSY}SYMSY gSY!iSY".SY#SY$mSY%QSY&jSY'VSY YISYKSYLSYPSYUSY^SYeSYhSYiSY kSY nSY qSY vSYu+Y$'%Y8679!Y!"213YHLRX\kpy~Y(6 Y0Y00YDGHQ4]gm5-{.Y (,/Y000jericho-html-3.1/classes/net/htmlparser/jericho/StartTagType.class0000644000175000017500000000732311214132414025347 0ustar twernertwerner1 IJK 'L &M INOP Q &R &S &T UV &W XYZ [ \] ^_ &` ab &c de &f gh &i jk &l mn &o pq &r st &u vw &x yz &{|}correspondingEndTagType#Lnet/htmlparser/jericho/EndTagType; hasAttributesZisNameAfterPrefixRequiredSTART_DELIMITER_PREFIXLjava/lang/String; ConstantValue UNREGISTERED%Lnet/htmlparser/jericho/StartTagType;NORMALCOMMENTXML_DECLARATIONXML_PROCESSING_INSTRUCTIONDOCTYPE_DECLARATIONMARKUP_DECLARATION CDATA_SECTION SERVER_COMMONSERVER_COMMON_ESCAPED_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)VCodegetCorrespondingEndTagType%()Lnet/htmlparser/jericho/EndTagType;()ZatEndOfAttributes$(Lnet/htmlparser/jericho/Source;IZ)ZconstructStartTagy(Lnet/htmlparser/jericho/Source;IILjava/lang/String;Lnet/htmlparser/jericho/Attributes;)Lnet/htmlparser/jericho/StartTag;parseAttributesW(Lnet/htmlparser/jericho/Source;ILjava/lang/String;)Lnet/htmlparser/jericho/Attributes;()V~ < ; "java/lang/IllegalArgumentException1startDelimiter of a start tag must start with "<" ; () *+ ,+  net/htmlparser/jericho/StartTag ;  01 21 31 41 51 61 71 81 91 :1#net/htmlparser/jericho/StartTagTypenet/htmlparser/jericho/TagTypejava/lang/String toLowerCase()Ljava/lang/String;L(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZLjava/lang/String;)VgetStartDelimiter startsWith(Ljava/lang/String;)Z(Ljava/lang/String;)Vnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText;getClosingDelimiter net/htmlparser/jericho/ParseText containsAt(Ljava/lang/String;I)Z~(Lnet/htmlparser/jericho/Source;IILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;Lnet/htmlparser/jericho/Attributes;)V!net/htmlparser/jericho/Attributes construct|(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/StartTagType;Ljava/lang/String;)Lnet/htmlparser/jericho/Attributes;/net/htmlparser/jericho/StartTagTypeUnregisteredINSTANCE1Lnet/htmlparser/jericho/StartTagTypeUnregistered;)net/htmlparser/jericho/StartTagTypeNormal+Lnet/htmlparser/jericho/StartTagTypeNormal;*net/htmlparser/jericho/StartTagTypeComment,Lnet/htmlparser/jericho/StartTagTypeComment;1net/htmlparser/jericho/StartTagTypeXMLDeclaration3Lnet/htmlparser/jericho/StartTagTypeXMLDeclaration;;net/htmlparser/jericho/StartTagTypeXMLProcessingInstruction=Lnet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction;5net/htmlparser/jericho/StartTagTypeDoctypeDeclaration7Lnet/htmlparser/jericho/StartTagTypeDoctypeDeclaration;4net/htmlparser/jericho/StartTagTypeMarkupDeclaration6Lnet/htmlparser/jericho/StartTagTypeMarkupDeclaration;/net/htmlparser/jericho/StartTagTypeCDATASection1Lnet/htmlparser/jericho/StartTagTypeCDATASection;/net/htmlparser/jericho/StartTagTypeServerCommon1Lnet/htmlparser/jericho/StartTagTypeServerCommon;6net/htmlparser/jericho/StartTagTypeServerCommonEscaped8Lnet/htmlparser/jericho/StartTagTypeServerCommonEscaped;!&'()*+,+-./012131415161718191:1;<=C7*+,-* Y* * * >?=* *@=* ,@=* AB=+ * CD=Y+*EF=+*-GH=I= !"#$%jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$TD_ElementHandler.class0000644000175000017500000000217211214132414031726 0ustar twernertwerner12        "#$INSTANCE& Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions'  () *+ ,- ./ 0;net/htmlparser/jericho/Renderer$Processor$TD_ElementHandlerTD_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler1)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$2000.(Lnet/htmlparser/jericho/Renderer$Processor;)Z access$2100?(Lnet/htmlparser/jericho/Renderer$Processor;)Ljava/lang/String; access$1100j(Lnet/htmlparser/jericho/Renderer$Processor;Ljava/lang/String;)Lnet/htmlparser/jericho/Renderer$Processor; access$202/(Lnet/htmlparser/jericho/Renderer$Processor;Z)Z access$000net/htmlparser/jericho/Renderer    *(+ ++W+W+, Y  %   ! jericho-html-3.1/classes/net/htmlparser/jericho/SubCache$CacheEntry.class0000644000175000017500000000227711214132414026446 0ustar twernertwerner1N ( ) * + , - ./ ( 0 12 34 56 7 89 : 8;=@indexIpostagLnet/htmlparser/jericho/Tag;previousCachedZ nextCachedremoved#(IILnet/htmlparser/jericho/Tag;ZZ)VCode isRedundant()ZtoString()Ljava/lang/String;pad(II)Ljava/lang/String; A      java/lang/StringBuilder &' BC BDnullE F% $%G HI J KLM*net/htmlparser/jericho/SubCache$CacheEntry CacheEntry InnerClassesjava/lang/Object()Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;net/htmlparser/jericho/Tag getDebugInfojava/lang/StringvalueOf(I)Ljava/lang/String;(I)Vlength()Inet/htmlparser/jericho/SubCache0 !1%*****-**"#!'***$%!{oY **  **  *|-  *|-  * * &'!C7NY:-d6 W- W? <>jericho-html-3.1/classes/net/htmlparser/jericho/Attribute.class0000644000175000017500000000660511214132414024721 0ustar twernertwerner1 $Y !Z ![ !\ $] $^ $_ $` !a bc $d efg h i !jk $lm n o pqr a st suv wx !x wy !z ${| wz b}~keyLjava/lang/String; nameSegment Lnet/htmlparser/jericho/Segment; valueSegmentvalueSegmentIncludingQuotesCHECKED ConstantValueCLASSDISABLEDIDMULTIPLENAMESELECTEDSTYLETYPEVALUET(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Lnet/htmlparser/jericho/Segment;)VCode(Lnet/htmlparser/jericho/Source;Ljava/lang/String;Lnet/htmlparser/jericho/Segment;Lnet/htmlparser/jericho/Segment;Lnet/htmlparser/jericho/Segment;)VgetKey()Ljava/lang/String;getNamegetNameSegment"()Lnet/htmlparser/jericho/Segment;hasValue()ZgetValuegetValueSegmentgetValueSegmentIncludingQuotes getQuoteChar()C getDebugInfo appendTidyP(Ljava/lang/Appendable;Lnet/htmlparser/jericho/Tag;)Lnet/htmlparser/jericho/Tag; ExceptionsappendTidyValue1(Ljava/lang/Appendable;Ljava/lang/CharSequence;)V appendHTML^(Ljava/lang/Appendable;Ljava/lang/CharSequence;Ljava/lang/CharSequence;)Ljava/lang/Appendable; @C @ %& '( )( *( E  java/lang/StringBuilder @ PE,name= IJ,value=  & ,NO VALUE =" UVnet/htmlparser/jericho/Segment  net/htmlparser/jericho/Attributecheckedclassdisabledidmultiplenameselectedstyletypevaluejava/io/IOExceptiongetBegin()IgetEnd$(Lnet/htmlparser/jericho/Source;II)VtoString)net/htmlparser/jericho/CharacterReferencedecode-(Ljava/lang/CharSequence;Z)Ljava/lang/String;sourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourcecharAt(I)C()Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;net/htmlparser/jericho/ConfigNewLinejava/lang/Appendable(C)Ljava/lang/Appendable;0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;net/htmlparser/jericho/TagbeginI getNextTag()Lnet/htmlparser/jericho/Tag;end appendEncodeG(Ljava/lang/Appendable;Ljava/lang/CharSequence;Z)Ljava/lang/Appendable;1$!%&'()(*(+&,-.&,/0&,12&,34&,56&,78&,9:&,;<&,=>&,?@AB *+,-@CB=1*+- -*,*-**DEB*FEB* GHB*IJB *KEB * LHB*MHB*NOB)** * * PEBui Y***L*.+*"*"W+W+QRB+ *W*+W,,* ,M,,*+* *>,j,*\+!Y* , ,"*%+!Y* ,*Y>W+,W,">,M*+!Y* * +"W,ST UVB *+ #WSTWXB:.* +W,*W*,#W*"W*STjericho-html-3.1/classes/net/htmlparser/jericho/SubCache$FoundCacheEntryMissingInternalError.class0000644000175000017500000000116511214132414033476 0ustar twernertwerner1   `(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;)VCode!missing cache entry for found tag Cnet/htmlparser/jericho/SubCache$FoundCacheEntryMissingInternalError#FoundCacheEntryMissingInternalError InnerClasses>net/htmlparser/jericho/SubCache$CacheEntryMissingInternalErrorCacheEntryMissingInternalErrorr(Lnet/htmlparser/jericho/TagType;Lnet/htmlparser/jericho/Tag;Lnet/htmlparser/jericho/SubCache;Ljava/lang/String;)Vnet/htmlparser/jericho/SubCache  *+,-    jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$TR_ElementHandler.class0000644000175000017500000000150611214132414031744 0ustar twernertwerner1&     INSTANCE  Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions!  "# $;net/htmlparser/jericho/Renderer$Processor$TR_ElementHandlerTR_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler%)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$000net/htmlparser/jericho/Renderer  *++,+ Y       jericho-html-3.1/classes/net/htmlparser/jericho/Segment.class0000644000175000017500000002517711214132414024365 0ustar twernertwerner1f c         c         $  $  $ $ $     $ $ $                 E      M M   M e e M M ! "# $ %  & '( ')  *+,-beginIendsourceLnet/htmlparser/jericho/Source; WHITESPACE[C$(Lnet/htmlparser/jericho/Source;II)VCode(I)V()V(II)V getSource!()Lnet/htmlparser/jericho/Source;getBegin()IgetEndequals(Ljava/lang/Object;)ZhashCodelengthencloses#(Lnet/htmlparser/jericho/Segment;)Z(I)ZtoString()Ljava/lang/String; getRenderer#()Lnet/htmlparser/jericho/Renderer;getTextExtractor(()Lnet/htmlparser/jericho/TextExtractor;getNodeIterator()Ljava/util/Iterator; Signature8()Ljava/util/Iterator; getAllTags()Ljava/util/List;0()Ljava/util/List;2(Lnet/htmlparser/jericho/TagType;)Ljava/util/List;P(Lnet/htmlparser/jericho/TagType;)Ljava/util/List;getAllStartTags5()Ljava/util/List;7(Lnet/htmlparser/jericho/StartTagType;)Ljava/util/List;Z(Lnet/htmlparser/jericho/StartTagType;)Ljava/util/List;$(Ljava/lang/String;)Ljava/util/List;G(Ljava/lang/String;)Ljava/util/List;7(Ljava/lang/String;Ljava/lang/String;Z)Ljava/util/List;Z(Ljava/lang/String;Ljava/lang/String;Z)Ljava/util/List;=(Ljava/lang/String;Ljava/util/regex/Pattern;)Ljava/util/List;`(Ljava/lang/String;Ljava/util/regex/Pattern;)Ljava/util/List;getAllStartTagsByClassgetChildElements4()Ljava/util/List;getAllElementsF(Ljava/lang/String;)Ljava/util/List;Y(Lnet/htmlparser/jericho/StartTagType;)Ljava/util/List;Y(Ljava/lang/String;Ljava/lang/String;Z)Ljava/util/List;_(Ljava/lang/String;Ljava/util/regex/Pattern;)Ljava/util/List;getAllElementsByClassgetAllCharacterReferences?()Ljava/util/List;getFirstStartTag#()Lnet/htmlparser/jericho/StartTag;H(Lnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;5(Ljava/lang/String;)Lnet/htmlparser/jericho/StartTag;H(Ljava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;N(Ljava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/StartTag;getFirstStartTagByClassgetFirstElement"()Lnet/htmlparser/jericho/Element;4(Ljava/lang/String;)Lnet/htmlparser/jericho/Element;G(Ljava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/Element;M(Ljava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/Element;getFirstElementByClassgetFormControls8()Ljava/util/List; getFormFields%()Lnet/htmlparser/jericho/FormFields;parseAttributes%()Lnet/htmlparser/jericho/Attributes;ignoreWhenParsing compareTo#(Lnet/htmlparser/jericho/Segment;)I isWhiteSpace()Z(C)Z getDebugInfocharAt(I)C subSequence(II)Ljava/lang/CharSequence;appendCollapseWhiteSpaceL(Ljava/lang/StringBuilder;Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;getClassPattern-(Ljava/lang/String;)Ljava/util/regex/Pattern;"(Ljava/util/List;)Ljava/util/List;g(Ljava/util/List;)Ljava/util/List;checkEnclosureD(Lnet/htmlparser/jericho/StartTag;)Lnet/htmlparser/jericho/StartTag;checkTagEnclosure:(Lnet/htmlparser/jericho/Tag;)Lnet/htmlparser/jericho/Tag;getNextCharacterReference.(I)Lnet/htmlparser/jericho/CharacterReference;(Ljava/lang/Object;)IbLjava/lang/Object;Ljava/lang/Comparable;Ljava/lang/CharSequence; mq"java/lang/IllegalArgumentException fg hg source argument must not be null m. ijnet/htmlparser/jericho/Source mr /'java/lang/UnsupportedOperationException8Source object is not available when using StreamedSourcenet/htmlparser/jericho/Segment net/htmlparser/jericho/Renderer m0$net/htmlparser/jericho/TextExtractor#net/htmlparser/jericho/NodeIterator 1 23 4 5java/util/ArrayList 6y 27 89 : net/htmlparser/jericho/StartTag ;<= >? 8@ :A :Bclass {v :C DEF G HIJ &startTagType argument must not be null K :L :M NM O PQ!net/htmlparser/jericho/FormFields mR S r kljava/lang/StringBuilder mp TU VWX YZ.*(\s|^) T[(\s|$).*\ ]^ _ `v ab c de java/lang/Objectjava/lang/Comparablejava/lang/CharSequence(Ljava/lang/String;)V isStreamed#(Lnet/htmlparser/jericho/Segment;)Vnet/htmlparser/jericho/Tag getNextTag^(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;java/util/Collections emptyListadd>(Lnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;getNextC(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/StartTag;getNextStartTag isXMLName(Ljava/lang/CharSequence;)Z#net/htmlparser/jericho/StartTagTypeNORMAL%Lnet/htmlparser/jericho/StartTagType;{(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;Z)Lnet/htmlparser/jericho/StartTag;I(ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;O(ILjava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/StartTag;$(I)Lnet/htmlparser/jericho/StartTag; getTagType"()Lnet/htmlparser/jericho/TagType;net/htmlparser/jericho/TagType isServerTag getElementjava/util/Listnet/htmlparser/jericho/Element)net/htmlparser/jericho/CharacterReferenceI(ILnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;6(ILjava/lang/String;)Lnet/htmlparser/jericho/StartTag;getNextStartTagByClass"net/htmlparser/jericho/FormControlgetAll2(Lnet/htmlparser/jericho/Segment;)Ljava/util/List;(Ljava/util/Collection;)V'(II)Lnet/htmlparser/jericho/Attributes;append(C)Ljava/lang/StringBuilder;getRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;-(Ljava/lang/String;)Ljava/lang/StringBuilder;java/util/regex/Patterncompile.(Ljava/lang/String;I)Ljava/util/regex/Pattern;isEmptysizeiteratorjava/util/IteratorhasNextnext()Ljava/lang/Object;!cdefghgijkl=mnoE9*  Y**+ Y*+mpo#***** mqo* mro ****sto%* Y *uvo*wvo*xyoL@*++ ++M,*,*,*zvo **`{vo **d|}o(*+*+|~o"**o***o Y*o Y*o Y*o*oC7***+M,YN-,W*,+M,-oA5*** !L+YM,+W*+"!L+,oRF+*#***+$M,YN-,W*,+%$M,-oeY+*#+&=***+'(!N-Y:-W**-)`+'(!N-oYM***+,*!:Y:W**)`+,*!:oQE***+,+!N-Y:-W**-)`+,+!N-o *,+-.oqe*/YL*=*0N-A-)*3-12 -3=-4:+5W6W7=+o **#8o **+98o$+ Y:**+;8o **+,<8o **+,.8o **+=8o:.**>L+YM,+W*+?>L+,o***0!o***+@!o***+A!o***+,*!o***+,+!o***+B!o@4*** !L+!+4M,7*,*+"!LodX+*C+&=***+'(!N-2-4:7***-)`+'(!NoWK***+,*!:34:7***)`+,*!:oPD***+,+!N-/-4:7***-)`+,+!NoK?***+B!M,+,4N-7*-**,)`+B!Mo*Do EY*FGo***Hqo***IoI=*+*+*+*+*+o1%*<**JKo/#LL+=>+46oPDMY2NL+(OW**P+QW+-OW**P+QW+)OW+Ro**`Jo **`*`ol`+S=>6*+TK +T6K 6 * OW6*OW*o*MYUVW*WXWR YoaU+ZY+[\M+]N-^+-_$:4:7* ,W,o++3*+o++`*+o(*aM,,?*,Ao *+bqo3'Y UY UY UY UY UY ULjericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger.class0000644000175000017500000000160511214132416030417 0ustar twernertwerner11    ! " # $ % & '),- slf4jLoggerLorg/slf4j/Logger;(Lorg/slf4j/Logger;)VCodeerror(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnabled . /        06net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger SLF4JLogger InnerClassesjava/lang/Objectnet/htmlparser/jericho/Logger()Vorg/slf4j/Logger*net/htmlparser/jericho/LoggerProviderSLF4J    **+ *+ *+ *+ *+ * * *  * +  (* jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeMasonNamedBlock.class0000644000175000017500000000216311214132416030264 0ustar twernertwerner19     !" # $ !%& ' ()INSTANCE4Lnet/htmlparser/jericho/StartTagTypeMasonNamedBlock;()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;mason named block<%>* + , - ./0 12 34 56 782net/htmlparser/jericho/StartTagTypeMasonNamedBlock  8net/htmlparser/jericho/StartTagTypeGenericImplementation0net/htmlparser/jericho/EndTagTypeMasonNamedBlock2Lnet/htmlparser/jericho/EndTagTypeMasonNamedBlock;_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/TaggetEnd()Inet/htmlparser/jericho/SourcecharAt(I)CgetName()Ljava/lang/String;getCorrespondingEndTagType%()Lnet/htmlparser/jericho/EndTagType; getNextEndTagW(ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;0 *A5*+N-+-d%+-- * - Y jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor.class0000644000175000017500000002631311214132414026466 0ustar twernertwerner1 * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A BC C D E F G H I J BK BL BM N O P Q 2R 2S 2T UL VW 2M XY Z[ Z\] 2L 2K ^ _ >` Ua bc d e 2f ghi d jk lK m lL no E@ p q jr st uv uw sx jw Byz u{ | }~ }  w  Z@ d g    d       d d  d d d  Processor InnerClassesTR_ElementHandlerTD_ElementHandlerPRE_ElementHandlerLI_ElementHandlerListElementHandlerHR_ElementHandlerBR_ElementHandlerA_ElementHandlerStandardBlockElementHandlerFontStyleElementHandlerStandardInlineElementHandlerRemoveElementHandlerElementHandlerrenderer!Lnet/htmlparser/jericho/Renderer; rootSegment Lnet/htmlparser/jericho/Segment;sourceLnet/htmlparser/jericho/Source; maxLineLengthInewLineLjava/lang/String;includeHyperlinkURLsZdecorateFontStylesconvertNonBreakingSpacesblockIndentSizelistIndentSize listBullets[CtableCellSeparator appendableLjava/lang/Appendable; renderedIndex atStartOfLinecolblockIndentLevellistIndentLevelblockVerticalMargin preformattedlastCharWhiteSpaceignoreInitialWhitespacebulletlistBulletNumber NO_MARGIN ConstantValueUNORDERED_LISTELEMENT_HANDLERSLjava/util/Map; Signature]Ljava/util/Map;$assertionsDisabledp(Lnet/htmlparser/jericho/Renderer;Lnet/htmlparser/jericho/Segment;ILjava/lang/String;ZZZII[CLjava/lang/String;)VCodeappendTo(Ljava/lang/Appendable;)V Exceptionsreset()VappendElementContent#(Lnet/htmlparser/jericho/Element;)V$appendSegmentProcessingChildElements(IILjava/util/List;)V7(IILjava/util/List;)VgetElementHandler\(Lnet/htmlparser/jericho/Element;)Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;appendSegmentRemovingTags(II)V appendSegmentappendPreformattedSegmentappendNonPreformattedSegmentisStartOfBlock()ZappendBlockVerticalMargin blockBoundary(I)V startNewLine appendIndentappend.(C)Lnet/htmlparser/jericho/Renderer$Processor;?(Ljava/lang/String;)Lnet/htmlparser/jericho/Renderer$Processor;(Ljava/lang/CharSequence;II)V access$000N(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V access$100.(Lnet/htmlparser/jericho/Renderer$Processor;)Z access$200 access$300Y(Lnet/htmlparser/jericho/Renderer$Processor;C)Lnet/htmlparser/jericho/Renderer$Processor; access$202/(Lnet/htmlparser/jericho/Renderer$Processor;Z)Z access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$508.(Lnet/htmlparser/jericho/Renderer$Processor;)I access$510 access$600 access$700N(Lnet/htmlparser/jericho/Renderer$Processor;)Lnet/htmlparser/jericho/Renderer; access$800 access$900 access$1000 access$1100j(Lnet/htmlparser/jericho/Renderer$Processor;Ljava/lang/String;)Lnet/htmlparser/jericho/Renderer$Processor; access$1200.(Lnet/htmlparser/jericho/Renderer$Processor;)V access$1300 access$1400 access$1402/(Lnet/htmlparser/jericho/Renderer$Processor;I)I access$1508 access$1510 access$1408 access$1602 access$1700 access$1802 access$1900 access$1902 access$2000 access$2100?(Lnet/htmlparser/jericho/Renderer$Processor;)Ljava/lang/String;            net/htmlparser/jericho/Element      8net/htmlparser/jericho/Renderer$Processor$ElementHandler  java/lang/AssertionError      From      .  net/htmlparser/jericho/Renderer  java/util/HashMapa  address appletb  blockquote brbuttoncaptioncenter code dd dir divdtem fieldsetformh1 h2h3h4h5h6headhrilegendlimenumapnoframesnoscriptol pprescriptselectstrongstyletextareatdthtru ul)net/htmlparser/jericho/Renderer$Processorjava/lang/Object;net/htmlparser/jericho/Renderer$Processor$TR_ElementHandler;net/htmlparser/jericho/Renderer$Processor$TD_ElementHandlernet/htmlparser/jericho/Renderer$Processor$RemoveElementHandlerjava/io/IOExceptionnet/htmlparser/jericho/SegmentbeginendgetChildElements()Ljava/util/List; getContentEnd()IisEmpty getStartTag#()Lnet/htmlparser/jericho/StartTag;net/htmlparser/jericho/StartTagjava/lang/Mathmax(II)Ijava/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNextnext()Ljava/lang/Object;processgetStartTagType'()Lnet/htmlparser/jericho/StartTagType;#net/htmlparser/jericho/StartTagType isServerTagINSTANCE:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;getName()Ljava/lang/String; java/util/Mapget&(Ljava/lang/Object;)Ljava/lang/Object;net/htmlparser/jericho/Source getNextTag(I)Lnet/htmlparser/jericho/Tag;net/htmlparser/jericho/Tag subSequence(II)Ljava/lang/CharSequence;)net/htmlparser/jericho/CharacterReferencedecode.(Ljava/lang/CharSequence;ZZ)Ljava/lang/String;java/lang/StringlengthcharAt(I)CdecodeCollapseWhiteSpace-(Ljava/lang/CharSequence;Z)Ljava/lang/String; isWhiteSpace(C)Z startsWith(Ljava/lang/String;I)Zjava/lang/Appendable0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;(C)Ljava/lang/Appendable;java/lang/IntegertoString(I)Ljava/lang/String;java/lang/CharSequencejava/lang/ClassdesiredAssertionStatusput8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object; INSTANCE_0_0 INSTANCE_BINSTANCE_1_1_INDENT INSTANCE_1_1 INSTANCE_CODEINSTANCE_0_0_INDENT INSTANCE_UL INSTANCE_I INSTANCE_2_1 INSTANCE_OL INSTANCE_U0 0X L**+*,*,* ****** * * /#* *+!**"*#*$%?3*&*'****(***Z*;/+)=+* *&++,>**&-+.%{o6-/:0O12:34*456*7*&3-6 *5 9-*+89:;*<=>L++?C7>*@:"A*ABC>*By]D EYF*&*&<* *G *H*&*&N*&*&-&<LLMLD EYFD*& EYF** *I*JN6-KT-L6 * 8 ** `6-K!-L  *WeYD EYFD*& EYF*I*MN-K*** * &**NO* * W66**Z-KK-L 7`-K-`L>`-K-P`Q *`d`* %*** * W* * * W*-R-K*6K**dNO*(0$D*( EYF**( *(*(*(F:*'`=>*!*SW*'**Z**!*SW*'**Z **h<*! TW**d*h<*! TW*E*d<*! TW*!**d*p4T TW?*UL*+Kdd=*! TW*!+SVSW*#**h<*! TW***h**h`*'."*'**!TW*Y`*1%*'**!+SW*Y+K`*E9*'*6*!+WTW*Yd`*+***  *Z  *  *YZ` *YZd****  * *+ * * **Z *YZ` *YZd  *YZ`! *Z"*# *Z$*% *Z&*'(*)XYDZY[;;\]^W;_`^W;a:^W;bc^W;de^W;fg^W;h:^W;i`^W;jk^W;lm^W;no^W;pq^W;r`^W;s`^W;tu^W;vk^W;wk^W;xy^W;zy^W;{y^W;|y^W;}y^W;~y^W;:^W;^W;u^W;`^W;^W;q^W;:^W;:^W;:^W;^W;k^W;^W;:^W;:^W;c^W;:^W;:^W;^W;^W;^W;^W;q^WrX > jericho-html-3.1/classes/net/htmlparser/jericho/ParseText.class0000644000175000017500000000077611214132414024700 0ustar twernertwerner1NO_BREAKI ConstantValuecharAt(I)C containsAt(Ljava/lang/String;I)ZindexOf(CI)I(CII)I(Ljava/lang/String;I)I(Ljava/lang/String;II)I lastIndexOflength()I subSequence(II)Ljava/lang/CharSequence;toString()Ljava/lang/String; net/htmlparser/jericho/ParseTextjava/lang/Objectjava/lang/CharSequence        jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger.class0000644000175000017500000000207511214132416030613 0ustar twernertwerner1=   ! " # $ %& ' %( %) %*,/0 log4JLoggerLorg/apache/log4j/Logger;(Lorg/apache/log4j/Logger;)VCodeerror(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnabled 1 2 3 3 3 34 56 78 96 :6 ;6<6net/htmlparser/jericho/LoggerProviderLog4J$Log4JLogger Log4JLogger InnerClassesjava/lang/Objectnet/htmlparser/jericho/Logger()Vorg/apache/log4j/Logger(Ljava/lang/Object;)Vorg/apache/log4j/LevelERRORLorg/apache/log4j/Level; isEnabledFor(Lorg/apache/log4j/Priority;)ZWARNINFODEBUG*net/htmlparser/jericho/LoggerProviderLog4J   **+ *+ *+ *+ *+ * *  *  * .  +- jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeXMLDeclaration.class0000644000175000017500000000075511214132416030102 0ustar twernertwerner1   INSTANCE3Lnet/htmlparser/jericho/StartTagTypeXMLDeclaration;()VCodeXML declaration 1net/htmlparser/jericho/StartTagTypeXMLDeclaration 8net/htmlparser/jericho/StartTagTypeGenericImplementation_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)V0   *   Yjericho-html-3.1/classes/net/htmlparser/jericho/EndTagTypeMasonNamedBlock.class0000644000175000017500000000123211214132416027671 0ustar twernertwerner1     INSTANCE2Lnet/htmlparser/jericho/EndTagTypeMasonNamedBlock;()VCodegetCorrespondingStartTagType'()Lnet/htmlparser/jericho/StartTagType;/mason named block  0net/htmlparser/jericho/EndTagTypeMasonNamedBlock 6net/htmlparser/jericho/EndTagTypeGenericImplementation;(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZZ)V$net/htmlparser/jericho/MasonTagTypesMASON_NAMED_BLOCK%Lnet/htmlparser/jericho/StartTagType;0    *  Yjericho-html-3.1/classes/net/htmlparser/jericho/TextExtractor$Processor.class0000644000175000017500000000535711214132414027545 0ustar twernertwerner1 .? /@ .A BC .C .D .E .FG BH IJ K L MN O PQ R ST T U VWX YZ[ \] ^_ `a R b cd eL efg \h i $j k lm ^n oa pqrusegment Lnet/htmlparser/jericho/Segment;sourceLnet/htmlparser/jericho/Source;convertNonBreakingSpacesZincludeAttributesexcludeNonHTMLElementsthis$0&Lnet/htmlparser/jericho/TextExtractor;L(Lnet/htmlparser/jericho/TextExtractor;Lnet/htmlparser/jericho/Segment;ZZZ)VCodetoString()Ljava/lang/String; 89 :v 01w 23 45 65 75java/lang/StringBuilder xy :z#net/htmlparser/jericho/NodeIterator :{ |} ~net/htmlparser/jericho/Tag  }  y z net/htmlparser/jericho/StartTag scriptstyle     ~ net/htmlparser/jericho/Attribute  >br  .net/htmlparser/jericho/TextExtractor$Processor Processor InnerClassesjava/lang/Object()Vnet/htmlparser/jericho/Segmentlength()I(I)V#(Lnet/htmlparser/jericho/Segment;)VhasNext()Znext"()Lnet/htmlparser/jericho/Segment; getTagType"()Lnet/htmlparser/jericho/TagType;net/htmlparser/jericho/TagType isServerTag getElement"()Lnet/htmlparser/jericho/Element;net/htmlparser/jericho/ElementgetEnd skipToPos#net/htmlparser/jericho/StartTagTypeNORMAL%Lnet/htmlparser/jericho/StartTagType;nameLjava/lang/String;$net/htmlparser/jericho/TextExtractorexcludeElement$(Lnet/htmlparser/jericho/StartTag;)Z#net/htmlparser/jericho/HTMLElementsgetElementNames()Ljava/util/List;java/util/Listcontains(Ljava/lang/Object;)Z getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/Attributesiterator()Ljava/util/Iterator;java/util/Iterator()Ljava/lang/Object;includeAttributeF(Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/Attribute;)Zappend(C)Ljava/lang/StringBuilder;getValueSegment3(Ljava/lang/CharSequence;)Ljava/lang/StringBuilder;getNamegetInlineLevelElementNames()Ljava/util/Set; java/util/Set)net/htmlparser/jericho/CharacterReferencedecodeCollapseWhiteSpace-(Ljava/lang/CharSequence;Z)Ljava/lang/String;0./012345657589:;<4(*+**,*,***=><L@ Y* L Y* M,,N--:(: ,:0&**",b*H !:"4#$:*%+ &'( &W)*+), + &W +-(W+*-M,t .\sjericho-html-3.1/classes/net/htmlparser/jericho/WriterLogger.class0000644000175000017500000000350411214132416025367 0ustar twernertwerner1f >? @ A B C D E F G HI J KL MN OP QR ST SUVW XYZ[writerLjava/io/Writer;nameLjava/lang/String; errorEnabledZ warnEnabled infoEnabled debugEnabled(Ljava/io/Writer;)VCode%(Ljava/io/Writer;Ljava/lang/String;)V getWriter()Ljava/io/Writer;getName()Ljava/lang/String;error(Ljava/lang/String;)VwarninfodebugisErrorEnabled()ZsetErrorEnabled(Z)V isWarnEnabledsetWarnEnabled isInfoEnabledsetInfoEnabledisDebugEnabledsetDebugEnabledlog'(Ljava/lang/String;Ljava/lang/String;)V\ ] %( %^ ! "! #! $!   23ERROR <= 63WARN 83INFO :3DEBUG_ `ab c. d^java/io/IOExceptionjava/lang/RuntimeException %e#net/htmlparser/jericho/WriterLoggerjava/lang/Objectnet/htmlparser/jericho/Loggernet/htmlparser/jericho/Source PACKAGE_NAME()V(net/htmlparser/jericho/BasicLogFormatterformatJ(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;java/io/Writerwriteflush(Ljava/lang/Throwable;)V! !"!#!$!%&' *+%('/#******+*, )*'*+,'* -.'* * + /.'* *+ 0.'* *+ 1.'* *+ 23'*45'*63'*75'*83'*95'*:3'*;5'*<='9%*+,* * NY-jericho-html-3.1/classes/net/htmlparser/jericho/SourceFormatter.class0000644000175000017500000000460111214132414026074 0ustar twernertwerner1w GH I J K L M N O P QRS T U V W X Y Z P [\ ]^_` a [b cdefg Processor InnerClassessegment Lnet/htmlparser/jericho/Segment; indentStringLjava/lang/String;tidyTagsZcollapseWhiteSpaceremoveLineBreaksindentAllElementsnewLine#(Lnet/htmlparser/jericho/Segment;)VCodewriteTo(Ljava/io/Writer;)V ExceptionshappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String;setIndentString<(Ljava/lang/String;)Lnet/htmlparser/jericho/SourceFormatter;getIndentString setTidyTags+(Z)Lnet/htmlparser/jericho/SourceFormatter; getTidyTags()ZsetCollapseWhiteSpacegetCollapseWhiteSpacesetRemoveLineBreaksgetRemoveLineBreakssetIndentAllElementsgetIndentAllElements setNewLine getNewLine +i #$ %& '& (& )& *$ !" 23j ki0net/htmlparser/jericho/SourceFormatter$Processor :7 => @> B> D> F7 +lm nop 6q"java/lang/IllegalArgumentException&indentString property must not be null +r stu v7&net/htmlparser/jericho/SourceFormatterjava/lang/Object'net/htmlparser/jericho/CharStreamSourcejava/io/IOException()Vjava/io/WriterflushL(Lnet/htmlparser/jericho/Segment;Ljava/lang/String;ZZZZZLjava/lang/String;)Vnet/htmlparser/jericho/Segmentlength()I+net/htmlparser/jericho/CharStreamSourceUtil=(Lnet/htmlparser/jericho/CharStreamSource;)Ljava/lang/String;(Ljava/lang/String;)VsourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourcegetBestGuessNewLine1!"#$%&'&(&)&*$+,-5)********+ ./- *+ + 0123-8 , Y* * ******+0145- * h67-*89-!+ Y*+*:7-*;<-**=>-*?<-**@>-*A<-**B>-*C<-**D>-*E9-*+*F7-&*** *  jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderSLF4J.class0000644000175000017500000000121111214132416026441 0ustar twernertwerner1      SLF4JLogger InnerClassesINSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; 6net/htmlparser/jericho/LoggerProviderSLF4J$SLF4JLogger  *net/htmlparser/jericho/LoggerProviderSLF4J java/lang/Object%net/htmlparser/jericho/LoggerProviderorg/slf4j/LoggerFactory&(Ljava/lang/String;)Lorg/slf4j/Logger;(Lorg/slf4j/Logger;)V0  * Y+ Y  jericho-html-3.1/classes/net/htmlparser/jericho/FormControlOutputStyle.class0000644000175000017500000000220711214132414027456 0ustar twernertwerner17 & '() * + , - . / 012ConfigDisplayValue InnerClassesNORMAL/Lnet/htmlparser/jericho/FormControlOutputStyle;REMOVE DISPLAY_VALUE$VALUES0[Lnet/htmlparser/jericho/FormControlOutputStyle;values2()[Lnet/htmlparser/jericho/FormControlOutputStyle;CodevalueOfC(Ljava/lang/String;)Lnet/htmlparser/jericho/FormControlOutputStyle;(Ljava/lang/String;I)V Signature()V getDebugInfo()Ljava/lang/String;ALjava/lang/Enum;  34-net/htmlparser/jericho/FormControlOutputStyle 5  6#    java/lang/Enum@net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValueclone()Ljava/lang/Object;5(Ljava/lang/Class;Ljava/lang/String;)Ljava/lang/Enum;toString@1@@@   **+ !"#*$!MAY Y  Y  Y SY SYS % jericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$HR_ElementHandler.class0000644000175000017500000000177711214132414031742 0ustar twernertwerner1+       INSTANCE" Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions#  $% &' ();net/htmlparser/jericho/Renderer$Processor$HR_ElementHandlerHR_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler*)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$1300.(Lnet/htmlparser/jericho/Renderer$Processor;)V access$300Y(Lnet/htmlparser/jericho/Renderer$Processor;C)Lnet/htmlparser/jericho/Renderer$Processor;net/htmlparser/jericho/Renderer   *0$++>H+-W+ Y  !     jericho-html-3.1/classes/net/htmlparser/jericho/LoggerProviderDisabled.class0000644000175000017500000000064711214132416027342 0ustar twernertwerner1   INSTANCE'Lnet/htmlparser/jericho/LoggerProvider;()VCode getLogger3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; -net/htmlparser/jericho/LoggerProviderDisabled java/lang/Object%net/htmlparser/jericho/LoggerProvider0  *     Yjericho-html-3.1/classes/net/htmlparser/jericho/CharacterReference$EncodingFilterWriter.class0000644000175000017500000000214711214132414032544 0ustar twernertwerner1B   ! " # $ % & ' () (* + ,'-0sbLjava/lang/StringBuilder;(Ljava/io/Writer;)VCodewrite(C)V Exceptions1(I)V([CII)V(Ljava/lang/String;II)V java/lang/StringBuilder2 34   5 67 89 :; <=>  ?@ A>net/htmlparser/jericho/CharacterReference$EncodingFilterWriterEncodingFilterWriter InnerClassesjava/io/FilterWriterjava/io/IOException)net/htmlparser/jericho/CharacterReferenceMAX_ENTITY_REFERENCE_LENGTHI setLength access$000/(Ljava/lang/Appendable;C)Ljava/lang/Appendable;length()IoutLjava/io/Writer;charAt(I)Cjava/io/Writerappend*(Ljava/lang/CharSequence;)Ljava/io/Writer;java/lang/String0 *+*YG;**W** * * * W* *`66*+4 , `66*+ / .jericho-html-3.1/classes/net/htmlparser/jericho/FormFields$Column.class0000644000175000017500000000072211214132414026224 0ustar twernertwerner1     formField"Lnet/htmlparser/jericho/FormField; isBooleanZpredefinedValueLjava/lang/String;8(Lnet/htmlparser/jericho/FormField;ZLjava/lang/String;)VCode   (net/htmlparser/jericho/FormFields$ColumnColumn InnerClassesjava/lang/Object()V!net/htmlparser/jericho/FormFields     **+**-  jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$RadioCheckboxFormControl.class0000644000175000017500000000651411214132414032100 0ustar twernertwerner1 *; )< =>? @A BC DE @F BGH I JKL M NO )PQ R S DTU =V )W XY )Z )[ *\ =] ^_ )` ab )c de af gh gi dj )k )lmpK(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;)VCodesetValue(Ljava/lang/String;)ZaddValue addValuesTo(Ljava/util/Collection;)V Signature-(Ljava/util/Collection;)V isChecked()ZaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)V +s tvw xyonz {|} ~ 6 java/lang/StringBuilder + ": compulsory "value" attribute of  control " y"" is missing, assuming the value " checked  56 /    y y :;net/htmlparser/jericho/FormControl$RadioCheckboxFormControlRadioCheckboxFormControl InnerClasses"net/htmlparser/jericho/FormControlr(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)VelementContainerElementContainer5Lnet/htmlparser/jericho/FormControl$ElementContainer;3net/htmlparser/jericho/FormControl$ElementContainerpredefinedValueLjava/lang/String;net/htmlparser/jericho/ElementsourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourceloggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabledbeginIgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(I)V&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;-(Ljava/lang/Object;)Ljava/lang/StringBuilder;name(C)Ljava/lang/StringBuilder;toString()Ljava/lang/String;info(Ljava/lang/String;)V setSelected((Ljava/lang/String;Ljava/lang/String;Z)ZformControlType(Lnet/htmlparser/jericho/FormControlType;&net/htmlparser/jericho/FormControlTypeCHECKBOXgetPredefinedValue access$200+(Ljava/util/Collection;Ljava/lang/String;)VgetBooleanAttribute!net/htmlparser/jericho/FormFieldsadd'(Lnet/htmlparser/jericho/FormControl;)V outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;-net/htmlparser/jericho/FormControlOutputStyleREMOVE getElement"()Lnet/htmlparser/jericho/Element;%net/htmlparser/jericho/OutputDocumentremove#(Lnet/htmlparser/jericho/Segment;)V DISPLAY_VALUE@net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValueConfigDisplayValue CheckedHTML UncheckedHTMLreplace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V setDisabled(Z)V+replaceAttributesInOutputDocumentIfModified$net/htmlparser/jericho/FormControl$10)*+,-v*+,*f*+N+++ Yȷ  ,*"./- *+0/-&*+*12-* +*3456- *78-+*9:-UI* +*!"6*#'* $%M, +*!,&*'*+(o")*nq*=*ugajericho-html-3.1/classes/net/htmlparser/jericho/Source.class0000644000175000017500000004032011214132414024206 0ustar twernertwerner1 st uv w x y z { | } ~                . .   .  .      +  .   .  x  6 x    6     x x   s       U       x  t   c   6  6 6      p   cu       x x x x   x       6    6 6  6 6 6             x 6 6   x x  ! " #$% & ' ( )* +  , c-   s. . /0 1 2 3 / 4 5 67 .89 :;<= >?@ AB CD EF sourceTextLjava/lang/CharSequence;documentSpecifiedEncodingLjava/lang/String;encodingencodingSpecificationInfopreliminaryEncodingInfonewLine parseText"Lnet/htmlparser/jericho/ParseText;parseTextOutputDocument'Lnet/htmlparser/jericho/OutputDocument;loggerLnet/htmlparser/jericho/Logger;rowColumnVectorCacheArray)[Lnet/htmlparser/jericho/RowColumnVector;cacheLnet/htmlparser/jericho/Cache;useAllTypesCacheZuseSpecialTypesCachefullSequentialParseData[I allTagsArray[Lnet/htmlparser/jericho/Tag;allTagsLjava/util/List; Signature.Ljava/util/List; allStartTags3Ljava/util/List; allElements2Ljava/util/List; childElements lastNewLine UNINITIALISED ConstantValueCRLFCRLF PACKAGE_NAMELegacyIteratorCompatabilityMode DeprecatedRuntimeVisibleAnnotationsLjava/lang/Deprecated;(Ljava/lang/CharSequence;)VCode,(Lnet/htmlparser/jericho/EncodingDetector;)V Exceptions%(Ljava/io/Reader;Ljava/lang/String;)V{(Ljava/lang/CharSequence;Lnet/htmlparser/jericho/StreamedParseText;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V(Ljava/lang/CharSequence;Z)V(Ljava/io/Reader;)V(Ljava/io/InputStream;)V(Ljava/net/URL;)V(Ljava/net/URLConnection;)V setEncoding8(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;getDocumentSpecifiedEncoding()Ljava/lang/String; getEncodinggetEncodingSpecificationInfogetPreliminaryEncodingInfoisXML()Z getNewLinegetBestGuessNewLinegetRow(I)I getColumngetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;toStringfullSequentialParse()[Lnet/htmlparser/jericho/Tag;iterator()Ljava/util/Iterator;8()Ljava/util/Iterator;getChildElements()Ljava/util/List;4()Ljava/util/List;getSourceFormatter*()Lnet/htmlparser/jericho/SourceFormatter; getAllTags0()Ljava/util/List;getAllStartTags5()Ljava/util/List;getAllElementsgetElementById4(Ljava/lang/String;)Lnet/htmlparser/jericho/Element;getTagAt(I)Lnet/htmlparser/jericho/Tag;getPreviousTag?(ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag; getNextTaggetNextNonServerTaggetPreviousNonServerTaggetEnclosingTaggetNextElement#(I)Lnet/htmlparser/jericho/Element;5(ILjava/lang/String;)Lnet/htmlparser/jericho/Element;H(ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/Element;N(ILjava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/Element;getNextElementByClassgetPreviousStartTag$(I)Lnet/htmlparser/jericho/StartTag;I(ILnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;6(ILjava/lang/String;)Lnet/htmlparser/jericho/StartTag;[(ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;getNextStartTagI(ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;O(ILjava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/StartTag;getNextStartTagByClassgetPreviousEndTag"(I)Lnet/htmlparser/jericho/EndTag;E(ILnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;4(ILjava/lang/String;)Lnet/htmlparser/jericho/EndTag; getNextEndTagW(ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag;getEnclosingElementgetPreviousCharacterReference.(I)Lnet/htmlparser/jericho/CharacterReference;getNextCharacterReference getNameEndparseAttributes'(II)Lnet/htmlparser/jericho/Attributes;((III)Lnet/htmlparser/jericho/Attributes;ignoreWhenParsing(II)V(Ljava/util/Collection;)V<(Ljava/util/Collection<+Lnet/htmlparser/jericho/Segment;>;)V setLogger"(Lnet/htmlparser/jericho/Logger;)V getLogger!()Lnet/htmlparser/jericho/Logger; clearCache()VgetCacheDebugInfo getParsedTags getParseText$()Lnet/htmlparser/jericho/ParseText; subSequence(II)Ljava/lang/CharSequence; substring(II)Ljava/lang/String;getNamecharAt(I)Clength()IwasFullSequentialParseCalled&getCharsetParameterFromHttpHeaderValue&(Ljava/lang/String;)Ljava/lang/String; newLogger getString=(Lnet/htmlparser/jericho/EncodingDetector;)Ljava/lang/String; isStreamedVLnet/htmlparser/jericho/Segment;Ljava/lang/Iterable;G hi H I  mZ WXnet/htmlparser/jericho/Cache J no    java/lang/StringBuilder \ K LM: NO nP7InputStreamReader.getEncoding() of constructor argumentQ Ri ST UVjava/io/InputStreamReader 'net/htmlparser/jericho/EncodingDetector  W XY  ./ Z[\ ]^net/htmlparser/jericho/StartTag _l    http-equiv content-type `acontent kl!No encoding specified in document  b^ 21 _`xhtml cd edf gh fg   i j k i i#java/lang/IndexOutOfBoundsException lm no pi Full sequential parse clearing all tags from cache. Consider calling Source.fullSequentialParse() manually immediately after construction of Source.q rs t\ uv w x y# java/util/ArrayList z{ A=| } ~ "   &net/htmlparser/jericho/SourceFormatter  '# i    net/htmlparser/jericho/Tag )# id  . 0 0 2 2/ 0/ 2 51 01 A? AB AC D?  ^ <@    A@   class  net/htmlparser/jericho/EndTag"java/lang/IllegalArgumentExceptionname argument must not be null s    IJ  K8       i PR  jjava/lang/IllegalStateExceptionRignoreWhenParsing can not be used after a full sequential parse has been performed%net/htmlparser/jericho/OutputDocument  Tnet/htmlparser/jericho/Segment S\  ,net/htmlparser/jericho/CharSequenceParseText ab cdcharset= g g c   Y java/io/IOException 3IOException constructing encoded source. Encoding:  - . PreliminaryEncoding: sjava/lang/Exceptionnet/htmlparser/jericho/Source  e java/lang/Iterablejava/lang/CharSequence(I)Vjava/lang/Object"(Lnet/htmlparser/jericho/Source;)VgetPreliminaryEncodingappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;'getPreliminaryEncodingSpecificationInfonet/htmlparser/jericho/Util$(Ljava/io/Reader;)Ljava/lang/String;(net/htmlparser/jericho/StreamedParseTextgetEndSTREAMED_SOURCE_MARKER%net/htmlparser/jericho/LoggerDisabledINSTANCE'Lnet/htmlparser/jericho/LoggerDisabled; java/net/URLopenConnection()Ljava/net/URLConnection; getTagType"()Lnet/htmlparser/jericho/TagType;#net/htmlparser/jericho/StartTagTypeXML_DECLARATION%Lnet/htmlparser/jericho/StartTagType;getAttributeValuegetFirstStartTagH(Ljava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;DOCTYPE_DECLARATIONbeginIend net/htmlparser/jericho/ParseTextindexOf(Ljava/lang/String;II)Inet/htmlparser/jericho/ConfigNewLine&net/htmlparser/jericho/RowColumnVector getCacheArrayJ(Lnet/htmlparser/jericho/Source;)[Lnet/htmlparser/jericho/RowColumnVector;getT([Lnet/htmlparser/jericho/RowColumnVector;I)Lnet/htmlparser/jericho/RowColumnVector; getTagCountnet/htmlparser/jericho/Loggerwarn(Ljava/lang/String;)VclearparseAll?(Lnet/htmlparser/jericho/Source;Z)[Lnet/htmlparser/jericho/Tag;getNodeIteratorjava/util/Collections emptyListsourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/TagType isServerTag getElement"()Lnet/htmlparser/jericho/Element;net/htmlparser/jericho/Element(I)Ljava/util/List; parentElement Lnet/htmlparser/jericho/Element; NOT_CACHEDjava/util/Listadd(Ljava/lang/Object;)Z#(Lnet/htmlparser/jericho/Segment;)Vsizejava/util/IteratorhasNextnext()Ljava/lang/Object;isEmptygetFirstElementG(Ljava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/Element;?(Lnet/htmlparser/jericho/Source;IZ)Lnet/htmlparser/jericho/Tag;>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;^(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag; getPreviousC(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/StartTag;NORMALjava/lang/String toLowerCasez(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;)Lnet/htmlparser/jericho/StartTag;getNexth(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Ljava/lang/String;Z)Lnet/htmlparser/jericho/StartTag;n(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Ljava/util/regex/Pattern;)Lnet/htmlparser/jericho/StartTag;getClassPattern-(Ljava/lang/String;)Ljava/util/regex/Pattern;A(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/EndTag;!net/htmlparser/jericho/EndTagType#Lnet/htmlparser/jericho/EndTagType;v(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/EndTagType;)Lnet/htmlparser/jericho/EndTag; isXMLName(Ljava/lang/CharSequence;)Z{(Lnet/htmlparser/jericho/Source;ILjava/lang/String;Lnet/htmlparser/jericho/StartTagType;Z)Lnet/htmlparser/jericho/StartTag;)net/htmlparser/jericho/CharacterReferenceM(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/CharacterReference;isXMLNameStartChar(C)Z isXMLNameChar!net/htmlparser/jericho/AttributesgetDefaultMaxErrorCount constructG(Lnet/htmlparser/jericho/Source;III)Lnet/htmlparser/jericho/Attributes;%(Lnet/htmlparser/jericho/ParseText;)VreplaceWithSpacesjava/util/CollectiongetTagIterator(Ljava/lang/String;)I(II)I(I)Ljava/lang/String;trim$net/htmlparser/jericho/LoggerFactory3(Ljava/lang/String;)Lnet/htmlparser/jericho/Logger; openReader()Ljava/io/Reader; isInfoEnabledinfojava/lang/Class getPackage()Ljava/lang/Package;java/lang/Package1 OLN Wt*+****** * * * * ******+**Y*F:*+*+*+*Y +!"#"+$"%$*+&,*,*'*,(****** * * * * ******)* * * -*-***+*,*t*+****** * * * * ******+** * **$*+++ ++,-  *.Y+/0 *.Y+120  *.Y+20  !* *+*,+***3L+.+45$*+678***+9:*;<=M,*,>8N-*-?***,@:*A:**BW***BW**J>*3L++45*CDM,*EF,G,HIs**<*JW*K= *LYMZ .**J*K NOYMZ*Z%*PL++MMQ *RS *RT4(*J UYV* **W * X*tX***Y*Z[\*]* =* * *^N* * -:* * -=IIKI *_!"#*|*` *ak**bW*cYd<*efM,B,gh ,i<,jN-kW-lm-l*-nW-o<*$%& pY*q'#**bW*()#`T*K*rL*cY+st+uM,v%,wxN-6*-6nW**+#fZ*Q*yL+za*cY+st+uM,v,w6N*-jnW*$,- *{+|./*}0/*~01*,2/*3/, *M,,4h,,H<4/0$*dM,,4h,,Gd<21*,5/*51#*,N- -H-67*fM,,j68 *,N--j69&*,-:j6:$*,-:j;8 *,N--j<=*<> *,6<? *,<@,,M*,-A=*A> *,D6A? *,A@,,M*,-AB *,-AC*,-D? *,EF*EG *,EH', Y*,IF*IG *,DIH *,IJ%, Y*,-K7*K8OC>,,M,6*,:j:od>LM*NM*OE1*K*K M+.UPQ *PR*STB6* Y* *Y*E ** SU."+M,v,wN-VWX*++*ZYZ*Z* *Z[\(*]****]*^#5)cYdL*M,v+,wW+(_`H<*3* *Y* * *Y**ab *cd *ed *fg *Khi *j *klE9*<`=*;> * *:ðmZĸŰ nox\*Ƹ&LM,C,Y ɶ"*"ʶ"*"˶"*!"ʶ"*$"%M+ VYp*)q\!Mζ϶гѱrjericho-html-3.1/classes/net/htmlparser/jericho/TextExtractor$AttributeIncludeChecker.class0000644000175000017500000000046711214132414032317 0ustar twernertwerner1  includeAttributeF(Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/Attribute;)Z (Ljava/io/Reader;[C)VCode(Ljava/io/Reader;)V([CI)V([C)V(Ljava/nio/CharBuffer;)V(Ljava/lang/CharSequence;)V setBuffer)([C)Lnet/htmlparser/jericho/StreamedText;hasExpandableBuffer()ZcharAt(I)CsetMinRequiredBufferBegin(I)VgetMinRequiredBufferBegin()IlengthgetEndprepareBufferRange(II)VwriteTo(Ljava/io/Writer;II)V Exceptions substring(II)Ljava/lang/String; subSequence(II)Ljava/lang/CharSequence; getCharBuffer(II)Ljava/nio/CharBuffer;toString()Ljava/lang/String; getDebugInfo getBuffer()[CgetBufferBegincheckPosgetBufferOverflowPositionreadToPosition expandBufferdiscardUsedText()V shiftBuffer([C[C)VgetCurrentBufferContent toCharArray(Ljava/lang/CharSequence;)[C Nx HI JI KI LI BC VW NO DE FG NR q `_ |} NS MI u] s]"java/lang/IllegalArgumentExceptionjava/lang/StringBuilderGCannot set minimum required buffer begin to already discarded position mn Njava/lang/IllegalStateExceptionOLength of streamed text cannot be determined until end of file has been reached#java/lang/IndexOutOfBoundsException bc java/lang/String N kl 'java/lang/UnsupportedOperationException.Streamed text can not be converted to a stringBuffer size: "", bufferBegin=, minRequiredBufferBegin= , readerPos=StreamedText position  has been discarded java/nio/BufferOverflowException v] wx java/io/IOExceptionjava/lang/RuntimeException N yz  gh |q Z[#net/htmlparser/jericho/StreamedTextjava/lang/Objectjava/lang/CharSequencejava/nio/CharBufferarrayappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(Ljava/lang/String;)Vjava/io/Writerwrite([CII)Vwrap([CII)Ljava/nio/CharBuffer;java/io/Readerread([CII)I(Ljava/lang/Throwable;)Vskip(J)Jjava/lang/Mathmin(II)I0?@ABCDEFGHIJIKILI MINOP1%******+*,WNQP*+ NRP@4*******+ * **NSP*++ NTP *+ + NUP *+VWP-!+*+ * * * *XYP* Z[P***** *d4\]P5)*YY*^_P*`_P$* Y*a_P*bcP3'd>**** Y deP$*!+* *dd"f5ghP'*!#Y* *dd$ijP*%klP#*!* *dd&mnP 'Y()onPH<Y** +*,*-*pqP* r_P*s]PE9*#YY./* Y t_P ** `u]P** `0** `* 0Y1**d`2*3*D** **d** `*d4=***Y` M6Y,75v]P."* h==N** -8*- f5wxP%**** * 8f5yzPx**d>**d66,d+4U****8***d97 ***Yañf5{nP ****:; |}P>2*# *#<*=L=++*>U+~xP jericho-html-3.1/classes/net/htmlparser/jericho/EndTagTypeUnregistered.class0000644000175000017500000000324511214132416027342 0ustar twernertwerner1c#$% & '( ) *+ , -. '/ 0 '1 23 '45 67 8 9:; < 2=> ? @AINSTANCE/Lnet/htmlparser/jericho/EndTagTypeUnregistered;()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag; /unregistered BC DE FGH IJ KGL MN OP QR STU VW XYjava/lang/StringBuilder ZEncountered possible EndTag at [\] ^_5 whose content does not match a registered EndTagType `G ab-net/htmlparser/jericho/EndTagTypeUnregistered  !net/htmlparser/jericho/EndTagType:(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Vnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText;getStartDelimiter()Ljava/lang/String;java/lang/Stringlength()IgetClosingDelimiter net/htmlparser/jericho/ParseTextindexOf(Ljava/lang/String;I)IgetName(II)Ljava/lang/String;constructEndTagT(Lnet/htmlparser/jericho/Source;IILjava/lang/String;)Lnet/htmlparser/jericho/EndTag;loggerLnet/htmlparser/jericho/Logger;net/htmlparser/jericho/Logger isInfoEnabled()ZgetRowColumnVector+(I)Lnet/htmlparser/jericho/RowColumnVector;(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;&net/htmlparser/jericho/RowColumnVectorappendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder;toStringinfo(Ljava/lang/String;)V0 * !s+N*`6-* 6+ :*+*` :+ ++ +Yȷ" Yjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeServerCommonEscaped.class0000644000175000017500000000077311214132416031200 0ustar twernertwerner1   INSTANCE8Lnet/htmlparser/jericho/StartTagTypeServerCommonEscaped;()VCodeescaped common server tag<\%%> 6net/htmlparser/jericho/StartTagTypeServerCommonEscaped 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/StringOutputSegment.class0000644000175000017500000000273411214132416026771 0ustar twernertwerner1U 2 3 45 6 73 74 8 9 :; <= >? 2@ A BCD E > FGHIbeginIendtextLjava/lang/CharSequence;(IILjava/lang/CharSequence;)VCode;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)VgetBegin()IgetEndwriteTo(Ljava/io/Writer;)V ExceptionsJappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfooutput K   L  *+M NOP Q$ ./java/lang/StringBuilder Replace: (p NR NS-p) NT &'*net/htmlparser/jericho/StringOutputSegmentjava/lang/Object$net/htmlparser/jericho/OutputSegmentjava/io/IOException()Vnet/htmlparser/jericho/Segmentjava/lang/Appendableappend0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;java/lang/CharSequencelength-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;-(Ljava/lang/Object;)Ljava/lang/StringBuilder;0  !)****--"!*++,#$!*%$!*&'!*+ ()*+! +* W(),-! * ./!* 0/!;/ Y***1'!*+()jericho-html-3.1/classes/net/htmlparser/jericho/FormControl.class0000644000175000017500000002201111214132414025207 0ustar twernertwerner1 v v v i  i  i i      x    " $ ' * [  i w  v 4 v v   v v 4 = 4 4 4 4 v   I v v  !" #$% v& i' () *+ *, -. /0 /12 43 45 *6 789: a;< 4= v> v? @ wAB iC IDy EF [)GHI JDK tLMNO InnerClassesElementContainerPositionComparatorSelectFormControlImageSubmitFormControlSubmitFormControlRadioCheckboxFormControlTextAreaFormControlInputFormControlformControlType(Lnet/htmlparser/jericho/FormControlType;nameLjava/lang/String;elementContainer5Lnet/htmlparser/jericho/FormControl$ElementContainer; outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;CHECKBOX_NULL_DEFAULT_VALUE ConstantValueP COMPARATORLjava/util/Comparator; Signature; constructF(Lnet/htmlparser/jericho/Element;)Lnet/htmlparser/jericho/FormControl;CodeL(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;Z)VgetFormControlType*()Lnet/htmlparser/jericho/FormControlType;getName()Ljava/lang/String; getElement"()Lnet/htmlparser/jericho/Element;getOptionElementIterator()Ljava/util/Iterator;8()Ljava/util/Iterator;getOutputStyle1()Lnet/htmlparser/jericho/FormControlOutputStyle;setOutputStyle2(Lnet/htmlparser/jericho/FormControlOutputStyle;)VgetAttributesMap()Ljava/util/Map;7()Ljava/util/Map; isDisabled()Z setDisabled(Z)V isCheckedgetPredefinedValuegetPredefinedValues()Ljava/util/Collection;,()Ljava/util/Collection; getValues()Ljava/util/List;&()Ljava/util/List; clearValues()VsetValue(Ljava/lang/String;)ZaddValue addValuesTo(Ljava/util/Collection;)V-(Ljava/util/Collection;)VaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)V getDebugInfogetDisplayValueHTML-(Ljava/lang/CharSequence;Z)Ljava/lang/String;+replaceAttributesInOutputDocumentIfModifiedgetAll2(Lnet/htmlparser/jericho/Segment;)Ljava/util/List;X(Lnet/htmlparser/jericho/Segment;)Ljava/util/List;J(Lnet/htmlparser/jericho/Segment;Ljava/util/ArrayList;Ljava/lang/String;)Vp(Lnet/htmlparser/jericho/Segment;Ljava/util/ArrayList;Ljava/lang/String;)V getString(CI)Ljava/lang/String; verifyName addValueTo+(Ljava/util/Collection;Ljava/lang/String;)V?(Ljava/util/Collection;Ljava/lang/String;)Vr(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)V access$200 access$300 QRS input TUtypeV WX3net/htmlparser/jericho/FormControl$InputFormControlY Z [ \] ^ _`a bcd e fg hijava/lang/StringBuilder jk lm(: INPUT control with unrecognised type " no" assumed to be type "text" p qr st uv;net/htmlparser/jericho/FormControl$RadioCheckboxFormControl4net/htmlparser/jericho/FormControl$SubmitFormControl9net/htmlparser/jericho/FormControl$ImageSubmitFormControl wjava/lang/AssertionError xselect4net/htmlparser/jericho/FormControl$SelectFormControltextarea6net/htmlparser/jericho/FormControl$TextAreaFormControlbuttonsubmit y z {g |} ~ 3net/htmlparser/jericho/FormControl$ElementContainer  X 'java/lang/UnsupportedOperationException,Only SELECT controls contain OPTION elements r disabled  >This property is only relevant for CHECKBOX and RADIO controls    java/util/ArrayList n name=" n PredefinedValue=" -  v     java/lang/String X   java/io/IOExceptionjava/lang/RuntimeException ?*3*+3*6@ *6AB *6AC =YD?*6E*FGW*FHIYJL*+K+*LW*+LdXYML+*7NO*:"PW*6E+Q*6E"PW+R*STW+Y+ +Uh2`N-<PVWWX:Y.Z[:*6\:-]W->PW+ +U-^W -+_W:aYb-cV>PW-(` *6+d9-IYJL*+e*+)e*+&e*+,e+fg+ B6*,hXN-Y$-Zi:j: +kWٱ 4(lYM>,PW,z*7m*: nL*:opL*SM,B,*S*SYȷq*7Nr+*++lsW*+,*+ tYufzR xv4v{tv|'v}"v~ vv*v v*././@LongLink0000000000000000000000000000015200000000000011563 Lustar rootrootjericho-html-3.1/classes/net/htmlparser/jericho/FormControl$SelectFormControl$OptionElementIterator.classjericho-html-3.1/classes/net/htmlparser/jericho/FormControl$SelectFormControl$OptionElementIterator.0000644000175000017500000000270511214132414034043 0ustar twernertwerner1? & ' ( ) * +, ( -./ ( 0134iIthis$06SelectFormControl InnerClasses6Lnet/htmlparser/jericho/FormControl$SelectFormControl;9(Lnet/htmlparser/jericho/FormControl$SelectFormControl;)VCodehasNext()Znext"()Lnet/htmlparser/jericho/Element;remove()V()Ljava/lang/Object;7_(Lnet/htmlparser/jericho/FormControl$SelectFormControl;Lnet/htmlparser/jericho/FormControl$1;)V SignatureHLjava/lang/Object;Ljava/util/Iterator;     8:  java/util/NoSuchElementException; <='java/lang/UnsupportedOperationException Jnet/htmlparser/jericho/FormControl$SelectFormControl$OptionElementIteratorOptionElementIteratorjava/lang/Objectjava/util/Iterator>4net/htmlparser/jericho/FormControl$SelectFormControl$net/htmlparser/jericho/FormControl$1optionElementContainersElementContainer6[Lnet/htmlparser/jericho/FormControl$ElementContainer;3net/htmlparser/jericho/FormControl$ElementContainerelement Lnet/htmlparser/jericho/Element;"net/htmlparser/jericho/FormControl0 *+**!**2&* Y**YZ`2  Y A * #*+$%"5!5 2-59jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$1.class0000644000175000017500000000155011214132414025341 0ustar twernertwerner16          ! "#&1$SwitchMap$net$htmlparser$jericho$FormControlType[I()VCodeEnclosingMethod'( )*  +, -.java/lang/NoSuchFieldError /, 0, 1, 2, 3, 4, 5,$net/htmlparser/jericho/FormControl$1 InnerClassesjava/lang/Object"net/htmlparser/jericho/FormControl&net/htmlparser/jericho/FormControlTypevalues+()[Lnet/htmlparser/jericho/FormControlType;TEXT(Lnet/htmlparser/jericho/FormControlType;ordinal()ICHECKBOXRADIOSUBMITIMAGEHIDDENPASSWORDFILE  OKOKOKOK OK OK OK OK #&'256ADEPST`cdpst%  jericho-html-3.1/classes/net/htmlparser/jericho/nodoc/0000755000175000017500000000000011214132414023022 5ustar twernertwernerjericho-html-3.1/classes/net/htmlparser/jericho/nodoc/SequentialListSegment$SubList.class0000644000175000017500000000436311214132414031722 0ustar twernertwerner1k B C D E FGH FI J K L M NOPQRST M NU VW Y[ \]^_listLjava/util/List; SignatureLjava/util/List;offsetIsize(Ljava/util/List;II)VCode(Ljava/util/List;II)Vget(I)Ljava/lang/Object;(I)TE;()Iiterator()Ljava/util/Iterator;()Ljava/util/Iterator; listIterator(I)Ljava/util/ListIterator; (I)Ljava/util/ListIterator;subList(II)Ljava/util/List;(II)Ljava/util/List;getSuperListIndex(I)I access$000SubList InnerClasses@(Lnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList;I)I access$100N(Lnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList;)Ljava/util/List; access$200?(Lnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList;)I access$3003Ljava/util/AbstractList; "# $#  67 %`#java/lang/IndexOutOfBoundsExceptionjava/lang/StringBuilder fromIndex= ab ac de %fg $,toIndex="java/lang/IllegalArgumentException fromIndex( ) > toIndex() )* 0h?'*@?'* A:Z9 jericho-html-3.1/classes/net/htmlparser/jericho/nodoc/SequentialListSegment.class0000644000175000017500000000661411214132414030351 0ustar twernertwerner1 %] $^ _`abc de f g h i $j $kl $m no n` p qr stuv d wm $x _o _y z _{ _| _} $~ "SubList InnerClasses$(Lnet/htmlparser/jericho/Source;II)VCodegetCount()I listIterator(I)Ljava/util/ListIterator; Signature (I)Ljava/util/ListIterator;get(I)Ljava/lang/Object;(I)TE;sizeisEmpty()Zcontains(Ljava/lang/Object;)ZtoArray()[Ljava/lang/Object;(([Ljava/lang/Object;)[Ljava/lang/Object; ([TT;)[TT;remove containsAll(Ljava/util/Collection;)Z(Ljava/util/Collection<*>;)ZaddAll(Ljava/util/Collection<+TE;>;)Z removeAll retainAlladd(TE;)Zset'(ILjava/lang/Object;)Ljava/lang/Object; (ITE;)TE;(ILjava/lang/Object;)V(ITE;)VindexOf(Ljava/lang/Object;)I lastIndexOfclear()V(ILjava/util/Collection;)Z (ILjava/util/Collection<+TE;>;)Ziterator()Ljava/util/Iterator;()Ljava/util/Iterator;()Ljava/util/ListIterator;()Ljava/util/ListIterator;subList(II)Ljava/util/List;(II)Ljava/util/List;KLnet/htmlparser/jericho/Segment;Ljava/util/List; )* ./  java/util/NoSuchElementException#java/lang/IndexOutOfBoundsExceptionjava/lang/StringBuilder )Qindex= ) ,- MNjava/lang/Object TU 7   [Ljava/lang/Object;'java/lang/UnsupportedOperationException 89 - 9 7 - .W:net/htmlparser/jericho/nodoc/SequentialListSegment$SubList )2net/htmlparser/jericho/nodoc/SequentialListSegmentnet/htmlparser/jericho/Segmentjava/util/Listjava/util/ListIteratornext()Ljava/lang/Object;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;toString()Ljava/lang/String;(Ljava/lang/String;)Vjava/util/IteratorhasNextgetClass()Ljava/lang/Class;java/lang/ClassgetComponentTypejava/lang/reflect/Array newInstance&(Ljava/lang/Class;I)Ljava/lang/Object;java/util/Collection previousIndexequals hasPreviousprevious nextIndex(Ljava/util/List;II)V!$%&)*+*+,-./0123+=)*M,NYY   045-+* 67+ * 89+*+:;+9-* L=*N--:+S+:<+[O* =++L>*::+S++S+0=>9+Y?@+2&+M,,N*-0AB@+Y0CD@+Y0AE@+Y0AF9+Y0GHI+Y0JFK+Y0L>3+Y04MN+NB*M+,0,,,+,,ON+QE** M+,0,, ,+,, PQ+YBR+Y0STU+*!0V.W+*0XYZ+ "Y*#0[0\( "$' jericho-html-3.1/classes/net/htmlparser/jericho/nodoc/SequentialListSegment$SubList$1.class0000644000175000017500000000357011214132414032046 0ustar twernertwerner1\ : ; < = > ?9 @ A B CD < E F G H A I FJ <KMNiLjava/util/ListIterator; SignatureLjava/util/ListIterator; val$indexIthis$0PSubList InnerClasses@(Lnet/htmlparser/jericho/nodoc/SequentialListSegment$SubList;I)VCodehasNext()Znext()Ljava/lang/Object;()TE; hasPreviousprevious nextIndex()I previousIndexremove()Vset(Ljava/lang/Object;)V(TE;)Vadd/Ljava/lang/Object;Ljava/util/ListIterator;EnclosingMethod QR #  $2 ST UVW  ./ XY '( java/util/NoSuchElementException )* 0/ ,( -* ZY'java/lang/UnsupportedOperationException()VCodeatEndOfAttributes$(Lnet/htmlparser/jericho/Source;IZ)Znormal<>& '( )* +,- .//> 01)net/htmlparser/jericho/StartTagTypeNormal  8net/htmlparser/jericho/StartTagTypeGenericImplementation!net/htmlparser/jericho/EndTagTypeNORMAL#Lnet/htmlparser/jericho/EndTagType;_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)Vnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseTextcharAt(I)C containsAt(Ljava/lang/String;I)Z0 *6*+:>  Y jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeXMLProcessingInstruction.class0000644000175000017500000000101111214132416032215 0ustar twernertwerner1   INSTANCE=Lnet/htmlparser/jericho/StartTagTypeXMLProcessingInstruction;()VCodeXML processing instruction ;net/htmlparser/jericho/StartTagTypeXMLProcessingInstruction 8net/htmlparser/jericho/StartTagTypeGenericImplementation_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)V0   *   Yjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$LI_ElementHandler.class0000644000175000017500000000222111214132414031716 0ustar twernertwerner1;     ! " # $ %&  ()*INSTANCE, Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions-  ./ 0/ 12 34 56 76 82 9;net/htmlparser/jericho/Renderer$Processor$LI_ElementHandlerLI_ElementHandler java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler:)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$1400.(Lnet/htmlparser/jericho/Renderer$Processor;)I access$1408 access$1602/(Lnet/htmlparser/jericho/Renderer$Processor;Z)Z access$400/(Lnet/htmlparser/jericho/Renderer$Processor;I)V access$1300.(Lnet/htmlparser/jericho/Renderer$Processor;)V access$1700 access$1802 access$000net/htmlparser/jericho/Renderer *C7++W+W++++W+, +W+ Y + ' jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$InputFormControl.class0000644000175000017500000000455111214132414030471 0ustar twernertwerner1s % &' () (* + ,- . /0 1 23 /4 5 67 68 9: ;< = > 2? @ADK(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;)VCodesetValue(Ljava/lang/String;)Z addValuesTo(Ljava/util/Collection;)V Signature-(Ljava/util/Collection;)VaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)V G HJvalueK LM NO PQR ST UVW XV YZ[ \] ^V _`a b` c`d fgh ij kl mn op q$3net/htmlparser/jericho/FormControl$InputFormControlInputFormControl InnerClasses"net/htmlparser/jericho/FormControlrr(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)VelementContainerElementContainer5Lnet/htmlparser/jericho/FormControl$ElementContainer;3net/htmlparser/jericho/FormControl$ElementContainersetAttributeValue'(Ljava/lang/String;Ljava/lang/String;)VgetAttributeValue&(Ljava/lang/String;)Ljava/lang/String; access$200+(Ljava/util/Collection;Ljava/lang/String;)V!net/htmlparser/jericho/FormFieldsadd'(Lnet/htmlparser/jericho/FormControl;)V outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;-net/htmlparser/jericho/FormControlOutputStyleREMOVE getElement"()Lnet/htmlparser/jericho/Element;%net/htmlparser/jericho/OutputDocumentremove#(Lnet/htmlparser/jericho/Segment;)V DISPLAY_VALUEformControlType(Lnet/htmlparser/jericho/FormControlType;&net/htmlparser/jericho/FormControlTypeHIDDENPASSWORD@net/htmlparser/jericho/FormControlOutputStyle$ConfigDisplayValueConfigDisplayValue PasswordCharCjava/lang/Stringlength()I access$300(CI)Ljava/lang/String;getDisplayValueHTML-(Ljava/lang/CharSequence;Z)Ljava/lang/String;replace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V+replaceAttributesInOutputDocumentIfModified$net/htmlparser/jericho/FormControl$10 *+, *++* !"+*#$sg* +* T* EM* -*N* --N*-M+* ,*+C"BE(I9/ejericho-html-3.1/classes/net/htmlparser/jericho/FormControl$ElementContainer.class0000644000175000017500000000427011214132414030437 0ustar twernertwerner1h 2 3 4 567 89 : 8; < = > ?@A ?B C D ?E 8F G ?H IJLOelement Lnet/htmlparser/jericho/Element; attributesMapLjava/util/Map; Signature5Ljava/util/Map;predefinedValueLjava/lang/String;$(Lnet/htmlparser/jericho/Element;Z)VCodegetAttributesMap()Ljava/util/Map;7()Ljava/util/Map; setSelected((Ljava/lang/String;Ljava/lang/String;Z)ZgetAttributeValue&(Ljava/lang/String;)Ljava/lang/String;setAttributeValue'(Ljava/lang/String;Ljava/lang/String;)VgetBooleanAttribute(Ljava/lang/String;)ZsetBooleanAttribute(Ljava/lang/String;Z)V+replaceAttributesInOutputDocumentIfModified*(Lnet/htmlparser/jericho/OutputDocument;)V P  Q RSvalueT U)  VW XY Z[ ./\ ]^java/lang/String _` () #$ a[ ]b ,- c^d efg3net/htmlparser/jericho/FormControl$ElementContainerElementContainer InnerClassesjava/lang/Object()Vnet/htmlparser/jericho/Element getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/AttributesgetValuegetMap(Z)Ljava/util/Map;toString()Ljava/lang/String;equals(Ljava/lang/Object;)Z java/util/Mapget&(Ljava/lang/Object;)Ljava/lang/Object;put8(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object; containsKey6(Ljava/lang/String;)Lnet/htmlparser/jericho/Attribute;remove%net/htmlparser/jericho/OutputDocumentreplace5(Lnet/htmlparser/jericho/Attributes;Ljava/util/Map;)V"net/htmlparser/jericho/FormControl0 !"0$***+*+#$"'****%&'"2&+*+ *,  *, ()"-!**+  *+*+"K?, *+ **+,W*+N- -, *+,W,-"2&**+*+./"7+*+>*++W*+W01"#*+**N KMjericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypePHPStandard.class0000644000175000017500000000074411214132416027402 0ustar twernertwerner1   INSTANCE0Lnet/htmlparser/jericho/StartTagTypePHPStandard;()VCodePHP standard tag .net/htmlparser/jericho/StartTagTypePHPStandard 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$PRE_ElementHandler.class0000644000175000017500000000175411214132414032052 0ustar twernertwerner1.        !"INSTANCE$ Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions%  &' () *+ ,+W+,+W+ Y #     jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypePHPScript.class0000644000175000017500000000201211214132416027074 0ustar twernertwerner18    ! "# $% &'( ) *+INSTANCE.Lnet/htmlparser/jericho/StartTagTypePHPScript;()VCodeconstructTagAt>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag; PHP script, -. / net/htmlparser/jericho/StartTagphp 01language2 345 67,net/htmlparser/jericho/StartTagTypePHPScript  8net/htmlparser/jericho/StartTagTypeGenericImplementation!net/htmlparser/jericho/EndTagTypeNORMAL#Lnet/htmlparser/jericho/EndTagType;_(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;ZZZ)V getAttributes%()Lnet/htmlparser/jericho/Attributes;!net/htmlparser/jericho/AttributesgetValue&(Ljava/lang/String;)Ljava/lang/String;java/lang/StringequalsIgnoreCase(Ljava/lang/String;)Z0 *1%*+N--  - Yjericho-html-3.1/classes/net/htmlparser/jericho/OutputSegmentComparator.class0000644000175000017500000000122311214132416027622 0ustar twernertwerner1    ()VCodecompareO(Lnet/htmlparser/jericho/OutputSegment;Lnet/htmlparser/jericho/OutputSegment;)I'(Ljava/lang/Object;Ljava/lang/Object;)I SignaturePLjava/lang/Object;Ljava/util/Comparator;  $net/htmlparser/jericho/OutputSegment .net/htmlparser/jericho/OutputSegmentComparatorjava/lang/Objectjava/util/ComparatorgetBegin()IgetEnd0  *  RF+,+,+,+,A   *+,jericho-html-3.1/classes/net/htmlparser/jericho/RowColumnVector.class0000644000175000017500000000346611214132414026070 0ustar twernertwerner1d 9 : ; < => ? @ AB C DEF GH IJ K ILM 9 N O IPQ R S& ?TrowIcolumnposFIRST(Lnet/htmlparser/jericho/RowColumnVector;STREAMED)[Lnet/htmlparser/jericho/RowColumnVector;(III)VCode(I)VgetRow()I getColumngetPostoString()Ljava/lang/String;appendTo4(Ljava/lang/StringBuilder;)Ljava/lang/StringBuilder; getCacheArrayJ(Lnet/htmlparser/jericho/Source;)[Lnet/htmlparser/jericho/RowColumnVector;getT([Lnet/htmlparser/jericho/RowColumnVector;I)Lnet/htmlparser/jericho/RowColumnVector;()V '8  ! " '(java/lang/StringBuilder '* 12 /0(r UV UW,c,p UX(pY Z[ %& \ java/util/ArrayList #$ ]^ _`&net/htmlparser/jericho/RowColumnVector a, bcjava/lang/Objectappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;net/htmlparser/jericho/Source isStreamed()Zendadd(Ljava/lang/Object;)ZcharAt(I)CsizetoArray(([Ljava/lang/Object;)[Ljava/lang/Object;1 ! " #$%& '() ****'*)*+,)*-,)*.,)*/0)*Y 12)RF*.+ *  *  * )+ * )34)|**d<YM>,W6D*6  )*` ,Y`W,,56)ui* Y=*d>`z6*2:(Yd``= d>78)!Yjericho-html-3.1/classes/net/htmlparser/jericho/Logger.class0000644000175000017500000000041711214132414024170 0ustar twernertwerner1 error(Ljava/lang/String;)VwarninfodebugisErrorEnabled()Z isWarnEnabled isInfoEnabledisDebugEnablednet/htmlparser/jericho/Loggerjava/lang/Object    jericho-html-3.1/classes/net/htmlparser/jericho/SubCache$TagIterator.class0000644000175000017500000000225611214132414026643 0ustar twernertwerner1; " # $ % &' # () (* +, -.12iInextTagLnet/htmlparser/jericho/Tag;this$0!Lnet/htmlparser/jericho/SubCache;$(Lnet/htmlparser/jericho/SubCache;)VCodehasNext()Znext()Lnet/htmlparser/jericho/Tag;remove()V loadNextTag()Ljava/lang/Object; SignatureDLjava/lang/Object;Ljava/util/Iterator;     'java/lang/UnsupportedOperationException3 45 689 : +net/htmlparser/jericho/SubCache$TagIterator TagIterator InnerClassesjava/lang/Objectjava/util/Iteratornet/htmlparser/jericho/SubCache access$000$(Lnet/htmlparser/jericho/SubCache;)I access$100 CacheEntryP(Lnet/htmlparser/jericho/SubCache;)[Lnet/htmlparser/jericho/SubCache$CacheEntry;*net/htmlparser/jericho/SubCache$CacheEntrytag0 *+*** * *L*+Y<0*Y`Z*** *2 ZԱA*  !0 (/+(7jericho-html-3.1/classes/net/htmlparser/jericho/FormControl$ImageSubmitFormControl.class0000644000175000017500000000201311214132414031567 0ustar twernertwerner15        !$#(Lnet/htmlparser/jericho/Element;)VCodeaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)V& '( ) java/lang/StringBuilder * +, -..x /01 23.y49net/htmlparser/jericho/FormControl$ImageSubmitFormControlImageSubmitFormControl InnerClasses4net/htmlparser/jericho/FormControl$SubmitFormControlSubmitFormControl&net/htmlparser/jericho/FormControlTypeIMAGE(Lnet/htmlparser/jericho/FormControlType;K(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;)V()VnameLjava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toString()Ljava/lang/String;!net/htmlparser/jericho/FormFieldsaddName9(Lnet/htmlparser/jericho/FormControl;Ljava/lang/String;)V"net/htmlparser/jericho/FormControl0  *+H<*++*Y* +*Y*  # " %jericho-html-3.1/classes/net/htmlparser/jericho/NumericCharacterReference.class0000644000175000017500000000557411214132416030022 0ustar twernertwerner1 $: ;< => ? =@ A BCD E F G H I J K L MN O@ 5P 5Q MR MS T> UVWX Y Z[ \ ] $^_hexZ&(Lnet/htmlparser/jericho/Source;IIIZ)VCode isDecimal()Z isHexadecimalencode,(Ljava/lang/CharSequence;)Ljava/lang/String; encodeDecimalencodeHexadecimalgetCharacterReferenceString()Ljava/lang/String;(I)Ljava/lang/String; constructa&UnterminatedCharacterReferenceSettings InnerClasses(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings;)Lnet/htmlparser/jericho/CharacterReference; getDebugInfo 'b %&java/lang/StringBuilderc de 'f gh ij kljava/io/IOExceptionjava/lang/RuntimeException 'm no p2 -. ql rs t3 u3v wxy zs {s |s }~ java/lang/NumberFormatException0net/htmlparser/jericho/NumericCharacterReference '( '" n l 92)net/htmlparser/jericho/CharacterReferenceDnet/htmlparser/jericho/Config$UnterminatedCharacterReferenceSettings%(Lnet/htmlparser/jericho/Source;III)Vjava/lang/CharSequencelength()I(I)VcharAt(I)CrequiresEncoding(C)Z%appendDecimalCharacterReferenceString/(Ljava/lang/Appendable;I)Ljava/lang/Appendable;(Ljava/lang/Throwable;)Vappend(C)Ljava/lang/StringBuilder;toString)appendHexadecimalCharacterReferenceString codePointI&getHexadecimalCharacterReferenceString"getDecimalCharacterReferenceStringnet/htmlparser/jericho/Source getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseText)hexadecimalCharacterReferenceMaxCodePoint%decimalCharacterReferenceMaxCodePointend substring(II)Ljava/lang/String;java/lang/Stringjava/lang/IntegerparseInt(Ljava/lang/String;I)I()V-(Ljava/lang/String;)Ljava/lang/StringBuilder;appendUnicodeTextnet/htmlparser/jericho/Config!$%& '()*+**+) *,+)* -.)kW*Y*hL=*3*>+W: Y + W+ 17: /.)* 0.)kW*Y*hL=*3*>+W: Y + W+ 17: 12)%* * * 13)48)@$*N`6-xY6 ,,6*d66 6 - 6  ; `6 * :i 0  9$/ a  f A F 6 6   6 * : y6  6    6 :  Y*    92)o[YL+" W*+*W +*W+ !W+*"W M Y, + *#!W+ ;> 7 5`6jericho-html-3.1/classes/net/htmlparser/jericho/BasicLogFormatter.class0000644000175000017500000000222211214132416026316 0ustar twernertwerner1M " #$ %& #' #( )* +, - . /0 1 23 45 67 " 89 OutputLevelZ OutputNameINSTANCELjava/util/logging/Formatter;()VCodeformat1(Ljava/util/logging/LogRecord;)Ljava/lang/String;J(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String; : ;<= >? @? A?  java/lang/StringBuilderB CD E  FG:  FH] I JK L?(net/htmlparser/jericho/BasicLogFormatter java/util/logging/Formatterjava/util/logging/LogRecordgetLevel()Ljava/util/logging/Level;java/util/logging/LevelgetName()Ljava/lang/String; getMessage getLoggerNamejava/lang/Stringlength()I(I)Vappend-(Ljava/lang/String;)Ljava/lang/StringBuilder;(C)Ljava/lang/StringBuilder;net/htmlparser/jericho/ConfigNewLineLjava/lang/String;toString!  *+++  YMY+(` N -*  W ,-[,  W-+ W- W-!  Yjericho-html-3.1/classes/net/htmlparser/jericho/StreamEncodingDetector.class0000644000175000017500000001106011214132416027343 0ustar twernertwerner1 Dn Co Cp Cq Crs tu tv wx Cyz n{ | } ~  C    ~ ~ C ~T inputStreamLjava/io/InputStream;encodingLjava/lang/String;encodingSpecificationInfo definitiveZ!documentSpecifiedEncodingPossibleUTF_16 ConstantValueUTF_16BEUTF_16LEUTF_8 ISO_8859_1EBCDICSCSUUTF_7 UTF_EBCDICBOCU_1UTF_32UTF_32BEUTF_32LE(Ljava/net/URLConnection;)VCode Exceptions(Ljava/io/InputStream;)VgetInputStream()Ljava/io/InputStream; getEncoding()Ljava/lang/String;getEncodingSpecificationInfo isDifinitive()Z#isDocumentSpecifiedEncodingPossible openReader()Ljava/io/Reader; setEncoding'(Ljava/lang/String;Ljava/lang/String;)Zinit [ GH IH JK LKjava/net/HttpURLConnection ab d EFjava/lang/StringBuilderHTTP header Content-Type: d gjava/io/BufferedInputStream [` mgjava/io/InputStreamReader ISO-8859-1 [ $java/io/UnsupportedEncodingException - [ empty input stream kl UTF-8 UTF-8 Byte Order Mark (EF BB BF)UTF-16)UTF-16 big-endian Byte Order Mark (FE FF)UTF-322UTF-32 little-endian Byte Order Mark (FF EE 00 00),UTF-16 little-endian Byte Order Mark (FF EE)/UTF-32 big-endian Byte Order Mark (00 00 FE FF)SCSU Byte Order Mark (0E FE FF)UTF-7 UTF-7 Byte Order Mark (2B 2F 76) UTF-EBCDIC(UTF-EBCDIC Byte Order Mark (DD 73 66 73)BOCU-1!BOCU-1 Byte Order Mark (FB EE 28)=default 8-bit ASCII-compatible encoding (stream 3 bytes long)UTF-16BELdefault 16-bit BE encoding (byte stream starts with 00, stream 2 bytes long)UTF-16LEKdefault 16-bit LE encoding (byte stream pattern XX 00, stream 2 bytes long)Rdefault 8-bit ASCII-compatible encoding (no 00 bytes present, stream 2 bytes long)UTF-32BE:default 32-bit BE encoding (byte stream starts with 00 00)7default 16-bit BE encoding (byte stream starts with 00)UTF-32LEHdefault 32-bit LE encoding (byte stream starts with pattern XX ?? 00 00)Gdefault 16-bit LE encoding (byte stream stars with pattern XX ?? XX 00)Hdefault 16-bit LE encoding (byte stream starts with pattern XX 00 ?? XX)Hdefault 16-bit BE encoding (byte stream starts with pattern XX XX 00 XX)Cp037,default EBCDIC encoding ( detected)0default EBCDIC encoding ( detected):default EBCDIC-compatible encoding (HTML element detected)[default 8-bit ASCII-compatible encoding (no 00 bytes present in first four bytes of stream)-net/htmlparser/jericho/StreamEncodingDetectorjava/lang/Objectjava/io/IOException()Vjava/net/URLConnectiongetContentTypenet/htmlparser/jericho/Source&getCharsetParameterFromHttpHeaderValue&(Ljava/lang/String;)Ljava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;toStringjava/io/InputStream markSupported*(Ljava/io/InputStream;Ljava/lang/String;)Vjava/nio/charset/Charset isSupported(Ljava/lang/String;)Z(Ljava/lang/String;)Vmark(I)Vread()Ireset0CDEFGHIHJKLKMHN"OHN1PHN3QHN RHNSHN>THN(UHN*VHN,WHN.XHN$YHN6ZHN9 [\]*****+ +M+N+:1* *!*- * Y  *-- Y- *W^_[`]A5******++ Y+ *W^_ab]* cd]*ed]*fg]*hg]*ij]`T*Y* *(Y Y **Y* *^kl] *+*,mg]5)* * < ** =* >* 6* * !*"#% *$%*"&*$'ng*()+/RvL**+ݠs6f0s)*,-( *./*>* *0 *12 *34*5 *67*18 *9:*3; *3< *1=LUo *>?ZĠ֠ *>@~~~ *>A*B^_jericho-html-3.1/classes/net/htmlparser/jericho/StartTagTypeServerCommon.class0000644000175000017500000000074411214132416027711 0ustar twernertwerner1   INSTANCE1Lnet/htmlparser/jericho/StartTagTypeServerCommon;()VCodecommon server tag<%%> /net/htmlparser/jericho/StartTagTypeServerCommon 8net/htmlparser/jericho/StartTagTypeGenericImplementation](Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Lnet/htmlparser/jericho/EndTagType;Z)V0    *   Yjericho-html-3.1/classes/net/htmlparser/jericho/SourceFormatter$Processor.class0000644000175000017500000001746011214132414030047 0ustar twernertwerner1J \ [  [ [ [ [ [ [ [ [ [   [ [ [ [    [ [ [  [  Q [ [ [ [  [    [ [  [ [ [ [ [ [ [ [  Q [  segment Lnet/htmlparser/jericho/Segment; sourceTextLjava/lang/CharSequence; indentStringLjava/lang/String;tidyTagsZcollapseWhiteSpaceremoveLineBreaksindentAllElementsindentScriptElementsnewLine appendableLjava/lang/Appendable;nextTagLnet/htmlparser/jericho/Tag;indexI$assertionsDisabledL(Lnet/htmlparser/jericho/Segment;Ljava/lang/String;ZZZZZLjava/lang/String;)VCodeappendTo(Ljava/lang/Appendable;)V Exceptions appendContent(ILjava/util/List;I)V Signature7(ILjava/util/List;I)V inlinable#(Lnet/htmlparser/jericho/Element;)Z appendText(II)V appendElement'(Lnet/htmlparser/jericho/Element;IIZZ)V updateNextTag()VappendIndentedScriptContentappendTextPreserveIndentation(II)Z(III)VappendTextRemoveIndentation(I)ZgetStartOfLinePos(IZ)IappendSpecifiedTextInline(Ljava/lang/CharSequence;I)VappendSpecifiedLine(Ljava/lang/CharSequence;I)IappendTextInline(IIZ)ZappendLineKeepWhiteSpaceappendTextCollapseWhiteSpaceappendContentPreformatted appendTag!(Lnet/htmlparser/jericho/Tag;II)V appendIndent(I)VappendFormattingNewLineappendEssentialNewLine$containsOnlyInlineLevelChildElements q ]^   _` ab cd ed fd gd hd ib jknet/htmlparser/jericho/Source  o   lm no  o   xy pdjava/lang/AssertionError   net/htmlparser/jericho/Element ~ |} pretextareascript }       !" #$%& '( )*  +, - ./ q 01  23  4  51 6 78 9: ; <= > ?net/htmlparser/jericho/StartTag @A 7B Cb DEF G&net/htmlparser/jericho/SourceFormatterH I0net/htmlparser/jericho/SourceFormatter$Processor Processor InnerClassesjava/lang/Objectjava/io/IOExceptionnet/htmlparser/jericho/SegmentsourceLnet/htmlparser/jericho/Source;toString()Ljava/lang/String;fullSequentialParse()[Lnet/htmlparser/jericho/Tag;begin getNextTag(I)Lnet/htmlparser/jericho/Tag;endgetChildElements()Ljava/util/List;java/util/Listiterator()Ljava/util/Iterator;java/util/IteratorhasNext()Znext()Ljava/lang/Object;getName getStartTag#()Lnet/htmlparser/jericho/StartTag;getStartTagType'()Lnet/htmlparser/jericho/StartTagType;#net/htmlparser/jericho/StartTagTypeDOCTYPE_DECLARATION%Lnet/htmlparser/jericho/StartTagType;NORMAL#net/htmlparser/jericho/HTMLElementsgetElementNamescontains(Ljava/lang/Object;)ZgetInlineLevelElementNames()Ljava/util/Set; java/util/Setjava/lang/CharSequencecharAt(I)C isWhiteSpace(C)Z getEndTag!()Lnet/htmlparser/jericho/EndTag;java/lang/Mathmin(II)I getContentEnd()Inet/htmlparser/jericho/EndTagnet/htmlparser/jericho/Tag()Lnet/htmlparser/jericho/Tag;lengthjava/lang/Appendableappend(C)Ljava/lang/Appendable; getTagType"()Lnet/htmlparser/jericho/TagType;COMMENT CDATA_SECTIONnet/htmlparser/jericho/TagType isServerTagtidy getAttributes%()Lnet/htmlparser/jericho/Attributes;0(Ljava/lang/CharSequence;)Ljava/lang/Appendable;name getElement"()Lnet/htmlparser/jericho/Element;java/util/CollectionisEmptyjava/lang/ClassdesiredAssertionStatus0[\ ]^_`abcdedfdgdhdibjklmnopdqrsV J**+*+*,*** * * * tusZN*+ **W********vwxys* Y,::6* ** j*!*":# $* 5%* !** *& X** Yvwz{|}spd+'(M,),*+"N-%* * +-,--.* *+&~sz* Y***/0*Y`Z*1* *2 *3W*4* Yvwsg [*+ Y* Y+':+5:*1*6*)*4*+78Y*9*4+:66*[ *;G*`<;)* *2%*3*46*+=`?>6*1*> Y*6*4 *4*+78Y*9vws1%* *?***@߱s* Y* *AW* Y*B>*+*1*C*D**B>***dE*D* Yvwszn* Y* *A*C**B>**D*`*dE* Yvws* Y*1*C*T69**/6  *Y`Z*D*1*C* Yvws* Y*C**A**/=  *Y`Z*D*C* Yvwsh\ *>**/6  *`>  *Y`ZsXL+F>*+G69`6+/0 *D*1*+G6ұvwscW+F>+/6 `6+/ ` `* HWvws* Y*C* `6**/0*Y`Z*D*1*C*Ʋ* Yvwsڲ* Y*I*$**?**6*ܱ**/> B*`64*/ $*Y`* Y $*Y`* Y* HW*Y`ZCvwsɲ* Y>*I**6**?(*  HW>**6*ʱ**YZ`/60>!*  HW>* HW}*  HW* Yvws* Y*I*$**?**6*ܱ* **/HW*Y`Z* Yvws Ȳ*+? Y* Y*+@+J +J6* Y+KL+KM +KN*OWI*9+P:+Q+QR* SW *T* *3W+J +Q* Y+U% * +KN+V:5:* Y>>66* *O6>* Y *D*1*> Y*6* Yvws/#* =* *SWvws"* * * SWvws* * SWvw}sdX+=M,W,XN-9-:":%-.*&sYZ [Yjericho-html-3.1/classes/net/htmlparser/jericho/FormControl$TextAreaFormControl.class0000644000175000017500000000431411214132414031104 0ustar twernertwerner1k +, - . / 0 1 23 4 56 7 89 5: ; 8< = >? @A @BC DEHvalueLjava/lang/String; UNCHANGED#(Lnet/htmlparser/jericho/Element;)VCodesetValue(Ljava/lang/String;)Z addValuesTo(Ljava/util/Collection;)V Signature-(Ljava/util/Collection;)VaddToFormFields&(Lnet/htmlparser/jericho/FormFields;)VreplaceInOutputDocument*(Lnet/htmlparser/jericho/OutputDocument;)VgetValue()Ljava/lang/String;()VI JK N   '( OPQ RS TUV WU XYZ [\ ]U ^_ `a b&c def gh ihjava/lang/String *6net/htmlparser/jericho/FormControl$TextAreaFormControlTextAreaFormControl InnerClasses"net/htmlparser/jericho/FormControl&net/htmlparser/jericho/FormControlTypeTEXTAREA(Lnet/htmlparser/jericho/FormControlType;jr(Lnet/htmlparser/jericho/Element;Lnet/htmlparser/jericho/FormControlType;ZLnet/htmlparser/jericho/FormControl$1;)V access$200+(Ljava/util/Collection;Ljava/lang/String;)V!net/htmlparser/jericho/FormFieldsadd'(Lnet/htmlparser/jericho/FormControl;)V outputStyle/Lnet/htmlparser/jericho/FormControlOutputStyle;-net/htmlparser/jericho/FormControlOutputStyleREMOVE getElement"()Lnet/htmlparser/jericho/Element;%net/htmlparser/jericho/OutputDocumentremove#(Lnet/htmlparser/jericho/Segment;)V DISPLAY_VALUEgetDisplayValueHTML-(Ljava/lang/CharSequence;Z)Ljava/lang/String;replace;(Lnet/htmlparser/jericho/Segment;Ljava/lang/CharSequence;)V+replaceAttributesInOutputDocumentIfModifiednet/htmlparser/jericho/Element getContent"()Lnet/htmlparser/jericho/Segment;)net/htmlparser/jericho/CharacterReferenceencode,(Ljava/lang/CharSequence;)Ljava/lang/String;decode$net/htmlparser/jericho/FormControl$10*+**+  +*!"#$+*%&aU* +* B* +* ** $*+*+* *'((** *)* YGFLjericho-html-3.1/classes/net/htmlparser/jericho/Tag.class0000644000175000017500000001474311214132414023473 0ustar twernertwerner1#  D D D D D D  D D  D D D D D D D D D D D D D D . D D D  > D > D  nameLjava/lang/String;userDataLjava/lang/Object;element Lnet/htmlparser/jericho/Element; previousTagLnet/htmlparser/jericho/Tag;nextTag NOT_CACHEDINCLUDE_UNREGISTERED_IN_SEARCHZ ConstantValue6(Lnet/htmlparser/jericho/Source;IILjava/lang/String;)VCode()V getElement"()Lnet/htmlparser/jericho/Element;getName()Ljava/lang/String;getNameSegment"()Lnet/htmlparser/jericho/Segment; getTagType"()Lnet/htmlparser/jericho/TagType; getUserData()Ljava/lang/Object; setUserData(Ljava/lang/Object;)V getNextTag()Lnet/htmlparser/jericho/Tag;getPreviousTagisUnregistered()Ztidy isXMLName(Ljava/lang/CharSequence;)ZisXMLNameStartChar(C)Z isXMLNameChargetNextStartTag#()Lnet/htmlparser/jericho/StartTag;getPreviousStartTag>(Lnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;includeInSearch>(Lnet/htmlparser/jericho/Source;I)Lnet/htmlparser/jericho/Tag;getPreviousTagUncached?(Lnet/htmlparser/jericho/Source;II)Lnet/htmlparser/jericho/Tag;getNextTagUncached^(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;_(Lnet/htmlparser/jericho/Source;ILnet/htmlparser/jericho/TagType;I)Lnet/htmlparser/jericho/Tag;getTagAt?(Lnet/htmlparser/jericho/Source;IZ)Lnet/htmlparser/jericho/Tag;getTagAtUncachedparseAll?(Lnet/htmlparser/jericho/Source;Z)[Lnet/htmlparser/jericho/Tag;parseAllgetNextTaga(Lnet/htmlparser/jericho/Source;Lnet/htmlparser/jericho/ParseText;IZ)Lnet/htmlparser/jericho/Tag;orphan isOrphaned ^ PQ RS YU TU YW VW XW e ^a hi Q net/htmlparser/jericho/Segment n~ r p~ vw xw w w nonet/htmlparser/jericho/StartTag po  n p qr [  p  n   }r#java/lang/IndexOutOfBoundsExceptionjava/lang/AssertionErrorUnexpected internal exception ^m     [ p  n e     java/util/ArrayList  net/htmlparser/jericho/Tag        ! "script$(Lnet/htmlparser/jericho/Source;II)Vnet/htmlparser/jericho/Elementjava/lang/String toLowerCase#net/htmlparser/jericho/HTMLElementsgetConstantElementName&(Ljava/lang/String;)Ljava/lang/String;beginInet/htmlparser/jericho/TagTypestartDelimiterPrefixlength()IsourceLnet/htmlparser/jericho/Source;net/htmlparser/jericho/SourcewasFullSequentialParseCalledjava/lang/CharSequencecharAt(I)Cjava/lang/CharacterisLetterisLetterOrDigit#net/htmlparser/jericho/StartTagType UNREGISTERED%Lnet/htmlparser/jericho/StartTagType;!net/htmlparser/jericho/EndTagType#Lnet/htmlparser/jericho/EndTagType;useAllTypesCachecacheLnet/htmlparser/jericho/Cache;net/htmlparser/jericho/Cache(I)Lnet/htmlparser/jericho/Tag; getParseText$()Lnet/htmlparser/jericho/ParseText; net/htmlparser/jericho/ParseText lastIndexOf(CII)IindexOfenduseSpecialTypesCache?(ILnet/htmlparser/jericho/TagType;)Lnet/htmlparser/jericho/Tag;getStartDelimiter(Ljava/lang/String;II)I (IZ)Lnet/htmlparser/jericho/Tag;@(Lnet/htmlparser/jericho/Source;IZZ)Lnet/htmlparser/jericho/Tag;fullSequentialParseData[Iadd(Ljava/lang/Object;)Z loadAllTagsR(Ljava/util/List;[Lnet/htmlparser/jericho/Tag;[Lnet/htmlparser/jericho/StartTag;)V allTagsArray[Lnet/htmlparser/jericho/Tag;java/util/ArraysasList%([Ljava/lang/Object;)Ljava/util/List;allTagsLjava/util/List; allStartTags(CI)IDOCTYPE_DECLARATIONNORMAL!DPQRSTUVWXWYWZ[\]#^_`?3*+****** ^a`0$* *****bcde`*fg`2&* * `<Y**`hijk`*lm`*+no`:.*"** `L*+*+*po`, **** d*qrsetu`K?***<**vw`%_ :xw`1%.-_ :yz`%*L+L+++{z`%*L+L+++n|`bV+*+ +** `+ *M,*, `+ ,M,, +,p|`bV+*+ +** d+!*M,*, d+!,M,, +,}r` *"p~`%*#*$% *&n~`%*#*$' *(`rN*)N6-<*6*+:,ͧN.Y/0A-5A-6>A-`lH*)N6-<16*+:,*2ɧNE-5E-6BE-p`%*3 *$,4*,5n`%*3 *$,6*,7`\, *&,8:*):696*+: ,˧:/U-0IU-JRU-`], *(,8:*):6:6*+: ,*2ǧ:/Y-0IY-JVY-`&*#*$; *<`*=` =>>Y?:* @*2m*):*A:XBW"" C  `6*2*A:D::*$E*F*GH*GId66<2:   d2  `2`6+<J6*=:YR :C*@.=K5-%*@LMNCO*2~:-{-|-a`*r`&**a` YO././@LongLink0000000000000000000000000000014600000000000011566 Lustar rootrootjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$StandardInlineElementHandler.classjericho-html-3.1/classes/net/htmlparser/jericho/Renderer$Processor$StandardInlineElementHandler.clas0000644000175000017500000000141111214132414033767 0ustar twernertwerner1"    INSTANCE Processor InnerClassesElementHandler:Lnet/htmlparser/jericho/Renderer$Processor$ElementHandler;()VCodeprocessN(Lnet/htmlparser/jericho/Renderer$Processor;Lnet/htmlparser/jericho/Element;)V Exceptions  Fnet/htmlparser/jericho/Renderer$Processor$StandardInlineElementHandlerStandardInlineElementHandler  java/lang/Object8net/htmlparser/jericho/Renderer$Processor$ElementHandler!)net/htmlparser/jericho/Renderer$Processorjava/io/IOException access$000net/htmlparser/jericho/Renderer  *+, Y       jericho-html-3.1/classes/net/htmlparser/jericho/TextExtractor.class0000644000175000017500000000612611214132414025574 0ustar twernertwerner1 (X YZ '[ '\ '] '^ '_ `a 'b cd efg 'h 'i 'j k b 'l mn opq rs Xt 'u ovwxyz '{| '}~ #X %X Processor InnerClassesAttributeIncludeCheckersegment Lnet/htmlparser/jericho/Segment;convertNonBreakingSpacesZincludeAttributesexcludeNonHTMLElementsmapLjava/util/Map; SignatureaLjava/util/Map;ALWAYS_INCLUDE>Lnet/htmlparser/jericho/TextExtractor$AttributeIncludeChecker;!INCLUDE_IF_NAME_ATTRIBUTE_PRESENT#(Lnet/htmlparser/jericho/Segment;)VCodewriteTo(Ljava/io/Writer;)V ExceptionsappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String;setConvertNonBreakingSpaces)(Z)Lnet/htmlparser/jericho/TextExtractor;getConvertNonBreakingSpaces()ZsetIncludeAttributesgetIncludeAttributesincludeAttributeF(Lnet/htmlparser/jericho/StartTag;Lnet/htmlparser/jericho/Attribute;)ZsetExcludeNonHTMLElementsgetExcludeNonHTMLElementsexcludeElement$(Lnet/htmlparser/jericho/StartTag;)Z%initDefaultAttributeIncludeCheckerMap()Ljava/util/Map;c()Ljava/util/Map;()V :W 0 /0 10 20 -. AB W EF  .net/htmlparser/jericho/TextExtractor$Processor IJ LJ PJ : 34 F < *++?@AB< +* W?@CD< * EF<( Y*** **GH<**IJ<*KH<**LJ<*MN<+,N--+,OH<**PJ<*QR< ST<^RYK*W*W*W*W* W*!W*5UVW<'"#Y$%Y& +" '*', #%jericho-html-3.1/classes/net/htmlparser/jericho/AttributesOutputSegment.class0000644000175000017500000000357711214132416027657 0ustar twernertwerner1a 9: ; <=> ? 9@ A 9B C D 9E 9FG <H I JKL MNO P QRSTbeginIendmapLjava/util/Map; Signature5Ljava/util/Map;'(Lnet/htmlparser/jericho/Attributes;Z)VCode5(Lnet/htmlparser/jericho/Attributes;Ljava/util/Map;)V[(Lnet/htmlparser/jericho/Attributes;Ljava/util/Map;)VgetBegin()IgetEndgetMap()Ljava/util/Map;7()Ljava/util/Map;writeTo(Ljava/io/Writer;)V ExceptionsappendTo(Ljava/lang/Appendable;)VgetEstimatedMaximumOutputLength()JtoString()Ljava/lang/String; getDebugInfoU ,V $' $W"java/lang/IllegalArgumentExceptionboth arguments must be non-null $X )*  +*  ! YZ [\java/lang/StringBuilder(p ]^ ]_-p): 23java/io/IOExceptionjava/lang/RuntimeException $` 67.net/htmlparser/jericho/AttributesOutputSegmentjava/lang/Object$net/htmlparser/jericho/OutputSegment!net/htmlparser/jericho/Attributes(Z)Ljava/util/Map;()V(Ljava/lang/String;)V appendHTML((Ljava/lang/Appendable;Ljava/util/Map;)V generateHTML#(Ljava/util/Map;)Ljava/lang/String;append-(Ljava/lang/String;)Ljava/lang/StringBuilder;(I)Ljava/lang/StringBuilder;(Ljava/lang/Throwable;)V  !"# $%& *++$'&8,*,+ Y*+*+ *, "()*&*+*&* ,-&* "./0& +* 123& +* 145& * *dh67&* 87&R>YL+** W*+ MY,+',/jericho-html-3.1/build.bat0000644000175000017500000000526511214016216015471 0ustar twernertwerner@if defined JERICHO_JDK_HOME echo JERICHO_JDK_HOME is set to %JERICHO_JDK_HOME% set version=3.1 @set package_name=jericho-html-%version% @set dependencies=compile-time-dependencies\slf4j-api-1.5.6.jar;compile-time-dependencies\commons-logging-api-1.1.1.jar;compile-time-dependencies\log4j-api-1.2.15.jar @if defined JERICHO_JDK_HOME ( set JERICHO_JAVAC_EXE=%JERICHO_JDK_HOME%\bin\javac set JERICHO_JAVADOC_EXE=%JERICHO_JDK_HOME%\bin\javadoc set JERICHO_JAR_EXE=%JERICHO_JDK_HOME%\bin\jar ) else ( set JERICHO_JAVAC_EXE=javac set JERICHO_JAVADOC_EXE=javadoc set JERICHO_JAR_EXE=jar ) @if exist test\classes rd /s/q test\classes @if exist classes rd /s/q classes @md classes @if exist dist rd /s/q dist @md dist rem ----- Compile source: "%JERICHO_JAVAC_EXE%" -Xlint:unchecked -g:none -classpath %dependencies% -d classes src\java\net\htmlparser\jericho\*.java src\java\net\htmlparser\jericho\nodoc\*.java @rem *** "%JERICHO_JAVAC_EXE%" -Xlint:unchecked -g -classpath %dependencies% -d classes src\java\net\htmlparser\jericho\*.java src\java\net\htmlparser\jericho\nodoc\*.java @if errorlevel 1 goto end rem ----- Create jar library: "%JERICHO_JAR_EXE%" -cf dist\%package_name%.jar -C classes . @if "%1"=="-nojavadoc" goto buildsamples rem ----- Create docs: @if exist docs\javadoc rd /s/q docs\javadoc "%JERICHO_JAVADOC_EXE%" -quiet -windowtitle "Jericho HTML Parser %version%" -classpath src\java;classes;%dependencies% -use -d docs\javadoc -subpackages net.htmlparser.jericho -exclude net.htmlparser.jericho.nodoc -noqualifier net.htmlparser.jericho -group "Core Package" net.htmlparser.jericho type docs\src\append\stylesheet.css >> docs\javadoc\stylesheet.css copy docs\src\replace\*.* docs\javadoc rem ----- Build the command line samples: :buildsamples @if exist samples\console\classes rd /s/q samples\console\classes @md samples\console\classes "%JERICHO_JAVAC_EXE%" -Xlint:unchecked -g -deprecation -classpath dist/%package_name%.jar -d samples\console\classes samples\console\src\*.java @if errorlevel 1 goto end @if not exist samples\console\bat_lib md samples\console\bat_lib @echo @set package_name=%package_name%> samples\console\bat_lib\set_package_name.bat @if "%1"=="-nojavadoc" goto end rem ----- Create the sample web application WAR file: @if exist samples\webapps\JerichoHTML\WEB-INF\lib rd /s/q samples\webapps\JerichoHTML\WEB-INF\lib @md samples\webapps\JerichoHTML\WEB-INF\lib copy /y dist\%package_name%.jar samples\webapps\JerichoHTML\WEB-INF\lib "%JERICHO_JAR_EXE%" -cf samples\webapps\JerichoHTML.war -C samples\webapps\JerichoHTML . rem ----- Zip up the whole package: "%JERICHO_JAR_EXE%" -cMf ..\%package_name%.zip -C .. %package_name% :end
  • tag isn't found inside the attribute value for (int x=0; x<5; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.CDATA_SECTION,startTag.getTagType()); assertEquals(" example of markup that is not to write with < and such.\r\n]]>",segment.toString()); assertEquals("\r\n example of markup that is not to write with < and such.\r\n",startTag.getTagContent().toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); segment=i.next(); assertEquals("",segment.toString()); segment=i.next(); segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); segment=i.next(); assertEquals("<% server tag %>",segment.toString()); // server tag is recognised inside comment segment=i.next(); assertEquals("\r\n",segment.toString()); // processing instruction isn't recognised inside comment segment=i.next(); assertEquals("
    ",segment.toString()); segment=i.next(); assertEquals("\r\n<*abc def=\"ghi\">\r\n This is an example of an element from a hypothetical server language \r\n whose tag formats have not been registered with the TagTypeRegister class \r\n\r\n",segment.toString()); segment=i.next(); assertEquals("

    ",segment.toString()); segment=i.next(); assertEquals(20071,segment.length()); segment=i.next(); assertEquals("

    ",segment.toString()); segment=i.next(); segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals(20077,segment.length()); segment=i.next(); assertTrue(i.hasNext()); segment=i.next(); assertEquals("",segment.toString()); assertTrue(i.hasNext()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertFalse(i.hasNext()); try { segment=i.next(); fail("Should throw NoSuchElementException"); } catch (NoSuchElementException ex) {} } @Test public void testLegacyIteratorCompatabilityMode() throws Exception { Source source=new Source(new URL(sourceUrlString)); Segment segment; Source.LegacyIteratorCompatabilityMode=true; Iterator i=source.iterator(); Source.LegacyIteratorCompatabilityMode=false; for (int x=0; x<30; x++) segment=i.next(); segment=i.next(); assertEquals("This paragraph contains character references: € and ©.",segment.toString()); assertEquals("This paragraph contains character references: \u20AC and \u00A9.",CharacterReference.decode(segment.toString())); } @Test public void testCharacterReferences() throws Exception { String sourceText="&
    &ww&&xxyy&zz&&"; Source source=new Source(sourceText); Segment segment; Iterator i=source.iterator(); assertTrue(i.next() instanceof CharacterReference); assertTrue(i.next() instanceof Tag); assertTrue(i.next() instanceof CharacterReference); assertTrue(i.next() instanceof Tag); assertEquals("ww",i.next().toString()); assertTrue(i.next() instanceof CharacterReference); assertTrue(i.next() instanceof Tag); assertTrue(i.next() instanceof CharacterReference); assertEquals("xx",i.next().toString()); assertTrue(i.next() instanceof Tag); assertEquals("yy",i.next().toString()); assertTrue(i.next() instanceof CharacterReference); assertEquals("zz",i.next().toString()); assertTrue(i.next() instanceof CharacterReference); assertTrue(i.next() instanceof Tag); assertTrue(i.next() instanceof CharacterReference); } /* @Test public void benchmark() throws Exception { for (int i=0; i<5000; i++) { for (Segment segment : new Source(new URL(sourceUrlString))) {} } } */ } jericho-html-3.1/test/src/net/htmlparser/jericho/StreamedParseTextTest.java0000644000175000017500000001113311174112362027144 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.nio.*; public class StreamedParseTextTest { private static final String text="ABCDEFGHIJKLMNOPQRSTUVWXYZ<0123456789A"; @Test public void testExpandableBuffer() { Reader reader=new StringReader(text); int originalInitialExpandableBufferSize=StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE; StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=2; StreamedText streamedText=new StreamedText(reader); StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=originalInitialExpandableBufferSize; StreamedParseText streamedParseText=new StreamedParseText(streamedText); assertEquals('a',streamedParseText.charAt(0)); assertEquals(2,streamedText.getBuffer().length); assertEquals("ab",streamedParseText.substring(0,2)); assertEquals('c',streamedParseText.charAt(2)); assertEquals(4,streamedText.getBuffer().length); int tagStartPos=streamedParseText.indexOf('<',0); streamedText.setMinRequiredBufferBegin(tagStartPos); assertEquals(26,tagStartPos); assertEquals(32,streamedText.getBuffer().length); assertEquals(7,streamedParseText.indexOf('h',5)); assertEquals('z',streamedParseText.charAt(25)); assertEquals(36,streamedParseText.indexOf('9',0)); // position 0 is still available in the buffer at start of search, then as search passes position 32 it is shifted to discard up to position 26 (setMinRequiredBufferBegin(26) called implicitly by writePlainTextSegment call above), allowing the rest of the text to fit in the buffer without expanding it. assertEquals(32,streamedText.getBuffer().length); assertEquals("<0123456789A",streamedText.getCurrentBufferContent()); try { streamedParseText.charAt(0); // now position 0 is no longer in the buffer fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) { assertEquals("StreamedText position 0 has been discarded",ex.getMessage()); } assertEquals(-1,streamedParseText.indexOf('A',30)); // search strings must be in lower case. } @Test public void testFixedBuffer() { Reader reader=new StringReader(text); char[] buffer=new char[10]; StreamedText streamedText=new StreamedText(reader,buffer); StreamedParseText streamedParseText=new StreamedParseText(streamedText); assertEquals('a',streamedParseText.charAt(0)); assertEquals("abc",streamedParseText.substring(0,3)); assertEquals(7,streamedParseText.indexOf('h',5)); assertEquals(10,streamedText.getBuffer().length); try { streamedParseText.indexOf('<',0); fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} assertEquals(10,streamedText.getBuffer().length); try { streamedParseText.indexOf('z',5); fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} try { streamedParseText.indexOf('H',5); fail("Should throw BufferOverflowException"); } catch (BufferOverflowException ex) {} streamedText.setMinRequiredBufferBegin(20); assertEquals('z',streamedParseText.charAt(25)); assertEquals('<',streamedParseText.charAt(26)); assertEquals("xyz<01",streamedParseText.substring(23,29)); streamedText.setMinRequiredBufferBegin(30); assertEquals(35,streamedParseText.indexOf('8',28)); assertEquals(36,streamedParseText.indexOf('9',30)); assertEquals(-1,streamedParseText.indexOf('A',30)); // search strings must be in lower case. assertEquals(37,streamedParseText.indexOf('a',30)); } @Test public void testCharBuffer() { char[] charArray=text.toCharArray(); CharBuffer charBuffer=CharBuffer.wrap(charArray,0,20); // no tag in document StreamedText streamedText=new StreamedText(charBuffer); StreamedParseText streamedParseText=new StreamedParseText(streamedText); int tagStartPos=streamedParseText.indexOf('<',15); assertEquals(-1,tagStartPos); assertEquals('a',streamedParseText.charAt(0)); assertEquals(-1,streamedParseText.indexOf('a',18)); streamedText.setMinRequiredBufferBegin(20); try { streamedParseText.charAt(25); fail("Should throw IndexOutOfBoundsException"); } catch (IndexOutOfBoundsException ex) {} streamedText.setMinRequiredBufferBegin(30); assertEquals(1,streamedParseText.indexOf('b',0)); charBuffer=CharBuffer.wrap(charArray,0,30); // tag in document streamedText=new StreamedText(charBuffer); streamedParseText=new StreamedParseText(streamedText); tagStartPos=streamedParseText.indexOf('<',15); assertEquals(26,tagStartPos); streamedText.setMinRequiredBufferBegin(tagStartPos); assertEquals('a',streamedParseText.charAt(0)); } } jericho-html-3.1/test/src/net/htmlparser/jericho/StreamedSourceHugeFileTest.java0000644000175000017500000000422711174142622030106 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.net.*; import java.util.*; import java.nio.CharBuffer; import java.nio.BufferOverflowException; public class StreamedSourceHugeFileTest { private static final String sourceUrlString="file:D:/Data/StreamedSourceHugeFileTest.txt"; @Test public void testDefault() throws Exception { if (true) return; // disable test StreamedSource streamedSource=null; int segmentCount=0; try { int originalInitialExpandableBufferSize=StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE; StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=120; streamedSource=new StreamedSource(new URL(sourceUrlString)); StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=originalInitialExpandableBufferSize; assertEquals(120,streamedSource.getBufferSize()); for (Segment segment : streamedSource) { segmentCount++; } assertEquals(30720,streamedSource.getBufferSize()); assertEquals(680158,segmentCount); } finally { if (streamedSource!=null) streamedSource.close(); } } @Test public void exampleFetchElementContent() throws Exception { if (true) return; // disable test int paragraphCount=0; StreamedSource streamedSource=null; try { streamedSource=new StreamedSource(new URL(sourceUrlString)); StringBuilder sb=new StringBuilder(); boolean insideParagraphElement=false; for (Segment segment : streamedSource) { if (segment instanceof Tag) { Tag tag=(Tag)segment; if (tag.getName().equals("p")) { if (tag instanceof StartTag) { insideParagraphElement=true; sb.setLength(0); } else { insideParagraphElement=false; // don't actually do anything with paragraph text, just count them paragraphCount++; } } } else if (insideParagraphElement) { if (segment instanceof CharacterReference) { ((CharacterReference)segment).appendCharTo(sb); } else { sb.append(segment); } } } } finally { if (streamedSource!=null) streamedSource.close(); } assertEquals(20000,paragraphCount); } } jericho-html-3.1/test/src/net/htmlparser/jericho/StreamedSourceTest.java0000644000175000017500000004226611174141330026475 0ustar twernertwernerpackage net.htmlparser.jericho; import org.junit.Test; import static org.junit.Assert.*; import java.io.*; import java.net.*; import java.util.*; import java.nio.CharBuffer; import java.nio.BufferOverflowException; public class StreamedSourceTest { private static final String sourceUrlString="file:test/data/StreamedSourceTest.html"; @Test public void testDefault() throws Exception { StreamedSource streamedSource=null; Segment segment; StartTag startTag; try { int originalInitialExpandableBufferSize=StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE; StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=120; streamedSource=new StreamedSource(new URL(sourceUrlString)); StreamedText.INITIAL_EXPANDABLE_BUFFER_SIZE=originalInitialExpandableBufferSize; assertNull(streamedSource.getCurrentSegment()); // doesn't have a value until iterator().next() is called assertEquals("UTF-8",streamedSource.getEncoding()); try { streamedSource.isXML(); fail("Should throw IllegalStateException"); } catch (IllegalStateException ex) { assertEquals("isXML() method only available after iterator() has been called",ex.getMessage()); } Iterator i=streamedSource.iterator(); assertTrue(streamedSource.isXML()); assertTrue(i.hasNext()); assertNull(streamedSource.getCurrentSegment()); // doesn't have a value until i.next() is called segment=i.next(); assertSame(segment,streamedSource.getCurrentSegment()); assertEquals(StartTagType.XML_DECLARATION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); assertEquals(" (XML declaration) ((p0)-(p39))",segment.getDebugInfo()); // note row and column information is not included try { segment.getSource(); fail("Should throw UnsupportedOperationException"); } catch (UnsupportedOperationException ex) { assertEquals("Source object is not available when using StreamedSource",ex.getMessage()); } assertTrue(i.hasNext()); segment=i.next(); assertEquals("\r\n",segment.toString()); assertTrue(i.hasNext()); segment=i.next(); assertEquals(StartTagType.SERVER_COMMON,((Tag)segment).getTagType()); assertEquals("<%@ page language=\"java\" %>",segment.toString()); assertEquals(120,streamedSource.getBufferSize()); CharBuffer charBuffer=streamedSource.getCurrentSegmentCharBuffer(); char[] charBufferArray=charBuffer.array(); assertEquals(120,charBufferArray.length); assertEquals(segment.toString(),new String(charBufferArray,charBuffer.position(),charBuffer.length())); segment=i.next(); assertEquals(StartTagType.SERVER_COMMON,((Tag)segment).getTagType()); assertEquals("<%@ taglib uri=\"/WEB-INF/struts-i18n.tld\" prefix=\"i18n\" %>",segment.toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); segment=i.next(); assertEquals(StartTagType.XML_PROCESSING_INSTRUCTION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); assertEquals(120,streamedSource.getBufferSize()); segment=i.next(); assertEquals(240,streamedSource.getBufferSize()); // last next() call fetches the next text segment as well as the following tag, which totals > 120 characters assertEquals("\r\n",segment.toString()); segment=i.next(); assertEquals(StartTagType.DOCTYPE_DECLARATION,((Tag)segment).getTagType()); assertEquals("\r\n \">\r\n]>",segment.toString()); segment=i.next(); assertEquals(StartTagType.MARKUP_DECLARATION,((Tag)segment).getTagType()); assertEquals("",segment.toString()); segment=i.next(); assertEquals(StartTagType.MARKUP_DECLARATION,((Tag)segment).getTagType()); assertEquals("\">",segment.toString()); for (int x=0; x<7; x++) segment=i.next(); assertEquals("Jericho HTML Parser Test Document",segment.toString()); for (int x=0; x<5; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.NORMAL,startTag.getTagType()); assertEquals("",segment.toString()); assertEquals("HTML parser,test document,R&D",startTag.getAttributeValue("content")); // note that character reference inside attribute value is not handled as a separate segment try { startTag.getElement(); fail("Should throw UnsupportedOperationException"); } catch (UnsupportedOperationException ex) { assertEquals("Elements are not supported when using StreamedSource",ex.getMessage()); } for (int x=0; x<8; x++) segment=i.next(); StringWriter plainTextWriter=new StringWriter(); segment=i.next(); assertEquals("This paragraph contains character references: ",segment.toString()); plainTextWriter.append(segment); segment=i.next(); assertEquals("€",segment.toString()); CharacterEntityReference characterEntityReference=(CharacterEntityReference)segment; characterEntityReference.appendCharTo(plainTextWriter); segment=i.next(); assertEquals(" and ",segment.toString()); plainTextWriter.append(segment); segment=i.next(); assertEquals("©",segment.toString()); NumericCharacterReference numericCharacterReference=(NumericCharacterReference)segment; numericCharacterReference.appendCharTo(plainTextWriter); segment=i.next(); assertEquals(".",segment.toString()); plainTextWriter.append(segment); assertEquals("This paragraph contains character references: \u20AC and \u00A9.",plainTextWriter.toString()); for (int x=0; x<3; x++) segment=i.next(); startTag=(StartTag)segment; assertEquals(StartTagType.COMMENT,startTag.getTagType()); assertEquals("",segment.toString()); assertEquals("

    This paragraph is commented out

    ",startTag.getTagContent().toString()); segment=i.next(); assertEquals("\r\n",segment.toString()); // note that

    tag isn't found inside comment segment=i.next(); assertEquals("This element is defined inside an onclick attribute