jtidy/0000755000175000017500000000000011617345035012144 5ustar twernertwernerjtidy/src/0000755000175000017500000000000011617345035012733 5ustar twernertwernerjtidy/src/main/0000755000175000017500000000000011617345035013657 5ustar twernertwernerjtidy/src/main/resources/0000755000175000017500000000000011617345035015671 5ustar twernertwernerjtidy/src/main/resources/jtidy.properties0000644000175000017500000000010311305157010021110 0ustar twernertwerner# to be updated with every release version = r938 date = 2009-12-01jtidy/src/main/resources/org/0000755000175000017500000000000011617345035016460 5ustar twernertwernerjtidy/src/main/resources/org/w3c/0000755000175000017500000000000011617345035017154 5ustar twernertwernerjtidy/src/main/resources/org/w3c/tidy/0000755000175000017500000000000011617345035020125 5ustar twernertwernerjtidy/src/main/resources/org/w3c/tidy/config.txt0000644000175000017500000000051310460747603022134 0ustar twernertwerner# sample config file for Java HTML tidy indent=auto indent-spaces=2 wrap=72 markup=yes clean=yes output-xml=no input-xml=no show-warnings=yes numeric-entities=yes quote-marks=yes quote-nbsp=yes quote-ampersand=no break-before-br=no uppercase-tags=yes uppercase-attributes=yes smart-indent=no output-xhtml=yes char-encoding=latin1 jtidy/src/main/resources/org/w3c/tidy/TidyMessages.properties0000644000175000017500000003331411304775000024640 0ustar twernertwerneranchor_not_unique={0} Anchor "{1}" already defined apos_undefined=Named Entity ' only defined in XML/XHTML attr_value_not_lcase={0} attribute value "{1}" for "{2}" must be lower case for XHTML # to be translated backslash_in_uri={0} URI reference contains backslash. Typo? bad_argument=Warning - missing or malformed argument "{1}" for option "{0}" bad_attribute_value={0} attribute "{1}" has invalid value "{2}" bad_cdata_content='<' + '/' + letter not allowed here bad_comment_chars=expecting -- or > bad_tree=Panic - tree has lost its integrity bad_xml_comment=XML comments can't contain -- badaccess_frames=Pages designed using frames presents problems for\u000apeople who are either blind or using a browser that\u000adoesn't support frames. A frames-based page should always\u000ainclude an alternative layout inside a NOFRAMES element. badaccess_missing_image_alt=The alt attribute should be used to give a short description\u000aof an image; longer descriptions should be given with the\u000alongdesc attribute which takes a URL linked to the description.\u000aThese measures are needed for people using non-graphical browsers. badaccess_missing_image_map=Use client-side image maps in preference to server-side image\u000amaps as the latter are inaccessible to people using non-\u000agraphical browsers. In addition, client-side maps are easier\u000ato set up and provide immediate feedback to users. badaccess_missing_link_alt=For hypertext links defined using a client-side image map, you\u000aneed to use the alt attribute to provide a textual description\u000aof the link for people using non-graphical browsers. badaccess_missing_summary=The table summary attribute should be used to describe\u000athe table structure. It is very helpful for people using\u000anon-visual browsers. The scope and headers attributes for\u000atable cells are useful for specifying which headers apply\u000ato each table cell, enabling non-visual browsers to provide\u000aa meaningful context for each cell. badaccess_summary=For further advice on how to make your pages accessible\u000asee "{0}". You may also want to try\u000a"http://www.cast.org/bobby/" which is a free Web-based\u000aservice for checking URLs for accessibility. badchars_summary=Characters codes for the Microsoft Windows fonts in the range\u000a128 - 159 may not be recognized on other platforms. You are\u000ainstead recommended to use named entities, e.g. ™ rather\u000athan Windows character code 153 (0x2122 in Unicode). Note that\u000aas of February 1998 few browsers support the new entities." badform_summary=You may need to move one or both of the
and
\u000atags. HTML elements should be properly nested and form elements\u000aare no exception. For instance you should not place the
\u000ain one table cell and the
in another. If the
is\u000aplaced before a table, the
cannot be placed inside the\u000atable! Note that one form can't be nested inside another! badlayout_using_body=You are recommended to use CSS to specify page and link colors badlayout_using_font=You are recommended to use CSS to specify the font and\u000aproperties such as its size and color. This will reduce\u000athe size of HTML files and make them easier to maintain\u000acompared with using elements. badlayout_using_layer=The Cascading Style Sheets (CSS) Positioning mechanism\u000ais recommended in preference to the proprietary \u000aelement due to limited vendor support for LAYER. badlayout_using_nobr=You are recommended to use CSS to control line wrapping.\u000aUse "white-space: nowrap" to inhibit wrapping in place\u000aof inserting ... into the markup. badlayout_using_spacer=You are recommended to use CSS for controlling white\u000aspace (e.g. for indentation, margins and line spacing).\u000aThe proprietary element has limited vendor support. cant_be_nested={0} can''t be nested coerce_to_endtag=<{0}> is probably intended as content_after_body=content occurs after end of body discarding_unexpected=discarding unexpected {0} doctype_after_tags= isn't allowed after elements doctype_given={0}: Doctype given is "{1}" dtype_not_upper_case=SYSTEM, PUBLIC, W3C, DTD, EN must be upper case duplicate_frameset=repeated FRAMESET element element_not_empty={0} element not empty or not closed emacs_format={0}:{1,number}:{2,number}: encoding_mismatch=specified input encoding ({0}) does not match actual input encoding ({1}) entity_in_id=no entities allowed in id attribute, discarding "&" error=Error: escaped_illegal_uri={0} escaping malformed URI reference expected_equalsign={0} unexpected '=', expected attribute name fixed_backslash={0} converting backslash in URI to slash forced_end_anchor=Warning: is probably intended as general_info=To learn more about JTidy see http://jtidy.sourceforge.net\u000aPlease report bugs at http://sourceforge.net/tracker/?group_id=13153&atid=113153\u000aHTML & CSS specifications are available from http://www.w3.org/\u000aLobby your company to join W3C, see http://www.w3.org/Consortium hello_message=Tidy (vers {0, date}) Parsing "{1}" help_text={0} [option...] [file...]\u000aUtility to clean up and pretty print HTML/XHTML/XML\u000asee http://jtidy.sourceforge.net/\u000a\u000aOptions for JTidy released on {1}\u000aProcessing directives\u000a---------------------\u000a -indent or -i to indent element content\u000a -omit or -o to omit optional end tags\u000a -wrap to wrap text at the specified (default is 68)\u000a -upper or -u to force tags to upper case (default is lower case)\u000a -clean or -c to replace FONT, NOBR and CENTER tags by CSS\u000a -bare or -b to strip out smart quotes and em dashes, etc.\u000a -numeric or -n to output numeric rather than named entities\u000a -errors or -e to only show errors\u000a -quiet or -q to suppress nonessential output\u000a -xml to specify the input is well formed XML\u000a -asxml to convert HTML to well formed XHTML\u000a -asxhtml to convert HTML to well formed XHTML\u000a -ashtml to force XHTML to well formed HTML\u000a -slides to burst into slides on H2 elements\u000a\u000aCharacter encodings\u000a-------------------\u000a -raw to output values above 127 without conversion to entities\u000a -ascii to use US-ASCII for output, ISO-8859-1 for input\u000a -latin1 to use ISO-8859-1 for both input and output\u000a -iso2022 to use ISO-2022 for both input and output\u000a -utf8 to use UTF-8 for both input and output\u000a -mac to use MacRoman for input, US-ASCII for output\u000a -utf16le to use UTF-16LE for both input and output\u000a -utf16be to use UTF-16BE for both input and output\u000a -utf16 to use UTF-16 for both input and output\u000a -win1252 to use Windows-1252 for input, US-ASCII for output\u000a -big5 to use Big5 for both input and output\u000a -shiftjis to use Shift_JIS for both input and output\u000a -language to set the two-letter language code (for future use)\u000a\u000aFile manipulation\u000a-----------------\u000a -config to set configuration options from the specified \u000a -f to write errors to the specified \u000a -modify or -m to modify the original input files\u000a\u000aMiscellaneous\u000a-------------\u000a -version or -v to show the version of Tidy\u000a -help, -h or -? to list the command line options\u000a -help-config to list all configuration options\u000a -show-config to list the current configuration settings\u000a\u000aYou can also use --blah for any configuration option blah\u000a\u000aInput/Output default to stdin/stdout respectively\u000aSingle letter options apart from -f may be combined\u000aas in: tidy -f errs.txt -imu foo.html\u000aFor further info on HTML see http://www.w3.org/MarkUp id_name_mismatch={0} id and name attribute value mismatch illegal_char=Warning: replacing illegal character code {0,number} illegal_nesting={0} shouldn''t be nested illegal_uri_reference={0} improperly escaped URI reference inconsistent_namespace=html namespace doesn't match content inconsistent_version=html doctype doesn't match content inserting_tag=inserting implicit <{0}> invalid_char={0,choice,0#replacing|1#discarding} invalid character code {1} invalid_ncr={0,choice,0#replacing|1#discarding} invalid numeric character reference {1} invalid_sgml_chars_summary=Character codes 128 to 159 (U+0080 to U+009F) are not allowed in HTML;\u000aeven if they were, they would likely be unprintable control characters.\u000aTidy assumed you wanted to refer to a character with the same byte value in the \u000a{0,choice,0#specified|1#Windows-1252|2#MacRoman} encoding and replaced that reference with the Unicode equivalent. invalid_utf16={0,choice,0#replacing|1#discarding} invalid UTF-16 surrogate pair (char. code {1}) invalid_utf16_summary=Character codes for UTF-16 must be in the range: U+0000 to U+10FFFF.\u000aThe definition of UTF-16 in Annex C of ISO/IEC 10646-1:2000 does not allow the\u000amapping of unpaired surrogates. For more information please refer to\u000ahttp://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html invalid_utf8={0,choice,0#replacing|1#discarding} invalid UTF-8 bytes (char. code {1}) invalid_utf8_summary=Character codes for UTF-8 must be in the range: U+0000 to U+10FFFF.\u000aThe definition of UTF-8 in Annex D of ISO/IEC 10646-1:2000 also\u000aallows for the use of five- and six-byte sequences to encode\u000acharacters that are outside the range of the Unicode character set;\u000athose five- and six-byte sequences are illegal for the use of\u000aUTF-8 as a transformation of Unicode characters. ISO/IEC 10646\u000adoes not allow mapping of unpaired surrogates, nor U+FFFE and U+FFFF\u000a(but it does allow other noncharacters). For more information please refer to\u000ahttp://www.unicode.org/unicode and http://www.cl.cam.ac.uk/~mgk25/unicode.html invaliduri_summary=URIs must be properly escaped, they must not contain unescaped\u000acharacters below U+0021 including the space character and not\u000aabove U+007E. Tidy escapes the URI for you as recommended by\u000aHTML 4.01 section B.2.1 and XML 1.0 section 4.2.2. Some user agents\u000ause another algorithm to escape such URIs and some server-sided\u000ascripts depend on that. If you want to depend on that, you must\u000aescape the URI by your own. For more information please refer to\u000ahttp://www.w3.org/International/O-URL-and-ident.html joining_attribute={0} joining values of repeated attribute "{1}" line_column=line {0,number} column {1,number} - malformed_comment=adjacent hyphens within comment malformed_doctype=expected "html PUBLIC" or "html SYSTEM" missing_attr_value={0} attribute "{1}" lacks value missing_attribute={0} lacks "{1}" attribute missing_body=Can't create slides - document is missing a body element. missing_doctype=missing declaration missing_endtag_before=missing before {1} missing_endtag_for=missing missing_imagemap={0} should use client-side image map missing_quotemark={0} attribute with missing trailing quote mark missing_semicolon=Warning: entity "{0}" doesn''t end in '';'' missing_semicolon_ncr=numeric character reference "{0}" doesn't end in ";" missing_starttag=missing <{0}> missing_title_element=inserting missing 'title' element needs_author_intervention=This document has errors that must be fixed before\u000ausing HTML Tidy to generate a tidied up version. nested_emphasis=nested emphasis {0} nested_quotation=nested q elements, possible typo newline_in_uri={0} discarding newline in URI reference no_warnings=no warnings or errors were found noframes_content={0} not inside ''noframes'' element non_matching_endtag=replacing unexpected {0} by num_warnings={0,choice,0#no warnings|1#1 warning|1<{0,number,integer} warnings}, {1,choice,0#no errors|1#1 error|2#{1,number,integer} errors} were found! obsolete_element=replacing obsolete element {0} by {1} proprietary_attr_value={0} proprietary attribute value "{1}" proprietary_attribute={0} proprietary attribute "{1}" proprietary_element={0} is not approved by W3C repeated_attribute={0} dropping value "{1}" for repeated attribute "{2}" replacing_element=replacing element {0} by {1} report_version={0}: Document content looks like {1} slides_found={0,number} Slides found suspected_missing_quote=missing quotemark for attribute value tag_not_allowed_in={0} isn''t allowed in <{1}> elements too_many_elements=too many {0} elements too_many_elements_in=too many {0} elements in <{1}> trim_empty_element=trimming empty {0} unescaped_ampersand=Warning: unescaped & which should be written as & unescaped_element=unescaped {0} in pre content unexpected_end_of_file=end of file while parsing attributes {0} unexpected_endtag=unexpected unexpected_endtag_in=unexpected in <{1}> unexpected_gt={0} missing ''>'' for end of tag unexpected_quotemark={0} unexpected or duplicate quote mark unknown_attribute=unknown attribute "{0}" unknown_element={0} is not recognized! unknown_entity=Warning: unescaped & or unknown entity "{0}" unknown_file={0}: can''t open file "{1}" unknown_option=Warning - unknown option: {0} unrecognized_option=unrecognized option -{0} use -help to list options using_br_inplace_of=using
in place of {0} vendor_specific_chars_summary=It is unlikely that vendor-specific, system-dependent encodings\u000awork widely enough on the World Wide Web; you should avoid using the \u000a{0,choice,0#specified|1#Windows-1252|2#MacRoman} character encoding, instead you are recommended to\u000ause named entities, e.g. ™. version_summary=JTidy released on {0} warning=Warning: xml_attribute_value={0} has XML attribute "{1}" xml_id_sintax=ID "{0}" uses XML ID syntax jtidy/src/main/resources/org/w3c/tidy/TidyMessages_es.properties0000644000175000017500000002357510460747603025351 0ustar twernertwerneranchor_not_unique= apos_undefined= attr_value_not_lcase= backslash_in_uri= bad_argument=Aviso: erroneo argumento "{1}" para opci\u00f3n: "{0}" bad_attribute_value={0} atributo "{1}" tiene valor desconocido "{2}" bad_cdata_content=letra '<' + '/' + no permitida aqu\u00ed bad_comment_chars=se espera -- o > bad_tree=Panic: - arbol ha perdido su integridad bad_xml_comment=comentarios XML no pueden contener -- badaccess_frames=P\u00e1ginas dise\u00f1adas usando marcos ( frames ) presentan problemas para usuarios ciegos o con navegadors que no los soporten. Una p\u00e1gina basada en marcos deberia incluir una composici\u00f3n alternativa dentro de un elemento NOFRAMES. badaccess_missing_image_alt=El atributo 'alt' se deberia usar para dar una corta descripci\u00f3n de una imagen; descripciones m\u00e1s largas deberian darse con el atributo 'longdesc' que utiliza una URL asociada con la descripci\u00f3n. Estas medidas son necesarias para gente usando navegadores no graficos. badaccess_missing_image_map=Utilice mapas de imagen de cliente en preferencia a mapas de servidor ya que estos son inaccesibles para gente usando navegadores no graficos. Ademas, mapas de cliente son m\u00e1s faciles de configurar y proporcionan realimentaci\u00f3n inmediata a los usuarios. badaccess_missing_link_alt=En enlaces de hipertexto que usan un mapa de imagen de cliente\u000ase debe usar el atributo 'alt' para proporcionar una descripci\u00f3n textual\u000adel enlace. badaccess_missing_summary=Se deber\u00eda usar el atributo 'summary' de una tabla para describir su estructura. Es de gran ayuda para gente usando navegadores no visuales. Los atributos 'scope' y 'headers' de las celdas de las tablas son utiles para especificar que cabeceras aplican a cada celda, permitiendo a navegadores no visuales el proporcionar un contecto significativo a cada celda. badaccess_summary=Para m\u00e1s recomendaciones sobre como hacer las p\u00e1ginas accesibles visite "{0}". Tambien le puede interesar visitar "http://www.cast.org/bobby/", un servicio gratuito para comprobar la accesibilidad de sitios web. badchars_summary=C\u00f3digos de caracteres para las fuentes de Microsoft Windows en el rango\u000a128 - 159 podr\u00edan no ser reconocidos en otras plataformas. Se recomienda\u000ausar entidades con nombre, por ejemplo ™ mejor que el c\u00f3digo de\u000acaracter 153 (0x2122 en Unicode) de Windows.\u000a badform_summary=Se deberia mover una o ambas de las etiquetas
y
.\u000aLos elementos HTML deberian estar adecuadamente anidados y los\u000aelementos 'form' no son excepci\u00f3n. Por ejemplo no se deberia colocar\u000aun
en una celda de tabla y el
en otra. Si
esta\u000aantes de una tabla,
deberia estar despues \u00a1Recuerde que los\u000aformularios no se pueden anidar!\u000a badlayout_using_body=Se recomienda utilizar CSS para especificar colores de p\u00e1gina y enlaces badlayout_using_font=Se recomienda utilizar CSS para especificar fuentes y propiedades\u000acomo tama\u00f1o y color. Esto reducira el tama\u00f1o de los archivos HTML\u000ay hara m\u00e1s facil el mantenimiento que con elementos FONT.\u000a badlayout_using_layer=Se recomienda el mecanismo de posicionado con hojas de estilo (CSS) en preferencia al elemento propietario LAYER, debido a su limitado soporte por vendedores. badlayout_using_nobr=Se recomienda utilizar CSS para controlar la division de lineas. Use "white-space: nowrap" para inhibir division en lugar de insertar ... en el etiquetado. badlayout_using_spacer=Se recomienda utilizar CSS para controlar espacios en blanco (por ejemplo para indentation, margenes y separacion de lineas ). El elemento propietario SPACER tiene limitado soporte. cant_be_nested={0} no se puede anidar coerce_to_endtag=<{0}> deberia ser probablemente content_after_body=contenido despues del final del 'body' discarding_unexpected=descartando inesperado {0} doctype_after_tags= no esta permitido despu\u00e9s de elementos doctype_given={0}: Doctype dado es "{1}" dtype_not_upper_case=SYSTEM, PUBLIC, W3C, DTD, EN deben ir en may\u00fasculas duplicate_frameset=elemento FRAMESET repetido element_not_empty= emacs_format={0}:{1,number}:{2,number}: encoding_mismatch= entity_in_id= error=Error: escaped_illegal_uri= expected_equalsign= fixed_backslash= forced_end_anchor=Aviso: deberia ser probablemente general_info=Para aprender m\u00e1s sobre JTidy visite http://jtidy.sourceforge.net\u000aPor favor, notifique errores en http://sourceforge.net/tracker/?group_id=13153&atid=113153\u000aEspecificaciones de HTML y CSS estan disponibles en http://www.w3.org/ hello_message=Tidy (versi\u00f3n {0}) Analizando "{1}" help_text={0}: file1 file2 ...\u000aAplicaci\u00f3n para limpiar y formatear archivos html\u000aconsulte http://www.w3.org/People/Raggett/tidy/\u000aopciones para tidy disponibles en {1}\u000a\u000aDirectivas de procesado\u000a--------------------\u000a -indent o -i indentar contenido de elemento\u000a -omit o -o omitir etiquetas finales opcionales\u000a -wrap 72 dividir texto en la columna 72 (68 por defecto)\u000a -upper o -u forzar etiquetas en may\u00fasculas (min\u00fasculas por defecto)\u000a -clean o -c remplazar etiquetas FONT, NOBR y CENTER tags por CSS\u000a -numeric o -n generar entidades num\u00e9ricas m\u00e1s que nombradas\u000a -errors o -e s\u00f3lo mostrar errores\u000a -quiet o -q suprimir salida no esencial\u000a -xml usar cuando la entrada es xml bien formado\u000a -asxml para convertir html a xml bien formado\u000a -slides para generar presentaciones a partir de elementos H2\u000a\u000aC\u00f3digos de caracteresn------------------\u000a -raw dejar caracteres > 128 igual que a la entrada\u000a -ascii usar ASCII para salida, Latin-1 para entrada\u000a -latin1 usar Latin-1 para entrada y salida\u000a -iso2022 usar ISO2022 para entrada y salida\u000a -utf8 usar UTF-8 para entrada y salida\u000a -mac usar el c\u00f3digo de caracteres MacRoman de Apple\u000a\u000aFile manipulation\u000a---------------\u000a -config configurar opciones a partir del archivo \u000a -f escribir errores al archivo \u000a -modify o -m para midificar archivos originales\u000a\u000aVarios\u000a------------\u000a -version o -v mostrar versi\u00f3n\u000a -help o -h listar opciones de la l\u00ednea de comando\u000aTambi\u00e9n puede usar --blah para cualquier opci\u00f3n de configuraci\u00f3n blah\u000a\u000aInput/Output son stdin/stdout por defecto\u000aOpciones de una s\u00f3la letra, excepto -f, se pueden combinar\u000acomo en: tidy -f errs.txt -imu foo.html\u000aPara m\u00e1s informaci\u00f3n sobre HTML consultar http://www.w3.org/MarkUp id_name_mismatch={0} valor de atributos 'id' y 'name' no coinciden illegal_char=Aviso: remplazar c\u00f3digo de caracter ilegal {0,number} illegal_nesting={0} no deber\u00eda estar anidado illegal_uri_reference= inconsistent_namespace=espacio de nombres no corresponde al contenido inconsistent_version=html doctype no corresponde al contenido inserting_tag=insertando <{0}> implicito invalid_char= invalid_ncr= invalid_sgml_chars_summary= invalid_utf16= invalid_utf16_summary= invalid_utf8= invalid_utf8_summary= invaliduri_summary= joining_attribute= line_column=linea {0,number} columna {1,number} - malformed_comment=guiones adjacentes dentro de comentario malformed_doctype=se espera "html PUBLIC" o "html SYSTEM" missing_attr_value={0} atributo "{1}" sin valor missing_attribute={0} falta atributo "{1}" missing_body=No se puede crear diapositivas, falta elemento BODY. missing_doctype=falta declaraci\u00f3n missing_endtag_before=falta antes {1} missing_endtag_for=falta missing_imagemap={0} debe usar mapa de imagen del lado del cliente missing_quotemark= missing_semicolon=Aviso: entidad "{0}" no termina en '';'' missing_semicolon_ncr= missing_starttag=falta <{0}> missing_title_element=insertado elemento 'title' needs_author_intervention=Este documento tiene errores que se deben solucionar para que Tidy pueda generar una version corregida nested_emphasis=enfasis anidado {0} nested_quotation= newline_in_uri= no_warnings=no hay avisos ni errores noframes_content={0} no dentro de elemento ''noframes'' non_matching_endtag=remplazar {0} inesperado por num_warnings=hallados {0,choice,0#no avisos|1#1 aviso|1<{0,number,integer} avisos}, {1,choice,0#no errors|1#1 error|2#{1,number,integer} errors}! obsolete_element=elemento obsoleto {0} por {1} proprietary_attr_value={0} valor de atributo "{1}" proprietario proprietary_attribute={0} atributo "{1}" proprietario proprietary_element={0} no aprobado por W3C repeated_attribute= replacing_element=remplazar elemento {0} por {1} report_version={0}: Contenido de documento parece {1} slides_found=Se han hallado {0,number} diapositivas suspected_missing_quote=faltan comillas para valor de atributo tag_not_allowed_in={0} no permitido en elementos <{1}> too_many_elements=demasiados elementos {0} too_many_elements_in=demasiados elementos {0} en <{1}> trim_empty_element=elemento vacio eliminado {0} unescaped_ampersand=Aviso: '&' no escapado que deber\u00eda ser escrito como & unescaped_element= unexpected_end_of_file=final de archivo mientras se analizan atributos {0} unexpected_endtag= inesperado unexpected_endtag_in= inesperado en <{1}> unexpected_gt={0} falta ''>'' al final de la etiqueta unexpected_quotemark={0} comillas inesperadas o duplicadas unknown_attribute=atributo "{0}" desconocido unknown_element={0} no se reconoce! unknown_entity=Aviso: '&' no escapado o entidad desconocida "{0}" unknown_file={0}: no se puede abrir archivo "{1}" unknown_option=Aviso: option desconocida: {0} unrecognized_option=opcion -{0} desconocida. Use -help para listar opciones using_br_inplace_of=usando
en lugar de {0} vendor_specific_chars_summary= warning=Aviso: xml_attribute_value={0} tiene atributo XML "{1}" xml_id_sintax= jtidy/src/main/resources/org/w3c/tidy/TidyMessages_de.properties0000644000175000017500000002627610460747603025333 0ustar twernertwerneranchor_not_unique= apos_undefined= attr_value_not_lcase= backslash_in_uri= bad_argument=Warnung: Fehlendes oder missgebildetes Argument "{1}" f\u00fcr Option "{0}" bad_attribute_value={0} Attribut "{1}" unbekannter Attribut-Wert "{2}" bad_cdata_content='<' + '/' + Buchstabe ist hier nicht erlaubt bad_comment_chars=erwarte -- oder > bad_tree=Interner Fehler: der Parse-Baum hat seine Integrit\u00e4t verloren. bad_xml_comment=XML-Kommentare d\u00fcrfen kein -- enthalten badaccess_frames=Seiten, welche Frames nutzen, sind problematisch f\u00fcr Leute,\u000adie entweder blind sind oder einen Browser nutzen, der keine\u000aFrames unterst\u00fctzt. Eine Frames-basierte Seite sollte immer\u000aauch ein alternatives Layout innerhalb eines NOFRAMES-Elementes\u000aenthalten. badaccess_missing_image_alt=Das alt-Attribut sollte verwendet werden, um ein kurze Beschreibung\u000aeines Bildes zu geben; l\u00e4ngere Beschreibungen sollten mit dem\u000alongdesc-Attribut angegeben werden, welches eine URL mit der\u000aBeschreibung annimmt. Diese Ma\u00dfnahmen sind notwendig f\u00fcr Leute\u000amit nicht-grafischen Browsern. badaccess_missing_image_map=Benutzen Sie client-seitige Image-Maps (usemap) anstatt serverseitigen\u000aImage-Maps (ismap), weil letztere unbenutzbar sind f\u00fcr Leute badaccess_missing_link_alt=F\u00fcr Hypertext-Links, welche durch eine clientseitige Image-Map\u000adefiniert sind, sollten Sie das alt-Attribut nutzen, um eine\u000atextuelle Beschreibung f\u00fcr Leute zu geben, die nichtgrafische\u000aBrowser nutzen. badaccess_missing_summary=Das summary-Attribut von sollte benutzt werden, um die\u000aTabellenstruktur zu beschreiben. Das ist sehr hilfreich f\u00fcr\u000aLeute mit nicht-visuellen Browsern. Die scope- und\u000aheader-Attribute f\u00fcr Tabellenzellen sind n\u00fctzlich, um anzugeben,\u000awelche header f\u00fcr jede Tabellenzelle gelten, wodurch nicht-\u000avisuelle Browser bef\u00e4higt werden, sinnvollen Kontext f\u00fcr jede\u000aZelle zur Verf\u00fcgung zu stellen. badaccess_summary=F\u00fcr weitere Ratschl\u00e4ge, wie Sie Ihre Seite zug\u00e4nglich machen\u000ak\u00f6nnen, siehe "{0}". Vielleicht wollen\u000aSie sich auch die (englische) Seite "http://www.cast.org/bobby/"\u000aansehen, welche einen kostenlosen, web-basierten Dienst zur\u000a\u00dcberpr\u00fcfung von URLs auf Zug\u00e4nglichkeit enth\u00e4lt. badchars_summary=Zeichen-Codes f\u00fcr die Microsoft-Windows-Schriftarten im Bereich\u000a128 - 159 werden auf anderen Plattformen nicht unbedingt erkannt.\u000aEs wird empfohlen, statt dessen die benannten Entit\u00e4ten zu benutzen,\u000az.B. ™ anstatt dem Windows-Zeichen-Code 153 (0x2122 in Unicode). badform_summary=Sie sollten eines oder beide - bzw. -Tags verschieben.\u000aHTML-Elemente sollten sauber verschachtelt sein, und -Elemente\u000asind da keine Ausnahme. Zum Beispiel sollte nicht das -Element\u000avor der Tabelle und das -Element in der Tabelle plaziert sein.\u000aAuch kann nicht ein in einem anderem verschachtelt sein. badlayout_using_body=Es wird empfohlen, CSS zu benutzen, um Seiten- und\u000aLink-Farben einzustellen. badlayout_using_font=Benutzen Sie CSS, um die Schriftart und Eigenschaften\u000awie Gr\u00f6\u00dfe und Farbe einzustellen. Das wird die Gr\u00f6\u00dfe der\u000aHTML-Dateien reduzieren und sie leichter wartbar machen\u000aim Vergleich mit -Elementen. badlayout_using_layer=Es wird empfohlen, den CSS-Positionierungs-Mechanismus\u000aanstelle des propriet\u00e4ren -Elementes zu verwenden,\u000ada letzteres nicht standardisiert ist und deswegen nur\u000abegrenzt von den Browserherstellern unterst\u00fctzt wird. badlayout_using_nobr=Benutzen Sie CSS, um die Zeilenumbr\u00fcche zu kontrollieren.\u000aBenutzen Sie "white-space: nowrap", um Umbr\u00fcche zu\u000averhindern, anstatt ... in das Dokument\u000aeinzuf\u00fcgen badlayout_using_spacer=Benutzen Sie CSS um Leerr\u00e4ume zu kontrollieren,\u000a(z.B. f\u00fcr Einr\u00fcckungen, R\u00e4nder und Zeilenabst\u00e4nde).\u000aDas propriet\u00e4re -Element hat nur begrenzten\u000aHersteller-Unterst\u00fctzung. cant_be_nested={0} kann nicht verschachtelt werden coerce_to_endtag=<{0}> ist wahrscheinlich als gemeint content_after_body=Inhalt erscheint nach dem Ende von . discarding_unexpected=entsorge unerwartetes {0} doctype_after_tags= darf nur am Anfang des Dokumentes stehen, nicht nach Elementen doctype_given= dtype_not_upper_case=SYSTEM, PUBLIC, W3C, DTD, EN m\u00fcssen komplett in Gro\u00dfbuchstaben geschrieben werden. duplicate_frameset=wiederholtes -Element element_not_empty= emacs_format={0}:{1,number}:{2,number}: encoding_mismatch= entity_in_id= error=Fehler: escaped_illegal_uri= expected_equalsign= fixed_backslash= forced_end_anchor=Warnung: ist wahrscheinlich gemeint als general_info=Um mehr \u00fcber Tidy zu erfahren, sehen\u000aSie sich die Seite http://www.w3.org/People/Raggett/tidy/\u000aan. JTidy gibt es hier: http://jtidy.sourceforge.net\u000a\u00dcberzeugen Sie Ihr Unternehmen davon, dem W3C beizutreten,\u000asiehe http://www.w3.org/Consortium\u000aHTML- und CSS-Spezifikationen sind verf\u00fcgbar bei http://www.w3.org. hello_message=Tidy (Version {0, date}) parst "{1}" help_text= {0}: datei1 datei2 ...\u000a Werkzeug, um HTML-Dateien zu s\u00e4ubern und auszudrucken.\u000a Siehe http://www.w3.org/People/Raggett/tidy/\u000a \u000a Optionen f\u00fcr tidy (Version {1}):\u000a \u000a Verarbeitungs-Anweisungen\u000a -------------------------\u000a -i | -indent Element-Inhalt einr\u00fccken\u000a -o | -omit optionale Ende-Tags weglassen\u000a -wrap nn Text bei Zeile nn umbrechen (Default: 68)\u000a -u | -upper Tags in GROSSBUCHSTABEN (Default: kleinbuchstaben)\u000a -c | -clean , ,
-Tags durch CSS ersetzen\u000a -n | -numeric numerische anstelle benannter Entit\u00e4ten verwenden\u000a -e | -errors nur Fehler anzeigen (nicht den korrigierten Text)\u000a -q | -quiet nicht notwendige Ausgaben unterdr\u00fccken\u000a -xml die Eingabe ist bereits wohlgeformtes XML\u000a -asxml die Ausgabe soll wohlgeformtes XML sein\u000a -slides das Dokument an den

-Elementen in "slides"\u000a (Einzelseiten) zerst\u00fcckeln: slide1.html, slide2.html, ...\u000a \u000a Zeichen-S\u00e4tze / -Kodierung\u000a -raw alle Bytes > 128 in der Ausgabe unver\u00e4ndert lassen\u000a -ascii ASCII f\u00fcr die Ausgabe, Latin-1 f\u00fcr die Eingabe verwenden\u000a -latin1 Latin-1 f\u00fcr Ein- und Ausgabe verwenden\u000a -iso2022 ISO2022 (v.a. Japanisch) f\u00fcr Ein- und Ausgabe verwenden\u000a -utf8 UTF8 (Unicode) f\u00fcr Ein- und Ausgabe verwenden\u000a -mac Den MacRoman-Zeichensatz verwenden\u000a \u000a Datei-Manipulation\u000a ------------------\u000a -config lese Optionen aus der angegebenen Konfigurations-Datei\u000a -f Fehler zur angegebenen Datei schreiben\u000a -m | -modify die Original-Datei \u00e4ndern\u000a \u000a Vermischtes\u000a -----------\u000a -v | -version die Programmversion anzeigen\u000a -h | -help diese Kommandozeilenhilfe anzeigen\u000a Au\u00dferdem kann f\u00fcr jede Konfigurations-Datei-Option "bla" auch\u000a "--blah" in der Kommandozeile genutzt werden.\u000a Ein- und Ausgabe ist (wenn nicht anders angegeben) die Standard-Ein-\u000a oder Ausgabe.\u000a Einzel-Buchstaben-Optionen au\u00dfer -f k\u00f6nnen miteinander kombiniert\u000a werden, wie z.B. tidy -f errs.txt -imu foo.html\u000a F\u00fcr weitere Informationen \u00fcber HTML siehe http://www.w3.org/MarkUp id_name_mismatch={0} das id- und das name-Attribut haben nicht den gleichen Wert illegal_char=Warnung: Ung\u00fcltiges Zeichen mit Code {0,number} wird ersetzt. illegal_nesting={0} sollte nicht verschachtelt werden. illegal_uri_reference= inconsistent_namespace=HTML Namensraum (namespace) passt nicht zum Inhalt inconsistent_version=HTML-DocTyp passt nicht zum Inhalt. inserting_tag=f\u00fcge implizites <{0}> ein invalid_char= invalid_ncr= invalid_sgml_chars_summary= invalid_utf16= invalid_utf16_summary= invalid_utf8= invalid_utf8_summary= invaliduri_summary= joining_attribute= line_column=Zeile {0,number} Spalte {1,number} - malformed_comment=zusammenh\u00e4ngende Bindestriche (--) innerhalb eines Kommentars. malformed_doctype=erwarte "html PUBLIC" oder "html SYSTEM" missing_attr_value={0} dem Attribut "{1}" fehlt ein Wert missing_attribute={0} hat kein "{1}"-Attribut missing_body=Ich kann keine Slides erstellen, weil das Dokument kein -Element enth\u00e4lt. missing_doctype= missing_endtag_before=fehlendes vor {1} missing_endtag_for=fehlendes missing_imagemap={0} sollte eine Client-seitige Image-Map benutzen (usemap=... anstatt ismap) missing_quotemark= missing_semicolon=Warnung: Die Entit\u00e4t "{0}" endet nicht mit ";" missing_semicolon_ncr= missing_starttag=fehlendes <{0}> missing_title_element=f\u00fcge fehlendes -Element ein. needs_author_intervention=Dieses Dokument hat Fehler, die korrigiert werden m\u00fcssen,\u000abevor HTML Tidy das Dokument aufr\u00e4umen kann. nested_emphasis=verschachtelte Hervorhebung {0} nested_quotation= newline_in_uri= no_warnings=Es wurden keine Warnungen oder Fehler gefunden. noframes_content={0} nicht innerhalb eines <noframe>-Elementes non_matching_endtag=ersetze unerwartetes unexpected {0} durch </{1}> num_warnings=Es wurden {0,choice,0#keine Warnungen|1#1 Warnungen|1<{0,number,integer} Warnungen}, {1,choice,0#keine Fehler|1#1 Fehler|2#{1,number,integer} Fehler} gefunden! obsolete_element=ersetze veraltetes Element {0} durch {1} proprietary_attr_value={0} propriet\u00e4rer (nicht-standardisierter) Attribut-Wert "{1}" proprietary_attribute={0} propriet\u00e4rer (nicht-standardisierter) Attribut "{1}" proprietary_element={0} ist nicht vom W3C abgesegnet repeated_attribute= replacing_element=ersetze Element {0} durch {1} report_version={0}: Der Inhalt des Dokumentes sieht aus wie {1}. slides_found=Es wurden {0,number} Slides gefunden. suspected_missing_quote=Fehlendes Anf\u00fchrungszeichen (") f\u00fcr Attribut-Wert tag_not_allowed_in={0} ist nicht erlaubt in <{1}>-Elementen. too_many_elements=zu viele {0}-Elemente too_many_elements_in=zu viele {0}-Elemente in <{1}> trim_empty_element=entferne leeres {0} unescaped_ampersand=Warnung: einzelnes "&" (sollte "&" sein). unescaped_element= unexpected_end_of_file=Ende der Datei, w\u00e4hrend Attribute geparst werden {0} unexpected_endtag=unerwartetes </{0}> unexpected_endtag_in=unerwartetes </{0}> in <{1}> unexpected_gt={0} fehlendes ''>'' am Ende des Tags unexpected_quotemark={0} unerwartetes oder dupliziertes Anf\u00fchrungszeichen (' oder ") unknown_attribute=unbekanntes Attribut "{0}" unknown_element={0} wird nicht erkannt! unknown_entity=Warnung: einzelnes "&" (sollte "&" sein) oder unbekannte Entit\u00e4t "{0}" unknown_file={0}: Ich kann die Datei "{1}" nicht \u00f6ffnen. unknown_option= unrecognized_option=Unbekannte Option -{0}. Benutzen Sie -help, um die Optionen aufzulisten. using_br_inplace_of=es wird <br> genutzt statt {0} vendor_specific_chars_summary= warning=Warnung: xml_attribute_value={0} hat das XML-Attribut "{1}" xml_id_sintax= ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/resources/tidy.gif�������������������������������������������������������������������0000644�0001750�0001750�00000000364�10460712233�017325� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������GIF89a � ������������������������!�����,���� � �I(fNٝ$fj-$z~d|F/"ıVzh)=L{x9ޚpI"p'G}L3?G�wzEX*gEW2;f3Gd9at]hXĺ�;����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/��������������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345034�014577� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/����������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345034�015366� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345034�016062� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/�������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�017034� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMCDATASectionImpl.java�������������������������������������������0000644�0001750�0001750�00000006305�10102754223�023215� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Tidy implementation of org.w3c.dom.CDATASection. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 400 $ ($Author: fgiust $) */ public class DOMCDATASectionImpl extends DOMTextImpl implements org.w3c.dom.CDATASection { /** * Instantiates a new DOMCDATASectionImpl which wraps the given Node. * @param adaptee wrapped node. */ protected DOMCDATASectionImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return "#cdata-section"; } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.CDATA_SECTION_NODE; } }���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMNodeListImpl.java�����������������������������������������������0000644�0001750�0001750�00000007524�10144212711�022576� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * DOMNodeListImpl. The items in the <code>NodeList</code> are accessible via an integral index, starting from 0. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMNodeListImpl implements org.w3c.dom.NodeList { /** * Parent Node. */ private Node parent; /** * Instantiates a new DOM node list. * @param parent parent Node */ protected DOMNodeListImpl(Node parent) { this.parent = parent; } /** * @see org.w3c.dom.NodeList#item(int) */ public org.w3c.dom.Node item(int index) { if (parent == null) { return null; } int i = 0; Node node = this.parent.content; while (node != null) { if (i >= index) { break; } i++; node = node.next; } if (node != null) { return node.getAdapter(); } return null; } /** * @see org.w3c.dom.NodeList#getLength */ public int getLength() { if (parent == null) { return 0; } int len = 0; Node node = this.parent.content; while (node != null) { len++; node = node.next; } return len; } }����������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Parser.java��������������������������������������������������������0000644�0001750�0001750�00000005602�10116675277�021145� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * HTML Parser. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 587 $ ($Author: fgiust $) */ public interface Parser { /** * Parse the given node. * @param lexer Lexer * @param node node created by the lexer upon seeing the start tag, or by the parser when the start tag is inferred * @param mode content mode */ void parse(Lexer lexer, Node node, short mode); }������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TidyMessage.java���������������������������������������������������0000644�0001750�0001750�00000016663�10123322004�022107� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Message sent to listeners for validation errors/warnings and info. * @see Tidy#setMessageListener(TidyMessageListener) * @author Fabrizio Giustina * @version $Revision: 630 $ ($Author: fgiust $) */ public final class TidyMessage { /** * Line in the source file (can be 0 if the message is not related to a particular line, such as a summary message). */ private int line; /** * Column in the source file (can be 0 if the message is not related to a particular column, such as a summary * message). */ private int column; /** * Level for this message. Can be TidyMessage.Level.SUMMARY | TidyMessage.Level.INFO | TidyMessage.Level.WARNING | * TidyMessage.Level.ERROR. */ private Level level; /** * Formatted text for this message. */ private String message; /** * Tidy internal error code. */ private int errorCode; /** * Instantiates a new message. * @param errorCode Tidy internal error code. * @param line Line number in the source file * @param column Column number in the source file * @param level severity * @param message message text */ public TidyMessage(int errorCode, int line, int column, Level level, String message) { this.errorCode = errorCode; this.line = line; this.column = column; this.level = level; this.message = message; } /** * Getter for <code>errorCode</code>. * @return Returns the errorCode. */ public int getErrorCode() { return this.errorCode; } /** * Getter for <code>column</code>. * @return Returns the column. */ public int getColumn() { return this.column; } /** * Getter for <code>level</code>. * @return Returns the level. */ public Level getLevel() { return this.level; } /** * Getter for <code>line</code>. * @return Returns the line. */ public int getLine() { return this.line; } /** * Getter for <code>message</code>. * @return Returns the message. */ public String getMessage() { return this.message; } /** * Message severity enumeration. * @author fgiust * @version $Revision: 630 $ ($Author: fgiust $) */ public static final class Level implements Comparable { /** * level = summary (0). */ public static final Level SUMMARY = new Level(0); /** * level = info (1). */ public static final Level INFO = new Level(1); /** * level = warning (2). */ public static final Level WARNING = new Level(2); /** * level = error (3). */ public static final Level ERROR = new Level(3); /** * short value for this level. */ private short code; /** * Instantiates a new message with the given code. * @param code int value for this level */ private Level(int code) { this.code = (short) code; } /** * Returns the int value for this level. * @return int value for this level */ public short getCode() { return this.code; } /** * Returns the Level instance corresponding to the given int value. * @param code int value for the level * @return Level instance */ public static Level fromCode(int code) { switch (code) { case 0 : return SUMMARY; case 1 : return INFO; case 2 : return WARNING; case 3 : return ERROR; default : return null; } } /** * @see java.lang.Comparable#compareTo(Object) */ public int compareTo(Object object) { return this.code - ((Level) object).code; } /** * @see java.lang.Object#equals(Object) */ public boolean equals(Object object) { if (!(object instanceof Level)) { return false; } return this.code == ((Level) object).code; } /** * @see java.lang.Object#toString() */ public String toString() { switch (code) { case 0 : return "SUMMARY"; case 1 : return "INFO"; case 2 : return "WARNING"; case 3 : return "ERROR"; default : // should not happen return "?"; } } /** * @see java.lang.Object#hashCode() */ public int hashCode() { // new instances should not be created return super.hashCode(); } } }�����������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMNodeListByTagNameImpl.java��������������������������������������0000644�0001750�0001750�00000011063�10123335515�024324� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * DOMNodeListByTagNameImpl. The items in the <code>NodeList</code> are accessible via an integral index, starting * from 0. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 633 $ ($Author: fgiust $) */ public class DOMNodeListByTagNameImpl implements org.w3c.dom.NodeList { /** * First node. */ private Node first; /** * Tag name. */ private String tagName; /** * Current index. */ private int currIndex; /** * Max index (number of nodes). */ private int maxIndex; /** * Current node. */ private Node currNode; /** * Instantiates a new DOMNodeListByTagName. * @param first first node. * @param tagName tag name */ protected DOMNodeListByTagNameImpl(Node first, String tagName) { this.first = first; this.tagName = tagName; } /** * @see org.w3c.dom.NodeList#item */ public org.w3c.dom.Node item(int index) { currIndex = 0; maxIndex = index; preTraverse(first); if (currIndex > maxIndex && currNode != null) { return currNode.getAdapter(); } return null; } /** * @see org.w3c.dom.NodeList#getLength */ public int getLength() { currIndex = 0; maxIndex = Integer.MAX_VALUE; preTraverse(first); return currIndex; } /** * Traverse the node list. * @param node Node */ protected void preTraverse(Node node) { if (node == null) { return; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (currIndex <= maxIndex && (tagName.equals("*") || tagName.equals(node.element))) { currIndex += 1; currNode = node; } } if (currIndex > maxIndex) { return; } node = node.content; while (node != null) { preTraverse(node); node = node.next; } } }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TidyUtils.java�����������������������������������������������������0000644�0001750�0001750�00000064473�11424644276�021654� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.UnsupportedEncodingException; /** * Utility class with handy methods, mainly for String handling or for reproducing c behaviours. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public final class TidyUtils { /** * char type: digit. */ private static final short DIGIT = 1; /** * char type: letter. */ private static final short LETTER = 2; /** * char type: namechar. */ private static final short NAMECHAR = 4; /** * char type: whitespace. */ private static final short WHITE = 8; /** * char type: newline. */ private static final short NEWLINE = 16; /** * char type: lowercase. */ private static final short LOWERCASE = 32; /** * char type: uppercase. */ private static final short UPPERCASE = 64; /** * used to classify chars for lexical purposes. */ private static short[] lexmap = new short[128]; static { mapStr("\r\n\f", (short) (NEWLINE | WHITE)); mapStr(" \t", WHITE); mapStr("-.:_", NAMECHAR); mapStr("0123456789", (short) (DIGIT | NAMECHAR)); mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER | NAMECHAR)); mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER | NAMECHAR)); } /** * utility class, don't instantiate. */ private TidyUtils() { // unused } /** * Converts a int to a boolean. * @param value int value * @return <code>true</code> if value is != 0 */ static boolean toBoolean(int value) { return value != 0; } /** * convert an int to unsigned (& 0xFF). * @param c signed int * @return unsigned int */ static int toUnsigned(int c) { return c & 0xFF; } /** * check if the first String contains the second one. * @param s1 full String * @param len1 maximum position in String * @param s2 String to search for * @return true if s1 contains s2 in the range 0-len1 */ static boolean wsubstrn(String s1, int len1, String s2) { int searchIndex = s1.indexOf(s2); return searchIndex > -1 && searchIndex <= len1; } /** * check if the first String contains the second one (ignore case). * @param s1 full String * @param len1 maximum position in String * @param s2 String to search for * @return true if s1 contains s2 in the range 0-len1 */ static boolean wsubstrncase(String s1, int len1, String s2) { return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase()); } /** * return offset of cc from beginning of s1, -1 if not found. * @param s1 String * @param len1 maximum offset (values > than lenl are ignored and returned as -1) * @param cc character to search for * @return index of cc in s1 */ static int wstrnchr(String s1, int len1, char cc) { int indexOf = s1.indexOf(cc); if (indexOf < len1) { return indexOf; } return -1; } /** * Same as wsubstrn, but without a specified length. * @param s1 full String * @param s2 String to search for * @return <code>true</code> if s2 is found in s2 (case insensitive search) */ static boolean wsubstr(String s1, String s2) { int i; int len1 = s1.length(); int len2 = s2.length(); for (i = 0; i <= len1 - len2; ++i) { if (s2.equalsIgnoreCase(s1.substring(i))) { return true; } } return false; } /** * Is the character a hex digit? * @param c char * @return <code>true</code> if he given character is a hex digit */ static boolean isxdigit(char c) { return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a' && Character.toLowerCase(c) <= 'f'); } /** * Check if the string valueToCheck is contained in validValues array (case insesitie comparison). * @param validValues array of valid values * @param valueToCheck value to search for * @return <code>true</code> if valueToCheck is found in validValues */ static boolean isInValuesIgnoreCase(String[] validValues, String valueToCheck) { int len = validValues.length; for (int j = 0; j < len; j++) { if (validValues[j].equalsIgnoreCase(valueToCheck)) { return true; } } return false; } /** * Return true if substring s is in p and isn't all in upper case. This is used to check the case of SYSTEM, PUBLIC, * DTD and EN. * @param s substring * @param p full string * @param len how many chars to check in p * @return true if substring s is in p and isn't all in upper case */ public static boolean findBadSubString(String s, String p, int len) { int n = s.length(); int i = 0; String ps; while (n < len) { ps = p.substring(i, i + n); if (s.equalsIgnoreCase(ps)) { return (!s.equals(ps)); } ++i; --len; } return false; } /** * Is the given char a valid xml letter? * @param c char * @return <code>true</code> if the char is a valid xml letter */ static boolean isXMLLetter(char c) { return ((c >= 0x41 && c <= 0x5a) || (c >= 0x61 && c <= 0x7a) || (c >= 0xc0 && c <= 0xd6) || (c >= 0xd8 && c <= 0xf6) || (c >= 0xf8 && c <= 0xff) || (c >= 0x100 && c <= 0x131) || (c >= 0x134 && c <= 0x13e) || (c >= 0x141 && c <= 0x148) || (c >= 0x14a && c <= 0x17e) || (c >= 0x180 && c <= 0x1c3) || (c >= 0x1cd && c <= 0x1f0) || (c >= 0x1f4 && c <= 0x1f5) || (c >= 0x1fa && c <= 0x217) || (c >= 0x250 && c <= 0x2a8) || (c >= 0x2bb && c <= 0x2c1) || c == 0x386 || (c >= 0x388 && c <= 0x38a) || c == 0x38c || (c >= 0x38e && c <= 0x3a1) || (c >= 0x3a3 && c <= 0x3ce) || (c >= 0x3d0 && c <= 0x3d6) || c == 0x3da || c == 0x3dc || c == 0x3de || c == 0x3e0 || (c >= 0x3e2 && c <= 0x3f3) || (c >= 0x401 && c <= 0x40c) || (c >= 0x40e && c <= 0x44f) || (c >= 0x451 && c <= 0x45c) || (c >= 0x45e && c <= 0x481) || (c >= 0x490 && c <= 0x4c4) || (c >= 0x4c7 && c <= 0x4c8) || (c >= 0x4cb && c <= 0x4cc) || (c >= 0x4d0 && c <= 0x4eb) || (c >= 0x4ee && c <= 0x4f5) || (c >= 0x4f8 && c <= 0x4f9) || (c >= 0x531 && c <= 0x556) || c == 0x559 || (c >= 0x561 && c <= 0x586) || (c >= 0x5d0 && c <= 0x5ea) || (c >= 0x5f0 && c <= 0x5f2) || (c >= 0x621 && c <= 0x63a) || (c >= 0x641 && c <= 0x64a) || (c >= 0x671 && c <= 0x6b7) || (c >= 0x6ba && c <= 0x6be) || (c >= 0x6c0 && c <= 0x6ce) || (c >= 0x6d0 && c <= 0x6d3) || c == 0x6d5 || (c >= 0x6e5 && c <= 0x6e6) || (c >= 0x905 && c <= 0x939) || c == 0x93d || (c >= 0x958 && c <= 0x961) || (c >= 0x985 && c <= 0x98c) || (c >= 0x98f && c <= 0x990) || (c >= 0x993 && c <= 0x9a8) || (c >= 0x9aa && c <= 0x9b0) || c == 0x9b2 || (c >= 0x9b6 && c <= 0x9b9) || (c >= 0x9dc && c <= 0x9dd) || (c >= 0x9df && c <= 0x9e1) || (c >= 0x9f0 && c <= 0x9f1) || (c >= 0xa05 && c <= 0xa0a) || (c >= 0xa0f && c <= 0xa10) || (c >= 0xa13 && c <= 0xa28) || (c >= 0xa2a && c <= 0xa30) || (c >= 0xa32 && c <= 0xa33) || (c >= 0xa35 && c <= 0xa36) || (c >= 0xa38 && c <= 0xa39) || (c >= 0xa59 && c <= 0xa5c) || c == 0xa5e || (c >= 0xa72 && c <= 0xa74) || (c >= 0xa85 && c <= 0xa8b) || c == 0xa8d || (c >= 0xa8f && c <= 0xa91) || (c >= 0xa93 && c <= 0xaa8) || (c >= 0xaaa && c <= 0xab0) || (c >= 0xab2 && c <= 0xab3) || (c >= 0xab5 && c <= 0xab9) || c == 0xabd || c == 0xae0 || (c >= 0xb05 && c <= 0xb0c) || (c >= 0xb0f && c <= 0xb10) || (c >= 0xb13 && c <= 0xb28) || (c >= 0xb2a && c <= 0xb30) || (c >= 0xb32 && c <= 0xb33) || (c >= 0xb36 && c <= 0xb39) || c == 0xb3d || (c >= 0xb5c && c <= 0xb5d) || (c >= 0xb5f && c <= 0xb61) || (c >= 0xb85 && c <= 0xb8a) || (c >= 0xb8e && c <= 0xb90) || (c >= 0xb92 && c <= 0xb95) || (c >= 0xb99 && c <= 0xb9a) || c == 0xb9c || (c >= 0xb9e && c <= 0xb9f) || (c >= 0xba3 && c <= 0xba4) || (c >= 0xba8 && c <= 0xbaa) || (c >= 0xbae && c <= 0xbb5) || (c >= 0xbb7 && c <= 0xbb9) || (c >= 0xc05 && c <= 0xc0c) || (c >= 0xc0e && c <= 0xc10) || (c >= 0xc12 && c <= 0xc28) || (c >= 0xc2a && c <= 0xc33) || (c >= 0xc35 && c <= 0xc39) || (c >= 0xc60 && c <= 0xc61) || (c >= 0xc85 && c <= 0xc8c) || (c >= 0xc8e && c <= 0xc90) || (c >= 0xc92 && c <= 0xca8) || (c >= 0xcaa && c <= 0xcb3) || (c >= 0xcb5 && c <= 0xcb9) || c == 0xcde || (c >= 0xce0 && c <= 0xce1) || (c >= 0xd05 && c <= 0xd0c) || (c >= 0xd0e && c <= 0xd10) || (c >= 0xd12 && c <= 0xd28) || (c >= 0xd2a && c <= 0xd39) || (c >= 0xd60 && c <= 0xd61) || (c >= 0xe01 && c <= 0xe2e) || c == 0xe30 || (c >= 0xe32 && c <= 0xe33) || (c >= 0xe40 && c <= 0xe45) || (c >= 0xe81 && c <= 0xe82) || c == 0xe84 || (c >= 0xe87 && c <= 0xe88) || c == 0xe8a || c == 0xe8d || (c >= 0xe94 && c <= 0xe97) || (c >= 0xe99 && c <= 0xe9f) || (c >= 0xea1 && c <= 0xea3) || c == 0xea5 || c == 0xea7 || (c >= 0xeaa && c <= 0xeab) || (c >= 0xead && c <= 0xeae) || c == 0xeb0 || (c >= 0xeb2 && c <= 0xeb3) || c == 0xebd || (c >= 0xec0 && c <= 0xec4) || (c >= 0xf40 && c <= 0xf47) || (c >= 0xf49 && c <= 0xf69) || (c >= 0x10a0 && c <= 0x10c5) || (c >= 0x10d0 && c <= 0x10f6) || c == 0x1100 || (c >= 0x1102 && c <= 0x1103) || (c >= 0x1105 && c <= 0x1107) || c == 0x1109 || (c >= 0x110b && c <= 0x110c) || (c >= 0x110e && c <= 0x1112) || c == 0x113c || c == 0x113e || c == 0x1140 || c == 0x114c || c == 0x114e || c == 0x1150 || (c >= 0x1154 && c <= 0x1155) || c == 0x1159 || (c >= 0x115f && c <= 0x1161) || c == 0x1163 || c == 0x1165 || c == 0x1167 || c == 0x1169 || (c >= 0x116d && c <= 0x116e) || (c >= 0x1172 && c <= 0x1173) || c == 0x1175 || c == 0x119e || c == 0x11a8 || c == 0x11ab || (c >= 0x11ae && c <= 0x11af) || (c >= 0x11b7 && c <= 0x11b8) || c == 0x11ba || (c >= 0x11bc && c <= 0x11c2) || c == 0x11eb || c == 0x11f0 || c == 0x11f9 || (c >= 0x1e00 && c <= 0x1e9b) || (c >= 0x1ea0 && c <= 0x1ef9) || (c >= 0x1f00 && c <= 0x1f15) || (c >= 0x1f18 && c <= 0x1f1d) || (c >= 0x1f20 && c <= 0x1f45) || (c >= 0x1f48 && c <= 0x1f4d) || (c >= 0x1f50 && c <= 0x1f57) || c == 0x1f59 || c == 0x1f5b || c == 0x1f5d || (c >= 0x1f5f && c <= 0x1f7d) || (c >= 0x1f80 && c <= 0x1fb4) || (c >= 0x1fb6 && c <= 0x1fbc) || c == 0x1fbe || (c >= 0x1fc2 && c <= 0x1fc4) || (c >= 0x1fc6 && c <= 0x1fcc) || (c >= 0x1fd0 && c <= 0x1fd3) || (c >= 0x1fd6 && c <= 0x1fdb) || (c >= 0x1fe0 && c <= 0x1fec) || (c >= 0x1ff2 && c <= 0x1ff4) || (c >= 0x1ff6 && c <= 0x1ffc) || c == 0x2126 || (c >= 0x212a && c <= 0x212b) || c == 0x212e || (c >= 0x2180 && c <= 0x2182) || (c >= 0x3041 && c <= 0x3094) || (c >= 0x30a1 && c <= 0x30fa) || (c >= 0x3105 && c <= 0x312c) || (c >= 0xac00 && c <= 0xd7a3) || (c >= 0x4e00 && c <= 0x9fa5) || c == 0x3007 || (c >= 0x3021 && c <= 0x3029) || (c >= 0x4e00 && c <= 0x9fa5) || c == 0x3007 || (c >= 0x3021 && c <= 0x3029)); } /** * Is the given char valid in xml name? * @param c char * @return <code>true</code> if the char is a valid xml name char */ static boolean isXMLNamechar(char c) { return (isXMLLetter(c) || c == '.' || c == '_' || c == ':' || c == '-' || (c >= 0x300 && c <= 0x345) || (c >= 0x360 && c <= 0x361) || (c >= 0x483 && c <= 0x486) || (c >= 0x591 && c <= 0x5a1) || (c >= 0x5a3 && c <= 0x5b9) || (c >= 0x5bb && c <= 0x5bd) || c == 0x5bf || (c >= 0x5c1 && c <= 0x5c2) || c == 0x5c4 || (c >= 0x64b && c <= 0x652) || c == 0x670 || (c >= 0x6d6 && c <= 0x6dc) || (c >= 0x6dd && c <= 0x6df) || (c >= 0x6e0 && c <= 0x6e4) || (c >= 0x6e7 && c <= 0x6e8) || (c >= 0x6ea && c <= 0x6ed) || (c >= 0x901 && c <= 0x903) || c == 0x93c || (c >= 0x93e && c <= 0x94c) || c == 0x94d || (c >= 0x951 && c <= 0x954) || (c >= 0x962 && c <= 0x963) || (c >= 0x981 && c <= 0x983) || c == 0x9bc || c == 0x9be || c == 0x9bf || (c >= 0x9c0 && c <= 0x9c4) || (c >= 0x9c7 && c <= 0x9c8) || (c >= 0x9cb && c <= 0x9cd) || c == 0x9d7 || (c >= 0x9e2 && c <= 0x9e3) || c == 0xa02 || c == 0xa3c || c == 0xa3e || c == 0xa3f || (c >= 0xa40 && c <= 0xa42) || (c >= 0xa47 && c <= 0xa48) || (c >= 0xa4b && c <= 0xa4d) || (c >= 0xa70 && c <= 0xa71) || (c >= 0xa81 && c <= 0xa83) || c == 0xabc || (c >= 0xabe && c <= 0xac5) || (c >= 0xac7 && c <= 0xac9) || (c >= 0xacb && c <= 0xacd) || (c >= 0xb01 && c <= 0xb03) || c == 0xb3c || (c >= 0xb3e && c <= 0xb43) || (c >= 0xb47 && c <= 0xb48) || (c >= 0xb4b && c <= 0xb4d) || (c >= 0xb56 && c <= 0xb57) || (c >= 0xb82 && c <= 0xb83) || (c >= 0xbbe && c <= 0xbc2) || (c >= 0xbc6 && c <= 0xbc8) || (c >= 0xbca && c <= 0xbcd) || c == 0xbd7 || (c >= 0xc01 && c <= 0xc03) || (c >= 0xc3e && c <= 0xc44) || (c >= 0xc46 && c <= 0xc48) || (c >= 0xc4a && c <= 0xc4d) || (c >= 0xc55 && c <= 0xc56) || (c >= 0xc82 && c <= 0xc83) || (c >= 0xcbe && c <= 0xcc4) || (c >= 0xcc6 && c <= 0xcc8) || (c >= 0xcca && c <= 0xccd) || (c >= 0xcd5 && c <= 0xcd6) || (c >= 0xd02 && c <= 0xd03) || (c >= 0xd3e && c <= 0xd43) || (c >= 0xd46 && c <= 0xd48) || (c >= 0xd4a && c <= 0xd4d) || c == 0xd57 || c == 0xe31 || (c >= 0xe34 && c <= 0xe3a) || (c >= 0xe47 && c <= 0xe4e) || c == 0xeb1 || (c >= 0xeb4 && c <= 0xeb9) || (c >= 0xebb && c <= 0xebc) || (c >= 0xec8 && c <= 0xecd) || (c >= 0xf18 && c <= 0xf19) || c == 0xf35 || c == 0xf37 || c == 0xf39 || c == 0xf3e || c == 0xf3f || (c >= 0xf71 && c <= 0xf84) || (c >= 0xf86 && c <= 0xf8b) || (c >= 0xf90 && c <= 0xf95) || c == 0xf97 || (c >= 0xf99 && c <= 0xfad) || (c >= 0xfb1 && c <= 0xfb7) || c == 0xfb9 || (c >= 0x20d0 && c <= 0x20dc) || c == 0x20e1 || (c >= 0x302a && c <= 0x302f) || c == 0x3099 || c == 0x309a || (c >= 0x30 && c <= 0x39) || (c >= 0x660 && c <= 0x669) || (c >= 0x6f0 && c <= 0x6f9) || (c >= 0x966 && c <= 0x96f) || (c >= 0x9e6 && c <= 0x9ef) || (c >= 0xa66 && c <= 0xa6f) || (c >= 0xae6 && c <= 0xaef) || (c >= 0xb66 && c <= 0xb6f) || (c >= 0xbe7 && c <= 0xbef) || (c >= 0xc66 && c <= 0xc6f) || (c >= 0xce6 && c <= 0xcef) || (c >= 0xd66 && c <= 0xd6f) || (c >= 0xe50 && c <= 0xe59) || (c >= 0xed0 && c <= 0xed9) || (c >= 0xf20 && c <= 0xf29) || c == 0xb7 || c == 0x2d0 || c == 0x2d1 || c == 0x387 || c == 0x640 || c == 0xe46 || c == 0xec6 || c == 0x3005 || (c >= 0x3031 && c <= 0x3035) || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe)); } /** * Is the given character a single or double quote? * @param c char * @return <code>true</code> if c is " or ' */ static boolean isQuote(int c) { return (c == '\'' || c == '\"'); } /** * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding * throws declarations in lots of methods. * @param str String * @return utf8 bytes * @see String#getBytes() */ public static byte[] getBytes(String str) { try { return str.getBytes("UTF8"); } catch (UnsupportedEncodingException e) { throw new Error("String to UTF-8 conversion failed: " + e.getMessage()); } } /** * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding * throws declarations in lots of methods. * @param bytes byte array * @param offset starting offset in byte array * @param length length in byte array starting from offset * @return same as <code>new String(bytes, offset, length, "UTF8")</code> */ public static String getString(final byte[] bytes, final int offset, final int length) { try { return new String(bytes, offset, Math.min(length, bytes.length - offset), "UTF8"); } catch (UnsupportedEncodingException e) { throw new Error("UTF-8 to string conversion failed: " + e.getMessage()); } } /** * Return the last char in string. This is useful when trailing quotemark is missing on an attribute * @param str String * @return last char in String */ public static int lastChar(String str) { if (str != null && str.length() > 0) { return str.charAt(str.length() - 1); } return 0; } /** * Determines if the specified character is whitespace. * @param c char * @return <code>true</code> if char is whitespace. */ public static boolean isWhite(char c) { short m = map(c); return TidyUtils.toBoolean(m & WHITE); } /** * Is the given char a digit? * @param c char * @return <code>true</code> if the given char is a digit */ public static boolean isDigit(char c) { short m; m = map(c); return TidyUtils.toBoolean(m & DIGIT); } /** * Is the given char a letter? * @param c char * @return <code>true</code> if the given char is a letter */ public static boolean isLetter(char c) { short m; m = map(c); return TidyUtils.toBoolean(m & LETTER); } /** * Is the given char valid in name? (letter, digit or "-", ".", ":", "_") * @param c char * @return <code>true</code> if char is a name char. */ public static boolean isNamechar(char c) { short map = map(c); return TidyUtils.toBoolean(map & NAMECHAR); } /** * Determines if the specified character is a lowercase character. * @param c char * @return <code>true</code> if char is lower case. */ public static boolean isLower(char c) { short map = map(c); return TidyUtils.toBoolean(map & LOWERCASE); } /** * Determines if the specified character is a uppercase character. * @param c char * @return <code>true</code> if char is upper case. */ public static boolean isUpper(char c) { short map = map(c); return TidyUtils.toBoolean(map & UPPERCASE); } /** * Maps the given character to its lowercase equivalent. * @param c char * @return lowercase char. */ public static char toLower(char c) { short m = map(c); if (TidyUtils.toBoolean(m & UPPERCASE)) { c = (char) (c + 'a' - 'A'); } return c; } /** * Maps the given character to its uppercase equivalent. * @param c char * @return uppercase char. */ public static char toUpper(char c) { short m = map(c); if (TidyUtils.toBoolean(m & LOWERCASE)) { c = (char) (c + 'A' - 'a'); } return c; } /** * Fold case of a char. * @param c char * @param tocaps convert to caps * @param xmlTags use xml tags? If true no change will be performed * @return folded char * @todo check the use of xmlTags parameter */ public static char foldCase(char c, boolean tocaps, boolean xmlTags) { if (!xmlTags) { if (tocaps) { if (isLower(c)) { c = toUpper(c); } } else { // force to lower case if (isUpper(c)) { c = toLower(c); } } } return c; } /** * Classify chars in String and put them in lexmap. * @param str String * @param code code associated to chars in the String */ private static void mapStr(String str, short code) { int c; for (int i = 0; i < str.length(); i++) { c = str.charAt(i); lexmap[c] |= code; } } /** * Returns the constant which defines the classification of char in lexmap. * @param c char * @return char type */ private static short map(char c) { return (c < 128 ? lexmap[c] : 0); } /** * Is the given character encoding supported? * @param name character encoding name * @return <code>true</code> if encoding is supported, false otherwhise. */ public static boolean isCharEncodingSupported(String name) { name = EncodingNameMapper.toJava(name); if (name == null) { return false; } try { "".getBytes(name); } catch (UnsupportedEncodingException e) { return false; } return true; } }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMProcessingInstructionImpl.java����������������������������������0000644�0001750�0001750�00000007272�10144212711�025433� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; /** * DOMProcessingInstructionImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMProcessingInstructionImpl extends DOMNodeImpl implements org.w3c.dom.ProcessingInstruction { /** * Instantiates a new DOM processing instruction. * @param adaptee wrapped Tidy node */ protected DOMProcessingInstructionImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; } /** * @todo DOM level 2 getTarget() Not implemented. Returns null. * @see org.w3c.dom.ProcessingInstruction#getTarget */ public String getTarget() { return null; } /** * @see org.w3c.dom.ProcessingInstruction#getData */ public String getData() { return getNodeValue(); } /** * @see org.w3c.dom.ProcessingInstruction#setData(java.lang.String) */ public void setData(String data) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Node is read only"); } }��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Configuration.java�������������������������������������������������0000644�0001750�0001750�00000101604�11131231632�022474� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.FileInputStream; import java.io.IOException; import java.io.Serializable; import java.io.Writer; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; /** * Read configuration file and manage configuration properties. Configuration files associate a property name with a * value. The format is that of a Java .properties file. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 817 $ ($Author: steffenyount $) */ public class Configuration implements Serializable { /** * character encoding = RAW. * @deprecated use <code>Tidy.setRawOut(true)</code> for raw output */ public static final int RAW = 0; /** * character encoding = ASCII. * @deprecated */ public static final int ASCII = 1; /** * character encoding = LATIN1. * @deprecated */ public static final int LATIN1 = 2; /** * character encoding = UTF8. * @deprecated */ public static final int UTF8 = 3; /** * character encoding = ISO2022. * @deprecated */ public static final int ISO2022 = 4; /** * character encoding = MACROMAN. * @deprecated */ public static final int MACROMAN = 5; /** * character encoding = UTF16LE. * @deprecated */ public static final int UTF16LE = 6; /** * character encoding = UTF16BE. * @deprecated */ public static final int UTF16BE = 7; /** * character encoding = UTF16. * @deprecated */ public static final int UTF16 = 8; /** * character encoding = WIN1252. * @deprecated */ public static final int WIN1252 = 9; /** * character encoding = BIG5. * @deprecated */ public static final int BIG5 = 10; /** * character encoding = SHIFTJIS. * @deprecated */ public static final int SHIFTJIS = 11; /** * Convert from deprecated tidy encoding constant to standard java encoding name. */ private final String[] ENCODING_NAMES = new String[]{"raw", // rawOut, it will not be mapped to a java encoding "ASCII", "ISO8859_1", "UTF8", "JIS", "MacRoman", "UnicodeLittle", "UnicodeBig", "Unicode", "Cp1252", "Big5", "SJIS"}; /** * treatment of doctype: omit. * @todo should be an enumeration DocTypeMode */ public static final int DOCTYPE_OMIT = 0; /** * treatment of doctype: auto. */ public static final int DOCTYPE_AUTO = 1; /** * treatment of doctype: strict. */ public static final int DOCTYPE_STRICT = 2; /** * treatment of doctype: loose. */ public static final int DOCTYPE_LOOSE = 3; /** * treatment of doctype: user. */ public static final int DOCTYPE_USER = 4; /** * Keep last duplicate attribute. * @todo should be an enumeration DupAttrMode */ public static final int KEEP_LAST = 0; /** * Keep first duplicate attribute. */ public static final int KEEP_FIRST = 1; /** * Map containg all the valid configuration options and the related parser. Tag entry contains String(option * name)-Flag instance. */ private static final Map OPTIONS = new HashMap(); /** * serial version UID for this class. */ private static final long serialVersionUID = -4955155037138560842L; static { addConfigOption(new Flag("indent-spaces", "spaces", ParsePropertyImpl.INT)); addConfigOption(new Flag("wrap", "wraplen", ParsePropertyImpl.INT)); addConfigOption(new Flag("show-errors", "showErrors", ParsePropertyImpl.INT)); addConfigOption(new Flag("tab-size", "tabsize", ParsePropertyImpl.INT)); addConfigOption(new Flag("wrap-attributes", "wrapAttVals", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("wrap-script-literals", "wrapScriptlets", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("wrap-sections", "wrapSection", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("wrap-asp", "wrapAsp", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("wrap-jste", "wrapJste", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("wrap-php", "wrapPhp", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("literal-attributes", "literalAttribs", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("show-body-only", "bodyOnly", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("fix-uri", "fixUri", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("lower-literals", "lowerLiterals", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("hide-comments", "hideComments", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("indent-cdata", "indentCdata", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("force-output", "forceOutput", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("ascii-chars", "asciiChars", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("join-classes", "joinClasses", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("join-styles", "joinStyles", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("escape-cdata", "escapeCdata", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("replace-color", "replaceColor", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("quiet", "quiet", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("tidy-mark", "tidyMark", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("indent-attributes", "indentAttributes", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("hide-endtags", "hideEndTags", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("input-xml", "xmlTags", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("output-xml", "xmlOut", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("output-html", "htmlOut", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("output-xhtml", "xHTML", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("add-xml-pi", "xmlPi", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("add-xml-decl", "xmlPi", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("assume-xml-procins", "xmlPIs", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("uppercase-tags", "upperCaseTags", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("uppercase-attributes", "upperCaseAttrs", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("bare", "makeBare", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("clean", "makeClean", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("logical-emphasis", "logicalEmphasis", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("word-2000", "word2000", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("drop-empty-paras", "dropEmptyParas", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("drop-font-tags", "dropFontTags", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("drop-proprietary-attributes", "dropProprietaryAttributes", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("enclose-text", "encloseBodyText", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("enclose-block-text", "encloseBlockText", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("add-xml-space", "xmlSpace", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("fix-bad-comments", "fixComments", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("split", "burstSlides", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("break-before-br", "breakBeforeBR", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("numeric-entities", "numEntities", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("quote-marks", "quoteMarks", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("quote-nbsp", "quoteNbsp", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("quote-ampersand", "quoteAmpersand", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("write-back", "writeback", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("keep-time", "keepFileTimes", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("show-warnings", "showWarnings", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("ncr", "ncr", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("fix-backslash", "fixBackslash", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("gnu-emacs", "emacs", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("only-errors", "onlyErrors", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("output-raw", "rawOut", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("trim-empty-elements", "trimEmpty", ParsePropertyImpl.BOOL)); addConfigOption(new Flag("markup", "onlyErrors", ParsePropertyImpl.INVBOOL)); addConfigOption(new Flag("char-encoding", null, ParsePropertyImpl.CHAR_ENCODING)); addConfigOption(new Flag("input-encoding", null, ParsePropertyImpl.CHAR_ENCODING)); addConfigOption(new Flag("output-encoding", null, ParsePropertyImpl.CHAR_ENCODING)); addConfigOption(new Flag("error-file", "errfile", ParsePropertyImpl.NAME)); addConfigOption(new Flag("slide-style", "slidestyle", ParsePropertyImpl.NAME)); addConfigOption(new Flag("language", "language", ParsePropertyImpl.NAME)); addConfigOption(new Flag("new-inline-tags", null, ParsePropertyImpl.TAGNAMES)); addConfigOption(new Flag("new-blocklevel-tags", null, ParsePropertyImpl.TAGNAMES)); addConfigOption(new Flag("new-empty-tags", null, ParsePropertyImpl.TAGNAMES)); addConfigOption(new Flag("new-pre-tags", null, ParsePropertyImpl.TAGNAMES)); addConfigOption(new Flag("doctype", "docTypeStr", ParsePropertyImpl.DOCTYPE)); addConfigOption(new Flag("repeated-attributes", "duplicateAttrs", ParsePropertyImpl.REPEATED_ATTRIBUTES)); addConfigOption(new Flag("alt-text", "altText", ParsePropertyImpl.STRING)); addConfigOption(new Flag("indent", "indentContent", ParsePropertyImpl.INDENT)); addConfigOption(new Flag("css-prefix", "cssPrefix", ParsePropertyImpl.CSS1SELECTOR)); addConfigOption(new Flag("newline", null, ParsePropertyImpl.NEWLINE)); } /** * default indentation. */ protected int spaces = 2; /** * default wrap margin (68). */ protected int wraplen = 68; /** * default tab size (8). */ protected int tabsize = 8; /** * see doctype property. */ protected int docTypeMode = DOCTYPE_AUTO; /** * Keep first or last duplicate attribute. */ protected int duplicateAttrs = KEEP_LAST; /** * default text for alt attribute. */ protected String altText; /** * style sheet for slides. * @deprecated does nothing */ protected String slidestyle; /** * RJ language property. */ protected String language; // #431953 /** * user specified doctype. */ protected String docTypeStr; /** * file name to write errors to. */ protected String errfile; /** * if true then output tidied markup. */ protected boolean writeback; /** * if true normal output is suppressed. */ protected boolean onlyErrors; /** * however errors are always shown. */ protected boolean showWarnings = true; /** * no 'Parsing X', guessed DTD or summary. */ protected boolean quiet; /** * indent content of appropriate tags. */ protected boolean indentContent; /** * does text/block level content effect indentation. */ protected boolean smartIndent; /** * suppress optional end tags. */ protected boolean hideEndTags; /** * treat input as XML. */ protected boolean xmlTags; /** * create output as XML. */ protected boolean xmlOut; /** * output extensible HTML. */ protected boolean xHTML; /** * output plain-old HTML, even for XHTML input. Yes means set explicitly. */ protected boolean htmlOut; /** * add <code><?xml?></code> for XML docs. */ protected boolean xmlPi; /** * output tags in upper not lower case. */ protected boolean upperCaseTags; /** * output attributes in upper not lower case. */ protected boolean upperCaseAttrs; /** * remove presentational clutter. */ protected boolean makeClean; /** * Make bare HTML: remove Microsoft cruft. */ protected boolean makeBare; /** * replace i by em and b by strong. */ protected boolean logicalEmphasis; /** * discard presentation tags. */ protected boolean dropFontTags; /** * discard proprietary attributes. */ protected boolean dropProprietaryAttributes; /** * discard empty p elements. */ protected boolean dropEmptyParas = true; /** * fix comments with adjacent hyphens. */ protected boolean fixComments = true; /** * trim empty elements. */ protected boolean trimEmpty = true; /** * o/p newline before br or not? */ protected boolean breakBeforeBR; /** * create slides on each h2 element. */ protected boolean burstSlides; /** * use numeric entities. */ protected boolean numEntities; /** * output " marks as ". */ protected boolean quoteMarks; /** * output non-breaking space as entity. */ protected boolean quoteNbsp = true; /** * output naked ampersand as &. */ protected boolean quoteAmpersand = true; /** * wrap within attribute values. */ protected boolean wrapAttVals; /** * wrap within JavaScript string literals. */ protected boolean wrapScriptlets; /** * wrap within CDATA section tags. */ protected boolean wrapSection = true; /** * wrap within ASP pseudo elements. */ protected boolean wrapAsp = true; /** * wrap within JSTE pseudo elements. */ protected boolean wrapJste = true; /** * wrap within PHP pseudo elements. */ protected boolean wrapPhp = true; /** * fix URLs by replacing \ with /. */ protected boolean fixBackslash = true; /** * newline+indent before each attribute. */ protected boolean indentAttributes; /** * If set to yes PIs must end with <code>?></code>. */ protected boolean xmlPIs; /** * if set to yes adds xml:space attr as needed. */ protected boolean xmlSpace; /** * if yes text at body is wrapped in p's. */ protected boolean encloseBodyText; /** * if yes text in blocks is wrapped in p's. */ protected boolean encloseBlockText; /** * if yes last modied time is preserved. */ protected boolean keepFileTimes = true; /** * draconian cleaning for Word2000. */ protected boolean word2000; /** * add meta element indicating tidied doc. */ protected boolean tidyMark = true; /** * if true format error output for GNU Emacs. */ protected boolean emacs; /** * if true attributes may use newlines. */ protected boolean literalAttribs; /** * output BODY content only. */ protected boolean bodyOnly; /** * properly escape URLs. */ protected boolean fixUri = true; /** * folds known attribute values to lower case. */ protected boolean lowerLiterals = true; /** * replace hex color attribute values with names. */ protected boolean replaceColor; /** * hides all (real) comments in output. */ protected boolean hideComments; /** * indent CDATA sections. */ protected boolean indentCdata; /** * output document even if errors were found. */ protected boolean forceOutput; /** * number of errors to put out. */ protected int showErrors = 6; /** * convert quotes and dashes to nearest ASCII char. */ protected boolean asciiChars = true; /** * join multiple class attributes. */ protected boolean joinClasses; /** * join multiple style attributes. */ protected boolean joinStyles = true; /** * replace CDATA sections with escaped text. */ protected boolean escapeCdata = true; /** * allow numeric character references. */ protected boolean ncr = true; // #431953 /** * CSS class naming for -clean option. */ protected String cssPrefix; /** * char encoding used when replacing illegal SGML chars, regardless of specified encoding. */ protected String replacementCharEncoding = "WIN1252"; // by default /** * TagTable associated with this Configuration. */ protected TagTable tt; /** * Report instance. Used for messages. */ protected Report report; /** * track what types of tags user has defined to eliminate unnecessary searches. */ protected int definedTags; /** * bytes for the newline marker. */ protected char[] newline = (System.getProperty("line.separator")).toCharArray(); /** * Input character encoding (defaults to "ISO8859_1"). */ private String inCharEncoding = "ISO8859_1"; /** * Output character encoding (defaults to "ASCII"). */ private String outCharEncoding = "ASCII"; /** * Avoid mapping values > 127 to entities. */ protected boolean rawOut; /** * configuration properties. */ private transient Properties properties = new Properties(); /** * Instantiates a new Configuration. This method should be called by Tidy only. * @param report Report instance */ protected Configuration(Report report) { this.report = report; } /** * adds a config option to the map. * @param flag configuration options added */ private static void addConfigOption(Flag flag) { OPTIONS.put(flag.getName(), flag); } /** * adds configuration Properties. * @param p Properties */ public void addProps(Properties p) { Enumeration propEnum = p.propertyNames(); while (propEnum.hasMoreElements()) { String key = (String) propEnum.nextElement(); String value = p.getProperty(key); properties.put(key, value); } parseProps(); } /** * Parses a property file. * @param filename file name */ public void parseFile(String filename) { try { properties.load(new FileInputStream(filename)); } catch (IOException e) { System.err.println(filename + " " + e.toString()); return; } parseProps(); } /** * Is the given String a valid configuration flag? * @param name configuration parameter name * @return <code>true</code> if the given String is a valid config option */ public static boolean isKnownOption(String name) { return name != null && OPTIONS.containsKey(name); } /** * Parses the configuration properties file. */ private void parseProps() { Iterator iterator = properties.keySet().iterator(); while (iterator.hasNext()) { String key = (String) iterator.next(); Flag flag = (Flag) OPTIONS.get(key); if (flag == null) { report.unknownOption(key); continue; } String stringValue = properties.getProperty(key); Object value = flag.getParser().parse(stringValue, key, this); if (flag.getLocation() != null) { try { flag.getLocation().set(this, value); } catch (IllegalArgumentException e) { throw new RuntimeException("IllegalArgumentException during config initialization for field " + key + "with value [" + value + "]: " + e.getMessage()); } catch (IllegalAccessException e) { throw new RuntimeException("IllegalArgumentException during config initialization for field " + key + "with value [" + value + "]: " + e.getMessage()); } } } } /** * Ensure that config is self consistent. */ public void adjust() { if (encloseBlockText) { encloseBodyText = true; } // avoid the need to set IndentContent when SmartIndent is set if (smartIndent) { indentContent = true; } // disable wrapping if (wraplen == 0) { wraplen = 0x7FFFFFFF; } // Word 2000 needs o:p to be declared as inline if (word2000) { definedTags |= Dict.TAGTYPE_INLINE; tt.defineTag(Dict.TAGTYPE_INLINE, "o:p"); } // #480701 disable XHTML output flag if both output-xhtml and xml are set if (xmlTags) { xHTML = false; } // XHTML is written in lower case if (xHTML) { xmlOut = true; upperCaseTags = false; upperCaseAttrs = false; } // if XML in, then XML out if (xmlTags) { xmlOut = true; xmlPIs = true; } // #427837 - fix by Dave Raggett 02 Jun 01 // generate <?xml version="1.0" encoding="iso-8859-1"?> if the output character encoding is Latin-1 etc. if (!"UTF8".equals(getOutCharEncodingName()) && !"ASCII".equals(getOutCharEncodingName()) && xmlOut) { xmlPi = true; } // XML requires end tags if (xmlOut) { quoteAmpersand = true; hideEndTags = false; } } /** * prints available configuration options. * @param errout where to write * @param showActualConfiguration print actual configuration values */ public void printConfigOptions(Writer errout, boolean showActualConfiguration) { String pad = " "; try { errout.write("\nConfiguration File Settings:\n\n"); if (showActualConfiguration) { errout.write("Name Type Current Value\n"); } else { errout.write("Name Type Allowable values\n"); } errout.write("=========================== ========= ========================================\n"); Flag configItem; // sort configuration options List values = new ArrayList(OPTIONS.values()); Collections.sort(values); Iterator iterator = values.iterator(); while (iterator.hasNext()) { configItem = (Flag) iterator.next(); errout.write(configItem.getName()); errout.write(pad, 0, 28 - configItem.getName().length()); errout.write(configItem.getParser().getType()); errout.write(pad, 0, 11 - configItem.getParser().getType().length()); if (showActualConfiguration) { Field field = configItem.getLocation(); Object actualValue = null; if (field != null) { try { actualValue = field.get(this); } catch (IllegalArgumentException e1) { // should never happen throw new RuntimeException("IllegalArgument when reading field " + field.getName()); } catch (IllegalAccessException e1) { // should never happen throw new RuntimeException("IllegalAccess when reading field " + field.getName()); } } errout.write(configItem.getParser().getFriendlyName(configItem.getName(), actualValue, this)); } else { errout.write(configItem.getParser().getOptionValues()); } errout.write("\n"); } errout.flush(); } catch (IOException e) { throw new RuntimeException(e.getMessage()); } } /** * A configuration option. */ static class Flag implements Comparable { /** * option name. */ private String name; /** * field name. */ private String fieldName; /** * Field where the evaluated value is saved. */ private Field location; /** * Parser for the configuration property. */ private ParseProperty parser; /** * Instantiates a new Flag. * @param name option name * @param fieldName field name (can be null) * @param parser parser for property */ Flag(String name, String fieldName, ParseProperty parser) { this.fieldName = fieldName; this.name = name; this.parser = parser; } /** * Getter for <code>location</code>. * @return Returns the location. */ public Field getLocation() { // lazy initialization to speed up loading if (fieldName != null && this.location == null) { try { this.location = Configuration.class.getDeclaredField(fieldName); } catch (NoSuchFieldException e) { throw new RuntimeException("NoSuchField exception during config initialization for field " + fieldName); } catch (SecurityException e) { throw new RuntimeException("Security exception during config initialization for field " + fieldName + ": " + e.getMessage()); } } return this.location; } /** * Getter for <code>name</code>. * @return Returns the name. */ public String getName() { return this.name; } /** * Getter for <code>parser</code>. * @return Returns the parser. */ public ParseProperty getParser() { return this.parser; } /** * @see java.lang.Object#equals(java.lang.Object) */ public boolean equals(Object obj) { return this.name.equals(((Flag) obj).name); } /** * @see java.lang.Object#hashCode() */ public int hashCode() { // returning the hashCode of String, to be consistent with equals and compareTo return this.name.hashCode(); } /** * @see java.lang.Comparable#compareTo(java.lang.Object) */ public int compareTo(Object o) { return this.name.compareTo(((Flag) o).name); } } /** * Getter for <code>inCharEncodingName</code>. * @return Returns the inCharEncodingName. */ protected String getInCharEncodingName() { return this.inCharEncoding; } /** * Setter for <code>inCharEncodingName</code>. * @param encoding The inCharEncodingName to set. */ protected void setInCharEncodingName(String encoding) { String javaEncoding = EncodingNameMapper.toJava(encoding); if (javaEncoding != null) { this.inCharEncoding = javaEncoding; } } /** * Getter for <code>outCharEncodingName</code>. * @return Returns the outCharEncodingName. */ protected String getOutCharEncodingName() { return this.outCharEncoding; } /** * Setter for <code>outCharEncodingName</code>. * @param encoding The outCharEncodingName to set. */ protected void setOutCharEncodingName(String encoding) { String javaEncoding = EncodingNameMapper.toJava(encoding); if (javaEncoding != null) { this.outCharEncoding = javaEncoding; } } /** * Setter for <code>inOutCharEncodingName</code>. * @param encoding The CharEncodingName to set. */ protected void setInOutEncodingName(String encoding) { setInCharEncodingName(encoding); setOutCharEncodingName(encoding); } /** * Setter for <code>outCharEncoding</code>. * @param encoding The outCharEncoding to set. * @deprecated use setOutCharEncodingName(String) */ protected void setOutCharEncoding(int encoding) { setOutCharEncodingName(convertCharEncoding(encoding)); } /** * Setter for <code>inCharEncoding</code>. * @param encoding The inCharEncoding to set. * @deprecated use setInCharEncodingName(String) */ protected void setInCharEncoding(int encoding) { setInCharEncodingName(convertCharEncoding(encoding)); } /** * Convert a char encoding from the deprecated tidy constant to a standard java encoding name. * @param code encoding code * @return encoding name */ protected String convertCharEncoding(int code) { if (code != 0 && code < ENCODING_NAMES.length) { return ENCODING_NAMES[code]; } return null; } }����������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ParserImpl.java����������������������������������������������������0000644�0001750�0001750�00000372720�11355372020�021761� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * HTML Parser implementation. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1100 $ ($Author: aditsu $) */ public final class ParserImpl { /** * parser for html. */ public static final Parser HTML = new ParseHTML(); /** * parser for head. */ public static final Parser HEAD = new ParseHead(); /** * parser for title. */ public static final Parser TITLE = new ParseTitle(); /** * parser for script. */ public static final Parser SCRIPT = new ParseScript(); /** * parser for body. */ public static final Parser BODY = new ParseBody(); /** * parser for frameset. */ public static final Parser FRAMESET = new ParseFrameSet(); /** * parser for inline. */ public static final Parser INLINE = new ParseInline(); /** * parser for list. */ public static final Parser LIST = new ParseList(); /** * parser for definition lists. */ public static final Parser DEFLIST = new ParseDefList(); /** * parser for pre. */ public static final Parser PRE = new ParsePre(); /** * parser for block elements. */ public static final Parser BLOCK = new ParseBlock(); /** * parser for table. */ public static final Parser TABLETAG = new ParseTableTag(); /** * parser for colgroup. */ public static final Parser COLGROUP = new ParseColGroup(); /** * parser for rowgroup. */ public static final Parser ROWGROUP = new ParseRowGroup(); /** * parser for row. */ public static final Parser ROW = new ParseRow(); /** * parser for noframes. */ public static final Parser NOFRAMES = new ParseNoFrames(); /** * parser for select. */ public static final Parser SELECT = new ParseSelect(); /** * parser for text. */ public static final Parser TEXT = new ParseText(); /** * parser for empty elements. */ public static final Parser EMPTY = new ParseEmpty(); /** * parser for optgroup. */ public static final Parser OPTGROUP = new ParseOptGroup(); /** * ParserImpl should not be instantiated. */ private ParserImpl() { // unused } /** * @param lexer * @param node * @param mode */ protected static void parseTag(Lexer lexer, Node node, short mode) { // Fix by GLP 2000-12-21. Need to reset insertspace if this // is both a non-inline and empty tag (base, link, meta, isindex, hr, area). if ((node.tag.model & Dict.CM_EMPTY) != 0) { lexer.waswhite = false; } else if (!((node.tag.model & Dict.CM_INLINE) != 0)) { lexer.insertspace = false; } if (node.tag.getParser() == null) { return; } if (node.type == Node.START_END_TAG) { Node.trimEmptyElement(lexer, node); return; } node.tag.getParser().parse(lexer, node, mode); } /** * Move node to the head, where element is used as starting point in hunt for head. Normally called during parsing. * @param lexer * @param element * @param node */ protected static void moveToHead(Lexer lexer, Node element, Node node) { Node head; node.removeNode(); // make sure that node is isolated TagTable tt = lexer.configuration.tt; if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); while (element.tag != tt.tagHtml) { element = element.parent; } for (head = element.content; head != null; head = head.next) { if (head.tag == tt.tagHead) { head.insertNodeAtEnd(node); break; } } if (node.tag.getParser() != null) { parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } } else { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); } } /** * moves given node to end of body element. * @param lexer Lexer * @param node Node to insert */ static void moveNodeToBody(Lexer lexer, Node node) { node.removeNode(); Node body = lexer.root.findBody(lexer.configuration.tt); body.insertNodeAtEnd(node); } /** * Parser for HTML. */ public static class ParseHTML implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node html, short mode) { Node node, head; Node frameset = null; Node noframes = null; lexer.configuration.xmlTags = false; lexer.seenEndBody = false; TagTable tt = lexer.configuration.tt; while (true) { node = lexer.getToken(Lexer.IGNORE_WHITESPACE); if (node == null) { node = lexer.inferredTag("head"); break; } if (node.tag == tt.tagHead) { break; } if (node.tag == html.tag && node.type == Node.END_TAG) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); continue; } // deal with comments etc. if (Node.insertMisc(html, node)) { continue; } lexer.ungetToken(); node = lexer.inferredTag("head"); break; } head = node; html.insertNodeAtEnd(head); HEAD.parse(lexer, head, mode); while (true) { node = lexer.getToken(Lexer.IGNORE_WHITESPACE); if (node == null) { if (frameset == null) { // implied body node = lexer.inferredTag("body"); html.insertNodeAtEnd(node); BODY.parse(lexer, node, mode); } return; } // robustly handle html tags if (node.tag == html.tag) { if (node.type != Node.START_TAG && frameset == null) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); } else if (node.type == Node.END_TAG) { lexer.seenEndHtml = true; } continue; } // deal with comments etc. if (Node.insertMisc(html, node)) { continue; } // if frameset document coerce <body> to <noframes> if (node.tag == tt.tagBody) { if (node.type != Node.START_TAG) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); continue; } if (frameset != null) { lexer.ungetToken(); if (noframes == null) { noframes = lexer.inferredTag("noframes"); frameset.insertNodeAtEnd(noframes); lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG); } parseTag(lexer, noframes, mode); continue; } lexer.constrainVersion(~Dict.VERS_FRAMESET); break; // to parse body } // flag an error if we see more than one frameset if (node.tag == tt.tagFrameset) { if (node.type != Node.START_TAG) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); continue; } if (frameset != null) { lexer.report.error(lexer, html, node, Report.DUPLICATE_FRAMESET); } else { frameset = node; } html.insertNodeAtEnd(node); parseTag(lexer, node, mode); // see if it includes a noframes element so that we can merge subsequent noframes elements for (node = frameset.content; node != null; node = node.next) { if (node.tag == tt.tagNoframes) { noframes = node; } } continue; } // if not a frameset document coerce <noframes> to <body> if (node.tag == tt.tagNoframes) { if (node.type != Node.START_TAG) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); continue; } if (frameset == null) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); node = lexer.inferredTag("body"); break; } if (noframes == null) { noframes = node; frameset.insertNodeAtEnd(noframes); } parseTag(lexer, noframes, mode); continue; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, html, node); continue; } // #427675 - discard illegal frame element following a frameset - fix by Randy Waki 11 Oct 00 if (frameset != null && node.tag == tt.tagFrame) { lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED); continue; } } lexer.ungetToken(); // insert other content into noframes element if (frameset != null) { if (noframes == null) { noframes = lexer.inferredTag("noframes"); frameset.insertNodeAtEnd(noframes); } else { lexer.report.warning(lexer, html, node, Report.NOFRAMES_CONTENT); } lexer.constrainVersion(Dict.VERS_FRAMESET); parseTag(lexer, noframes, mode); continue; } node = lexer.inferredTag("body"); lexer.constrainVersion(~Dict.VERS_FRAMESET); break; } // node must be body html.insertNodeAtEnd(node); parseTag(lexer, node, mode); lexer.seenEndHtml = true; } } /** * Parser for HEAD. */ public static class ParseHead implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node head, short mode) { Node node; int hasTitle = 0; int hasBase = 0; TagTable tt = lexer.configuration.tt; while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == head.tag && node.type == Node.END_TAG) { head.closed = true; break; } if (node.type == Node.TEXT_NODE) { lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); lexer.ungetToken(); break; } // deal with comments etc. if (Node.insertMisc(head, node)) { continue; } if (node.type == Node.DOCTYPE_TAG) { Node.insertDocType(lexer, head, node); continue; } // discard unknown tags if (node.tag == null) { lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); continue; } if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD)) { // #545067 Implicit closing of head broken - warn only for XHTML input if (lexer.isvoyager) { lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); } lexer.ungetToken(); break; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (node.tag == tt.tagTitle) { ++hasTitle; if (hasTitle > 1) { lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); } } else if (node.tag == tt.tagBase) { ++hasBase; if (hasBase > 1) { lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS); } } else if (node.tag == tt.tagNoscript) { lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN); } head.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); continue; } // discard unexpected text nodes and end tags lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED); } } } /** * Parser for TITLE. */ public static class ParseTitle implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node title, short mode) { Node node; while ((node = lexer.getToken(Lexer.MIXED_CONTENT)) != null) { // [438658] : Missing / in title endtag makes 2 titles if (node.tag == title.tag && node.type == Node.START_TAG) { lexer.report.warning(lexer, title, node, Report.COERCE_TO_ENDTAG); node.type = Node.END_TAG; continue; } else if (node.tag == title.tag && node.type == Node.END_TAG) { title.closed = true; Node.trimSpaces(lexer, title); return; } if (node.type == Node.TEXT_NODE) { // only called for 1st child if (title.content == null) { Node.trimInitialSpace(lexer, title, node); } if (node.start >= node.end) { continue; } title.insertNodeAtEnd(node); continue; } // deal with comments etc. if (Node.insertMisc(title, node)) { continue; } // discard unknown tags if (node.tag == null) { lexer.report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED); continue; } // pushback unexpected tokens lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); Node.trimSpaces(lexer, title); return; } lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR); } } /** * Parser for SCRIPT. */ public static class ParseScript implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node script, short mode) { Node node = lexer.getCDATA(script); if (node != null) { script.insertNodeAtEnd(node); } else { /* handle e.g. a document like "<script>" */ lexer.report.warning(lexer, script, null, Report.MISSING_ENDTAG_FOR); return; } node = lexer.getToken(Lexer.IGNORE_WHITESPACE); if (!(node != null && node.type == Node.END_TAG && node.tag != null && node.tag.name.equalsIgnoreCase(script.tag.name))) { lexer.report.warning(lexer, script, node, Report.MISSING_ENDTAG_FOR); if (node != null) { lexer.ungetToken(); } } } } /** * Parser for BODY. */ public static class ParseBody implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node body, short mode) { Node node; boolean checkstack, iswhitenode; mode = Lexer.IGNORE_WHITESPACE; checkstack = true; TagTable tt = lexer.configuration.tt; Clean.bumpObject(lexer, body.parent); while ((node = lexer.getToken(mode)) != null) { // #538536 Extra endtags not detected if (node.tag == tt.tagHtml) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG || lexer.seenEndHtml) { lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); } else { lexer.seenEndHtml = true; } continue; } if (lexer.seenEndBody && (node.type == Node.START_TAG || node.type == Node.END_TAG || node.type == Node.START_END_TAG)) { lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY); } if (node.tag == body.tag && node.type == Node.END_TAG) { body.closed = true; Node.trimSpaces(lexer, body); lexer.seenEndBody = true; mode = Lexer.IGNORE_WHITESPACE; if (body.parent.tag == tt.tagNoframes) { break; } continue; } if (node.tag == tt.tagNoframes) { if (node.type == Node.START_TAG) { body.insertNodeAtEnd(node); BLOCK.parse(lexer, node, mode); continue; } if (node.type == Node.END_TAG && body.parent.tag == tt.tagNoframes) { Node.trimSpaces(lexer, body); lexer.ungetToken(); break; } } if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) && body.parent.tag == tt.tagNoframes) { Node.trimSpaces(lexer, body); lexer.ungetToken(); break; } iswhitenode = false; if (node.type == Node.TEXT_NODE && node.end <= node.start + 1 && node.textarray[node.start] == (byte) ' ') { iswhitenode = true; } // deal with comments etc. if (Node.insertMisc(body, node)) { continue; } // #538536 Extra endtags not detected // if (lexer.seenEndBody && !iswhitenode) // { // lexer.seenEndBody = true; // lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY); // } // mixed content model permits text if (node.type == Node.TEXT_NODE) { if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE) { continue; } if (lexer.configuration.encloseBodyText && !iswhitenode) { Node para; lexer.ungetToken(); para = lexer.inferredTag("p"); body.insertNodeAtEnd(para); parseTag(lexer, para, mode); mode = Lexer.MIXED_CONTENT; continue; } // HTML2 and HTML4 strict doesn't allow text here lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20)); if (checkstack) { checkstack = false; if (lexer.inlineDup(node) > 0) { continue; } } body.insertNodeAtEnd(node); mode = Lexer.MIXED_CONTENT; continue; } if (node.type == Node.DOCTYPE_TAG) { Node.insertDocType(lexer, body, node); continue; } // discard unknown and PARAM tags if (node.tag == null || node.tag == tt.tagParam) { lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); continue; } // Netscape allows LI and DD directly in BODY We infer UL or DL respectively and use this boolean to // exclude block-level elements so as to match Netscape's observed behaviour. lexer.excludeBlocks = false; if ((!((node.tag.model & Dict.CM_BLOCK) != 0) && !((node.tag.model & Dict.CM_INLINE) != 0)) || node.tag == tt.tagInput) { // avoid this error message being issued twice if (!((node.tag.model & Dict.CM_HEAD) != 0)) { lexer.report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN); } if ((node.tag.model & Dict.CM_HTML) != 0) { // copy body attributes if current body was inferred if (node.tag == tt.tagBody && body.implicit && body.attributes == null) { body.attributes = node.attributes; node.attributes = null; } continue; } if ((node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, body, node); continue; } if ((node.tag.model & Dict.CM_LIST) != 0) { lexer.ungetToken(); node = lexer.inferredTag("ul"); node.addClass("noindent"); lexer.excludeBlocks = true; } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) { lexer.ungetToken(); node = lexer.inferredTag("dl"); lexer.excludeBlocks = true; } else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0) { // Issue 2855511 if (node.type != Node.END_TAG) { lexer.ungetToken(); node = lexer.inferredTag("table"); } lexer.excludeBlocks = true; } else if (node.tag == tt.tagInput) { lexer.ungetToken(); node = lexer.inferredTag("form"); lexer.excludeBlocks = true; } else { if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0)) { lexer.ungetToken(); return; } // ignore </td></th> <option> etc. continue; } } if (node.type == Node.END_TAG) { if (node.tag == tt.tagBr) { node.type = Node.START_TAG; } else if (node.tag == tt.tagP) { Node.coerceNode(lexer, node, tt.tagBr); body.insertNodeAtEnd(node); node = lexer.inferredTag("br"); } else if ((node.tag.model & Dict.CM_INLINE) != 0) { lexer.popInline(node); } } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0)) { // HTML4 strict doesn't allow inline content here // but HTML2 does allow img elements as children of body if (node.tag == tt.tagImg) { lexer.constrainVersion(~Dict.VERS_HTML40_STRICT); } else { lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20)); } if (checkstack && !node.implicit) { checkstack = false; if (lexer.inlineDup(node) > 0) { continue; } } mode = Lexer.MIXED_CONTENT; } else { checkstack = true; mode = Lexer.IGNORE_WHITESPACE; } if (node.implicit) { lexer.report.warning(lexer, body, node, Report.INSERTING_TAG); } body.insertNodeAtEnd(node); parseTag(lexer, node, mode); continue; } // discard unexpected tags lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED); } } } /** * Parser for FRAMESET. */ public static class ParseFrameSet implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node frameset, short mode) { Node node; TagTable tt = lexer.configuration.tt; lexer.badAccess |= Report.USING_FRAMES; while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == frameset.tag && node.type == Node.END_TAG) { frameset.closed = true; Node.trimSpaces(lexer, frameset); return; } // deal with comments etc. if (Node.insertMisc(frameset, node)) { continue; } if (node.tag == null) { lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, frameset, node); continue; } } if (node.tag == tt.tagBody) { lexer.ungetToken(); node = lexer.inferredTag("noframes"); lexer.report.warning(lexer, frameset, node, Report.INSERTING_TAG); } if (node.type == Node.START_TAG && (node.tag.model & Dict.CM_FRAMES) != 0) { frameset.insertNodeAtEnd(node); lexer.excludeBlocks = false; parseTag(lexer, node, Lexer.MIXED_CONTENT); continue; } else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0) { frameset.insertNodeAtEnd(node); continue; } // discard unexpected tags lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED); } lexer.report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR); } } /** * Parser for INLINE. */ public static class ParseInline implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node element, short mode) { Node node, parent; TagTable tt = lexer.configuration.tt; if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY)) { return; } // ParseInline is used for some block level elements like H1 to H6 For such elements we need to insert // inline emphasis tags currently on the inline stack. For Inline elements, we normally push them onto the // inline stack provided they aren't implicit or OBJECT/APPLET. This test is carried out in PushInline and // PopInline, see istack.c We don't push SPAN to replicate current browser behavior if (TidyUtils.toBoolean(element.tag.model & Dict.CM_BLOCK) || (element.tag == tt.tagDt)) { lexer.inlineDup(null); } else if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE) // EUNYEE: Add back this condition // because this causes the infinite loop problem when the span does not have the ending tag. && element.tag != tt.tagA && element.tag != tt.tagSpan) { // && element.tag != tt.tagSpan #540571 Inconsistent behaviour with span inline element lexer.pushInline(element); } if (element.tag == tt.tagNobr) { lexer.badLayout |= Report.USING_NOBR; } else if (element.tag == tt.tagFont) { lexer.badLayout |= Report.USING_FONT; } // Inline elements may or may not be within a preformatted element if (mode != Lexer.PREFORMATTED) { mode = Lexer.MIXED_CONTENT; } while ((node = lexer.getToken(mode)) != null) { // end tag for current element if (node.tag == element.tag && node.type == Node.END_TAG) { if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)) { lexer.popInline(node); } if (!TidyUtils.toBoolean(mode & Lexer.PREFORMATTED)) { Node.trimSpaces(lexer, element); } // if a font element wraps an anchor and nothing else then move the font element inside the anchor // since otherwise it won't alter the anchor text color if (element.tag == tt.tagFont && element.content != null && element.content == element.last) { Node child = element.content; if (child.tag == tt.tagA) { child.parent = element.parent; child.next = element.next; child.prev = element.prev; if (child.prev != null) { child.prev.next = child; } else { child.parent.content = child; } if (child.next != null) { child.next.prev = child; } else { child.parent.last = child; } element.next = null; element.prev = null; element.parent = child; element.content = child.content; element.last = child.last; child.content = element; child.last = element; for (child = element.content; child != null; child = child.next) { child.parent = element; } } } element.closed = true; Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } // <u> ... <u> map 2nd <u> to </u> if 1st is explicit // otherwise emphasis nesting is probably unintentional // big and small have cumulative effect to leave them alone if (node.type == Node.START_TAG && node.tag == element.tag && lexer.isPushed(node) && !node.implicit && !element.implicit && node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0) && node.tag != tt.tagA && node.tag != tt.tagFont && node.tag != tt.tagBig && node.tag != tt.tagSmall && node.tag != tt.tagQ) { if (element.content != null && node.attributes == null) { lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); node.type = Node.END_TAG; lexer.ungetToken(); continue; } lexer.report.warning(lexer, element, node, Report.NESTED_EMPHASIS); } else if (lexer.isPushed(node) && node.type == Node.START_TAG && node.tag == tt.tagQ) { lexer.report.warning(lexer, element, node, Report.NESTED_QUOTATION); } if (node.type == Node.TEXT_NODE) { // only called for 1st child if (element.content == null && !TidyUtils.toBoolean(mode & Lexer.PREFORMATTED)) { Node.trimSpaces(lexer, element); } if (node.start >= node.end) { continue; } element.insertNodeAtEnd(node); continue; } // mixed content model so allow text if (Node.insertMisc(element, node)) { continue; } // deal with HTML tags if (node.tag == tt.tagHtml) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // otherwise infer end of inline element lexer.ungetToken(); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } // within <dt> or <pre> map <p> to <br> if (node.tag == tt.tagP && node.type == Node.START_TAG && ((mode & Lexer.PREFORMATTED) != 0 || element.tag == tt.tagDt || element.isDescendantOf(tt.tagDt))) { node.tag = tt.tagBr; node.element = "br"; Node.trimSpaces(lexer, element); element.insertNodeAtEnd(node); continue; } // ignore unknown and PARAM tags if (node.tag == null || node.tag == tt.tagParam) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag == tt.tagBr && node.type == Node.END_TAG) { node.type = Node.START_TAG; } if (node.type == Node.END_TAG) { // coerce </br> to <br> if (node.tag == tt.tagBr) { node.type = Node.START_TAG; } else if (node.tag == tt.tagP) { // coerce unmatched </p> to <br><br> if (!element.isDescendantOf(tt.tagP)) { Node.coerceNode(lexer, node, tt.tagBr); Node.trimSpaces(lexer, element); element.insertNodeAtEnd(node); node = lexer.inferredTag("br"); continue; } } else if ((node.tag.model & Dict.CM_INLINE) != 0 && node.tag != tt.tagA && !((node.tag.model & Dict.CM_OBJECT) != 0) && (element.tag.model & Dict.CM_INLINE) != 0) { // allow any inline end tag to end current element lexer.popInline(element); if (element.tag != tt.tagA) { if (node.tag == tt.tagA && node.tag != element.tag) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); } else { lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); } if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } // if parent is <a> then discard unexpected inline end tag lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // special case </tr> etc. for stuff moved in front of table else if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0) { lexer.ungetToken(); Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } // allow any header tag to end current header if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0) { if (node.tag == element.tag) { lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG); } else { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); } if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } // an <A> tag to ends any open <A> element but <A href=...> is mapped to </A><A href=...> // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 // if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node)) if (node.tag == tt.tagA && !node.implicit && (element.tag == tt.tagA || element.isDescendantOf(tt.tagA))) { // coerce <a> to </a> unless it has some attributes // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00 // other fixes by Dave Raggett // if (node.attributes == null) if (node.type != Node.END_TAG && node.attributes == null) { node.type = Node.END_TAG; lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG); // lexer.popInline(node); lexer.ungetToken(); continue; } lexer.ungetToken(); lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); // lexer.popInline(element); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } if ((element.tag.model & Dict.CM_HEADING) != 0) { if (node.tag == tt.tagCenter || node.tag == tt.tagDiv) { if (node.type != Node.START_TAG && node.type != Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); // insert center as parent if heading is empty if (element.content == null) { Node.insertNodeAsParent(element, node); continue; } // split heading and make center parent of 2nd part element.insertNodeAfterElement(node); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } element = lexer.cloneNode(element); element.start = lexer.lexsize; element.end = lexer.lexsize; node.insertNodeAtEnd(element); continue; } if (node.tag == tt.tagHr) { if (node.type != Node.START_TAG && node.type != Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); // insert hr before heading if heading is empty if (element.content == null) { Node.insertNodeBeforeElement(element, node); continue; } // split heading and insert hr before 2nd part element.insertNodeAfterElement(node); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } element = lexer.cloneNode(element); element.start = lexer.lexsize; element.end = lexer.lexsize; node.insertNodeAfterElement(element); continue; } } if (element.tag == tt.tagDt) { if (node.tag == tt.tagHr) { Node dd; if (node.type != Node.START_TAG && node.type != Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN); dd = lexer.inferredTag("dd"); // insert hr within dd before dt if dt is empty if (element.content == null) { Node.insertNodeBeforeElement(element, dd); dd.insertNodeAtEnd(node); continue; } // split dt and insert hr within dd before 2nd part element.insertNodeAfterElement(dd); dd.insertNodeAtEnd(node); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } element = lexer.cloneNode(element); element.start = lexer.lexsize; element.end = lexer.lexsize; dd.insertNodeAfterElement(element); continue; } } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { for (parent = element.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); } if (element.tag == tt.tagA) { lexer.popInline(element); } lexer.ungetToken(); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } } } // block level tags end this element if (!((node.tag.model & Dict.CM_INLINE) != 0)) { if (node.type != Node.START_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } if (!((element.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); } if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0)) { moveToHead(lexer, element, node); continue; } // prevent anchors from propagating into block tags except for headings h1 to h6 if (element.tag == tt.tagA) { if (node.tag != null && !((node.tag.model & Dict.CM_HEADING) != 0)) { lexer.popInline(element); } else if (!(element.content != null)) { Node.discardElement(element); lexer.ungetToken(); return; } } lexer.ungetToken(); if (!((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, element); } Node.trimEmptyElement(lexer, element); return; } // parse inline element if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (node.implicit) { lexer.report.warning(lexer, element, node, Report.INSERTING_TAG); } // trim white space before <br> if (node.tag == tt.tagBr) { Node.trimSpaces(lexer, element); } element.insertNodeAtEnd(node); parseTag(lexer, node, mode); continue; } // discard unexpected tags lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } if (!((element.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); } Node.trimEmptyElement(lexer, element); } } /** * Parser for LIST. */ public static class ParseList implements Parser { public void parse(Lexer lexer, Node list, short mode) { Node node; Node parent; TagTable tt = lexer.configuration.tt; if ((list.tag.model & Dict.CM_EMPTY) != 0) { return; } lexer.insert = -1; // defer implicit inline start tags while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == list.tag && node.type == Node.END_TAG) { if ((list.tag.model & Dict.CM_OBSOLETE) != 0) { Node.coerceNode(lexer, list, tt.tagUl); } list.closed = true; Node.trimEmptyElement(lexer, list); return; } // deal with comments etc. if (Node.insertMisc(list, node)) { continue; } if (node.type != Node.TEXT_NODE && node.tag == null) { lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (node.tag == tt.tagForm) { badForm(lexer); lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0) { lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); lexer.popInline(node); continue; } for (parent = list.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); if ((list.tag.model & Dict.CM_OBSOLETE) != 0) { Node.coerceNode(lexer, list, tt.tagUl); } Node.trimEmptyElement(lexer, list); return; } } lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag != tt.tagLi) { lexer.ungetToken(); if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0 && lexer.excludeBlocks) { lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); Node.trimEmptyElement(lexer, list); return; } node = lexer.inferredTag("li"); node.addAttribute("style", "list-style: none"); lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG); } // node should be <LI> list.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } if ((list.tag.model & Dict.CM_OBSOLETE) != 0) { Node.coerceNode(lexer, list, tt.tagUl); } lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); Node.trimEmptyElement(lexer, list); } } /** * Parser for empty elements. */ public static class ParseEmpty implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node element, short mode) { if (lexer.isvoyager) { Node node = lexer.getToken(mode); if (node != null && !(node.type == Node.END_TAG && node.tag == element.tag)) { lexer.report.warning(lexer, element, node, Report.ELEMENT_NOT_EMPTY); lexer.ungetToken(); } } } } /** * Parser for DEFLIST. */ public static class ParseDefList implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node list, short mode) { Node node, parent; TagTable tt = lexer.configuration.tt; if ((list.tag.model & Dict.CM_EMPTY) != 0) { return; } lexer.insert = -1; // defer implicit inline start tags while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == list.tag && node.type == Node.END_TAG) { list.closed = true; Node.trimEmptyElement(lexer, list); return; } // deal with comments etc. if (Node.insertMisc(list, node)) { continue; } if (node.type == Node.TEXT_NODE) { lexer.ungetToken(); node = lexer.inferredTag("dt"); lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG); } if (node.tag == null) { lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (node.tag == tt.tagForm) { badForm(lexer); lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } for (parent = list.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); Node.trimEmptyElement(lexer, list); return; } } } // center in a dt or a dl breaks the dl list in two if (node.tag == tt.tagCenter) { if (list.content != null) { list.insertNodeAfterElement(node); } else { // trim empty dl list Node.insertNodeBeforeElement(list, node); // #540296 tidy dumps with empty definition list Node.discardElement(list); } // and parse contents of center parseTag(lexer, node, mode); // now create a new dl element list = lexer.inferredTag("dl"); node.insertNodeAfterElement(list); continue; } if (!(node.tag == tt.tagDt || node.tag == tt.tagDd)) { lexer.ungetToken(); if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) { lexer.report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN); Node.trimEmptyElement(lexer, list); return; } // if DD appeared directly in BODY then exclude blocks if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks) { Node.trimEmptyElement(lexer, list); return; } node = lexer.inferredTag("dd"); lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG); } if (node.type == Node.END_TAG) { lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED); continue; } // node should be <DT> or <DD> list.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR); Node.trimEmptyElement(lexer, list); } } /** * Parser for PRE. */ public static class ParsePre implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node pre, short mode) { Node node; TagTable tt = lexer.configuration.tt; if ((pre.tag.model & Dict.CM_EMPTY) != 0) { return; } if ((pre.tag.model & Dict.CM_OBSOLETE) != 0) { Node.coerceNode(lexer, pre, tt.tagPre); } lexer.inlineDup(null); // tell lexer to insert inlines if needed while ((node = lexer.getToken(Lexer.PREFORMATTED)) != null) { if (node.tag == pre.tag && node.type == Node.END_TAG) { Node.trimSpaces(lexer, pre); pre.closed = true; Node.trimEmptyElement(lexer, pre); return; } if (node.tag == tt.tagHtml) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); } continue; } if (node.type == Node.TEXT_NODE) { // if first check for inital newline if (pre.content == null) { if (node.textarray[node.start] == (byte) '\n') { ++node.start; } if (node.start >= node.end) { continue; } } pre.insertNodeAtEnd(node); continue; } // deal with comments etc. if (Node.insertMisc(pre, node)) { continue; } // strip unexpected tags if (!lexer.preContent(node)) { Node newnode; lexer.report.warning(lexer, pre, node, Report.UNESCAPED_ELEMENT); newnode = Node.escapeTag(lexer, node); pre.insertNodeAtEnd(newnode); continue; } if (node.tag == tt.tagP) { if (node.type == Node.START_TAG) { lexer.report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF); // trim white space before <p> in <pre> Node.trimSpaces(lexer, pre); // coerce both <p> and </p> to <br> Node.coerceNode(lexer, node, tt.tagBr); pre.insertNodeAtEnd(node); } else { lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); } continue; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { // trim white space before <br> if (node.tag == tt.tagBr) { Node.trimSpaces(lexer, pre); } pre.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.PREFORMATTED); continue; } // discard unexpected tags lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED); } lexer.report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR); Node.trimEmptyElement(lexer, pre); } } /** * Parser for block elements. */ public static class ParseBlock implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node element, short mode) { // element is node created by the lexer upon seeing the start tag, or by the parser when the start tag is // inferred. Node node, parent; boolean checkstack; int istackbase = 0; TagTable tt = lexer.configuration.tt; checkstack = true; if ((element.tag.model & Dict.CM_EMPTY) != 0) { return; } if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm)) { lexer.report.warning(lexer, element, null, Report.ILLEGAL_NESTING); } // InlineDup() asks the lexer to insert inline emphasis tags currently pushed on the istack, but take care // to avoid propagating inline emphasis inside OBJECT or APPLET. For these elements a fresh inline stack // context is created and disposed of upon reaching the end of the element. They thus behave like table // cells in this respect. if ((element.tag.model & Dict.CM_OBJECT) != 0) { istackbase = lexer.istackbase; lexer.istackbase = lexer.istack.size(); } if (!((element.tag.model & Dict.CM_MIXED) != 0)) { lexer.inlineDup(null); } mode = Lexer.IGNORE_WHITESPACE; while ((node = lexer.getToken(mode)) != null) { // end tag for this element if (node.type == Node.END_TAG && node.tag != null && (node.tag == element.tag || element.was == node.tag)) { if ((element.tag.model & Dict.CM_OBJECT) != 0) { // pop inline stack while (lexer.istack.size() > lexer.istackbase) { lexer.popInline(null); } lexer.istackbase = istackbase; } element.closed = true; Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } if (node.tag == tt.tagHtml || node.tag == tt.tagHead || node.tag == tt.tagBody) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); } continue; } if (node.type == Node.END_TAG) { if (node.tag == null) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } else if (node.tag == tt.tagBr) { node.type = Node.START_TAG; } else if (node.tag == tt.tagP) { Node.coerceNode(lexer, node, tt.tagBr); element.insertNodeAtEnd(node); node = lexer.inferredTag("br"); } else { // if this is the end tag for an ancestor element then infer end tag for this element for (parent = element.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { if (!((element.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); } lexer.ungetToken(); if ((element.tag.model & Dict.CM_OBJECT) != 0) { // pop inline stack while (lexer.istack.size() > lexer.istackbase) { lexer.popInline(null); } lexer.istackbase = istackbase; } Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } // special case </tr> etc. for stuff moved in front of table if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0) { lexer.ungetToken(); Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } } // mixed content model permits text if (node.type == Node.TEXT_NODE) { boolean iswhitenode = false; if (node.type == Node.TEXT_NODE && node.end <= node.start + 1 && lexer.lexbuf[node.start] == (byte) ' ') { iswhitenode = true; } if (lexer.configuration.encloseBlockText && !iswhitenode) { lexer.ungetToken(); node = lexer.inferredTag("p"); element.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.MIXED_CONTENT); continue; } if (checkstack) { checkstack = false; if (!((element.tag.model & Dict.CM_MIXED) != 0)) { if (lexer.inlineDup(node) > 0) { continue; } } } element.insertNodeAtEnd(node); mode = Lexer.MIXED_CONTENT; // HTML4 strict doesn't allow mixed content for elements with %block; as their content model // But only body, map, blockquote, form and noscript have content model %block; if (element.tag == tt.tagBody || element.tag == tt.tagMap || element.tag == tt.tagBlockquote || element.tag == tt.tagForm || element.tag == tt.tagNoscript) { lexer.constrainVersion(~Dict.VERS_HTML40_STRICT); } continue; } if (Node.insertMisc(element, node)) { continue; } // allow PARAM elements? if (node.tag == tt.tagParam) { if (((element.tag.model & Dict.CM_PARAM) != 0) && (node.type == Node.START_TAG || node.type == Node.START_END_TAG)) { element.insertNodeAtEnd(node); continue; } // otherwise discard it lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // allow AREA elements? if (node.tag == tt.tagArea) { if ((element.tag == tt.tagMap) && (node.type == Node.START_TAG || node.type == Node.START_END_TAG)) { element.insertNodeAtEnd(node); continue; } // otherwise discard it lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // ignore unknown start/end tags if (node.tag == null) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // Allow Dict.CM_INLINE elements here. Allow Dict.CM_BLOCK elements here unless lexer.excludeBlocks is // yes. LI and DD are special cased. Otherwise infer end tag for this element. if (!((node.tag.model & Dict.CM_INLINE) != 0)) { if (node.type != Node.START_TAG && node.type != Node.START_END_TAG) { if (node.tag == tt.tagForm) { badForm(lexer); } lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } // #427671 - Fix by Randy Waki - 10 Aug 00 // If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION start tag, discard the start // tag and let the subsequent content get parsed as content of the enclosing LI. This seems to // mimic IE and Netscape, and avoids an infinite loop: without this check, ParseBlock (which is // parsing the LI's content) and ParseList (which is parsing the LI's parent's content) repeatedly // defer to each other to parse the illegal start tag, each time inferring a missing </li> or <li> // respectively. NOTE: This check is a bit fragile. It specifically checks for the four tags that // happen to weave their way through the current series of tests performed by ParseBlock and // ParseList to trigger the infinite loop. if (element.tag == tt.tagLi) { if (node.tag == tt.tagFrame || node.tag == tt.tagFrameset || node.tag == tt.tagOptgroup || node.tag == tt.tagOption) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } } if (element.tag == tt.tagTd || element.tag == tt.tagTh) { // if parent is a table cell, avoid inferring the end of the cell if ((node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, element, node); continue; } if ((node.tag.model & Dict.CM_LIST) != 0) { lexer.ungetToken(); node = lexer.inferredTag("ul"); node.addClass("noindent"); lexer.excludeBlocks = true; } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) { lexer.ungetToken(); node = lexer.inferredTag("dl"); lexer.excludeBlocks = true; } // infer end of current table cell if (!((node.tag.model & Dict.CM_BLOCK) != 0)) { lexer.ungetToken(); Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } else if ((node.tag.model & Dict.CM_BLOCK) != 0) { if (lexer.excludeBlocks) { if (!((element.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); } lexer.ungetToken(); if ((element.tag.model & Dict.CM_OBJECT) != 0) { lexer.istackbase = istackbase; } Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } else { // things like list items if ((node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, element, node); continue; } // special case where a form start tag occurs in a tr and is followed by td or th if (element.tag == tt.tagForm && element.parent.tag == tt.tagTd && element.parent.implicit) { if (node.tag == tt.tagTd) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag == tt.tagTh) { lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); node = element.parent; node.element = "th"; node.tag = tt.tagTh; continue; } } if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE); } lexer.ungetToken(); if ((node.tag.model & Dict.CM_LIST) != 0) { if (element.parent != null && element.parent.tag != null && element.parent.tag.getParser() == LIST) { Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } node = lexer.inferredTag("ul"); node.addClass("noindent"); } else if ((node.tag.model & Dict.CM_DEFLIST) != 0) { if (element.parent.tag == tt.tagDl) { Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } node = lexer.inferredTag("dl"); } else if ((node.tag.model & Dict.CM_TABLE) != 0 || (node.tag.model & Dict.CM_ROW) != 0) { node = lexer.inferredTag("table"); } else if ((element.tag.model & Dict.CM_OBJECT) != 0) { // pop inline stack while (lexer.istack.size() > lexer.istackbase) { lexer.popInline(null); } lexer.istackbase = istackbase; Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } else { Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); return; } } } // parse known element if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { // DSR - 27Apr02 ensure we wrap anchors and other inline content // fgiust: commented out due to [1403105]: java.lang.StackOverflowError in Tidy.parseDOM() // if (lexer.configuration.encloseBlockText) // { // lexer.ungetToken(); // node = lexer.inferredTag("p"); // element.insertNodeAtEnd(node); // parseTag(lexer, node, Lexer.MIXED_CONTENT); // continue; // } if (checkstack && !node.implicit) { checkstack = false; // #431731 - fix by Randy Waki 25 Dec 00 if (!TidyUtils.toBoolean(element.tag.model & Dict.CM_MIXED)) { if (lexer.inlineDup(node) > 0) { continue; } } } mode = Lexer.MIXED_CONTENT; } else { checkstack = true; mode = Lexer.IGNORE_WHITESPACE; } // trim white space before <br> if (node.tag == tt.tagBr) { Node.trimSpaces(lexer, element); } element.insertNodeAtEnd(node); if (node.implicit) { lexer.report.warning(lexer, element, node, Report.INSERTING_TAG); } parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent ); continue; } // discard unexpected tags if (node.type == Node.END_TAG) { lexer.popInline(node); // if inline end tag } lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED); continue; } if (!((element.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR); } if ((element.tag.model & Dict.CM_OBJECT) != 0) { // pop inline stack while (lexer.istack.size() > lexer.istackbase) { lexer.popInline(null); } lexer.istackbase = istackbase; } Node.trimSpaces(lexer, element); Node.trimEmptyElement(lexer, element); } } /** * Parser for TABLE. */ public static class ParseTableTag implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node table, short mode) { Node node, parent; int istackbase; TagTable tt = lexer.configuration.tt; lexer.deferDup(); istackbase = lexer.istackbase; lexer.istackbase = lexer.istack.size(); while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == table.tag && node.type == Node.END_TAG) { lexer.istackbase = istackbase; table.closed = true; Node.trimEmptyElement(lexer, table); return; } // deal with comments etc. if (Node.insertMisc(table, node)) { continue; } // discard unknown tags if (node.tag == null && node.type != Node.TEXT_NODE) { lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); continue; } // if TD or TH or text or inline or block then infer <TR> if (node.type != Node.END_TAG) { if (node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag == tt.tagTable) { lexer.ungetToken(); node = lexer.inferredTag("tr"); lexer.report.warning(lexer, table, node, Report.MISSING_STARTTAG); } else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) { Node.insertNodeBeforeElement(table, node); lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); lexer.exiled = true; if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type == TextNode) - fix by Young { parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } lexer.exiled = false; continue; } else if ((node.tag.model & Dict.CM_HEAD) != 0) { moveToHead(lexer, table, node); continue; } } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (node.tag == tt.tagForm) { badForm(lexer); lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); continue; } if ((node.tag != null && (node.tag.model & (Dict.CM_TABLE | Dict.CM_ROW)) != 0) || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) { lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); continue; } for (parent = table.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); lexer.istackbase = istackbase; Node.trimEmptyElement(lexer, table); return; } } } if (!((node.tag.model & Dict.CM_TABLE) != 0)) { lexer.ungetToken(); lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN); lexer.istackbase = istackbase; Node.trimEmptyElement(lexer, table); return; } if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { table.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); continue; } // discard unexpected text nodes and end tags lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED); } lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR); Node.trimEmptyElement(lexer, table); lexer.istackbase = istackbase; } } /** * Parser for COLGROUP. */ public static class ParseColGroup implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node colgroup, short mode) { Node node, parent; TagTable tt = lexer.configuration.tt; if ((colgroup.tag.model & Dict.CM_EMPTY) != 0) { return; } while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == colgroup.tag && node.type == Node.END_TAG) { colgroup.closed = true; return; } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (node.tag == tt.tagForm) { badForm(lexer); lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); continue; } for (parent = colgroup.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.ungetToken(); return; } } } if (node.type == Node.TEXT_NODE) { lexer.ungetToken(); return; } // deal with comments etc. if (Node.insertMisc(colgroup, node)) { continue; } // discard unknown tags if (node.tag == null) { lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag != tt.tagCol) { lexer.ungetToken(); return; } if (node.type == Node.END_TAG) { lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED); continue; } // node should be <COL> colgroup.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } } } /** * Parser for ROWGROUP. */ public static class ParseRowGroup implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node rowgroup, short mode) { Node node, parent; TagTable tt = lexer.configuration.tt; if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0) { return; } while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == rowgroup.tag) { if (node.type == Node.END_TAG) { rowgroup.closed = true; Node.trimEmptyElement(lexer, rowgroup); return; } lexer.ungetToken(); return; } // if </table> infer end tag if (node.tag == tt.tagTable && node.type == Node.END_TAG) { lexer.ungetToken(); Node.trimEmptyElement(lexer, rowgroup); return; } // deal with comments etc. if (Node.insertMisc(rowgroup, node)) { continue; } // discard unknown tags if (node.tag == null && node.type != Node.TEXT_NODE) { lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); continue; } // if TD or TH then infer <TR> if text or inline or block move before table if head content move to // head if (node.type != Node.END_TAG) { if (node.tag == tt.tagTd || node.tag == tt.tagTh) { lexer.ungetToken(); node = lexer.inferredTag("tr"); lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); } else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) { Node.moveBeforeTable(rowgroup, node, tt); lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); lexer.exiled = true; // #427662 was (!node.type == TextNode) fix by Young 04 Aug 00 if (node.type != Node.TEXT_NODE) { parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } lexer.exiled = false; continue; } else if ((node.tag.model & Dict.CM_HEAD) != 0) { lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN); moveToHead(lexer, rowgroup, node); continue; } } // if this is the end tag for ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (node.tag == tt.tagForm || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) { if (node.tag == tt.tagForm) { badForm(lexer); } lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh) { lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); continue; } for (parent = rowgroup.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.ungetToken(); Node.trimEmptyElement(lexer, rowgroup); return; } } } // if THEAD, TFOOT or TBODY then implied end tag if ((node.tag.model & Dict.CM_ROWGRP) != 0) { if (node.type != Node.END_TAG) { lexer.ungetToken(); } Node.trimEmptyElement(lexer, rowgroup); return; } if (node.type == Node.END_TAG) { lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED); continue; } if (!(node.tag == tt.tagTr)) { node = lexer.inferredTag("tr"); lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG); lexer.ungetToken(); } // node should be <TR> rowgroup.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } Node.trimEmptyElement(lexer, rowgroup); } } /** * Parser for ROW. */ public static class ParseRow implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node row, short mode) { Node node, parent; boolean excludeState; TagTable tt = lexer.configuration.tt; if ((row.tag.model & Dict.CM_EMPTY) != 0) { return; } while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == row.tag) { if (node.type == Node.END_TAG) { row.closed = true; Node.fixEmptyRow(lexer, row); return; } lexer.ungetToken(); Node.fixEmptyRow(lexer, row); return; } // if this is the end tag for an ancestor element then infer end tag for this element if (node.type == Node.END_TAG) { if (((node.tag != null && (node.tag.model & (Dict.CM_HTML | Dict.CM_TABLE)) != 0) || node.tag == tt.tagTable) && row.isDescendantOf(node.tag)) { lexer.ungetToken(); return; } if (node.tag == tt.tagForm || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)) { if (node.tag == tt.tagForm) { badForm(lexer); } lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); continue; } if (node.tag == tt.tagTd || node.tag == tt.tagTh) { lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); continue; } for (parent = row.parent; parent != null; parent = parent.parent) { if (node.tag == parent.tag) { lexer.ungetToken(); Node.trimEmptyElement(lexer, row); return; } } } // deal with comments etc. if (Node.insertMisc(row, node)) { continue; } // discard unknown tags if (node.tag == null && node.type != Node.TEXT_NODE) { lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); continue; } // discard unexpected <table> element if (node.tag == tt.tagTable) { lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); continue; } // THEAD, TFOOT or TBODY if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0) { lexer.ungetToken(); Node.trimEmptyElement(lexer, row); return; } if (node.type == Node.END_TAG) { lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED); continue; } // if text or inline or block move before table if head content move to head if (node.type != Node.END_TAG) { if (node.tag == tt.tagForm) { lexer.ungetToken(); node = lexer.inferredTag("td"); lexer.report.warning(lexer, row, node, Report.MISSING_STARTTAG); } else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0) { Node.moveBeforeTable(row, node, tt); lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); lexer.exiled = true; if (node.type != Node.TEXT_NODE) { parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); } lexer.exiled = false; continue; } else if ((node.tag.model & Dict.CM_HEAD) != 0) { lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); moveToHead(lexer, row, node); continue; } } if (!(node.tag == tt.tagTd || node.tag == tt.tagTh)) { lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN); continue; } // node should be <TD> or <TH> row.insertNodeAtEnd(node); excludeState = lexer.excludeBlocks; lexer.excludeBlocks = false; parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); lexer.excludeBlocks = excludeState; // pop inline stack while (lexer.istack.size() > lexer.istackbase) { lexer.popInline(null); } } Node.trimEmptyElement(lexer, row); } } /** * Parser for NOFRAMES. */ public static class ParseNoFrames implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node noframes, short mode) { Node node; TagTable tt = lexer.configuration.tt; lexer.badAccess |= Report.USING_NOFRAMES; mode = Lexer.IGNORE_WHITESPACE; while ((node = lexer.getToken(mode)) != null) { if (node.tag == noframes.tag && node.type == Node.END_TAG) { noframes.closed = true; Node.trimSpaces(lexer, noframes); return; } if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset)) { Node.trimSpaces(lexer, noframes); // fix for [539369] if (node.type == Node.END_TAG) { lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); // Throw it away } else { lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE); lexer.ungetToken(); } return; } if (node.tag == tt.tagHtml) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); } continue; } // deal with comments etc. if (Node.insertMisc(noframes, node)) { continue; } if (node.tag == tt.tagBody && node.type == Node.START_TAG) { boolean seenbody = lexer.seenEndBody; noframes.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent if (seenbody) { Node.coerceNode(lexer, node, tt.tagDiv); moveNodeToBody(lexer, node); } continue; } // implicit body element inferred if (node.type == Node.TEXT_NODE || (node.tag != null && node.type != Node.END_TAG)) { if (lexer.seenEndBody) { Node body = lexer.root.findBody(tt); if (node.type == Node.TEXT_NODE) { lexer.ungetToken(); node = lexer.inferredTag("p"); lexer.report.warning(lexer, noframes, node, Report.CONTENT_AFTER_BODY); } body.insertNodeAtEnd(node); } else { lexer.ungetToken(); node = lexer.inferredTag("body"); if (lexer.configuration.xmlOut) { lexer.report.warning(lexer, noframes, node, Report.INSERTING_TAG); } noframes.insertNodeAtEnd(node); } parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent continue; } // discard unexpected end tags lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED); } lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR); } } /** * Parser for SELECT. */ public static class ParseSelect implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node field, short mode) { Node node; TagTable tt = lexer.configuration.tt; lexer.insert = -1; // defer implicit inline start tags while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == field.tag && node.type == Node.END_TAG) { field.closed = true; Node.trimSpaces(lexer, field); return; } // deal with comments etc. if (Node.insertMisc(field, node)) { continue; } if (node.type == Node.START_TAG && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup || node.tag == tt.tagScript)) { field.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); continue; } // discard unexpected tags lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); } lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); } } /** * Parser for text nodes. */ public static class ParseText implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node field, short mode) { Node node; TagTable tt = lexer.configuration.tt; lexer.insert = -1; // defer implicit inline start tags if (field.tag == tt.tagTextarea) { mode = Lexer.PREFORMATTED; } else { mode = Lexer.MIXED_CONTENT; // kludge for font tags } while ((node = lexer.getToken(mode)) != null) { if (node.tag == field.tag && node.type == Node.END_TAG) { field.closed = true; Node.trimSpaces(lexer, field); return; } // deal with comments etc. if (Node.insertMisc(field, node)) { continue; } if (node.type == Node.TEXT_NODE) { // only called for 1st child if (field.content == null && !((mode & Lexer.PREFORMATTED) != 0)) { Node.trimSpaces(lexer, field); } if (node.start >= node.end) { continue; } field.insertNodeAtEnd(node); continue; } // for textarea should all cases of < and & be escaped? // discard inline tags e.g. font if (node.tag != null && ((node.tag.model & Dict.CM_INLINE) != 0) && (node.tag.model & Dict.CM_FIELD) == 0) // #487283 - fix by Lee Passey 25 Jan 02 { lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); continue; } // terminate element on other tags if (!((field.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE); } lexer.ungetToken(); Node.trimSpaces(lexer, field); return; } if (!((field.tag.model & Dict.CM_OPT) != 0)) { lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR); } } } /** * Parser for OPTGROUP. */ public static class ParseOptGroup implements Parser { /** * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short) */ public void parse(Lexer lexer, Node field, short mode) { Node node; TagTable tt = lexer.configuration.tt; lexer.insert = -1; // defer implicit inline start tags while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { if (node.tag == field.tag && node.type == Node.END_TAG) { field.closed = true; Node.trimSpaces(lexer, field); return; } // deal with comments etc. if (Node.insertMisc(field, node)) { continue; } if (node.type == Node.START_TAG && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup)) { if (node.tag == tt.tagOptgroup) { lexer.report.warning(lexer, field, node, Report.CANT_BE_NESTED); } field.insertNodeAtEnd(node); parseTag(lexer, node, Lexer.MIXED_CONTENT); continue; } // discard unexpected tags lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED); } } } /** * HTML is the top level element. */ public static Node parseDocument(Lexer lexer) { Node node, document, html; Node doctype = null; TagTable tt = lexer.configuration.tt; document = lexer.newNode(); document.type = Node.ROOT_NODE; lexer.root = document; while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { // deal with comments etc. if (Node.insertMisc(document, node)) { continue; } if (node.type == Node.DOCTYPE_TAG) { if (doctype == null) { document.insertNodeAtEnd(node); doctype = node; } else { lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); } continue; } if (node.type == Node.END_TAG) { lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO? continue; } if (node.type != Node.START_TAG || node.tag != tt.tagHtml) { lexer.ungetToken(); html = lexer.inferredTag("html"); } else { html = node; } if (document.findDocType() == null && !lexer.configuration.bodyOnly) { lexer.report.warning(lexer, null, null, Report.MISSING_DOCTYPE); } document.insertNodeAtEnd(html); HTML.parse(lexer, html, (short) 0); // TODO? break; } if (lexer.root.findHTML(lexer.configuration.tt) == null) { /* a later check should complain if <body> is empty */ html = lexer.inferredTag("html"); lexer.root.insertNodeAtEnd(html); HTML.parse(lexer, html, Lexer.IGNORE_WHITESPACE); } if (lexer.root.findTITLE(lexer.configuration.tt) == null) { Node head = lexer.root.findHEAD(lexer.configuration.tt); lexer.report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT); head.insertNodeAtEnd(lexer.inferredTag("title")); } return document; } /** * Indicates whether or not whitespace should be preserved for this element. If an <code>xml:space</code> * attribute is found, then if the attribute value is <code>preserve</code>, returns <code>true</code>. For * any other value, returns <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em> * found, then the following element names result in a return value of <code>true: * pre, script, style,</code> and * <code>xsl:text</code>. Finally, if a <code>TagTable</code> was passed in and the element appears as the * "pre" element in the <code>TagTable</code>, then <code>true</code> will be returned. Otherwise, * <code>false</code> is returned. * @param element The <code>Node</code> to test to see if whitespace should be preserved. * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> function. This may be * <code>null</code>, in which case this test is bypassed. * @return <code>true</code> or <code>false</code>, as explained above. */ public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt) { AttVal attribute; // search attributes for xml:space for (attribute = element.attributes; attribute != null; attribute = attribute.next) { if (attribute.attribute.equals("xml:space")) { if (attribute.value.equals("preserve")) { return true; } return false; } } if (element.element == null) // Debian Bug #137124. Fix based on suggestion by Cesar Eduardo Barros 06 Mar 02 { return false; } // kludge for html docs without explicit xml:space attribute if ("pre".equalsIgnoreCase(element.element) || "script".equalsIgnoreCase(element.element) || "style".equalsIgnoreCase(element.element)) { return true; } if ((tt != null) && (tt.findParser(element) == PRE)) { return true; } // kludge for XSL docs if ("xsl:text".equalsIgnoreCase(element.element)) { return true; } return false; } /** * XML documents. */ public static void parseXMLElement(Lexer lexer, Node element, short mode) { Node node; // if node is pre or has xml:space="preserve" then do so if (XMLPreserveWhiteSpace(element, lexer.configuration.tt)) { mode = Lexer.PREFORMATTED; } while ((node = lexer.getToken(mode)) != null) { if (node.type == Node.END_TAG && node.element.equals(element.element)) { element.closed = true; break; } // discard unexpected end tags if (node.type == Node.END_TAG) { lexer.report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG); continue; } // parse content on seeing start tag if (node.type == Node.START_TAG) { parseXMLElement(lexer, node, mode); } element.insertNodeAtEnd(node); } // if first child is text then trim initial space and delete text node if it is empty. node = element.content; if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED) { if (node.textarray[node.start] == (byte) ' ') { node.start++; if (node.start >= node.end) { Node.discardElement(node); } } } // if last child is text then trim final space and delete the text node if it is empty node = element.last; if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED) { if (node.textarray[node.end - 1] == (byte) ' ') { node.end--; if (node.start >= node.end) { Node.discardElement(node); } } } } public static Node parseXMLDocument(Lexer lexer) { Node node, document, doctype; document = lexer.newNode(); document.type = Node.ROOT_NODE; doctype = null; lexer.configuration.xmlTags = true; while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null) { // discard unexpected end tags if (node.type == Node.END_TAG) { lexer.report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG); continue; } // deal with comments etc. if (Node.insertMisc(document, node)) { continue; } if (node.type == Node.DOCTYPE_TAG) { if (doctype == null) { document.insertNodeAtEnd(node); doctype = node; } else { lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO } continue; } if (node.type == Node.START_END_TAG) { document.insertNodeAtEnd(node); continue; } // if start tag then parse element's content if (node.type == Node.START_TAG) { document.insertNodeAtEnd(node); parseXMLElement(lexer, node, Lexer.IGNORE_WHITESPACE); } } if (doctype != null && !lexer.checkDocTypeKeyWords(doctype)) { lexer.report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE); } // ensure presence of initial <?XML version="1.0"?> if (lexer.configuration.xmlPi) { lexer.fixXmlDecl(document); } return document; } /** * errors in positioning of form start or end tags generally require human intervention to fix. */ static void badForm(Lexer lexer) { lexer.badForm = 1; lexer.errors++; } }������������������������������������������������jtidy/src/main/java/org/w3c/tidy/StreamInJavaImpl.java����������������������������������������������0000644�0001750�0001750�00000016575�11330623355�023057� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.UnsupportedEncodingException; /** * StreamIn Implementation using java writers. * @author Fabrizio Giustina * @version $Revision: 1095 $ ($Author: aditsu $) */ public class StreamInJavaImpl implements StreamIn { /** * number of characters kept in buffer. */ private static final int CHARBUF_SIZE = 16; /** * character buffer. */ private int[] charbuf = new int[CHARBUF_SIZE]; /** * actual position in buffer. */ private int bufpos; /** * Java input stream reader. */ private Reader reader; /** * has end of stream been reached? */ private boolean endOfStream; /** * Is char pushed? */ private boolean pushed; /** * current column number. */ private int curcol; /** * last column. */ private int lastcol; /** * current line number. */ private int curline; /** * tab size in chars. */ private int tabsize; private int tabs; /** * Instantiates a new StreamInJavaImpl. * @param stream * @param encoding * @param tabsize * @throws UnsupportedEncodingException */ protected StreamInJavaImpl(InputStream stream, String encoding, int tabsize) throws UnsupportedEncodingException { reader = new InputStreamReader(stream, encoding); this.pushed = false; this.tabsize = tabsize; this.curline = 1; this.curcol = 1; this.endOfStream = false; } /** * Instantiates a new StreamInJavaImpl. * @param stream * @param encoding * @param tabsize */ protected StreamInJavaImpl(Reader reader, int tabsize) { this.reader = reader; this.pushed = false; this.tabsize = tabsize; this.curline = 1; this.curcol = 1; this.endOfStream = false; } /** * @see org.w3c.tidy.StreamIn#readCharFromStream() */ public int readCharFromStream() { int c; try { c = reader.read(); if (c < 0) { endOfStream = true; } } catch (IOException e) { // @todo how to handle? endOfStream = true; return END_OF_STREAM; } return c; } /** * @see org.w3c.tidy.StreamIn#readChar() */ public int readChar() { int c; if (this.pushed) { c = this.charbuf[--(this.bufpos)]; if ((this.bufpos) == 0) { this.pushed = false; } if (c == '\n') { this.curcol = 1; this.curline++; return c; } this.curcol++; return c; } this.lastcol = this.curcol; if (this.tabs > 0) { this.curcol++; this.tabs--; return ' '; } c = readCharFromStream(); if (c < 0) { endOfStream = true; return END_OF_STREAM; } if (c == '\n') { this.curcol = 1; this.curline++; return c; } else if (c == '\r') // \r\n { c = readCharFromStream(); if (c != '\n') { if (c != END_OF_STREAM) { ungetChar(c); } c = '\n'; } this.curcol = 1; this.curline++; return c; } if (c == '\t') { this.tabs = tabsize > 0 ? this.tabsize - ((this.curcol - 1) % this.tabsize) - 1 : 0; this.curcol++; c = ' '; return c; } this.curcol++; return c; } /** * @see org.w3c.tidy.StreamIn#ungetChar(int) */ public void ungetChar(int c) { this.pushed = true; if (this.bufpos >= CHARBUF_SIZE) { // pop last element System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1); this.bufpos--; } this.charbuf[(this.bufpos)++] = c; if (c == '\n') { --this.curline; } this.curcol = this.lastcol; } /** * @see org.w3c.tidy.StreamIn#isEndOfStream() */ public boolean isEndOfStream() { return endOfStream; } /** * Getter for <code>curcol</code>. * @return Returns the curcol. */ public int getCurcol() { return this.curcol; } /** * Getter for <code>curline</code>. * @return Returns the curline. */ public int getCurline() { return this.curline; } /** * @see org.w3c.tidy.StreamIn#setLexer(org.w3c.tidy.Lexer) */ public void setLexer(Lexer lexer) { // unused in the java implementation } }�����������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMDocumentTypeImpl.java�������������������������������������������0000644�0001750�0001750�00000011141�10144212711�023463� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * DOMDocumentTypeImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMDocumentTypeImpl extends DOMNodeImpl implements org.w3c.dom.DocumentType { /** * Instantiates a new DOM document type. * @param adaptee Tidy Node */ protected DOMDocumentTypeImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.DOCUMENT_TYPE_NODE; } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return getName(); } /** * @see org.w3c.dom.DocumentType#getName */ public String getName() { String value = null; if (adaptee.type == Node.DOCTYPE_TAG) { if (adaptee.textarray != null && adaptee.start < adaptee.end) { value = TidyUtils.getString(adaptee.textarray, adaptee.start, adaptee.end - adaptee.start); } } return value; } /** * @todo DOM level 2 getEntities() Not implemented. Returns null. * @see org.w3c.dom.DocumentType#getEntities() */ public org.w3c.dom.NamedNodeMap getEntities() { return null; } /** * @todo DOM level 2 getNotations() Not implemented. Returns null. * @see org.w3c.dom.DocumentType#getNotations() */ public org.w3c.dom.NamedNodeMap getNotations() { return null; } /** * @todo DOM level 2 getPublicId() Not implemented. Returns null. * @see org.w3c.dom.DocumentType#getPublicId() */ public String getPublicId() { return null; } /** * @todo DOM level 2 getSystemId() Not implemented. Returns null. * @see org.w3c.dom.DocumentType#getSystemId() */ public String getSystemId() { return null; } /** * @todo DOM level 2 getInternalSubset() Not implemented. Returns null. * @see org.w3c.dom.DocumentType#getInternalSubset() */ public String getInternalSubset() { return null; } }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/StyleProp.java�����������������������������������������������������0000644�0001750�0001750�00000006244�10116675277�021655� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Linked list of style properties. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 587 $ ($Author: fgiust $) */ public class StyleProp { /** * Style name. */ protected String name; /** * Style value. */ protected String value; /** * Next linked style property. */ protected StyleProp next; /** * Instantiates a new style property. * @param name Style name * @param value Style value * @param next Next linked style property. Can be null. */ public StyleProp(String name, String value, StyleProp next) { this.name = name; this.value = value; this.next = next; } }������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMAttrImpl.java���������������������������������������������������0000644�0001750�0001750�00000021761�10144212711�021766� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; import org.w3c.dom.TypeInfo; /** * Tidy implementation of org.w3c.dom.DOMAttrImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMAttrImpl extends DOMNodeImpl implements org.w3c.dom.Attr, Cloneable { /** * wrapped org.w3c.tidy.AttVal. */ protected AttVal avAdaptee; /** * instantiates a new DOMAttrImpl which wraps the given AttVal. * @param adaptee wrapped AttVal */ protected DOMAttrImpl(AttVal adaptee) { super(null); // must override all methods of DOMNodeImpl this.avAdaptee = adaptee; } /** * @see org.w3c.dom.Node#getNodeValue() */ public String getNodeValue() throws DOMException { return getValue(); } /** * @see org.w3c.dom.Node#setNodeValue(java.lang.String) */ public void setNodeValue(String nodeValue) throws DOMException { setValue(nodeValue); } /** * @see org.w3c.dom.Node#getNodeName() */ public String getNodeName() { return getName(); } /** * @see org.w3c.dom.Node#getNodeType() */ public short getNodeType() { return org.w3c.dom.Node.ATTRIBUTE_NODE; } /** * @see org.w3c.dom.Attr#getName */ public String getName() { return avAdaptee.attribute; } /** * @see org.w3c.dom.Attr#getSpecified */ public boolean getSpecified() { return avAdaptee.value != null; } /** * @see org.w3c.dom.Attr#getValue */ public String getValue() { // Thanks to Brett Knights brett@knightsofthenet.com for this fix. return (avAdaptee.value == null) ? avAdaptee.attribute : avAdaptee.value; } /** * @see org.w3c.dom.Attr#setValue(java.lang.String) */ public void setValue(String value) { avAdaptee.value = value; } /** * @see org.w3c.dom.Node#getParentNode() */ public org.w3c.dom.Node getParentNode() { // Attr.getParentNode() should always return null // http://www.w3.org/TR/DOM-Level-2-Core/core.html#ID-637646024 return null; } /** * @todo DOM level 2 getChildNodes() Not implemented. Returns an empty NodeList. * @see org.w3c.dom.Node#getChildNodes() */ public org.w3c.dom.NodeList getChildNodes() { // Calling getChildNodes on a DOM Attr node does return the children of the Attr, which are the text and // EntityReference nodes that make up the Attr's content. return new DOMNodeListImpl(null); } /** * @todo DOM level 2 getFirstChild() Not implemented. Returns null. * @see org.w3c.dom.Node#getFirstChild() */ public org.w3c.dom.Node getFirstChild() { return null; } /** * @todo DOM level 2 getLastChild() Not implemented. Returns null. * @see org.w3c.dom.Node#getLastChild() */ public org.w3c.dom.Node getLastChild() { return null; } /** * @see org.w3c.dom.Node#getPreviousSibling() */ public org.w3c.dom.Node getPreviousSibling() { // Attr.getPreviousSibling() should always return null return null; } /** * @see org.w3c.dom.Node#getNextSibling() */ public org.w3c.dom.Node getNextSibling() { // Attr.getNextSibling() should always return null return null; } /** * @see org.w3c.dom.Node#getAttributes() */ public org.w3c.dom.NamedNodeMap getAttributes() { return null; } /** * @todo DOM level 2 getOwnerDocument() Not implemented. Returns null. * @see org.w3c.dom.Node#getOwnerDocument() */ public org.w3c.dom.Document getOwnerDocument() { return null; } /** * Not supported. * @see org.w3c.dom.Node#insertBefore(org.w3c.dom.Node, org.w3c.dom.Node) */ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, org.w3c.dom.Node refChild) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not supported"); } /** * Not supported. * @see org.w3c.dom.Node#replaceChild(org.w3c.dom.Node, org.w3c.dom.Node) */ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, org.w3c.dom.Node oldChild) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not supported"); } /** * Not supported. * @see org.w3c.dom.Node#removeChild(org.w3c.dom.Node) */ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not supported"); } /** * Not supported. * @see org.w3c.dom.Node#appendChild(org.w3c.dom.Node) */ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not supported"); } /** * @see org.w3c.dom.Node#hasChildNodes() */ public boolean hasChildNodes() { return false; } /** * @see org.w3c.dom.Node#cloneNode(boolean) */ public org.w3c.dom.Node cloneNode(boolean deep) { // http://java.sun.com/j2se/1.5.0/docs/api/index.html?org/w3c/dom/Attr.html // Cloning an Attr always clones its children, since they represent its value, no matter whether this is a deep // clone or not. return (org.w3c.dom.Node) clone(); } /** * @todo DOM level 2 getOwnerElement() Not implemented. Returns null. * @see org.w3c.dom.Attr#getOwnerElement() */ public org.w3c.dom.Element getOwnerElement() { return null; } /** * @todo DOM level 3 getSchemaTypeInfo() Not implemented. Returns null. * @see org.w3c.dom.Attr#getSchemaTypeInfo() */ public TypeInfo getSchemaTypeInfo() { return null; } /** * @see org.w3c.dom.Attr#isId() */ public boolean isId() { return "id".equals(this.avAdaptee.getAttribute()); } /** * @see java.lang.Object#clone() */ protected Object clone() { DOMAttrImpl clone; try { clone = (DOMAttrImpl) super.clone(); } catch (CloneNotSupportedException e) { // should never happen throw new RuntimeException("Clone not supported"); } clone.avAdaptee = (AttVal) this.avAdaptee.clone(); return clone; } }���������������jtidy/src/main/java/org/w3c/tidy/DOMCharacterDataImpl.java������������������������������������������0000644�0001750�0001750�00000013553�11461711132�023546� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; /** * Tidy implementation of org.w3c.dom.CharacterData. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1131 $ ($Author: aditsu $) */ public class DOMCharacterDataImpl extends DOMNodeImpl implements org.w3c.dom.CharacterData { /** * Instantiates a new DOMCharacterDataImpl which wraps the given Node. * @param adaptee wrapped node. */ protected DOMCharacterDataImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.CharacterData#getData */ public String getData() throws DOMException { return getNodeValue(); } /** * @see org.w3c.dom.CharacterData#getLength */ public int getLength() { int len = 0; if (adaptee.textarray != null && adaptee.start < adaptee.end) { len = adaptee.end - adaptee.start; } return len; } /** * @see org.w3c.dom.CharacterData#substringData */ public String substringData(int offset, int count) throws DOMException { int len; String value = null; if (count < 0) { throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid length"); } if (adaptee.textarray != null && adaptee.start < adaptee.end) { if (adaptee.start + offset >= adaptee.end) { throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid offset"); } len = count; if (adaptee.start + offset + len - 1 >= adaptee.end) { len = adaptee.end - adaptee.start - offset; } value = TidyUtils.getString(adaptee.textarray, adaptee.start + offset, len); } return value; } /** * @see org.w3c.dom.CharacterData#setData */ public void setData(String data) throws DOMException { setNodeValue(data); } /** * @see org.w3c.dom.CharacterData#appendData */ public void appendData(String arg) throws DOMException { setNodeValue(getNodeValue() + arg); } /** * @see org.w3c.dom.CharacterData#insertData */ public void insertData(int offset, String arg) throws DOMException { if (offset < adaptee.start || adaptee.start + offset >= adaptee.end) { throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid offset"); } StringBuilder sb = new StringBuilder(getNodeValue()); sb.insert(offset, arg); setData(sb.toString()); } /** * @see org.w3c.dom.CharacterData#deleteData */ public void deleteData(int offset, int count) throws DOMException { if (count < 0) { throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid length"); } if (offset < adaptee.start || adaptee.start + offset >= adaptee.end) { throw new DOMException(DOMException.INDEX_SIZE_ERR, "Invalid offset"); } StringBuilder sb = new StringBuilder(getNodeValue()); int end = offset + count; if (end > adaptee.end) { end = adaptee.end; } setData(sb.delete(offset, end).toString()); } /** * @see org.w3c.dom.CharacterData#replaceData */ public void replaceData(int offset, int count, String arg) throws DOMException { deleteData(offset, count); insertData(offset, arg); } } �����������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/AttVal.java��������������������������������������������������������0000644�0001750�0001750�00000024622�11250160555�021073� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.Attr; /** * Attribute/Value linked list node. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 899 $ ($Author: aditsu $) */ public class AttVal extends Object implements Cloneable { /** * next AttVal. */ protected AttVal next; /** * Attribute definition. */ protected Attribute dict; /** * Asp node. */ protected Node asp; /** * Php node. */ protected Node php; /** * Delimiter (" or '). */ protected int delim; /** * Attribute name. */ protected String attribute; /** * Attribute value. */ protected String value; /** * DOM adapter. */ protected Attr adapter; /** * Instantiates a new empty AttVal. */ public AttVal() { super(); } /** * Instantiates a new AttVal. * @param next next linked AttVal * @param dict Attribute from dictionary * @param delim delimitator for attribute value * @param attribute attribute name * @param value attribute value */ public AttVal(AttVal next, Attribute dict, int delim, String attribute, String value) { this.next = next; this.dict = dict; this.delim = delim; this.attribute = attribute; this.value = value; } /** * Instantiates a new AttVal. * @param next next linked AttVal * @param dict Attribute from dictionary * @param asp contained asp node * @param php contained php node * @param delim delimitator for attribute value * @param attribute attribute name * @param value attribute value */ public AttVal(AttVal next, Attribute dict, Node asp, Node php, int delim, String attribute, String value) { this.next = next; this.dict = dict; this.asp = asp; this.php = php; this.delim = delim; this.attribute = attribute; this.value = value; } /** * @see java.lang.Object#clone() */ protected Object clone() { AttVal av = null; try { av = (AttVal) super.clone(); } catch (CloneNotSupportedException e) { // should never happen } if (this.next != null) { av.next = (AttVal) this.next.clone(); } if (this.asp != null) { av.asp = this.asp.cloneNode(false); } if (this.php != null) { av.php = this.php.cloneNode(false); } return av; } /** * Is this a boolean attribute. * @return <code>true</code> if this is a boolean attribute */ public boolean isBoolAttribute() { Attribute attr = this.dict; if (attr != null) { if (attr.getAttrchk() == AttrCheckImpl.BOOL) { return true; } } return false; } /** * Check the attribute value for uppercase letters (only if the value should be lowercase, required for literal * values in xhtml). * @param lexer Lexer * @param node Node which contains this attribute */ void checkLowerCaseAttrValue(Lexer lexer, Node node) { if (this.value == null) { return; } String lowercase = this.value.toLowerCase(); if (!this.value.equals(lowercase)) { if (lexer.isvoyager) { lexer.report.attrError(lexer, node, this, Report.ATTR_VALUE_NOT_LCASE); } if (lexer.isvoyager || lexer.configuration.lowerLiterals) { this.value = lowercase; } } } /** * Check attribute name/value and report errors. * @param lexer Lexer * @param node node which contains this attribute * @return Attribute */ public Attribute checkAttribute(Lexer lexer, Node node) { TagTable tt = lexer.configuration.tt; Attribute attr = this.dict; // ignore unknown attributes for proprietary elements if (attr != null) { // if attribute looks like <foo/> check XML is ok if (TidyUtils.toBoolean(attr.getVersions() & Dict.VERS_XML)) { if (!(lexer.configuration.xmlTags || lexer.configuration.xmlOut)) { lexer.report.attrError(lexer, node, this, Report.XML_ATTRIBUTE_VALUE); } } // title first appeared in HTML 4.0 except for a/link else if (attr != AttributeTable.attrTitle || !(node.tag == tt.tagA || node.tag == tt.tagLink)) { lexer.constrainVersion(attr.getVersions()); } if (attr.getAttrchk() != null) { attr.getAttrchk().check(lexer, node, this); } else if (TidyUtils.toBoolean(this.dict.getVersions() & Dict.VERS_PROPRIETARY)) { lexer.report.attrError(lexer, node, this, Report.PROPRIETARY_ATTRIBUTE); } } else if (!lexer.configuration.xmlTags && !(node.tag == null) && this.asp == null && !(node.tag != null && (TidyUtils.toBoolean(node.tag.versions & Dict.VERS_PROPRIETARY)))) { lexer.report.attrError(lexer, node, this, Report.UNKNOWN_ATTRIBUTE); } return attr; } /** * Return the org.w3c.dom.Attr adapter. * @return org.w3c.dom.Attr adapter */ protected org.w3c.dom.Attr getAdapter() { if (this.adapter == null) { this.adapter = new DOMAttrImpl(this); } return this.adapter; } /** * Getter for <code>asp</code>. * @return Returns the asp. */ public Node getAsp() { return this.asp; } /** * Setter for <code>asp</code>. * @param asp The asp to set. */ public void setAsp(Node asp) { this.asp = asp; } /** * Getter for <code>attribute</code>. * @return Returns the attribute. */ public String getAttribute() { return this.attribute; } /** * Setter for <code>attribute</code>. * @param attribute The attribute to set. */ public void setAttribute(String attribute) { this.attribute = attribute; } /** * Getter for <code>delim</code>. * @return Returns the delim. */ public int getDelim() { return this.delim; } /** * Setter for <code>delim</code>. * @param delim The delim to set. */ public void setDelim(int delim) { this.delim = delim; } /** * Getter for <code>dict</code>. * @return Returns the dict. */ public Attribute getDict() { return this.dict; } /** * Setter for <code>dict</code>. * @param dict The dict to set. */ public void setDict(Attribute dict) { this.dict = dict; } /** * Getter for <code>next</code>. * @return Returns the next. */ public AttVal getNext() { return this.next; } /** * Setter for <code>next</code>. * @param next The next to set. */ public void setNext(AttVal next) { this.next = next; } /** * Getter for <code>php</code>. * @return Returns the php. */ public Node getPhp() { return this.php; } /** * Setter for <code>php</code>. * @param php The php to set. */ public void setPhp(Node php) { this.php = php; } /** * Getter for <code>value</code>. * @return Returns the value. */ public String getValue() { return this.value; } /** * Setter for <code>value</code>. * @param value The value to set. */ public void setValue(String value) { this.value = value; } }��������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/AttrCheck.java�����������������������������������������������������0000644�0001750�0001750�00000005513�10102754223�021542� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Check attribute values. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 400 $ ($Author: fgiust $) */ public interface AttrCheck { /** * Check the value of an attribute. * @param lexer Lexer * @param node current Node * @param attval attribute value */ void check(Lexer lexer, Node node, AttVal attval); }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/EncodingNameMapper.java��������������������������������������������0000644�0001750�0001750�00000044337�10463460061�023401� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.HashMap; import java.util.Map; /** * Maps between Java and IANA character encoding names. Also handles encoding alias used in tidy c. * @author Fabrizio Giustina * @version $Revision: 804 $ ($Author: fgiust $) * @see http://www.iana.org/assignments/character-sets */ public abstract class EncodingNameMapper { /** * Map containing uppercase alias - {standard iana, standard java}. */ private static Map encodingNameMap = new HashMap(); static { encodingNameMap.put("ISO-8859-1", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("ISO8859_1", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("ISO-IR-100", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("LATIN1", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("CSISOLATIN1", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("L1", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("819", new String[]{"ISO-8859-1", "ISO8859_1"}); encodingNameMap.put("US-ASCII", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("ASCII", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("ISO-IR-6", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("CSASCII", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("ISO646-US", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("US", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("367", new String[]{"US-ASCII", "ASCII"}); encodingNameMap.put("UTF-8", new String[]{"UTF-8", "UTF8"}); encodingNameMap.put("UTF8", new String[]{"UTF-8", "UTF8"}); encodingNameMap.put("UTF-16", new String[]{"UTF-16", "Unicode"}); encodingNameMap.put("UNICODE", new String[]{"UTF-16", "Unicode"}); encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"}); encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"}); // tidy encodingNameMap.put("UTF-16BE", new String[]{"UTF-16BE", "UnicodeBig"}); encodingNameMap.put("UNICODEBIG", new String[]{"UTF-16BE", "UnicodeBig"}); encodingNameMap.put("UTF16-BE", new String[]{"UTF-16BE", "UnicodeBig"}); encodingNameMap.put("UTF-16LE", new String[]{"UTF-16LE", "UnicodeLittle"}); encodingNameMap.put("UNICODELITTLE", new String[]{"UTF-16LE", "UnicodeLittle"}); encodingNameMap.put("UTF16-LE", new String[]{"UTF-16LE", "UnicodeLittle"}); encodingNameMap.put("UTF16BE", new String[]{"UTF-16BE", "UnicodeBig"}); // tidy encodingNameMap.put("UTF16LE", new String[]{"UTF-16LE", "UnicodeLittle"}); // tidy encodingNameMap.put("BIG5", new String[]{"BIG5", "Big5"}); encodingNameMap.put("CSBIG5", new String[]{"BIG5", "Big5"}); encodingNameMap.put("SJIS", new String[]{"SHIFT_JIS", "SJIS"}); encodingNameMap.put("SHIFT_JIS", new String[]{"SHIFT_JIS", "SJIS"}); encodingNameMap.put("CSSHIFTJIS", new String[]{"CSSHIFTJIS", "SJIS"}); encodingNameMap.put("MS_KANJI", new String[]{"MS_KANJI", "SJIS"}); encodingNameMap.put("SHIFTJIS", new String[]{"SHIFT_JIS", "SJIS"}); // tidy encodingNameMap.put("JIS", new String[]{"ISO-2022-JP", "JIS"}); encodingNameMap.put("ISO-2022-JP", new String[]{"ISO-2022-JP", "JIS"}); encodingNameMap.put("CSISO2022JP", new String[]{"CSISO2022JP", "JIS"}); encodingNameMap.put("ISO2022", new String[]{"ISO-2022-JP", "JIS"}); // tidy encodingNameMap.put("ISO2022KR", new String[]{"ISO-2022-KR", "ISO2022KR"}); encodingNameMap.put("ISO-2022-KR", new String[]{"ISO-2022-KR", "ISO2022KR"}); encodingNameMap.put("CSISO2022KR", new String[]{"CSISO2022KR", "ISO2022KR"}); encodingNameMap.put("ISO-2022-CN", new String[]{"ISO-2022-CN", "ISO2022CN"}); encodingNameMap.put("ISO2022CN", new String[]{"ISO-2022-CN", "ISO2022CN"}); encodingNameMap.put("MACROMAN", new String[]{"macintosh", "MacRoman"}); // tidy encodingNameMap.put("MACINTOSH", new String[]{"macintosh", "MacRoman"}); encodingNameMap.put("MACINTOSH ROMAN", new String[]{"macintosh", "MacRoman"}); encodingNameMap.put("37", new String[]{"IBM037", "CP037"}); encodingNameMap.put("273", new String[]{"IBM273", "CP273"}); encodingNameMap.put("277", new String[]{"IBM277", "CP277"}); encodingNameMap.put("278", new String[]{"IBM278", "CP278"}); encodingNameMap.put("280", new String[]{"IBM280", "CP280"}); encodingNameMap.put("284", new String[]{"IBM284", "CP284"}); encodingNameMap.put("285", new String[]{"IBM285", "CP285"}); encodingNameMap.put("290", new String[]{"IBM290", "CP290"}); encodingNameMap.put("297", new String[]{"IBM297", "CP297"}); encodingNameMap.put("420", new String[]{"IBM420", "CP420"}); encodingNameMap.put("424", new String[]{"IBM424", "CP424"}); encodingNameMap.put("437", new String[]{"IBM437", "CP437"}); encodingNameMap.put("500", new String[]{"IBM500", "CP500"}); encodingNameMap.put("775", new String[]{"IBM775", "CP775"}); encodingNameMap.put("850", new String[]{"IBM850", "CP850"}); encodingNameMap.put("852", new String[]{"IBM852", "CP852"}); encodingNameMap.put("CSPCP852", new String[]{"IBM852", "CP852"}); encodingNameMap.put("855", new String[]{"IBM855", "CP855"}); encodingNameMap.put("857", new String[]{"IBM857", "CP857"}); encodingNameMap.put("858", new String[]{"IBM00858", "Cp858"}); encodingNameMap.put("0858", new String[]{"IBM00858", "Cp858"}); encodingNameMap.put("860", new String[]{"IBM860", "CP860"}); encodingNameMap.put("861", new String[]{"IBM861", "CP861"}); encodingNameMap.put("IS", new String[]{"IBM861", "CP861"}); encodingNameMap.put("862", new String[]{"IBM862", "CP862"}); encodingNameMap.put("863", new String[]{"IBM863", "CP863"}); encodingNameMap.put("864", new String[]{"IBM864", "CP864"}); encodingNameMap.put("865", new String[]{"IBM865", "CP865"}); encodingNameMap.put("866", new String[]{"IBM866", "CP866"}); encodingNameMap.put("868", new String[]{"IBM868", "CP868"}); encodingNameMap.put("AR", new String[]{"IBM868", "CP868"}); encodingNameMap.put("869", new String[]{"IBM869", "CP869"}); encodingNameMap.put("GR", new String[]{"IBM869", "CP869"}); encodingNameMap.put("870", new String[]{"IBM870", "CP870"}); encodingNameMap.put("871", new String[]{"IBM871", "CP871"}); encodingNameMap.put("EBCDIC-CP-IS", new String[]{"IBM871", "CP871"}); encodingNameMap.put("918", new String[]{"CP918", "CP918"}); encodingNameMap.put("924", new String[]{"IBM00924", "CP924"}); encodingNameMap.put("0924", new String[]{"IBM00924", "CP924"}); encodingNameMap.put("1026", new String[]{"IBM1026", "CP1026"}); encodingNameMap.put("1047", new String[]{"IBM1047", "Cp1047"}); encodingNameMap.put("1140", new String[]{"IBM01140", "Cp1140"}); encodingNameMap.put("1141", new String[]{"IBM01141", "Cp1141"}); encodingNameMap.put("1142", new String[]{"IBM01142", "Cp1142"}); encodingNameMap.put("1143", new String[]{"IBM01143", "Cp1143"}); encodingNameMap.put("1144", new String[]{"IBM01144", "Cp1144"}); encodingNameMap.put("1145", new String[]{"IBM01145", "Cp1145"}); encodingNameMap.put("1146", new String[]{"IBM01146", "Cp1146"}); encodingNameMap.put("1147", new String[]{"IBM01147", "Cp1147"}); encodingNameMap.put("1148", new String[]{"IBM01148", "Cp1148"}); encodingNameMap.put("1149", new String[]{"IBM01149", "Cp1149"}); encodingNameMap.put("1250", new String[]{"WINDOWS-1250", "Cp1250"}); encodingNameMap.put("1251", new String[]{"WINDOWS-1251", "Cp1251"}); encodingNameMap.put("1252", new String[]{"WINDOWS-1252", "Cp1252"}); encodingNameMap.put("WIN1252", new String[]{"WINDOWS-1252", "Cp1252"}); // tidy encodingNameMap.put("1253", new String[]{"WINDOWS-1253", "Cp1253"}); encodingNameMap.put("1254", new String[]{"WINDOWS-1254", "Cp1254"}); encodingNameMap.put("1255", new String[]{"WINDOWS-1255", "Cp1255"}); encodingNameMap.put("1256", new String[]{"WINDOWS-1256", "Cp1256"}); encodingNameMap.put("1257", new String[]{"WINDOWS-1257", "Cp1257"}); encodingNameMap.put("1258", new String[]{"WINDOWS-1258", "Cp1258"}); encodingNameMap.put("EUC-JP", new String[]{"EUC-JP", "EUCJIS"}); encodingNameMap.put("EUCJIS", new String[]{"EUC-JP", "EUCJIS"}); encodingNameMap.put("EUC-KR", new String[]{"EUC-KR", "KSC5601"}); encodingNameMap.put("KSC5601", new String[]{"EUC-KR", "KSC5601"}); encodingNameMap.put("GB2312", new String[]{"GB2312", "GB2312"}); encodingNameMap.put("CSGB2312", new String[]{"GB2312", "GB2312"}); encodingNameMap.put("X0201", new String[]{"X0201", "JIS0201"}); encodingNameMap.put("JIS0201", new String[]{"X0201", "JIS0201"}); encodingNameMap.put("X0208", new String[]{"X0208", "JIS0208"}); encodingNameMap.put("JIS0208", new String[]{"X0208", "JIS0208"}); encodingNameMap.put("ISO-IR-87", new String[]{"ISO-IR-87", "JIS0208"}); encodingNameMap.put("JIS0208", new String[]{"ISO-IR-87", "JIS0208"}); encodingNameMap.put("X0212", new String[]{"X0212", "JIS0212"}); encodingNameMap.put("JIS0212", new String[]{"X0212", "JIS0212"}); encodingNameMap.put("ISO-IR-159", new String[]{"X0212", "JIS0212"}); encodingNameMap.put("GB18030", new String[]{"GB18030", "GB18030"}); encodingNameMap.put("936", new String[]{"GBK", "GBK"}); encodingNameMap.put("MS936", new String[]{"GBK", "GBK"}); encodingNameMap.put("MS932", new String[]{"WINDOWS-31J", "MS932"}); encodingNameMap.put("31J", new String[]{"WINDOWS-31J", "MS932"}); encodingNameMap.put("CSWINDOWS31J", new String[]{"WINDOWS-31J", "MS932"}); encodingNameMap.put("TIS-620", new String[]{"TIS-620", "TIS620"}); encodingNameMap.put("TIS620", new String[]{"TIS-620", "TIS620"}); encodingNameMap.put("ISO-8859-2", new String[]{"ISO-8859-2", "ISO8859_2"}); encodingNameMap.put("ISO8859_2", new String[]{"ISO-8859-2", "ISO8859_2"}); encodingNameMap.put("ISO-IR-101", new String[]{"ISO-8859-2", "ISO8859_2"}); encodingNameMap.put("LATIN2", new String[]{"ISO-8859-2", "ISO8859_2"}); encodingNameMap.put("L2", new String[]{"ISO-8859-2", "ISO8859_2"}); encodingNameMap.put("ISO-8859-3", new String[]{"ISO-8859-3", "ISO8859_3"}); encodingNameMap.put("ISO8859_3", new String[]{"ISO-8859-3", "ISO8859_3"}); encodingNameMap.put("ISO-IR-109", new String[]{"ISO-8859-3", "ISO8859_3"}); encodingNameMap.put("LATIN3", new String[]{"ISO-8859-3", "ISO8859_3"}); encodingNameMap.put("L3", new String[]{"ISO-8859-3", "ISO8859_3"}); encodingNameMap.put("ISO-8859-4", new String[]{"ISO-8859-4", "ISO8859_4"}); encodingNameMap.put("ISO8859_4", new String[]{"ISO-8859-4", "ISO8859_4"}); encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"}); encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"}); encodingNameMap.put("L4", new String[]{"ISO-8859-4", "ISO8859_4"}); encodingNameMap.put("ISO-8859-5", new String[]{"ISO-8859-5", "ISO8859_5"}); encodingNameMap.put("ISO8859_5", new String[]{"ISO-8859-5", "ISO8859_5"}); encodingNameMap.put("ISO-IR-144", new String[]{"ISO-8859-5", "ISO8859_5"}); encodingNameMap.put("CYRILLIC", new String[]{"ISO-8859-5", "ISO8859_5"}); encodingNameMap.put("ISO-8859-6", new String[]{"ISO-8859-6", "ISO8859_6"}); encodingNameMap.put("ISO8859_6", new String[]{"ISO-8859-6", "ISO8859_6"}); encodingNameMap.put("ISO-IR-127", new String[]{"ISO-8859-6", "ISO8859_6"}); encodingNameMap.put("ARABIC", new String[]{"ISO-8859-6", "ISO8859_6"}); encodingNameMap.put("ISO-8859-7", new String[]{"ISO-8859-7", "ISO8859_7"}); encodingNameMap.put("ISO8859_7", new String[]{"ISO-8859-7", "ISO8859_7"}); encodingNameMap.put("ISO-IR-126", new String[]{"ISO-8859-7", "ISO8859_7"}); encodingNameMap.put("GREEK", new String[]{"ISO-8859-7", "ISO8859_7"}); encodingNameMap.put("ISO-8859-8", new String[]{"ISO-8859-8", "ISO8859_8"}); encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-8", "ISO8859_8"}); encodingNameMap.put("ISO-8859-8-I", new String[]{"ISO-8859-8", "ISO8859_8"}); encodingNameMap.put("ISO-IR-138", new String[]{"ISO-8859-8", "ISO8859_8"}); encodingNameMap.put("HEBREW", new String[]{"ISO-8859-8", "ISO8859_8"}); encodingNameMap.put("ISO-8859-9", new String[]{"ISO-8859-9", "ISO8859_8"}); encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-9", "ISO8859_8"}); encodingNameMap.put("CSISOLATINHEBREW", new String[]{"ISO-8859-9", "ISO8859_9"}); encodingNameMap.put("ISO-IR-148", new String[]{"ISO-8859-9", "ISO8859_9"}); encodingNameMap.put("LATIN5", new String[]{"ISO-8859-9", "ISO8859_9"}); encodingNameMap.put("CSISOLATIN5", new String[]{"ISO-8859-9", "ISO8859_9"}); encodingNameMap.put("L5", new String[]{"ISO-8859-9", "ISO8859_9"}); encodingNameMap.put("ISO-8859-15", new String[]{"ISO-8859-15", "ISO8859_15"}); encodingNameMap.put("ISO8859_15", new String[]{"ISO-8859-15", "ISO8859_15"}); encodingNameMap.put("KOI8-R", new String[]{"KOI8-R", "KOI8_R"}); encodingNameMap.put("KOI8_R", new String[]{"CSKOI8R", "KOI8_R"}); encodingNameMap.put("CSKOI8R", new String[]{"CSKOI8R", "KOI8_R"}); } /** * Convert a Java character encoding name to its IANA equivalent. * @param encoding java encoding name or alias * @return iana equivalent or null if no match is found. */ public static String toIana(String encoding) { if (encoding == null) { return null; } String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding)); if (values != null) { return values[0]; } return null; } /** * "Fix" the name for common alias to reduce the number of entries needed in the hashmap. It actually removes CSIBM, * CCSID, IBM-, IBM0, CP-0, IBM, CP0, CP-, CP, WINDOWS- prefixes from given name. * @param encoding encoding name * @return "fixed" encoding. */ private static String handlecommonAlias(String encoding) { String key = encoding.toUpperCase(); // handle common alias if (key.startsWith("CSIBM") || key.startsWith("CCSID")) { key = key.substring(5); } else if (key.startsWith("IBM-") || key.startsWith("IBM0") || key.startsWith("CP-0")) { key = key.substring(4); } else if (key.startsWith("IBM") || key.startsWith("CP0") || key.startsWith("CP-")) { key = key.substring(3); } else if (key.startsWith("CP")) { key = key.substring(2); } else if (key.startsWith("WINDOWS-")) { key = key.substring(8); } else if (key.startsWith("ISO_")) { key = "ISO-" + key.substring(4); } return key; } /** * Converts an encoding name to the standard java name. Handles IANA names, legacy names used in tidy and different * java encoding alias. See http://www.iana.org/assignments/character-sets. * @param encoding IANA encoding name or alias * @return java equivalent or null if no match is found. */ public static String toJava(String encoding) { if (encoding == null) { return null; } String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding)); if (values != null) { return values[1]; } return null; } } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/StreamIn.java������������������������������������������������������0000644�0001750�0001750�00000006712�10116675277�021436� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Input Stream. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 587 $ ($Author: fgiust $) */ public interface StreamIn { /** * end of stream char. */ int END_OF_STREAM = -1; /** * Getter for <code>curcol</code>. * @return Returns the curcol. */ int getCurcol(); /** * Getter for <code>curline</code>. * @return Returns the curline. */ int getCurline(); /** * reads a char from the stream. * @return char */ int readCharFromStream(); /** * Read a char. * @return char */ int readChar(); /** * Unget a char. * @param c char */ void ungetChar(int c); /** * Has end of stream been reached? * @return <code>true</code> if end of stream has been reached */ boolean isEndOfStream(); /** * Setter for lexer instance (needed for error reporting). * @param lexer Lexer */ void setLexer(Lexer lexer); }������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TagCheck.java������������������������������������������������������0000644�0001750�0001750�00000005455�10103520052�021340� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Check HTML attributes. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 408 $ ($Author: fgiust $) */ public interface TagCheck { /** * Checks attributes in given Node. * @param lexer Lexer * @param node Node to check for valid attributes. */ void check(Lexer lexer, Node node); }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/PPrint.java��������������������������������������������������������0000644�0001750�0001750�00000234171�11432627613�021123� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.text.NumberFormat; /** * Pretty print parse tree. Block-level and unknown elements are printed on new lines and their contents indented 2 * spaces Inline elements are printed inline. Inline content is wrapped on spaces (except in attribute values or * preformatted text, after start tags and before end tags. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1122 $ ($Author: aditsu $) */ public class PPrint { /** * position: normal. */ private static final short NORMAL = 0; /** * position: preformatted text. */ private static final short PREFORMATTED = 1; /** * position: comment. */ private static final short COMMENT = 2; /** * position: attribute value. */ private static final short ATTRIBVALUE = 4; /** * position: nowrap. */ private static final short NOWRAP = 8; /** * position: cdata. */ private static final short CDATA = 16; /** * Start cdata token. */ private static final String CDATA_START = "<![CDATA["; /** * End cdata token. */ private static final String CDATA_END = "]]>"; /** * Javascript comment start. */ private static final String JS_COMMENT_START = "//"; /** * Javascript comment end. */ private static final String JS_COMMENT_END = ""; /** * VB comment start. */ private static final String VB_COMMENT_START = "\'"; /** * VB comment end. */ private static final String VB_COMMENT_END = ""; /** * CSS comment start. */ private static final String CSS_COMMENT_START = "/*"; /** * CSS comment end. */ private static final String CSS_COMMENT_END = "*/"; /** * Default comment start. */ private static final String DEFAULT_COMMENT_START = ""; /** * Default comment end. */ private static final String DEFAULT_COMMENT_END = ""; private int[] linebuf; private int lbufsize; private int linelen; private int wraphere; private boolean inAttVal; private boolean inString; /** * Current slide number. */ private int slide; /** * Total slides count. */ private int count; private Node slidecontent; /** * current configuration. */ private Configuration configuration; /** * Instantiates a new PPrint. * @param configuration configuration */ public PPrint(Configuration configuration) { this.configuration = configuration; } /** * @param ind * @return */ int cWrapLen(int ind) { /* #431953 - start RJ Wraplen adjusted for smooth international ride */ if ("zh".equals(this.configuration.language)) { // Chinese characters take two positions on a fixed-width screen // It would be more accurate to keep a parallel linelen and wraphere incremented by 2 for Chinese characters // and 1 otherwise, but this is way simpler. return (ind + ((this.configuration.wraplen - ind) / 2)); } if ("ja".equals(this.configuration.language)) { /* average Japanese text is 30% kanji */ return (ind + (((this.configuration.wraplen - ind) * 7) / 10)); } return (this.configuration.wraplen); /* #431953 - end RJ */ } /** * return one less than the number of bytes used by the UTF-8 byte sequence. The Unicode char is returned in ch. * @param str points to the UTF-8 byte sequence * @param start starting offset in str * @param ch initialized to 1st byte, passed as an array to allow modification * @return one less that the number of bytes used by UTF-8 char */ public static int getUTF8(byte[] str, int start, int[] ch) { int[] n = new int[1]; int[] bytes = new int[]{0}; // first byte "str[0]" is passed in separately from the // rest of the UTF-8 byte sequence starting at "str[1]" byte[] successorBytes = str; boolean err = EncodingUtils.decodeUTF8BytesToChar( n, TidyUtils.toUnsigned(str[start]), successorBytes, null, bytes, start + 1); if (err) { n[0] = 0xFFFD; // replacement char } ch[0] = n[0]; return bytes[0] - 1; } /** * store char c as UTF-8 encoded byte stream. * @param buf * @param start * @param c * @return */ public static int putUTF8(byte[] buf, int start, int c) { int[] count = new int[]{0}; boolean err = EncodingUtils.encodeCharToUTF8Bytes(c, buf, null, count); if (err) { // replacement char 0xFFFD encoded as UTF-8 buf[0] = (byte) 0xEF; buf[1] = (byte) 0xBF; buf[2] = (byte) 0xBD; count[0] = 3; } start += count[0]; return start; } private void addC(int c, int index) { if (index + 1 >= lbufsize) { while (index + 1 >= lbufsize) { if (lbufsize == 0) { lbufsize = 256; } else { lbufsize = lbufsize * 2; } } int[] temp = new int[lbufsize]; if (linebuf != null) { System.arraycopy(linebuf, 0, temp, 0, index); } linebuf = temp; } linebuf[index] = c; } /** * Adds an ascii String. * @param str String to be added * @param index actual line lenght * @return final line length */ private int addAsciiString(String str, int index) { int len = str.length(); if (index + len >= lbufsize) { while (index + len >= lbufsize) { if (lbufsize == 0) { lbufsize = 256; } else { lbufsize = lbufsize * 2; } } int[] temp = new int[lbufsize]; if (linebuf != null) { System.arraycopy(linebuf, 0, temp, 0, index); } linebuf = temp; } for (int ix = 0; ix < len; ++ix) { linebuf[index + ix] = str.charAt(ix); } return index + len; } /** * @param fout * @param indent */ private void wrapLine(Out fout, int indent) { int i, p, q; if (wraphere == 0) { return; } for (i = 0; i < indent; ++i) { fout.outc(' '); } for (i = 0; i < wraphere; ++i) { fout.outc(linebuf[i]); } if (inString) { fout.outc(' '); fout.outc('\\'); } fout.newline(); if (linelen > wraphere) { p = 0; if (linebuf[wraphere] == ' ') { ++wraphere; } q = wraphere; addC('\0', linelen); while (true) { linebuf[p] = linebuf[q]; if (linebuf[q] == 0) { break; } p++; q++; } linelen -= wraphere; } else { linelen = 0; } wraphere = 0; } /** * @param fout * @param indent * @param inString */ private void wrapAttrVal(Out fout, int indent, boolean inString) { int i, p, q; for (i = 0; i < indent; ++i) { fout.outc(' '); } for (i = 0; i < wraphere; ++i) { fout.outc(linebuf[i]); } fout.outc(' '); if (inString) { fout.outc('\\'); } fout.newline(); if (linelen > wraphere) { p = 0; if (linebuf[wraphere] == ' ') { ++wraphere; } q = wraphere; addC('\0', linelen); while (true) { linebuf[p] = linebuf[q]; if (linebuf[q] == 0) { break; } p++; q++; } linelen -= wraphere; } else { linelen = 0; } wraphere = 0; } /** * @param fout * @param indent */ public void flushLine(Out fout, int indent) { int i; if (linelen > 0) { if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (!inAttVal || this.configuration.indentAttributes) { for (i = 0; i < indent; ++i) { fout.outc(' '); } } for (i = 0; i < linelen; ++i) { fout.outc(linebuf[i]); } } fout.newline(); linelen = 0; wraphere = 0; inAttVal = false; } /** * @param fout * @param indent */ public void condFlushLine(Out fout, int indent) { int i; if (linelen > 0) { if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (!inAttVal || this.configuration.indentAttributes) { for (i = 0; i < indent; ++i) { fout.outc(' '); } } for (i = 0; i < linelen; ++i) { fout.outc(linebuf[i]); } fout.newline(); linelen = 0; wraphere = 0; inAttVal = false; } } /** * @param c * @param mode */ private void printChar(int c, short mode) { String entity; boolean breakable = false; // #431953 - RJ if (c == ' ' && !TidyUtils.toBoolean(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE | CDATA))) { // coerce a space character to a non-breaking space if (TidyUtils.toBoolean(mode & NOWRAP)) { // by default XML doesn't define   if (this.configuration.numEntities || this.configuration.xmlTags) { addC('&', linelen++); addC('#', linelen++); addC('1', linelen++); addC('6', linelen++); addC('0', linelen++); addC(';', linelen++); } else { // otherwise use named entity addC('&', linelen++); addC('n', linelen++); addC('b', linelen++); addC('s', linelen++); addC('p', linelen++); addC(';', linelen++); } return; } wraphere = linelen; } // comment characters are passed raw if (TidyUtils.toBoolean(mode & (COMMENT | CDATA))) { addC(c, linelen++); return; } // except in CDATA map < to < etc. if (!TidyUtils.toBoolean(mode & CDATA)) { if (c == '<') { addC('&', linelen++); addC('l', linelen++); addC('t', linelen++); addC(';', linelen++); return; } if (c == '>') { addC('&', linelen++); addC('g', linelen++); addC('t', linelen++); addC(';', linelen++); return; } // naked '&' chars can be left alone or quoted as & // The latter is required for XML where naked '&' are illegal. if (c == '&' && this.configuration.quoteAmpersand) { addC('&', linelen++); addC('a', linelen++); addC('m', linelen++); addC('p', linelen++); addC(';', linelen++); return; } if (c == '"' && this.configuration.quoteMarks) { addC('&', linelen++); addC('q', linelen++); addC('u', linelen++); addC('o', linelen++); addC('t', linelen++); addC(';', linelen++); return; } if (c == '\'' && this.configuration.quoteMarks) { addC('&', linelen++); addC('#', linelen++); addC('3', linelen++); addC('9', linelen++); addC(';', linelen++); return; } if (c == 160 && !this.configuration.rawOut) { if (this.configuration.makeBare) { addC(' ', linelen++); } else if (this.configuration.quoteNbsp) { addC('&', linelen++); if (this.configuration.numEntities || this.configuration.xmlTags) { addC('#', linelen++); addC('1', linelen++); addC('6', linelen++); addC('0', linelen++); } else { addC('n', linelen++); addC('b', linelen++); addC('s', linelen++); addC('p', linelen++); } addC(';', linelen++); } else { addC(c, linelen++); } return; } } // #431953 - start RJ // Handle encoding-specific issues if ("UTF8".equals(this.configuration.getOutCharEncodingName())) { // Chinese doesn't have spaces, so it needs other kinds of breaks // This will also help documents using nice Unicode punctuation // But we leave the ASCII range punctuation untouched // Break after any punctuation or spaces characters if ((c >= 0x2000) && !TidyUtils.toBoolean(mode & PREFORMATTED)) { if (((c >= 0x2000) && (c <= 0x2006)) || ((c >= 0x2008) && (c <= 0x2010)) || ((c >= 0x2011) && (c <= 0x2046)) || ((c >= 0x207D) && (c <= 0x207E)) || ((c >= 0x208D) && (c <= 0x208E)) || ((c >= 0x2329) && (c <= 0x232A)) || ((c >= 0x3001) && (c <= 0x3003)) || ((c >= 0x3008) && (c <= 0x3011)) || ((c >= 0x3014) && (c <= 0x301F)) || ((c >= 0xFD3E) && (c <= 0xFD3F)) || ((c >= 0xFE30) && (c <= 0xFE44)) || ((c >= 0xFE49) && (c <= 0xFE52)) || ((c >= 0xFE54) && (c <= 0xFE61)) || ((c >= 0xFE6A) && (c <= 0xFE6B)) || ((c >= 0xFF01) && (c <= 0xFF03)) || ((c >= 0xFF05) && (c <= 0xFF0A)) || ((c >= 0xFF0C) && (c <= 0xFF0F)) || ((c >= 0xFF1A) && (c <= 0xFF1B)) || ((c >= 0xFF1F) && (c <= 0xFF20)) || ((c >= 0xFF3B) && (c <= 0xFF3D)) || ((c >= 0xFF61) && (c <= 0xFF65))) { wraphere = linelen + 1; breakable = true; } else { switch (c) { case 0xFE63 : case 0xFE68 : case 0x3030 : case 0x30FB : case 0xFF3F : case 0xFF5B : case 0xFF5D : wraphere = linelen + 1; breakable = true; } } // but break before a left punctuation if (breakable) { if (((c >= 0x201A) && (c <= 0x201C)) || ((c >= 0x201E) && (c <= 0x201F))) { wraphere--; } else { switch (c) { case 0x2018 : case 0x2039 : case 0x2045 : case 0x207D : case 0x208D : case 0x2329 : case 0x3008 : case 0x300A : case 0x300C : case 0x300E : case 0x3010 : case 0x3014 : case 0x3016 : case 0x3018 : case 0x301A : case 0x301D : case 0xFD3E : case 0xFE35 : case 0xFE37 : case 0xFE39 : case 0xFE3B : case 0xFE3D : case 0xFE3F : case 0xFE41 : case 0xFE43 : case 0xFE59 : case 0xFE5B : case 0xFE5D : case 0xFF08 : case 0xFF3B : case 0xFF5B : case 0xFF62 : wraphere--; } } } } else if ("BIG5".equals(this.configuration.getOutCharEncodingName())) { // Allow linebreak at Chinese punctuation characters // There are not many spaces in Chinese addC(c, linelen++); if (((c & 0xFF00) == 0xA100) && !TidyUtils.toBoolean(mode & PREFORMATTED)) { wraphere = linelen; // opening brackets have odd codes: break before them if ((c > 0x5C) && (c < 0xAD) && ((c & 1) == 1)) { wraphere--; } } return; } else if ("SHIFTJIS".equals(this.configuration.getOutCharEncodingName()) || "ISO2022".equals(this.configuration.getOutCharEncodingName())) { // ISO 2022 characters are passed raw addC(c, linelen++); return; } else { if (this.configuration.rawOut) { addC(c, linelen++); return; } } // #431953 - end RJ } // if preformatted text, map   to space if (c == 160 && TidyUtils.toBoolean(mode & PREFORMATTED)) { addC(' ', linelen++); return; } // Filters from Word and PowerPoint often use smart quotes resulting in character codes between 128 and 159. // Unfortunately, the corresponding HTML 4.0 entities for these are not widely supported. // The following converts dashes and quotation marks to the nearest ASCII equivalent. // My thanks to Andrzej Novosiolov for his help with this code. if (this.configuration.makeClean && this.configuration.asciiChars || this.configuration.makeBare) { if (c >= 0x2013 && c <= 0x201E) { switch (c) { case 0x2013 : // en dash case 0x2014 : // em dash c = '-'; break; case 0x2018 : // left single quotation mark case 0x2019 : // right single quotation mark case 0x201A : // single low-9 quotation mark c = '\''; break; case 0x201C : // left double quotation mark case 0x201D : // right double quotation mark case 0x201E : // double low-9 quotation mark c = '"'; break; } } } // don't map latin-1 chars to entities if ("ISO8859_1".equals(this.configuration.getOutCharEncodingName())) { if (c > 255) /* multi byte chars */ { if (!this.configuration.numEntities) { entity = EntityTable.getDefaultEntityTable().entityName((short) c); if (entity != null) { entity = "&" + entity + ";"; } else { entity = "&#" + c + ";"; } } else { entity = "&#" + c + ";"; } for (int i = 0; i < entity.length(); i++) { addC(entity.charAt(i), linelen++); } return; } if (c > 126 && c < 160) { entity = "&#" + c + ";"; for (int i = 0; i < entity.length(); i++) { addC(entity.charAt(i), linelen++); } return; } addC(c, linelen++); return; } // don't map utf8 or utf16 chars to entities if (this.configuration.getOutCharEncodingName().startsWith("UTF")) { addC(c, linelen++); return; } // use numeric entities only for XML if (this.configuration.xmlTags) { // if ASCII use numeric entities for chars > 127 if (c > 127 && "ASCII".equals(this.configuration.getOutCharEncodingName())) { entity = "&#" + c + ";"; for (int i = 0; i < entity.length(); i++) { addC(entity.charAt(i), linelen++); } return; } // otherwise output char raw addC(c, linelen++); return; } // default treatment for ASCII if ("ASCII".equals(this.configuration.getOutCharEncodingName()) && (c > 126 || (c < ' ' && c != '\t'))) { if (!this.configuration.numEntities) { entity = EntityTable.getDefaultEntityTable().entityName((short) c); if (entity != null) { entity = "&" + entity + ";"; } else { entity = "&#" + c + ";"; } } else { entity = "&#" + c + ";"; } for (int i = 0; i < entity.length(); i++) { addC(entity.charAt(i), linelen++); } return; } addC(c, linelen++); } /** * The line buffer is uint not char so we can hold Unicode values unencoded. The translation to UTF-8 is deferred to * the outc routine called to flush the line buffer. * @param fout * @param mode * @param indent * @param textarray * @param start * @param end */ private void printText(Out fout, short mode, int indent, byte[] textarray, int start, int end) { int i, c; int[] ci = new int[1]; for (i = start; i < end; ++i) { if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } c = (textarray[i]) & 0xFF; // Convert to unsigned. // look for UTF-8 multibyte character if (c > 0x7F) { i += getUTF8(textarray, i, ci); c = ci[0]; } if (c == '\n') { flushLine(fout, indent); continue; } printChar(c, mode); } } /** * @param str */ private void printString(String str) { for (int i = 0; i < str.length(); i++) { addC(str.charAt(i), linelen++); } } /** * @param fout * @param indent * @param value * @param delim * @param wrappable */ private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable) { int c; int[] ci = new int[1]; boolean wasinstring = false; byte[] valueChars = null; int i; short mode = (wrappable ? (short) (NORMAL | ATTRIBVALUE) : (short) (PREFORMATTED | ATTRIBVALUE)); if (value != null) { valueChars = TidyUtils.getBytes(value); } // look for ASP, Tango or PHP instructions for computed attribute value if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<') { if (valueChars[1] == '%' || valueChars[1] == '@' || (new String(valueChars, 0, 5)).equals("<?php")) { mode |= CDATA; } } if (delim == 0) { delim = '"'; } addC('=', linelen++); // don't wrap after "=" for xml documents if (!this.configuration.xmlOut) { if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } else { condFlushLine(fout, indent); } } addC(delim, linelen++); if (value != null) { inString = false; i = 0; while (i < valueChars.length) { c = (valueChars[i]) & 0xFF; // Convert to unsigned. if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen) { wraphere = linelen; wasinstring = inString; } if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen) { wrapAttrVal(fout, indent, wasinstring); } if (c == delim) { String entity; entity = (c == '"' ? """ : "'"); for (int j = 0; j < entity.length(); j++) { addC(entity.charAt(j), linelen++); } ++i; continue; } else if (c == '"') { if (this.configuration.quoteMarks) { addC('&', linelen++); addC('q', linelen++); addC('u', linelen++); addC('o', linelen++); addC('t', linelen++); addC(';', linelen++); } else { addC('"', linelen++); } if (delim == '\'') { inString = !inString; } ++i; continue; } else if (c == '\'') { if (this.configuration.quoteMarks) { addC('&', linelen++); addC('#', linelen++); addC('3', linelen++); addC('9', linelen++); addC(';', linelen++); } else { addC('\'', linelen++); } if (delim == '"') { inString = !inString; } ++i; continue; } // look for UTF-8 multibyte character if (c > 0x7F) { i += getUTF8(valueChars, i, ci); c = ci[0]; } ++i; if (c == '\n') { flushLine(fout, indent); continue; } printChar(c, mode); } } inString = false; addC(delim, linelen++); } /** * @param fout * @param indent * @param node * @param attr */ private void printAttribute(Out fout, int indent, Node node, AttVal attr) { String name; boolean wrappable = false; if (this.configuration.indentAttributes) { flushLine(fout, indent); indent += this.configuration.spaces; } name = attr.attribute; if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (!this.configuration.xmlTags && !this.configuration.xmlOut && attr.dict != null) { if (AttributeTable.getDefaultAttributeTable().isScript(name)) { wrappable = this.configuration.wrapScriptlets; } else if (!attr.dict.isNowrap() && this.configuration.wrapAttVals) { wrappable = true; } } if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; addC(' ', linelen++); } else { condFlushLine(fout, indent); addC(' ', linelen++); } for (int i = 0; i < name.length(); i++) { addC( TidyUtils.foldCase(name.charAt(i), this.configuration.upperCaseAttrs, this.configuration.xmlTags), linelen++); } if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (attr.value == null) { if (this.configuration.xmlTags || this.configuration.xmlOut) { printAttrValue(fout, indent, (attr.isBoolAttribute() ? attr.attribute : ""), attr.delim, true); } else if (!attr.isBoolAttribute() && node != null && !node.isNewNode()) { printAttrValue(fout, indent, "", attr.delim, true); } else if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } } else { printAttrValue(fout, indent, attr.value, attr.delim, wrappable); } } /** * @param fout * @param indent * @param node * @param attr */ private void printAttrs(Out fout, int indent, Node node, AttVal attr) { // add xml:space attribute to pre and other elements if (configuration.xmlOut && configuration.xmlSpace && ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt) && node.getAttrByName("xml:space") == null) { node.addAttribute("xml:space", "preserve"); if (attr != null) { attr = node.attributes; } } if (attr != null) { if (attr.next != null) { printAttrs(fout, indent, node, attr.next); } if (attr.attribute != null) { Attribute attribute = attr.dict; if (!this.configuration.dropProprietaryAttributes || !(attribute == null || TidyUtils.toBoolean(attribute.getVersions() & Dict.VERS_PROPRIETARY))) { printAttribute(fout, indent, node, attr); } } else if (attr.asp != null) { addC(' ', linelen++); printAsp(fout, indent, attr.asp); } else if (attr.php != null) { addC(' ', linelen++); printPhp(fout, indent, attr.php); } } } /** * Line can be wrapped immediately after inline start tag provided if follows a text node ending in a space, or it * parent is an inline element that that rule applies to. This behaviour was reverse engineered from Netscape 3.0 * @param node current Node * @return <code>true</code> if the current char follows a space */ private static boolean afterSpace(Node node) { Node prev; int c; if (node == null || node.tag == null || !TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { return true; } prev = node.prev; if (prev != null) { if (prev.type == Node.TEXT_NODE && prev.end > prev.start) { c = (prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned. if (c == 160 || c == ' ' || c == '\n') { return true; } } return false; } return afterSpace(node.parent); } /** * @param lexer * @param fout * @param mode * @param indent * @param node */ private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node) { String p; TagTable tt = this.configuration.tt; addC('<', linelen++); if (node.type == Node.END_TAG) { addC('/', linelen++); } p = node.element; for (int i = 0; i < p.length(); i++) { addC( TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); } printAttrs(fout, indent, node, node.attributes); if ((this.configuration.xmlOut || this.configuration.xHTML) && (node.type == Node.START_END_TAG || TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY))) { addC(' ', linelen++); // Space is NS compatibility hack <br /> addC('/', linelen++); // Required end tag marker } addC('>', linelen++); if ((node.type != Node.START_END_TAG || configuration.xHTML) && !TidyUtils.toBoolean(mode & PREFORMATTED)) { if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } if (indent + linelen < this.configuration.wraplen) { // wrap after start tag if is <br/> or if it's not inline // fix for [514348] if (!TidyUtils.toBoolean(mode & NOWRAP) && (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE) || (node.tag == tt.tagBr)) && afterSpace(node)) { wraphere = linelen; } } } else { condFlushLine(fout, indent); } } /** * @param mode * @param indent * @param node */ private void printEndTag(short mode, int indent, Node node) { String p; // Netscape ignores SGML standard by not ignoring a line break before </A> or </U> etc. // To avoid rendering this as an underlined space, I disable line wrapping before inline end tags // if (indent + linelen < this.configuration.wraplen && !TidyUtils.toBoolean(mode & NOWRAP)) // { // wraphere = linelen; // } addC('<', linelen++); addC('/', linelen++); p = node.element; for (int i = 0; i < p.length(); i++) { addC( TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); } addC('>', linelen++); } /** * @param fout * @param indent * @param node */ private void printComment(Out fout, int indent, Node node) { if (this.configuration.hideComments) { return; } if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } addC('<', linelen++); addC('!', linelen++); addC('-', linelen++); addC('-', linelen++); printText(fout, COMMENT, indent, node.textarray, node.start, node.end); // See Lexer.java: AQ 8Jul2000 addC('-', linelen++); addC('-', linelen++); addC('>', linelen++); if (node.linebreak) { flushLine(fout, indent); } } /** * @param fout * @param indent * @param lexer * @param node */ private void printDocType(Out fout, int indent, Lexer lexer, Node node) { int i, c = 0; short mode = 0; boolean q = this.configuration.quoteMarks; this.configuration.quoteMarks = false; if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } condFlushLine(fout, indent); addC('<', linelen++); addC('!', linelen++); addC('D', linelen++); addC('O', linelen++); addC('C', linelen++); addC('T', linelen++); addC('Y', linelen++); addC('P', linelen++); addC('E', linelen++); addC(' ', linelen++); if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } for (i = node.start; i < node.end; ++i) { if (indent + linelen >= this.configuration.wraplen) { wrapLine(fout, indent); } c = node.textarray[i] & 0xFF; // Convert to unsigned. // inDTDSubset? if (TidyUtils.toBoolean(mode & CDATA)) { if (c == ']') { mode &= ~CDATA; } } else if (c == '[') { mode |= CDATA; } int[] ci = new int[1]; // look for UTF-8 multibyte character if (c > 0x7F) { i += getUTF8(node.textarray, i, ci); c = ci[0]; } if (c == '\n') { flushLine(fout, indent); continue; } printChar(c, mode); } if (linelen < this.configuration.wraplen) { wraphere = linelen; } addC('>', linelen++); this.configuration.quoteMarks = q; condFlushLine(fout, indent); } /** * @param fout * @param indent * @param node */ private void printPI(Out fout, int indent, Node node) { if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } addC('<', linelen++); addC('?', linelen++); // set CDATA to pass < and > unescaped printText(fout, CDATA, indent, node.textarray, node.start, node.end); if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 - fix by Terry Teague 10 Apr 02 { addC('?', linelen++); } addC('>', linelen++); condFlushLine(fout, indent); } /** * Pretty print the xml declaration. * @param fout * @param indent * @param node */ private void printXmlDecl(Out fout, int indent, Node node) { if (indent + linelen < this.configuration.wraplen) { wraphere = linelen; } addC('<', linelen++); addC('?', linelen++); addC('x', linelen++); addC('m', linelen++); addC('l', linelen++); printAttrs(fout, indent, node, node.attributes); if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 - fix by Terry Teague 10 Apr 02 { addC('?', linelen++); } addC('>', linelen++); condFlushLine(fout, indent); } /** * note ASP and JSTE share <% ... %> syntax. * @param fout * @param indent * @param node */ private void printAsp(Out fout, int indent, Node node) { int savewraplen = this.configuration.wraplen; // disable wrapping if so requested if (!this.configuration.wrapAsp || !this.configuration.wrapJste) { this.configuration.wraplen = 0xFFFFFF; // a very large number } addC('<', linelen++); addC('%', linelen++); printText(fout, (this.configuration.wrapAsp ? CDATA : COMMENT), indent, node.textarray, node.start, node.end); addC('%', linelen++); addC('>', linelen++); /* condFlushLine(fout, indent); */ this.configuration.wraplen = savewraplen; } /** * JSTE also supports <# ... #> syntax * @param fout * @param indent * @param node */ private void printJste(Out fout, int indent, Node node) { int savewraplen = this.configuration.wraplen; // disable wrapping if so requested if (!this.configuration.wrapJste) { this.configuration.wraplen = 0xFFFFFF; // a very large number } addC('<', linelen++); addC('#', linelen++); printText(fout, (this.configuration.wrapJste ? CDATA : COMMENT), indent, node.textarray, node.start, node.end); addC('#', linelen++); addC('>', linelen++); // condFlushLine(fout, indent); this.configuration.wraplen = savewraplen; } /** * PHP is based on XML processing instructions. * @param fout * @param indent * @param node */ private void printPhp(Out fout, int indent, Node node) { int savewraplen = this.configuration.wraplen; // disable wrapping if so requested if (!this.configuration.wrapPhp) { this.configuration.wraplen = 0xFFFFFF; // a very large number } addC('<', linelen++); addC('?', linelen++); printText(fout, (this.configuration.wrapPhp ? CDATA : COMMENT), indent, node.textarray, node.start, node.end); addC('?', linelen++); addC('>', linelen++); // PCondFlushLine(fout, indent); this.configuration.wraplen = savewraplen; } /** * @param fout * @param indent * @param node */ private void printCDATA(Out fout, int indent, Node node) { int savewraplen = this.configuration.wraplen; if (!this.configuration.indentCdata) { indent = 0; } condFlushLine(fout, indent); // disable wrapping this.configuration.wraplen = 0xFFFFFF; // a very large number addC('<', linelen++); addC('!', linelen++); addC('[', linelen++); addC('C', linelen++); addC('D', linelen++); addC('A', linelen++); addC('T', linelen++); addC('A', linelen++); addC('[', linelen++); printText(fout, COMMENT, indent, node.textarray, node.start, node.end); addC(']', linelen++); addC(']', linelen++); addC('>', linelen++); condFlushLine(fout, indent); this.configuration.wraplen = savewraplen; } /** * @param fout * @param indent * @param node */ private void printSection(Out fout, int indent, Node node) { int savewraplen = this.configuration.wraplen; // disable wrapping if so requested if (!this.configuration.wrapSection) { this.configuration.wraplen = 0xFFFFFF; // a very large number } addC('<', linelen++); addC('!', linelen++); addC('[', linelen++); printText( fout, (this.configuration.wrapSection ? CDATA : COMMENT), indent, node.textarray, node.start, node.end); addC(']', linelen++); addC('>', linelen++); // PCondFlushLine(fout, indent); this.configuration.wraplen = savewraplen; } /** * Is the current node inside HEAD? * @param node Node * @return <code>true</code> if node is inside an HEAD tag */ private boolean insideHead(Node node) { if (node.tag == this.configuration.tt.tagHead) { return true; } if (node.parent != null) { return insideHead(node.parent); } return false; } /** * Is text node and already ends w/ a newline? Used to pretty print CDATA/PRE text content. If it already ends on a * newline, it is not necessary to print another before printing end tag. * @param lexer Lexer * @param node text node * @return text indent */ private int textEndsWithNewline(Lexer lexer, Node node) { if (node.type == Node.TEXT_NODE && node.end > node.start) { int ch, ix = node.end - 1; // Skip non-newline whitespace while (ix >= node.start && TidyUtils.toBoolean(ch = (node.textarray[ix] & 0xff)) && (ch == ' ' || ch == '\t' || ch == '\r')) { --ix; } if (ix >= 0 && node.textarray[ix] == '\n') { return node.end - ix - 1; // #543262 tidy eats all memory } } return -1; } /** * Does the current node contain a CDATA section? * @param lexer Lexer * @param node Node * @return <code>true</code> if node contains a CDATA section */ static boolean hasCDATA(Lexer lexer, Node node) { // Scan forward through the textarray. Since the characters we're // looking for are < 0x7f, we don't have to do any UTF-8 decoding. if (node.type != Node.TEXT_NODE) { return false; } int len = node.end - node.start + 1; String start = TidyUtils.getString(node.textarray, node.start, len); int indexOfCData = start.indexOf(CDATA_START); return indexOfCData > -1 && indexOfCData <= len; } /** * Print script and style elements. For XHTML, wrap the content as follows: * * <pre> * JavaScript: * //<![CDATA[ * content * //]]> * VBScript: * '<![CDATA[ * content * ']]> * CSS: * /*<![CDATA[* / * content * /*]]>* / * other: * <![CDATA[ * content * ]]> * </pre> * * @param fout * @param mode * @param indent * @param lexer * @param node */ private void printScriptStyle(Out fout, short mode, int indent, Lexer lexer, Node node) { Node content; String commentStart = DEFAULT_COMMENT_START; String commentEnd = DEFAULT_COMMENT_END; boolean hasCData = false; int contentIndent = -1; if (insideHead(node)) { // flushLine(fout, indent); } indent = 0; // start script printTag(lexer, fout, mode, indent, node); // flushLine(fout, indent); // extra newline if (lexer.configuration.xHTML && node.content != null) { AttVal type = node.getAttrByName("type"); if (type != null) { if ("text/javascript".equalsIgnoreCase(type.value)) { commentStart = JS_COMMENT_START; commentEnd = JS_COMMENT_END; } else if ("text/css".equalsIgnoreCase(type.value)) { commentStart = CSS_COMMENT_START; commentEnd = CSS_COMMENT_END; } else if ("text/vbscript".equalsIgnoreCase(type.value)) { commentStart = VB_COMMENT_START; commentEnd = VB_COMMENT_END; } } hasCData = hasCDATA(lexer, node.content); if (!hasCData) { // disable wrapping int savewraplen = lexer.configuration.wraplen; lexer.configuration.wraplen = 0xFFFFFF; // a very large number linelen = addAsciiString(commentStart, linelen); linelen = addAsciiString(CDATA_START, linelen); linelen = addAsciiString(commentEnd, linelen); condFlushLine(fout, indent); // restore wrapping lexer.configuration.wraplen = savewraplen; } } for (content = node.content; content != null; content = content.next) { printTree(fout, (short) (mode | PREFORMATTED | NOWRAP | CDATA), 0, lexer, content); if (content.next == null) { contentIndent = textEndsWithNewline(lexer, content); } } if (contentIndent < 0) { condFlushLine(fout, indent); contentIndent = 0; } if (lexer.configuration.xHTML && node.content != null) { if (!hasCData) { // disable wrapping int ix, savewraplen = lexer.configuration.wraplen; lexer.configuration.wraplen = 0xFFFFFF; // a very large number // Add spaces to last text node to align w/ indent if (contentIndent > 0 && linelen < contentIndent) { linelen = contentIndent; } for (ix = 0; contentIndent < indent && ix < indent - contentIndent; ++ix) { addC(' ', linelen++); } linelen = addAsciiString(commentStart, linelen); linelen = addAsciiString(CDATA_END, linelen); linelen = addAsciiString(commentEnd, linelen); // restore wrapping lexer.configuration.wraplen = savewraplen; condFlushLine(fout, 0); } } printEndTag(mode, indent, node); if (!lexer.configuration.indentContent && node.next != null && !((node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) || node.type != Node.TEXT_NODE )) { flushLine(fout, indent); } flushLine(fout, indent); } /** * Should tidy indent the give tag? * @param node actual node * @return <code>true</code> if line should be indented */ private boolean shouldIndent(Node node) { TagTable tt = this.configuration.tt; if (!this.configuration.indentContent) { return false; } if (this.configuration.smartIndent) { if (node.content != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_NO_INDENT)) { for (node = node.content; node != null; node = node.next) { if (node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_BLOCK)) { return true; } } return false; } if (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEADING)) { return false; } if (node.tag == tt.tagP) { return false; } if (node.tag == tt.tagTitle) { return false; } } if (TidyUtils.toBoolean(node.tag.model & (Dict.CM_FIELD | Dict.CM_OBJECT))) { return true; } if (node.tag == tt.tagMap) { return true; } return !TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE); } /** * Print just the content of the body element. Useful when you want to reuse material from other documents. * @param fout * @param lexer * @param root * @param xml */ void printBody(Out fout, Lexer lexer, Node root, boolean xml) { if (root == null) { return; } // Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 // Sebastiano Vigna <vigna@dsi.unimi.it> Node body = root.findBody(lexer.configuration.tt); if (body != null) { Node content; for (content = body.content; content != null; content = content.next) { printTree(fout, (short) 0, 0, lexer, content); } } } /** * @param fout * @param mode * @param indent * @param lexer * @param node */ public void printTree(Out fout, short mode, int indent, Lexer lexer, Node node) { Node content, last; TagTable tt = this.configuration.tt; if (node == null) { return; } if (node.type == Node.TEXT_NODE || (node.type == Node.CDATA_TAG && lexer.configuration.escapeCdata)) { printText(fout, mode, indent, node.textarray, node.start, node.end); } else if (node.type == Node.COMMENT_TAG) { printComment(fout, indent, node); } else if (node.type == Node.ROOT_NODE) { for (content = node.content; content != null; content = content.next) { printTree(fout, mode, indent, lexer, content); } } else if (node.type == Node.DOCTYPE_TAG) { printDocType(fout, indent, lexer, node); } else if (node.type == Node.PROC_INS_TAG) { printPI(fout, indent, node); } else if (node.type == Node.XML_DECL) { printXmlDecl(fout, indent, node); } else if (node.type == Node.CDATA_TAG) { printCDATA(fout, indent, node); } else if (node.type == Node.SECTION_TAG) { printSection(fout, indent, node); } else if (node.type == Node.ASP_TAG) { printAsp(fout, indent, node); } else if (node.type == Node.JSTE_TAG) { printJste(fout, indent, node); } else if (node.type == Node.PHP_TAG) { printPhp(fout, indent, node); } else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY) || (node.type == Node.START_END_TAG && !configuration.xHTML)) { if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { condFlushLine(fout, indent); } if (node.tag == tt.tagBr && node.prev != null && node.prev.tag != tt.tagBr && this.configuration.breakBeforeBR) { flushLine(fout, indent); } if (this.configuration.makeClean && node.tag == tt.tagWbr) { printString(" "); } else { printTag(lexer, fout, mode, indent, node); } if (node.tag == tt.tagParam || node.tag == tt.tagArea) { condFlushLine(fout, indent); } else if (node.tag == tt.tagBr || node.tag == tt.tagHr) { flushLine(fout, indent); } } else { if (node.type == Node.START_END_TAG) { node.type = Node.START_TAG; } // some kind of container element if (node.tag != null && node.tag.getParser() == ParserImpl.PRE) { condFlushLine(fout, indent); indent = 0; condFlushLine(fout, indent); printTag(lexer, fout, mode, indent, node); flushLine(fout, indent); for (content = node.content; content != null; content = content.next) { printTree(fout, (short) (mode | PREFORMATTED | NOWRAP), indent, lexer, content); } condFlushLine(fout, indent); printEndTag(mode, indent, node); flushLine(fout, indent); if (!this.configuration.indentContent && node.next != null) { flushLine(fout, indent); } } else if (node.tag == tt.tagStyle || node.tag == tt.tagScript) { printScriptStyle(fout, (short) (mode | PREFORMATTED | NOWRAP | CDATA), indent, lexer, node); } else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { if (this.configuration.makeClean) { // discards <font> and </font> tags if (node.tag == tt.tagFont) { for (content = node.content; content != null; content = content.next) { printTree(fout, mode, indent, lexer, content); } return; } // replace <nobr> ... </nobr> by   or   etc. if (node.tag == tt.tagNobr) { for (content = node.content; content != null; content = content.next) { printTree(fout, (short) (mode | NOWRAP), indent, lexer, content); } return; } } // otherwise a normal inline element printTag(lexer, fout, mode, indent, node); // indent content for SELECT, TEXTAREA, MAP, OBJECT and APPLET if (shouldIndent(node)) { condFlushLine(fout, indent); indent += this.configuration.spaces; for (content = node.content; content != null; content = content.next) { printTree(fout, mode, indent, lexer, content); } condFlushLine(fout, indent); indent -= this.configuration.spaces; condFlushLine(fout, indent); } else { for (content = node.content; content != null; content = content.next) { printTree(fout, mode, indent, lexer, content); } } printEndTag(mode, indent, node); } else { // other tags condFlushLine(fout, indent); if (this.configuration.smartIndent && node.prev != null) { flushLine(fout, indent); } // do not omit elements with attributes if (!this.configuration.hideEndTags || !(node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_OMITST)) || node.attributes != null) { printTag(lexer, fout, mode, indent, node); if (shouldIndent(node)) { condFlushLine(fout, indent); } else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_HTML) || node.tag == tt.tagNoframes || (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD) && !(node.tag == tt.tagTitle))) { flushLine(fout, indent); } } if (node.tag == tt.tagBody && this.configuration.burstSlides) { printSlide(fout, mode, (this.configuration.indentContent ? indent + this.configuration.spaces : indent), lexer); } else { last = null; for (content = node.content; content != null; content = content.next) { // kludge for naked text before block level tag if (last != null && !this.configuration.indentContent && last.type == Node.TEXT_NODE && content.tag != null && !TidyUtils.toBoolean(content.tag.model & Dict.CM_INLINE)) { flushLine(fout, indent); } printTree( fout, mode, (shouldIndent(node) ? indent + this.configuration.spaces : indent), lexer, content); last = content; } } // don't flush line for td and th if (shouldIndent(node) || ((TidyUtils.toBoolean(node.tag.model & Dict.CM_HTML) || node.tag == tt.tagNoframes || // (TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD) && !(node.tag == tt.tagTitle))) && // !this.configuration.hideEndTags)) { condFlushLine( fout, (this.configuration.indentContent ? indent + this.configuration.spaces : indent)); if (!this.configuration.hideEndTags || !TidyUtils.toBoolean(node.tag.model & Dict.CM_OPT)) { printEndTag(mode, indent, node); // #603128 tidy adds newslines after </html> tag // Fix by Fabrizio Giustina 12-02-2004 // fix is different from the one in original tidy if (!lexer.seenEndHtml) { flushLine(fout, indent); } } } else { if (!this.configuration.hideEndTags || !TidyUtils.toBoolean(node.tag.model & Dict.CM_OPT)) { printEndTag(mode, indent, node); } flushLine(fout, indent); } // FG commented out: double newlines // if (!this.configuration.indentContent // && node.next != null // && !this.configuration.hideEndTags // && (node.tag.model // & TidyUtils.toBoolean(Dict.CM_BLOCK | Dict.CM_TABLE | Dict.CM_LIST | Dict.CM_DEFLIST))) // { // flushLine(fout, indent); // } } } } /** * @param fout * @param mode * @param indent * @param lexer * @param node */ public void printXMLTree(Out fout, short mode, int indent, Lexer lexer, Node node) { TagTable tt = this.configuration.tt; if (node == null) { return; } if (node.type == Node.TEXT_NODE || (node.type == Node.CDATA_TAG && lexer.configuration.escapeCdata)) { printText(fout, mode, indent, node.textarray, node.start, node.end); } else if (node.type == Node.COMMENT_TAG) { condFlushLine(fout, indent); printComment(fout, 0, node); condFlushLine(fout, 0); } else if (node.type == Node.ROOT_NODE) { Node content; for (content = node.content; content != null; content = content.next) { printXMLTree(fout, mode, indent, lexer, content); } } else if (node.type == Node.DOCTYPE_TAG) { printDocType(fout, indent, lexer, node); } else if (node.type == Node.PROC_INS_TAG) { printPI(fout, indent, node); } else if (node.type == Node.XML_DECL) { printXmlDecl(fout, indent, node); } else if (node.type == Node.CDATA_TAG) { printCDATA(fout, indent, node); } else if (node.type == Node.SECTION_TAG) { printSection(fout, indent, node); } else if (node.type == Node.ASP_TAG) { printAsp(fout, indent, node); } else if (node.type == Node.JSTE_TAG) { printJste(fout, indent, node); } else if (node.type == Node.PHP_TAG) { printPhp(fout, indent, node); } else if (TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY) || node.type == Node.START_END_TAG && !configuration.xHTML) { condFlushLine(fout, indent); printTag(lexer, fout, mode, indent, node); // fgiust: Remove empty lines between tags in XML. // flushLine(fout, indent); // CPR: folks don't want so much vertical spacing in XML // if (node.next != null) { flushLine(fout, indent); } } else { // some kind of container element Node content; boolean mixed = false; int cindent; for (content = node.content; content != null; content = content.next) { if (content.type == Node.TEXT_NODE) { mixed = true; break; } } condFlushLine(fout, indent); if (ParserImpl.XMLPreserveWhiteSpace(node, tt)) { indent = 0; cindent = 0; mixed = false; } else if (mixed) { cindent = indent; } else { cindent = indent + this.configuration.spaces; } printTag(lexer, fout, mode, indent, node); if (!mixed && node.content != null) { flushLine(fout, indent); } for (content = node.content; content != null; content = content.next) { printXMLTree(fout, mode, cindent, lexer, content); } if (!mixed && node.content != null) { condFlushLine(fout, cindent); } printEndTag(mode, indent, node); // condFlushLine(fout, indent); // CPR: folks don't want so much vertical spacing in XML // if (node.next != null) { flushLine(fout, indent); } } } /** * Split parse tree by h2 elements and output to separate files. Counts number of h2 children (if any) belonging to * node. * @param node root node * @return number of slides (number of h2 elements) */ public int countSlides(Node node) { // assume minimum of 1 slide int n = 1; TagTable tt = this.configuration.tt; // fix for [431716] avoid empty slides if (node != null && node.content != null && node.content.tag == tt.tagH2) { // "first" slide is empty, so ignore it n--; } if (node != null) { for (node = node.content; node != null; node = node.next) { if (node.tag == tt.tagH2) { ++n; } } } return n; } /** * @param fout * @param indent */ private void printNavBar(Out fout, int indent) { String buf; condFlushLine(fout, indent); printString("<center><small>"); NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMinimumIntegerDigits(3); if (slide > 1) { buf = "<a href=\"slide" + numberFormat.format(slide - 1) + ".html\">previous</a> | "; // #427666 - fix by Eric Rossen 02 Aug 00 printString(buf); condFlushLine(fout, indent); if (slide < count) { printString("<a href=\"slide001.html\">start</a> | "); // #427666 - fix by Eric Rossen 02 Aug 00 } else { printString("<a href=\"slide001.html\">start</a>"); // #427666 - fix by Eric Rossen 02 Aug 00 } condFlushLine(fout, indent); } if (slide < count) { buf = "<a href=\"slide" + numberFormat.format(slide + 1) + ".html\">next</a>"; // #427666 - fix by Eric Rossen 02 Aug 00 printString(buf); } printString("</small></center>"); condFlushLine(fout, indent); } /** * Called from printTree to print the content of a slide from the node slidecontent. On return slidecontent points * to the node starting the next slide or null. The variables slide and count are used to customise the navigation * bar. * @param fout * @param mode * @param indent * @param lexer */ public void printSlide(Out fout, short mode, int indent, Lexer lexer) { Node content, last; TagTable tt = this.configuration.tt; NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMinimumIntegerDigits(3); /* insert div for onclick handler */ String s; s = "<div onclick=\"document.location='slide" + numberFormat.format(slide < count ? slide + 1 : 1) + ".html'\">"; // #427666 - fix by Eric Rossen 02 Aug 00 printString(s); condFlushLine(fout, indent); /* first print the h2 element and navbar */ if (slidecontent != null && slidecontent.tag == tt.tagH2) { printNavBar(fout, indent); /* now print an hr after h2 */ addC('<', linelen++); addC(TidyUtils.foldCase('h', this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); addC(TidyUtils.foldCase('r', this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); if (this.configuration.xmlOut) { printString(" />"); } else { addC('>', linelen++); } if (this.configuration.indentContent) { condFlushLine(fout, indent); } // PrintVertSpacer(fout, indent); // condFlushLine(fout, indent); // print the h2 element printTree( fout, mode, (this.configuration.indentContent ? indent + this.configuration.spaces : indent), lexer, slidecontent); slidecontent = slidecontent.next; } // now continue until we reach the next h2 last = null; content = slidecontent; for (; content != null; content = content.next) { if (content.tag == tt.tagH2) { break; } // kludge for naked text before block level tag if (last != null && !this.configuration.indentContent && last.type == Node.TEXT_NODE && content.tag != null && TidyUtils.toBoolean(content.tag.model & Dict.CM_BLOCK)) { flushLine(fout, indent); flushLine(fout, indent); } printTree( fout, mode, (this.configuration.indentContent ? indent + this.configuration.spaces : indent), lexer, content); last = content; } slidecontent = content; // now print epilog condFlushLine(fout, indent); printString("<br clear=\"all\">"); condFlushLine(fout, indent); addC('<', linelen++); addC(TidyUtils.foldCase('h', this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); addC(TidyUtils.foldCase('r', this.configuration.upperCaseTags, this.configuration.xmlTags), linelen++); if (this.configuration.xmlOut) { printString(" />"); } else { addC('>', linelen++); } if (this.configuration.indentContent) { condFlushLine(fout, indent); } printNavBar(fout, indent); // end tag for div printString("</div>"); condFlushLine(fout, indent); } /** * Add meta element for page transition effect, this works on IE but not NS. * @param lexer * @param root * @param duration */ public void addTransitionEffect(Lexer lexer, Node root, double duration) { Node head = root.findHEAD(lexer.configuration.tt); String transition; transition = "blendTrans(Duration=" + (new Double(duration)).toString() + ")"; if (head != null) { Node meta = lexer.inferredTag("meta"); meta.addAttribute("http-equiv", "Page-Enter"); meta.addAttribute("content", transition); head.insertNodeAtStart(meta); } } /** * Creates slides from h2. * @param lexer Lexer * @param root root node */ public void createSlides(Lexer lexer, Node root) { Node body; String buf; NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMinimumIntegerDigits(3); body = root.findBody(lexer.configuration.tt); count = countSlides(body); slidecontent = body.content; addTransitionEffect(lexer, root, 3.0); for (slide = 1; slide <= count; ++slide) { buf = "slide" + numberFormat.format(slide) + ".html"; try { FileOutputStream fis = new FileOutputStream(buf); Out out = OutFactory.getOut(configuration, fis); printTree(out, (short) 0, 0, lexer, root); flushLine(out, 0); fis.close(); } catch (IOException e) { System.err.println(buf + e.toString()); } } // delete superfluous slides by deleting slideN.html for N = count+1, count+2, etc. // until no such file is found. // #427666 - fix by Eric Rossen 02 Aug 00 while ((new File("slide" + numberFormat.format(slide) + ".html")).delete()) { ++slide; } } }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Attribute.java�����������������������������������������������������0000644�0001750�0001750�00000011312�10102754223�021627� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * HTML attribute. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 400 $ ($Author: fgiust $) */ public class Attribute { /** * attribute name. */ private String name; /** * don't wrap attribute. */ private boolean nowrap; /** * unmodifiable attribute? */ private boolean literal; /** * html versions for this attribute. */ private short versions; /** * checker for the attribute. */ private AttrCheck attrchk; /** * Instantiates a new Attribute. * @param attributeName attribute name * @param htmlVersions versions in which this attribute is supported * @param check AttrCheck instance */ public Attribute(String attributeName, short htmlVersions, AttrCheck check) { this.name = attributeName; this.versions = htmlVersions; this.attrchk = check; } /** * Is this a literal (unmodifiable) attribute? * @param isLiteral boolean <code>true</code> for a literal attribute */ public void setLiteral(boolean isLiteral) { this.literal = isLiteral; } /** * Don't wrap this attribute? * @param isNowrap boolean <code>true</code>= don't wrap */ public void setNowrap(boolean isNowrap) { this.nowrap = isNowrap; } /** * Returns the checker for this attribute. * @return instance of AttrCheck. */ public AttrCheck getAttrchk() { return this.attrchk; } /** * Is this a literal (unmodifiable) attribute? * @return <code>true</code> for a literal attribute */ public boolean isLiteral() { return this.literal; } /** * Returns the attribute name. * @return attribute name. */ public String getName() { return this.name; } /** * Don't wrap this attribute? * @return <code>true</code>= don't wrap */ public boolean isNowrap() { return this.nowrap; } /** * Returns the html versions in which this attribute is supported. * @return html versions for this attribute. * @see Dict */ public short getVersions() { return this.versions; } }����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Style.java���������������������������������������������������������0000644�0001750�0001750�00000006477�10116675277�021024� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Linked list of class names and styles. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 587 $ ($Author: fgiust $) */ public class Style { /** * Tag name. */ protected String tag; /** * Tag class. */ protected String tagClass; /** * Style properties. */ protected String properties; /** * Next linked style element. */ protected Style next; /** * Instantiates a new style. * @param tag Tag name * @param tagClass Tag class * @param properties Style properties * @param next Next linked style element. Can be null. */ public Style(String tag, String tagClass, String properties, Style next) { this.tag = tag; this.tagClass = tagClass; this.properties = properties; this.next = next; } }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMCommentImpl.java������������������������������������������������0000644�0001750�0001750�00000006252�10102754223�022457� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Tidy implementation of org.w3c.dom.Comment. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 400 $ ($Author: fgiust $) */ public class DOMCommentImpl extends DOMCharacterDataImpl implements org.w3c.dom.Comment { /** * Instantiates a new DOMCommentImpl which wraps the given Node. * @param adaptee wrapped node. */ protected DOMCommentImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return "#comment"; } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.COMMENT_NODE; } }������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Tidy.java����������������������������������������������������������0000644�0001750�0001750�00000210413�11470215135�020603� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; import java.io.Reader; import java.io.Serializable; import java.io.Writer; import java.util.HashMap; import java.util.Map; import java.util.Properties; /** * HTML parser and pretty printer. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1191 $ ($Author: aditsu $) */ public class Tidy implements Serializable { /** * Serial Version UID to avoid problems during serialization. */ static final long serialVersionUID = -2794371560623987718L; /** * Alias for configuration options accepted in command line. */ private static final Map CMDLINE_ALIAS = new HashMap(); static { CMDLINE_ALIAS.put("xml", "input-xml"); CMDLINE_ALIAS.put("asxml", "output-xhtml"); CMDLINE_ALIAS.put("asxhtml", "output-xhtml"); CMDLINE_ALIAS.put("ashtml", "output-html"); CMDLINE_ALIAS.put("omit", "hide-endtags"); CMDLINE_ALIAS.put("upper", "uppercase-tags"); CMDLINE_ALIAS.put("raw", "output-raw"); CMDLINE_ALIAS.put("numeric", "numeric-entities"); CMDLINE_ALIAS.put("change", "write-back"); CMDLINE_ALIAS.put("update", "write-back"); CMDLINE_ALIAS.put("modify", "write-back"); CMDLINE_ALIAS.put("errors", "only-errors"); CMDLINE_ALIAS.put("slides", "split"); CMDLINE_ALIAS.put("lang", "language"); CMDLINE_ALIAS.put("w", "wrap"); CMDLINE_ALIAS.put("file", "error-file"); CMDLINE_ALIAS.put("f", "error-file"); } /** * Error output stream. */ private PrintWriter errout; private PrintWriter stderr; private Configuration configuration; private String inputStreamName = "InputStream"; private int parseErrors; private int parseWarnings; private Report report; /** * Instantiates a new Tidy instance. It's reccomended that a new instance is used at each parsing. */ public Tidy() { this.report = new Report(); configuration = new Configuration(this.report); TagTable tt = new TagTable(); tt.setConfiguration(configuration); configuration.tt = tt; configuration.errfile = null; stderr = new PrintWriter(System.err, true); errout = stderr; } /** * Returns the actual configuration * @return tidy configuration */ public Configuration getConfiguration() { return configuration; } public PrintWriter getStderr() { return stderr; } /** * ParseErrors - the number of errors that occurred in the most recent parse operation. * @return number of errors that occurred in the most recent parse operation. */ public int getParseErrors() { return parseErrors; } /** * ParseWarnings - the number of warnings that occurred in the most recent parse operation. * @return number of warnings that occurred in the most recent parse operation. */ public int getParseWarnings() { return parseWarnings; } /** * InputStreamName - the name of the input stream (printed in the header information). * @param name input stream name */ public void setInputStreamName(String name) { if (name != null) { inputStreamName = name; } } public String getInputStreamName() { return inputStreamName; } /** * Errout - the error output stream. * @return error output stream. */ public PrintWriter getErrout() { return errout; } public void setErrout(PrintWriter out) { this.errout = out; } /** * Sets the configuration from a configuration file. * @param filename configuration file name/path. */ public void setConfigurationFromFile(String filename) { configuration.parseFile(filename); } /** * Sets the configuration from a properties object. * @param props Properties object */ public void setConfigurationFromProps(Properties props) { configuration.addProps(props); } /** * Creates an empty DOM Document. * @return a new org.w3c.dom.Document */ public static org.w3c.dom.Document createEmptyDocument() { Node document = new Node(Node.ROOT_NODE, new byte[0], 0, 0); Node node = new Node(Node.START_TAG, new byte[0], 0, 0, "html", new TagTable()); if (document != null && node != null) { document.insertNodeAtStart(node); return (org.w3c.dom.Document) document.getAdapter(); } return null; } /** * Reads from the given input and returns the root Node. If out is non-null, pretty prints to out. Warning: caller * is responsible for calling close() on input and output after calling this method. * @param in input * @param out optional destination for pretty-printed document * @return parsed org.w3c.tidy.Node */ public Node parse(InputStream in, OutputStream out) { StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in); Out o = null; if (out != null) { o = OutFactory.getOut(this.configuration, out); // normal output stream } return parse(streamIn, o); } /** * Reads from the given input and returns the root Node. If out is non-null, pretty prints to out. Warning: caller * is responsible for calling close() on input and output after calling this method. * @param in input * @param out optional destination for pretty-printed document * @return parsed org.w3c.tidy.Node */ public Node parse(Reader in, OutputStream out) { StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in); Out o = null; if (out != null) { o = OutFactory.getOut(this.configuration, out); // normal output stream } return parse(streamIn, o); } /** * Reads from the given input and returns the root Node. If out is non-null, pretty prints to out. Warning: caller * is responsible for calling close() on input and output after calling this method. * @param in input * @param out optional destination for pretty-printed document * @return parsed org.w3c.tidy.Node */ public Node parse(Reader in, Writer out) { StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in); Out o = null; if (out != null) { o = OutFactory.getOut(this.configuration, out); // normal output stream } return parse(streamIn, o); } /** * Reads from the given input and returns the root Node. If out is non-null, pretty prints to out. Warning: caller * is responsible for calling close() on input and output after calling this method. * @param in input * @param out optional destination for pretty-printed document * @return parsed org.w3c.tidy.Node */ public Node parse(InputStream in, Writer out) { StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in); Out o = null; if (out != null) { o = OutFactory.getOut(this.configuration, out); // normal output stream } return parse(streamIn, o); } /** * Parses InputStream in and returns a DOM Document node. If out is non-null, pretty prints to OutputStream out. * @param in input stream * @param out optional output stream * @return parsed org.w3c.dom.Document */ public org.w3c.dom.Document parseDOM(InputStream in, OutputStream out) { Node document = parse(in, out); if (document != null) { return (org.w3c.dom.Document) document.getAdapter(); } return null; } public org.w3c.dom.Document parseDOM(Reader in, Writer out) { Node document = parse(in, out); if (document != null) { return (org.w3c.dom.Document) document.getAdapter(); } return null; } /** * Pretty-prints a DOM Document. Must be an instance of org.w3c.tidy.DOMDocumentImpl. Caller is responsible for * closing the outputStream after calling this method. * @param doc org.w3c.dom.Document * @param out output stream */ public void pprint(org.w3c.dom.Document doc, OutputStream out) { if (!(doc instanceof DOMDocumentImpl)) { // @todo should we inform users that tidy can't print a generic Document or change the method signature? return; } pprint(((DOMDocumentImpl) doc).adaptee, out); } /** * Pretty-prints a DOM Node. Caller is responsible for closing the outputStream after calling this method. * @param node org.w3c.dom.Node. Must be an instance of org.w3c.tidy.DOMNodeImpl. * @param out output stream */ public void pprint(org.w3c.dom.Node node, OutputStream out) { if (!(node instanceof DOMNodeImpl)) { // @todo should we inform users than tidy can't print a generic Node or change the method signature? return; } pprint(((DOMNodeImpl) node).adaptee, out); } /** * Internal routine that actually does the parsing. * @param streamIn tidy StreamIn * @param o tidy Out * @return parsed org.w3c.tidy.Node */ private Node parse(StreamIn streamIn, Out o) { Lexer lexer; Node document = null; Node doctype; PPrint pprint; if (errout == null) { return null; } // ensure config is self-consistent configuration.adjust(); parseErrors = 0; parseWarnings = 0; lexer = new Lexer(streamIn, configuration, this.report); lexer.errout = errout; // store pointer to lexer in input stream to allow character encoding errors to be reported streamIn.setLexer(lexer); this.report.setFilename(inputStreamName); // #431895 - fix by Dave Bryan 04 Jan 01 // Tidy doesn't alter the doctype for generic XML docs if (configuration.xmlTags) { document = ParserImpl.parseXMLDocument(lexer); if (!document.checkNodeIntegrity()) { if (!configuration.quiet) { report.badTree(errout); } return null; } } else { lexer.warnings = 0; document = ParserImpl.parseDocument(lexer); if (!document.checkNodeIntegrity()) { if (!configuration.quiet) { this.report.badTree(errout); } return null; } Clean cleaner = new Clean(configuration.tt); // simplifies <b><b> ... </b> ... </b> etc. cleaner.nestedEmphasis(document); // cleans up <dir> indented text </dir> etc. cleaner.list2BQ(document); cleaner.bQ2Div(document); // replaces i by em and b by strong if (configuration.logicalEmphasis) { cleaner.emFromI(document); } if (configuration.word2000 && cleaner.isWord2000(document)) { // prune Word2000's <![if ...]> ... <![endif]> cleaner.dropSections(lexer, document); // drop style & class attributes and empty p, span elements cleaner.cleanWord2000(lexer, document); } // replaces presentational markup by style rules if (configuration.makeClean || configuration.dropFontTags) { cleaner.cleanTree(lexer, document); } if (!document.checkNodeIntegrity()) { this.report.badTree(errout); return null; } doctype = document.findDocType(); // remember given doctype if (doctype != null) { doctype = doctype.cloneNode(false); } if (document.content != null) { if (configuration.xHTML) { lexer.setXHTMLDocType(document); } else { lexer.fixDocType(document); } if (configuration.tidyMark) { lexer.addGenerator(document); } } // ensure presence of initial <?XML version="1.0"?> if (configuration.xmlOut && configuration.xmlPi) { lexer.fixXmlDecl(document); } if (!configuration.quiet && document.content != null) { this.report.reportVersion(errout, lexer, inputStreamName, doctype); } } if (!configuration.quiet) { parseWarnings = lexer.warnings; parseErrors = lexer.errors; this.report.reportNumWarnings(errout, lexer); } if (!configuration.quiet && lexer.errors > 0 && !configuration.forceOutput) { this.report.needsAuthorIntervention(errout); } if (!configuration.onlyErrors && (lexer.errors == 0 || configuration.forceOutput)) { if (configuration.burstSlides) { Node body; body = null; // remove doctype to avoid potential clash with markup introduced when bursting into slides // discard the document type doctype = document.findDocType(); if (doctype != null) { Node.discardElement(doctype); } /* slides use transitional features */ lexer.versions |= Dict.VERS_HTML40_LOOSE; // and patch up doctype to match if (configuration.xHTML) { lexer.setXHTMLDocType(document); } else { lexer.fixDocType(document); } // find the body element which may be implicit body = document.findBody(configuration.tt); if (body != null) { pprint = new PPrint(configuration); if (!configuration.quiet) { this.report.reportNumberOfSlides(errout, pprint.countSlides(body)); } pprint.createSlides(lexer, document); } else if (!configuration.quiet) { this.report.missingBody(errout); } } else if (o != null) { pprint = new PPrint(configuration); if (document.findDocType() == null) { // only use numeric character references if no doctype could be determined (e.g., because // the document contains proprietary features) to ensure well-formedness. configuration.numEntities = true; } if (configuration.bodyOnly) { // Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 pprint.printBody(o, lexer, document, configuration.xmlOut); } else if (configuration.xmlOut && !configuration.xHTML) { pprint.printXMLTree(o, (short) 0, 0, lexer, document); } else { pprint.printTree(o, (short) 0, 0, lexer, document); } pprint.flushLine(o, 0); o.flush(); } } if (!configuration.quiet) { this.report.errorSummary(lexer); } return document; } /** * Internal routine that actually does the parsing. The caller can pass either an InputStream or file name. If both * are passed, the file name is preferred. * @param in input stream (used only if <code>file</code> is null) * @param file file name * @param out output stream * @return parsed org.w3c.tidy.Node * @throws FileNotFoundException if <code>file</code> is not null but it can't be found * @throws IOException for errors in reading input stream or file */ private Node parse(InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException { StreamIn streamIn; Out o = null; boolean inputStreamOpen = false; boolean outputStreamOpen = false; if (file != null) { in = new FileInputStream(file); inputStreamOpen = true; inputStreamName = file; } else if (in == null) { in = System.in; inputStreamName = "stdin"; } streamIn = StreamInFactory.getStreamIn(configuration, in); if (configuration.writeback && (file != null)) { out = new FileOutputStream(file); outputStreamOpen = true; } if (out != null) { o = OutFactory.getOut(this.configuration, out); // normal output stream } Node node = parse(streamIn, o); // Try to close the InputStream but only if if we created it. if (inputStreamOpen) { try { in.close(); } catch (IOException e) { // ignore } } // Try to close the OutputStream but only if if we created it. if (outputStreamOpen) { try { out.close(); } catch (IOException e) { // ignore } } return node; } /** * Pretty-prints a tidy Node. * @param node org.w3c.tidy.Node * @param out output stream */ private void pprint(Node node, OutputStream out) { PPrint pprint; if (out != null) { Out o = OutFactory.getOut(this.configuration, out); Lexer lexer = new Lexer(null, this.configuration, this.report); pprint = new PPrint(configuration); if (configuration.xmlTags) { pprint.printXMLTree(o, (short) 0, 0, lexer, node); } else { pprint.printTree(o, (short) 0, 0, lexer, node); } pprint.flushLine(o, 0); o.flush(); } } /** * Command line interface to parser and pretty printer. * @param argv command line parameters */ public static void main(String[] argv) { Tidy tidy = new Tidy(); int returnCode = tidy.mainExec(argv); System.exit(returnCode); } /** * Main method, but returns the return code as an int instead of calling System.exit(code). Needed for testing main * method without shutting down tests. * @param argv command line parameters * @return return code */ protected int mainExec(String[] argv) { String file; int argCount = argv.length; int argIndex = 0; // read command line Properties properties = new Properties(); while (argCount > 0) { if (argv[argIndex].startsWith("-")) { // support -foo and --foo String argName = argv[argIndex].toLowerCase(); while (argName.length() > 0 && argName.charAt(0) == '-') { argName = argName.substring(1); } // "exclusive" options if (argName.equals("help") || argName.equals("h") || argName.equals("?")) { this.report.helpText(new PrintWriter(System.out, true)); return 0; } else if (argName.equals("help-config")) { configuration.printConfigOptions(new PrintWriter(System.out, true), false); return 0; } else if (argName.equals("show-config")) { configuration.adjust(); // ensure config is self-consistent configuration.printConfigOptions(errout, true); return 0; } else if (argName.equals("version") || argName.equals("v")) { this.report.showVersion(errout); return 0; } // optional value for non boolean options String argValue = null; if (argCount > 2 && !argv[argIndex + 1].startsWith("-")) { argValue = argv[argIndex + 1]; --argCount; ++argIndex; } // handle "special" aliases String alias = (String) CMDLINE_ALIAS.get(argName); if (alias != null) { argName = alias; } if (Configuration.isKnownOption(argName)) // handle any standard config option { properties.setProperty(argName, (argValue == null ? "" : argValue)); } else if (argName.equals("config")) // parse a property file { if (argValue != null) { configuration.parseFile(argValue); } } else if (TidyUtils.isCharEncodingSupported(argName)) // handle any encoding name { properties.setProperty("char-encoding", argName); } else { for (int i = 0; i < argName.length(); i++) { switch (argName.charAt(i)) { case 'i' : configuration.indentContent = true; configuration.smartIndent = true; break; case 'o' : configuration.hideEndTags = true; break; case 'u' : configuration.upperCaseTags = true; break; case 'c' : configuration.makeClean = true; break; case 'b' : configuration.makeBare = true; break; case 'n' : configuration.numEntities = true; break; case 'm' : configuration.writeback = true; break; case 'e' : configuration.onlyErrors = true; break; case 'q' : configuration.quiet = true; break; default : this.report.unknownOption(this.errout, argName.charAt(i)); break; } } } --argCount; ++argIndex; continue; } configuration.addProps(properties); // ensure config is self-consistent configuration.adjust(); // user specified error file if (configuration.errfile != null) { String errorfile = "stderr"; // is it same as the currently opened file? if (!configuration.errfile.equals(errorfile)) { // no so close previous error file if (this.errout != this.stderr) { this.errout.close(); } // and try to open the new error file try { this.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true)); errorfile = configuration.errfile; } catch (IOException e) { // can't be opened so fall back to stderr errorfile = "stderr"; this.setErrout(stderr); } } } if (argCount > 0) { file = argv[argIndex]; } else { file = "stdin"; } try { parse(null, file, System.out); } catch (FileNotFoundException fnfe) { this.report.unknownFile(this.errout, file); } catch (IOException ioe) { this.report.unknownFile(this.errout, file); } --argCount; ++argIndex; if (argCount <= 0) { break; } } if (this.parseErrors + this.parseWarnings > 0 && !configuration.quiet) { this.report.generalInfo(this.errout); } if (this.errout != this.stderr) { this.errout.close(); } // return status can be used by scripts if (this.parseErrors > 0) { return 2; } if (this.parseWarnings > 0) { return 1; } // 0 means all is ok return 0; } /** * Attach a TidyMessageListener which will be notified for messages and errors. * @param listener TidyMessageListener implementation */ public void setMessageListener(TidyMessageListener listener) { this.report.addMessageListener(listener); } /** * <code>indent-spaces</code>- default indentation. * @param spaces number of spaces used for indentation * @see Configuration#spaces */ public void setSpaces(int spaces) { configuration.spaces = spaces; } /** * <code>indent-spaces</code>- default indentation. * @return number of spaces used for indentation * @see Configuration#spaces */ public int getSpaces() { return configuration.spaces; } /** * <code>wrap</code>- default wrap margin. * @param wraplen default wrap margin * @see Configuration#wraplen */ public void setWraplen(int wraplen) { configuration.wraplen = wraplen; } /** * <code>wrap</code>- default wrap margin. * @return default wrap margin * @see Configuration#wraplen */ public int getWraplen() { return configuration.wraplen; } /** * <code>tab-size</code>- tab size in chars. * @param tabsize tab size in chars * @see Configuration#tabsize */ public void setTabsize(int tabsize) { configuration.tabsize = tabsize; } /** * <code>tab-size</code>- tab size in chars. * @return tab size in chars * @see Configuration#tabsize */ public int getTabsize() { return configuration.tabsize; } /** * Errfile - file name to write errors to. * @param errfile file name to write errors to * @see Configuration#errfile */ public void setErrfile(String errfile) { configuration.errfile = errfile; } /** * Errfile - file name to write errors to. * @return error file name * @see Configuration#errfile */ public String getErrfile() { return configuration.errfile; } /** * writeback - if true then output tidied markup. NOTE: this property is ignored when parsing from an InputStream. * @param writeback <code>true</code>= output tidied markup * @see Configuration#writeback */ public void setWriteback(boolean writeback) { configuration.writeback = writeback; } /** * writeback - if true then output tidied markup. NOTE: this property is ignored when parsing from an InputStream. * @return <code>true</code> if tidy will output tidied markup in input file * @see Configuration#writeback */ public boolean getWriteback() { return configuration.writeback; } /** * only-errors - if true normal output is suppressed. * @param onlyErrors if <code>true</code> normal output is suppressed. * @see Configuration#onlyErrors */ public void setOnlyErrors(boolean onlyErrors) { configuration.onlyErrors = onlyErrors; } /** * only-errors - if true normal output is suppressed. * @return <code>true</code> if normal output is suppressed. * @see Configuration#onlyErrors */ public boolean getOnlyErrors() { return configuration.onlyErrors; } /** * show-warnings - show warnings? (errors are always shown). * @param showWarnings if <code>false</code> warnings are not shown * @see Configuration#showWarnings */ public void setShowWarnings(boolean showWarnings) { configuration.showWarnings = showWarnings; } /** * show-warnings - show warnings? (errors are always shown). * @return <code>false</code> if warnings are not shown * @see Configuration#showWarnings */ public boolean getShowWarnings() { return configuration.showWarnings; } /** * quiet - no 'Parsing X', guessed DTD or summary. * @param quiet <code>true</code>= don't output summary, warnings or errors * @see Configuration#quiet */ public void setQuiet(boolean quiet) { configuration.quiet = quiet; } /** * quiet - no 'Parsing X', guessed DTD or summary. * @return <code>true</code> if tidy will not output summary, warnings or errors * @see Configuration#quiet */ public boolean getQuiet() { return configuration.quiet; } /** * indent - indent content of appropriate tags. * @param indentContent indent content of appropriate tags * @see Configuration#indentContent */ public void setIndentContent(boolean indentContent) { configuration.indentContent = indentContent; } /** * indent - indent content of appropriate tags. * @return <code>true</code> if tidy will indent content of appropriate tags * @see Configuration#indentContent */ public boolean getIndentContent() { return configuration.indentContent; } /** * SmartIndent - does text/block level content effect indentation. * @param smartIndent <code>true</code> if text/block level content should effect indentation * @see Configuration#smartIndent */ public void setSmartIndent(boolean smartIndent) { configuration.smartIndent = smartIndent; } /** * SmartIndent - does text/block level content effect indentation. * @return <code>true</code> if text/block level content should effect indentation * @see Configuration#smartIndent */ public boolean getSmartIndent() { return configuration.smartIndent; } /** * hide-endtags - suppress optional end tags. * @param hideEndTags <code>true</code>= suppress optional end tags * @see Configuration#hideEndTags */ public void setHideEndTags(boolean hideEndTags) { configuration.hideEndTags = hideEndTags; } /** * hide-endtags - suppress optional end tags. * @return <code>true</code> if tidy will suppress optional end tags * @see Configuration#hideEndTags */ public boolean getHideEndTags() { return configuration.hideEndTags; } /** * input-xml - treat input as XML. * @param xmlTags <code>true</code> if tidy should treat input as XML * @see Configuration#xmlTags */ public void setXmlTags(boolean xmlTags) { configuration.xmlTags = xmlTags; } /** * input-xml - treat input as XML. * @return <code>true</code> if tidy will treat input as XML * @see Configuration#xmlTags */ public boolean getXmlTags() { return configuration.xmlTags; } /** * output-xml - create output as XML. * @param xmlOut <code>true</code> if tidy should create output as xml * @see Configuration#xmlOut */ public void setXmlOut(boolean xmlOut) { configuration.xmlOut = xmlOut; } /** * output-xml - create output as XML. * @return <code>true</code> if tidy will create output as xml * @see Configuration#xmlOut */ public boolean getXmlOut() { return configuration.xmlOut; } /** * output-xhtml - output extensible HTML. * @param xhtml <code>true</code> if tidy should output XHTML * @see Configuration#xHTML */ public void setXHTML(boolean xhtml) { configuration.xHTML = xhtml; } /** * output-xhtml - output extensible HTML. * @return <code>true</code> if tidy will output XHTML * @see Configuration#xHTML */ public boolean getXHTML() { return configuration.xHTML; } /** * uppercase-tags - output tags in upper case. * @param upperCaseTags <code>true</code> if tidy should output tags in upper case (default is lowercase) * @see Configuration#upperCaseTags */ public void setUpperCaseTags(boolean upperCaseTags) { configuration.upperCaseTags = upperCaseTags; } /** * uppercase-tags - output tags in upper case. * @return <code>true</code> if tidy should will tags in upper case * @see Configuration#upperCaseTags */ public boolean getUpperCaseTags() { return configuration.upperCaseTags; } /** * uppercase-attributes - output attributes in upper case. * @param upperCaseAttrs <code>true</code> if tidy should output attributes in upper case (default is lowercase) * @see Configuration#upperCaseAttrs */ public void setUpperCaseAttrs(boolean upperCaseAttrs) { configuration.upperCaseAttrs = upperCaseAttrs; } /** * uppercase-attributes - output attributes in upper case. * @return <code>true</code> if tidy should will attributes in upper case * @see Configuration#upperCaseAttrs */ public boolean getUpperCaseAttrs() { return configuration.upperCaseAttrs; } /** * make-clean - remove presentational clutter. * @param makeClean true to remove presentational clutter * @see Configuration#makeClean */ public void setMakeClean(boolean makeClean) { configuration.makeClean = makeClean; } /** * make-clean - remove presentational clutter. * @return true if tidy will remove presentational clutter * @see Configuration#makeClean */ public boolean getMakeClean() { return configuration.makeClean; } /** * make-bare - remove Microsoft cruft. * @param makeBare true to remove Microsoft cruft * @see Configuration#makeBare */ public void setMakeBare(boolean makeBare) { configuration.makeBare = makeBare; } /** * make-clean - remove Microsoft cruft. * @return true if tidy will remove Microsoft cruft * @see Configuration#makeBare */ public boolean getMakeBare() { return configuration.makeBare; } /** * break-before-br - output newline before <br>. * @param breakBeforeBR <code>true</code> if tidy should output a newline before <br> * @see Configuration#breakBeforeBR */ public void setBreakBeforeBR(boolean breakBeforeBR) { configuration.breakBeforeBR = breakBeforeBR; } /** * break-before-br - output newline before <br>. * @return <code>true</code> if tidy will output a newline before <br> * @see Configuration#breakBeforeBR */ public boolean getBreakBeforeBR() { return configuration.breakBeforeBR; } /** * <code>split</code>- create slides on each h2 element. * @param burstSlides <code>true</code> if tidy should create slides on each h2 element * @see Configuration#burstSlides */ public void setBurstSlides(boolean burstSlides) { configuration.burstSlides = burstSlides; } /** * <code>split</code>- create slides on each h2 element. * @return <code>true</code> if tidy will create slides on each h2 element * @see Configuration#burstSlides */ public boolean getBurstSlides() { return configuration.burstSlides; } /** * <code>numeric-entities</code>- output entities other than the built-in HTML entities in the numeric rather * than the named entity form. * @param numEntities <code>true</code> if tidy should output entities in the numeric form. * @see Configuration#numEntities */ public void setNumEntities(boolean numEntities) { configuration.numEntities = numEntities; } /** * <code>numeric-entities</code>- output entities other than the built-in HTML entities in the numeric rather * than the named entity form. * @return <code>true</code> if tidy will output entities in the numeric form. * @see Configuration#numEntities */ public boolean getNumEntities() { return configuration.numEntities; } /** * <code>quote-marks</code>- output " marks as &quot;. * @param quoteMarks <code>true</code> if tidy should output " marks as &quot; * @see Configuration#quoteMarks */ public void setQuoteMarks(boolean quoteMarks) { configuration.quoteMarks = quoteMarks; } /** * <code>quote-marks</code>- output " marks as &quot;. * @return <code>true</code> if tidy will output " marks as &quot; * @see Configuration#quoteMarks */ public boolean getQuoteMarks() { return configuration.quoteMarks; } /** * <code>quote-nbsp</code>- output non-breaking space as entity. * @param quoteNbsp <code>true</code> if tidy should output non-breaking space as entity * @see Configuration#quoteNbsp */ public void setQuoteNbsp(boolean quoteNbsp) { configuration.quoteNbsp = quoteNbsp; } /** * <code>quote-nbsp</code>- output non-breaking space as entity. * @return <code>true</code> if tidy will output non-breaking space as entity * @see Configuration#quoteNbsp */ public boolean getQuoteNbsp() { return configuration.quoteNbsp; } /** * <code>quote-ampersand</code>- output naked ampersand as &. * @param quoteAmpersand <code>true</code> if tidy should output naked ampersand as & * @see Configuration#quoteAmpersand */ public void setQuoteAmpersand(boolean quoteAmpersand) { configuration.quoteAmpersand = quoteAmpersand; } /** * <code>quote-ampersand</code>- output naked ampersand as &. * @return <code>true</code> if tidy will output naked ampersand as & * @see Configuration#quoteAmpersand */ public boolean getQuoteAmpersand() { return configuration.quoteAmpersand; } /** * <code>wrap-attributes</code>- wrap within attribute values. * @param wrapAttVals <code>true</code> if tidy should wrap within attribute values * @see Configuration#wrapAttVals */ public void setWrapAttVals(boolean wrapAttVals) { configuration.wrapAttVals = wrapAttVals; } /** * <code>wrap-attributes</code>- wrap within attribute values. * @return <code>true</code> if tidy will wrap within attribute values * @see Configuration#wrapAttVals */ public boolean getWrapAttVals() { return configuration.wrapAttVals; } /** * <code>wrap-script-literals</code>- wrap within JavaScript string literals. * @param wrapScriptlets <code>true</code> if tidy should wrap within JavaScript string literals * @see Configuration#wrapScriptlets */ public void setWrapScriptlets(boolean wrapScriptlets) { configuration.wrapScriptlets = wrapScriptlets; } /** * <code>wrap-script-literals</code>- wrap within JavaScript string literals. * @return <code>true</code> if tidy will wrap within JavaScript string literals * @see Configuration#wrapScriptlets */ public boolean getWrapScriptlets() { return configuration.wrapScriptlets; } /** * <code>wrap-sections</code>- wrap within <![ ... ]> section tags * @param wrapSection <code>true</code> if tidy should wrap within <![ ... ]> section tags * @see Configuration#wrapSection */ public void setWrapSection(boolean wrapSection) { configuration.wrapSection = wrapSection; } /** * <code>wrap-sections</code>- wrap within <![ ... ]> section tags * @return <code>true</code> if tidy will wrap within <![ ... ]> section tags * @see Configuration#wrapSection */ public boolean getWrapSection() { return configuration.wrapSection; } /** * <code>alt-text</code>- default text for alt attribute. * @param altText default text for alt attribute * @see Configuration#altText */ public void setAltText(String altText) { configuration.altText = altText; } /** * <code>alt-text</code>- default text for alt attribute. * @return default text for alt attribute * @see Configuration#altText */ public String getAltText() { return configuration.altText; } /** * <code>add-xml-pi</code>- add <?xml?> for XML docs. * @param xmlPi <code>true</code> if tidy should add <?xml?> for XML docs * @see Configuration#xmlPi */ public void setXmlPi(boolean xmlPi) { configuration.xmlPi = xmlPi; } /** * <code>add-xml-pi</code>- add <?xml?> for XML docs. * @return <code>true</code> if tidy will add <?xml?> for XML docs * @see Configuration#xmlPi */ public boolean getXmlPi() { return configuration.xmlPi; } /** * <code>drop-font-tags</code>- discard presentation tags. * @param dropFontTags <code>true</code> if tidy should discard presentation tags * @see Configuration#dropFontTags */ public void setDropFontTags(boolean dropFontTags) { configuration.dropFontTags = dropFontTags; } /** * <code>drop-font-tags</code>- discard presentation tags. * @return <code>true</code> if tidy will discard presentation tags * @see Configuration#dropFontTags */ public boolean getDropFontTags() { return configuration.dropFontTags; } /** * <code>drop-proprietary-attributes</code>- discard proprietary attributes. * @param dropProprietaryAttributes <code>true</code> if tidy should discard proprietary attributes * @see Configuration#dropProprietaryAttributes */ public void setDropProprietaryAttributes(boolean dropProprietaryAttributes) { configuration.dropProprietaryAttributes = dropProprietaryAttributes; } /** * <code>drop-proprietary-attributes</code>- discard proprietary attributes. * @return <code>true</code> if tidy will discard proprietary attributes * @see Configuration#dropProprietaryAttributes */ public boolean getDropProprietaryAttributes() { return configuration.dropProprietaryAttributes; } /** * <code>drop-empty-paras</code>- discard empty p elements. * @param dropEmptyParas <code>true</code> if tidy should discard empty p elements * @see Configuration#dropEmptyParas */ public void setDropEmptyParas(boolean dropEmptyParas) { configuration.dropEmptyParas = dropEmptyParas; } /** * <code>drop-empty-paras</code>- discard empty p elements. * @return <code>true</code> if tidy will discard empty p elements * @see Configuration#dropEmptyParas */ public boolean getDropEmptyParas() { return configuration.dropEmptyParas; } /** * <code>fix-bad-comments</code>- fix comments with adjacent hyphens. * @param fixComments <code>true</code> if tidy should fix comments with adjacent hyphens * @see Configuration#fixComments */ public void setFixComments(boolean fixComments) { configuration.fixComments = fixComments; } /** * <code>fix-bad-comments</code>- fix comments with adjacent hyphens. * @return <code>true</code> if tidy will fix comments with adjacent hyphens * @see Configuration#fixComments */ public boolean getFixComments() { return configuration.fixComments; } /** * <code>wrap-asp</code>- wrap within ASP pseudo elements. * @param wrapAsp <code>true</code> if tidy should wrap within ASP pseudo elements * @see Configuration#wrapAsp */ public void setWrapAsp(boolean wrapAsp) { configuration.wrapAsp = wrapAsp; } /** * <code>wrap-asp</code>- wrap within ASP pseudo elements. * @return <code>true</code> if tidy will wrap within ASP pseudo elements * @see Configuration#wrapAsp */ public boolean getWrapAsp() { return configuration.wrapAsp; } /** * <code>wrap-jste</code>- wrap within JSTE pseudo elements. * @param wrapJste <code>true</code> if tidy should wrap within JSTE pseudo elements * @see Configuration#wrapJste */ public void setWrapJste(boolean wrapJste) { configuration.wrapJste = wrapJste; } /** * <code>wrap-jste</code>- wrap within JSTE pseudo elements. * @return <code>true</code> if tidy will wrap within JSTE pseudo elements * @see Configuration#wrapJste */ public boolean getWrapJste() { return configuration.wrapJste; } /** * <code>wrap-php</code>- wrap within PHP pseudo elements. * @param wrapPhp <code>true</code> if tidy should wrap within PHP pseudo elements * @see Configuration#wrapPhp */ public void setWrapPhp(boolean wrapPhp) { configuration.wrapPhp = wrapPhp; } /** * <code>wrap-php</code>- wrap within PHP pseudo elements. * @return <code>true</code> if tidy will wrap within PHP pseudo elements * @see Configuration#wrapPhp */ public boolean getWrapPhp() { return configuration.wrapPhp; } /** * <code>fix-backslash</code>- fix URLs by replacing \ with /. * @param fixBackslash <code>true</code> if tidy should fix URLs by replacing \ with / * @see Configuration#fixBackslash */ public void setFixBackslash(boolean fixBackslash) { configuration.fixBackslash = fixBackslash; } /** * <code>fix-backslash</code>- fix URLs by replacing \ with /. * @return <code>true</code> if tidy will fix URLs by replacing \ with / * @see Configuration#fixBackslash */ public boolean getFixBackslash() { return configuration.fixBackslash; } /** * <code>indent-attributes</code>- newline+indent before each attribute. * @param indentAttributes <code>true</code> if tidy should output a newline+indent before each attribute * @see Configuration#indentAttributes */ public void setIndentAttributes(boolean indentAttributes) { configuration.indentAttributes = indentAttributes; } /** * <code>indent-attributes</code>- newline+indent before each attribute. * @return <code>true</code> if tidy will output a newline+indent before each attribute * @see Configuration#indentAttributes */ public boolean getIndentAttributes() { return configuration.indentAttributes; } /** * <code>doctype</code>- user specified doctype. * @param doctype <code>omit | auto | strict | loose | <em>fpi</em></code> where the <em>fpi </em> is a string * similar to "-//ACME//DTD HTML 3.14159//EN" Note: for <em>fpi </em> include the double-quotes in the * string. * @see Configuration#docTypeStr * @see Configuration#docTypeMode */ public void setDocType(String doctype) { if (doctype != null) { configuration.docTypeStr = (String) ParsePropertyImpl.DOCTYPE.parse(doctype, "doctype", configuration); } } /** * <code>doctype</code>- user specified doctype. * @return <code>omit | auto | strict | loose | <em>fpi</em></code> where the <em>fpi </em> is a string similar * to "-//ACME//DTD HTML 3.14159//EN" Note: for <em>fpi </em> include the double-quotes in the string. * @see Configuration#docTypeStr * @see Configuration#docTypeMode */ public String getDocType() { String result = null; switch (configuration.docTypeMode) { case Configuration.DOCTYPE_OMIT : result = "omit"; break; case Configuration.DOCTYPE_AUTO : result = "auto"; break; case Configuration.DOCTYPE_STRICT : result = "strict"; break; case Configuration.DOCTYPE_LOOSE : result = "loose"; break; case Configuration.DOCTYPE_USER : result = configuration.docTypeStr; break; } return result; } /** * <code>logical-emphasis</code>- replace i by em and b by strong. * @param logicalEmphasis <code>true</code> if tidy should replace i by em and b by strong * @see Configuration#logicalEmphasis */ public void setLogicalEmphasis(boolean logicalEmphasis) { configuration.logicalEmphasis = logicalEmphasis; } /** * <code>logical-emphasis</code>- replace i by em and b by strong. * @return <code>true</code> if tidy will replace i by em and b by strong * @see Configuration#logicalEmphasis */ public boolean getLogicalEmphasis() { return configuration.logicalEmphasis; } /** * <code>assume-xml-procins</code> This option specifies if Tidy should change the parsing of processing * instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in * XML. * @param xmlPIs <code>true</code> if tidy should expect a ?> at the end of processing instructions * @see Configuration#xmlPIs */ public void setXmlPIs(boolean xmlPIs) { configuration.xmlPIs = xmlPIs; } /** * <code>assume-xml-procins</code> This option specifies if Tidy should change the parsing of processing * instructions to require ?> as the terminator rather than >. This option is automatically set if the input is in * XML. * @return <code>true</code> if tidy will expect a ?> at the end of processing instructions * @see Configuration#xmlPIs */ public boolean getXmlPIs() { return configuration.xmlPIs; } /** * <code>enclose-text</code>- if true text at body is wrapped in <p>'s. * @param encloseText <code>true</code> if tidy should wrap text at body in <p>'s. * @see Configuration#encloseBodyText */ public void setEncloseText(boolean encloseText) { configuration.encloseBodyText = encloseText; } /** * <code>enclose-text</code>- if true text at body is wrapped in <p>'s. * @return <code>true</code> if tidy will wrap text at body in <p>'s. * @see Configuration#encloseBodyText */ public boolean getEncloseText() { return configuration.encloseBodyText; } /** * <code>enclose-block-text</code>- if true text in blocks is wrapped in <p>'s. * @param encloseBlockText <code>true</code> if tidy should wrap text text in blocks in <p>'s. * @see Configuration#encloseBlockText */ public void setEncloseBlockText(boolean encloseBlockText) { configuration.encloseBlockText = encloseBlockText; } /** * <code>enclose-block-text</code>- if true text in blocks is wrapped in <p>'s. return <code>true</code> * if tidy should will text text in blocks in <p>'s. * @see Configuration#encloseBlockText */ public boolean getEncloseBlockText() { return configuration.encloseBlockText; } /** * <code>word-2000</code>- draconian cleaning for Word2000. * @param word2000 <code>true</code> if tidy should clean word2000 documents * @see Configuration#word2000 */ public void setWord2000(boolean word2000) { configuration.word2000 = word2000; } /** * <code>word-2000</code>- draconian cleaning for Word2000. * @return <code>true</code> if tidy will clean word2000 documents * @see Configuration#word2000 */ public boolean getWord2000() { return configuration.word2000; } /** * <code>tidy-mark</code>- add meta element indicating tidied doc. * @param tidyMark <code>true</code> if tidy should add meta element indicating tidied doc * @see Configuration#tidyMark */ public void setTidyMark(boolean tidyMark) { configuration.tidyMark = tidyMark; } /** * <code>tidy-mark</code>- add meta element indicating tidied doc. * @return <code>true</code> if tidy will add meta element indicating tidied doc * @see Configuration#tidyMark */ public boolean getTidyMark() { return configuration.tidyMark; } /** * <code>add-xml-space</code>- if set to yes adds xml:space attr as needed. * @param xmlSpace <code>true</code> if tidy should add xml:space attr as needed * @see Configuration#xmlSpace */ public void setXmlSpace(boolean xmlSpace) { configuration.xmlSpace = xmlSpace; } /** * <code>add-xml-space</code>- if set to yes adds xml:space attr as needed. * @return <code>true</code> if tidy will add xml:space attr as needed * @see Configuration#xmlSpace */ public boolean getXmlSpace() { return configuration.xmlSpace; } /** * <code>gnu-emacs</code>- if true format error output for GNU Emacs. * @param emacs <code>true</code> if tidy should format error output for GNU Emacs * @see Configuration#emacs */ public void setEmacs(boolean emacs) { configuration.emacs = emacs; } /** * <code>gnu-emacs</code>- if true format error output for GNU Emacs. * @return <code>true</code> if tidy will format error output for GNU Emacs * @see Configuration#emacs */ public boolean getEmacs() { return configuration.emacs; } /** * <code>literal-attributes</code>- if true attributes may use newlines. * @param literalAttribs <code>true</code> if attributes may use newlines * @see Configuration#literalAttribs */ public void setLiteralAttribs(boolean literalAttribs) { configuration.literalAttribs = literalAttribs; } /** * <code>literal-attributes</code>- if true attributes may use newlines. * @return <code>true</code> if attributes may use newlines * @see Configuration#literalAttribs */ public boolean getLiteralAttribs() { return configuration.literalAttribs; } /** * <code>print-body-only</code>- output BODY content only. * @param bodyOnly true = print only the document body * @see Configuration#bodyOnly */ public void setPrintBodyOnly(boolean bodyOnly) { configuration.bodyOnly = bodyOnly; } /** * <code>print-body-only</code>- output BODY content only. * @return true if tidy will print only the document body */ public boolean getPrintBodyOnly() { return configuration.bodyOnly; } /** * <code>fix-uri</code>- fix uri references applying URI encoding if necessary. * @param fixUri true = fix uri references * @see Configuration#fixUri */ public void setFixUri(boolean fixUri) { configuration.fixUri = fixUri; } /** * <code>fix-uri</code>- output BODY content only. * @return true if tidy will fix uri references */ public boolean getFixUri() { return configuration.fixUri; } /** * <code>lower-literals</code>- folds known attribute values to lower case. * @param lowerLiterals true = folds known attribute values to lower case * @see Configuration#lowerLiterals */ public void setLowerLiterals(boolean lowerLiterals) { configuration.lowerLiterals = lowerLiterals; } /** * <code>lower-literals</code>- folds known attribute values to lower case. * @return true if tidy will folds known attribute values to lower case */ public boolean getLowerLiterals() { return configuration.lowerLiterals; } /** * <code>hide-comments</code>- hides all (real) comments in output. * @param hideComments true = hides all comments in output * @see Configuration#hideComments */ public void setHideComments(boolean hideComments) { configuration.hideComments = hideComments; } /** * <code>hide-comments</code>- hides all (real) comments in output. * @return true if tidy will hide all comments in output */ public boolean getHideComments() { return configuration.hideComments; } /** * <code>indent-cdata</code>- indent CDATA sections. * @param indentCdata true = indent CDATA sections * @see Configuration#indentCdata */ public void setIndentCdata(boolean indentCdata) { configuration.indentCdata = indentCdata; } /** * <code>indent-cdata</code>- indent CDATA sections. * @return true if tidy will indent CDATA sections */ public boolean getIndentCdata() { return configuration.indentCdata; } /** * <code>force-output</code>- output document even if errors were found. * @param forceOutput true = output document even if errors were found * @see Configuration#forceOutput */ public void setForceOutput(boolean forceOutput) { configuration.forceOutput = forceOutput; } /** * <code>force-output</code>- output document even if errors were found. * @return true if tidy will output document even if errors were found */ public boolean getForceOutput() { return configuration.forceOutput; } /** * <code>show-errors</code>- set the number of errors to put out. * @param showErrors number of errors to put out * @see Configuration#showErrors */ public void setShowErrors(int showErrors) { configuration.showErrors = showErrors; } /** * <code>show-errors</code>- number of errors to put out. * @return the number of errors tidy will put out */ public int getShowErrors() { return configuration.showErrors; } /** * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII char. * @param asciiChars true = convert quotes and dashes to nearest ASCII char * @see Configuration#asciiChars */ public void setAsciiChars(boolean asciiChars) { configuration.asciiChars = asciiChars; } /** * <code>ascii-chars</code>- convert quotes and dashes to nearest ASCII char. * @return true if tidy will convert quotes and dashes to nearest ASCII char */ public boolean getAsciiChars() { return configuration.asciiChars; } /** * <code>join-classes</code>- join multiple class attributes. * @param joinClasses true = join multiple class attributes * @see Configuration#joinClasses */ public void setJoinClasses(boolean joinClasses) { configuration.joinClasses = joinClasses; } /** * <code>join-classes</code>- join multiple class attributes. * @return true if tidy will join multiple class attributes */ public boolean getJoinClasses() { return configuration.joinClasses; } /** * <code>join-styles</code>- join multiple style attributes. * @param joinStyles true = join multiple style attributes * @see Configuration#joinStyles */ public void setJoinStyles(boolean joinStyles) { configuration.joinStyles = joinStyles; } /** * <code>join-styles</code>- join multiple style attributes. * @return true if tidy will join multiple style attributes */ public boolean getJoinStyles() { return configuration.joinStyles; } /** * <code>trim-empty-elements</code>- trim empty elements. * @param trim-empty-elements true = trim empty elements * @see Configuration#trimEmpty */ public void setTrimEmptyElements(boolean trimEmpty) { configuration.trimEmpty = trimEmpty; } /** * <code>trim-empty-elements</code>- trim empty elements. * @return true if tidy will trim empty elements */ public boolean getTrimEmptyElements() { return configuration.trimEmpty; } /** * <code>replace-color</code>- replace hex color attribute values with names. * @param replaceColor true = replace hex color attribute values with names * @see Configuration#replaceColor */ public void setReplaceColor(boolean replaceColor) { configuration.replaceColor = replaceColor; } /** * <code>replace-color</code>- replace hex color attribute values with names. * @return true if tidy will replace hex color attribute values with names */ public boolean getReplaceColor() { return configuration.replaceColor; } /** * <code>escape-cdata</code>- replace CDATA sections with escaped text. * @param escapeCdata true = replace CDATA sections with escaped text * @see Configuration#escapeCdata */ public void setEscapeCdata(boolean escapeCdata) { configuration.escapeCdata = escapeCdata; } /** * <code>escape-cdata</code> -replace CDATA sections with escaped text. * @return true if tidy will replace CDATA sections with escaped text */ public boolean getEscapeCdata() { return configuration.escapeCdata; } /** * <code>repeated-attributes</code>- keep first or last duplicate attribute. * @param repeatedAttributes <code>Configuration.KEEP_FIRST | Configuration.KEEP_LAST</code> * @see Configuration#duplicateAttrs */ public void setRepeatedAttributes(int repeatedAttributes) { configuration.duplicateAttrs = repeatedAttributes; } /** * <code>repeated-attributes</code>- keep first or last duplicate attribute. * @return <code>Configuration.KEEP_FIRST | Configuration.KEEP_LAST</code> */ public int getRepeatedAttributes() { return configuration.duplicateAttrs; } /** * <code>keep-time</code>- if true last modified time is preserved. * @param keepFileTimes <code>true</code> if tidy should preserved last modified time in input file. * @todo <strong>this is NOT supported at this time. </strong> * @see Configuration#keepFileTimes */ public void setKeepFileTimes(boolean keepFileTimes) { configuration.keepFileTimes = keepFileTimes; } /** * <code>keep-time</code>- if true last modified time is preserved. * @return <code>true</code> if tidy will preserved last modified time in input file. * @todo <strong>this is NOT supported at this time. </strong> * @see Configuration#keepFileTimes */ public boolean getKeepFileTimes() { return configuration.keepFileTimes; } /** * <code>output-raw</code>- avoid mapping values > 127 to entities. This has the same effect of specifying a * "raw" encoding in the original version of tidy. * @param rawOut avoid mapping values > 127 to entities * @see Configuration#rawOut */ public void setRawOut(boolean rawOut) { configuration.rawOut = rawOut; } /** * <code>output-raw</code>- avoid mapping values > 127 to entities. * @return <code>true</code> if tidy will not map values > 127 to entities * @see Configuration#rawOut */ public boolean getRawOut() { return configuration.rawOut; } /** * <code>input-encoding</code> the character encoding used for input. * @param encoding a valid java encoding name */ public void setInputEncoding(String encoding) { configuration.setInCharEncodingName(encoding); } /** * <code>input-encoding</code> the character encoding used for input. * @return the java name of the encoding currently used for input */ public String getInputEncoding() { return configuration.getInCharEncodingName(); } /** * <code>output-encoding</code> the character encoding used for output. * @param encoding a valid java encoding name */ public void setOutputEncoding(String encoding) { configuration.setOutCharEncodingName(encoding); } /** * <code>output-encoding</code> the character encoding used for output. * @return the java name of the encoding currently used for output */ public String getOutputEncoding() { return configuration.getOutCharEncodingName(); } }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Entity.java��������������������������������������������������������0000644�0001750�0001750�00000006425�10111347002�021142� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * HTML ISO entity. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 487 $ ($Author: fgiust $) */ public class Entity { /** * entity name. */ private String name; /** * entity code. */ private short code; /** * instantiates a new entity. * @param name entity name * @param code entity code (will be casted to short) */ public Entity(String name, int code) { this.name = name; this.code = (short) code; } /** * Getter for <code>code</code>. * @return Returns the code. */ public short getCode() { return this.code; } /** * Getter for <code>name</code>. * @return Returns the name. */ public String getName() { return this.name; } }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Dict.java����������������������������������������������������������0000644�0001750�0001750�00000023757�10112175570�020572� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Tag dictionary node. If the document uses just HTML 2.0 tags and attributes described it as HTML 2.0 Similarly for * HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML * Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it * as one of the flavors of Voyager (strict, loose or frameset). * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 502 $ ($Author: fgiust $) */ public class Dict { /** * Content model: unknown. */ public static final int CM_UNKNOWN = 0; /** * Content model: empty. */ public static final int CM_EMPTY = (1 << 0); /** * Content model: html. */ public static final int CM_HTML = (1 << 1); /** * Content model: head. */ public static final int CM_HEAD = (1 << 2); /** * Content model: block. */ public static final int CM_BLOCK = (1 << 3); /** * Content model: inline. */ public static final int CM_INLINE = (1 << 4); /** * Content model: list. */ public static final int CM_LIST = (1 << 5); /** * Content model: definition list. */ public static final int CM_DEFLIST = (1 << 6); /** * Content model: table. */ public static final int CM_TABLE = (1 << 7); /** * Content model: rowgroup. */ public static final int CM_ROWGRP = (1 << 8); /** * Content model: row. */ public static final int CM_ROW = (1 << 9); /** * Content model: field. */ public static final int CM_FIELD = (1 << 10); /** * Content model: object. */ public static final int CM_OBJECT = (1 << 11); /** * Content model: param. */ public static final int CM_PARAM = (1 << 12); /** * Content model: frames. */ public static final int CM_FRAMES = (1 << 13); /** * Content model: heading. */ public static final int CM_HEADING = (1 << 14); /** * Content model: opt. */ public static final int CM_OPT = (1 << 15); /** * Content model: img. */ public static final int CM_IMG = (1 << 16); /** * Content model: mixed. */ public static final int CM_MIXED = (1 << 17); /** * Content model: no indent. */ public static final int CM_NO_INDENT = (1 << 18); /** * Content model: obsolete. */ public static final int CM_OBSOLETE = (1 << 19); /** * Content model: new. */ public static final int CM_NEW = (1 << 20); /** * Content model: omitst. */ public static final int CM_OMITST = (1 << 21); /** * Version: unknown. */ public static final short VERS_UNKNOWN = 0; /** * Version: html 2.0. */ public static final short VERS_HTML20 = 1; /** * Version: html 3.2. */ public static final short VERS_HTML32 = 2; /** * Version: html 4.0 strict. */ public static final short VERS_HTML40_STRICT = 4; /** * Version: html 4.0 transitional. */ public static final short VERS_HTML40_LOOSE = 8; /** * Version: html 4.0 frameset. */ public static final short VERS_FRAMESET = 16; /** * Version: xml. */ public static final short VERS_XML = 32; /** * Version: netscape. */ public static final short VERS_NETSCAPE = 64; /** * Version: microsoft. */ public static final short VERS_MICROSOFT = 128; /** * Version: sun. */ public static final short VERS_SUN = 256; /** * Version: malformed. */ public static final short VERS_MALFORMED = 512; /** * Version: xhtml 1.1. */ public static final short VERS_XHTML11 = 1024; /** * Version: xhtml basic. */ public static final short VERS_BASIC = 2048; /** * all tags and attributes are ok in proprietary version of HTML. */ public static final short VERS_PROPRIETARY = (VERS_NETSCAPE | VERS_MICROSOFT | VERS_SUN); /** * tags/attrs in HTML4 but not in earlier version. */ public static final short VERS_HTML40 = (VERS_HTML40_STRICT | VERS_HTML40_LOOSE | VERS_FRAMESET); /** * tags/attrs which are in all versions of HTML except strict. */ public static final short VERS_LOOSE = (VERS_HTML32 | VERS_HTML40_LOOSE | VERS_FRAMESET); /** * tags/attrs in HTML 4 loose and frameset. */ public static final short VERS_IFRAME = (VERS_HTML40_LOOSE | VERS_FRAMESET); /** * tags/attrs in all versions from HTML 3.2 onwards. */ public static final short VERS_FROM32 = (VERS_HTML40_STRICT | VERS_LOOSE); /** * versions with on... attributes. */ public static final short VERS_EVENTS = (VERS_HTML40 | VERS_XHTML11); /** * tags/attrs in any version. */ public static final short VERS_ALL = (VERS_HTML20 | VERS_HTML32 | VERS_HTML40 | VERS_XHTML11 | VERS_BASIC); /** * types of tags that the user can define: empty tag. */ public static final short TAGTYPE_EMPTY = 1; /** * types of tags that the user can define: inline tag. */ public static final short TAGTYPE_INLINE = 2; /** * types of tags that the user can define: block tag. */ public static final short TAGTYPE_BLOCK = 4; /** * types of tags that the user can define: pre tag. */ public static final short TAGTYPE_PRE = 8; /** * Tag name. */ protected String name; /** * Version in which this tag is defined. */ protected short versions; /** * model (CM_* constants). */ protected int model; /** * Parser for this tag. */ private Parser parser; /** * Validator for this tag. */ private TagCheck chkattrs; /** * Instantiates a new Tag definition. * @param name tag name * @param versions version in which this tag is defined * @param model model (CM_* constants) * @param parser parser for this tag * @param chkattrs validator for this tag (can be null) */ public Dict(String name, short versions, int model, Parser parser, TagCheck chkattrs) { this.name = name; this.versions = versions; this.model = model; this.parser = parser; this.chkattrs = chkattrs; } /** * Getter for <code>chkattrs</code>. * @return Returns the chkattrs. */ public TagCheck getChkattrs() { return this.chkattrs; } /** * Getter for <code>model</code>. * @return Returns the model. */ public int getModel() { return this.model; } /** * Getter for <code>name</code>. * @return Returns the name. */ public String getName() { return this.name; } /** * Getter for <code>parser</code>. * @return Returns the parser. */ public Parser getParser() { return this.parser; } /** * Setter for <code>chkattrs</code>. * @param chkattrs The chkattrs to set. */ public void setChkattrs(TagCheck chkattrs) { this.chkattrs = chkattrs; } /** * Getter for <code>versions</code>. * @return Returns the versions. */ public short getVersions() { return this.versions; } /** * Setter for <code>parser</code>. * @param parser The parser to set. */ public void setParser(Parser parser) { this.parser = parser; } }�����������������jtidy/src/main/java/org/w3c/tidy/DOMElementImpl.java������������������������������������������������0000644�0001750�0001750�00000031613�10144212711�022442� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.Attr; import org.w3c.dom.DOMException; import org.w3c.dom.TypeInfo; /** * DOMElementImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMElementImpl extends DOMNodeImpl implements org.w3c.dom.Element { /** * Instantiates a new DOM element. * @param adaptee Tidy Node. */ protected DOMElementImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.ELEMENT_NODE; } /** * @see org.w3c.dom.Element#getTagName */ public String getTagName() { return super.getNodeName(); } /** * @see org.w3c.dom.Element#getAttribute(java.lang.String) */ public String getAttribute(String name) { if (this.adaptee == null) { return null; } AttVal att = this.adaptee.attributes; while (att != null) { if (att.attribute.equals(name)) { break; } att = att.next; } if (att != null) { return att.value; } return ""; } /** * @see org.w3c.dom.Element#setAttribute(java.lang.String, java.lang.String) */ public void setAttribute(String name, String value) throws DOMException { if (this.adaptee == null) { return; } AttVal att = this.adaptee.attributes; while (att != null) { if (att.attribute.equals(name)) { break; } att = att.next; } if (att != null) { att.value = value; } else { att = new AttVal(null, null, '"', name, value); att.dict = AttributeTable.getDefaultAttributeTable().findAttribute(att); if (this.adaptee.attributes == null) { this.adaptee.attributes = att; } else { att.next = this.adaptee.attributes; this.adaptee.attributes = att; } } } /** * @see org.w3c.dom.Element#removeAttribute(java.lang.String) */ public void removeAttribute(String name) throws DOMException { if (this.adaptee == null) { return; } AttVal att = this.adaptee.attributes; AttVal pre = null; while (att != null) { if (att.attribute.equals(name)) { break; } pre = att; att = att.next; } if (att != null) { if (pre == null) { this.adaptee.attributes = att.next; } else { pre.next = att.next; } } } /** * @see org.w3c.dom.Element#getAttributeNode(java.lang.String) */ public org.w3c.dom.Attr getAttributeNode(String name) { if (this.adaptee == null) { return null; } AttVal att = this.adaptee.attributes; while (att != null) { if (att.attribute.equals(name)) { break; } att = att.next; } if (att != null) { return att.getAdapter(); } return null; } /** * @see org.w3c.dom.Element#setAttributeNode(org.w3c.dom.Attr) */ public org.w3c.dom.Attr setAttributeNode(org.w3c.dom.Attr newAttr) throws DOMException { if (newAttr == null) { return null; } if (!(newAttr instanceof DOMAttrImpl)) { throw new DOMException(DOMException.WRONG_DOCUMENT_ERR, "newAttr not instanceof DOMAttrImpl"); } DOMAttrImpl newatt = (DOMAttrImpl) newAttr; String name = newatt.avAdaptee.attribute; org.w3c.dom.Attr result = null; AttVal att = this.adaptee.attributes; while (att != null) { if (att.attribute.equals(name)) { break; } att = att.next; } if (att != null) { result = att.getAdapter(); att.adapter = newAttr; } else { if (this.adaptee.attributes == null) { this.adaptee.attributes = newatt.avAdaptee; } else { newatt.avAdaptee.next = this.adaptee.attributes; this.adaptee.attributes = newatt.avAdaptee; } } return result; } /** * @see org.w3c.dom.Element#removeAttributeNode(org.w3c.dom.Attr) */ public org.w3c.dom.Attr removeAttributeNode(org.w3c.dom.Attr oldAttr) throws DOMException { if (oldAttr == null) { return null; } org.w3c.dom.Attr result = null; AttVal att = this.adaptee.attributes; AttVal pre = null; while (att != null) { if (att.getAdapter() == oldAttr) { break; } pre = att; att = att.next; } if (att != null) { if (pre == null) { this.adaptee.attributes = att.next; } else { pre.next = att.next; } result = oldAttr; } else { throw new DOMException(DOMException.NOT_FOUND_ERR, "oldAttr not found"); } return result; } /** * @see org.w3c.dom.Element#getElementsByTagName(java.lang.String) */ public org.w3c.dom.NodeList getElementsByTagName(String name) { return new DOMNodeListByTagNameImpl(this.adaptee, name); } /** * @todo DOM level 2 getOwnerDocument() Not supported. Do nothing. * @see org.w3c.dom.Element#normalize */ public void normalize() { // do nothing } /** * @todo DOM level 2 getAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#getAttributeNS(java.lang.String, java.lang.String) */ public String getAttributeNS(String namespaceURI, String localName) { // DOMException - NOT_SUPPORTED_ERR: May be raised if the implementation does not support the feature "XML" and // the language exposed through the Document does not support XML Namespaces (such as HTML 4.01). throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 setAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#setAttributeNS(java.lang.String, java.lang.String, java.lang.String) */ public void setAttributeNS(String namespaceURI, String qualifiedName, String value) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 removeAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#removeAttributeNS(java.lang.String, java.lang.String) */ public void removeAttributeNS(String namespaceURI, String localName) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 getAttributeNodeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#getAttributeNodeNS(java.lang.String, java.lang.String) */ public org.w3c.dom.Attr getAttributeNodeNS(String namespaceURI, String localName) { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 setAttributeNodeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#setAttributeNodeNS(org.w3c.dom.Attr) */ public org.w3c.dom.Attr setAttributeNodeNS(org.w3c.dom.Attr newAttr) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 getElementsByTagNameNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#getElementsByTagNameNS(java.lang.String, java.lang.String) */ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, String localName) { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 2 hasAttribute() Not supported. Returns false. * @see org.w3c.dom.Element#hasAttribute(java.lang.String) */ public boolean hasAttribute(String name) { return false; } /** * @todo DOM level 2 hasAttribute() Not supported. Returns false. * @see org.w3c.dom.Element#hasAttributeNS(java.lang.String, java.lang.String) */ public boolean hasAttributeNS(String namespaceURI, String localName) { return false; } /** * @todo DOM level 3 getSchemaTypeInfo() Not supported. Returns null. * @see org.w3c.dom.Element#getSchemaTypeInfo() */ public TypeInfo getSchemaTypeInfo() { return null; } /** * @todo DOM level 3 setIdAttribute() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#setIdAttribute(java.lang.String, boolean) */ public void setIdAttribute(String name, boolean isId) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 3 setIdAttributeNode() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#setIdAttributeNode(org.w3c.dom.Attr, boolean) */ public void setIdAttributeNode(Attr idAttr, boolean isId) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 3 setIdAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Element#setIdAttributeNS(java.lang.String, java.lang.String, boolean) */ public void setIdAttributeNS(String namespaceURI, String localName, boolean isId) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } }���������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Lexer.java���������������������������������������������������������0000644�0001750�0001750�00000363425�11465175246�021001� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.PrintWriter; import java.util.List; import java.util.Stack; import java.util.Vector; /** * Lexer for html parser. * <p> * Given a file stream fp it returns a sequence of tokens. GetToken(fp) gets the next token UngetToken(fp) provides one * level undo The tags include an attribute list: - linked list of attribute/value nodes - each node has 2 * null-terminated strings. - entities are replaced in attribute values white space is compacted if not in preformatted * mode If not in preformatted mode then leading white space is discarded and subsequent white space sequences compacted * to single space chars. If XmlTags is no then Tag names are folded to upper case and attribute names to lower case. * Not yet done: - Doctype subset and marked sections * </p> * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1168 $ ($Author: aditsu $) */ public class Lexer { /** * state: ignore whitespace. */ public static final short IGNORE_WHITESPACE = 0; /** * state: mixed content. */ public static final short MIXED_CONTENT = 1; /** * state: preformatted. */ public static final short PREFORMATTED = 2; /** * state: ignore markup. */ public static final short IGNORE_MARKUP = 3; /** * URI for XHTML 1.0 transitional DTD. */ private static final String VOYAGER_LOOSE = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; /** * URI for XHTML 1.0 strict DTD. */ private static final String VOYAGER_STRICT = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; /** * URI for XHTML 1.0 frameset DTD. */ private static final String VOYAGER_FRAMESET = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"; /** * URI for XHTML 1.1. */ private static final String VOYAGER_11 = "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"; /** * URI for XHTML Basic 1.0. */ // private static final String VOYAGER_BASIC = "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd"; /** * xhtml namespace. */ private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; /** * lists all the known versions. */ private static final Lexer.W3CVersionInfo[] W3CVERSION = { new W3CVersionInfo("HTML 4.01", "XHTML 1.0 Strict", VOYAGER_STRICT, Dict.VERS_HTML40_STRICT), new W3CVersionInfo("HTML 4.01 Transitional", "XHTML 1.0 Transitional", VOYAGER_LOOSE, Dict.VERS_HTML40_LOOSE), new W3CVersionInfo("HTML 4.01 Frameset", "XHTML 1.0 Frameset", VOYAGER_FRAMESET, Dict.VERS_FRAMESET), new W3CVersionInfo("HTML 4.0", "XHTML 1.0 Strict", VOYAGER_STRICT, Dict.VERS_HTML40_STRICT), new W3CVersionInfo("HTML 4.0 Transitional", "XHTML 1.0 Transitional", VOYAGER_LOOSE, Dict.VERS_HTML40_LOOSE), new W3CVersionInfo("HTML 4.0 Frameset", "XHTML 1.0 Frameset", VOYAGER_FRAMESET, Dict.VERS_FRAMESET), new W3CVersionInfo("HTML 3.2", "XHTML 1.0 Transitional", VOYAGER_LOOSE, Dict.VERS_HTML32), new W3CVersionInfo("HTML 3.2 Final", "XHTML 1.0 Transitional", VOYAGER_LOOSE, Dict.VERS_HTML32), new W3CVersionInfo("HTML 3.2 Draft", "XHTML 1.0 Transitional", VOYAGER_LOOSE, Dict.VERS_HTML32), new W3CVersionInfo("HTML 2.0", "XHTML 1.0 Strict", VOYAGER_STRICT, Dict.VERS_HTML20), new W3CVersionInfo("HTML 4.01", "XHTML 1.1", VOYAGER_STRICT, Dict.VERS_XHTML11)}; /** * getToken state: content. */ private static final short LEX_CONTENT = 0; /** * getToken state: gt. */ private static final short LEX_GT = 1; /** * getToken state: endtag. */ private static final short LEX_ENDTAG = 2; /** * getToken state: start tag. */ private static final short LEX_STARTTAG = 3; /** * getToken state: comment. */ private static final short LEX_COMMENT = 4; /** * getToken state: doctype. */ private static final short LEX_DOCTYPE = 5; /** * getToken state: procinstr. */ private static final short LEX_PROCINSTR = 6; /** * getToken state: cdata. */ private static final short LEX_CDATA = 8; /** * getToken state: section. */ private static final short LEX_SECTION = 9; /** * getToken state: asp. */ private static final short LEX_ASP = 10; /** * getToken state: jste. */ private static final short LEX_JSTE = 11; /** * getToken state: php. */ private static final short LEX_PHP = 12; /** * getToken state: xml declaration. */ private static final short LEX_XMLDECL = 13; /** * file stream. */ protected StreamIn in; /** * error output stream. */ protected PrintWriter errout; /** * for accessibility errors. */ protected short badAccess; /** * for bad style errors. */ protected short badLayout; /** * for bad char encodings. */ protected short badChars; /** * for mismatched/mispositioned form tags. */ protected short badForm; /** * count of warnings in this document. */ protected short warnings; /** * count of errors. */ protected short errors; /** * lines seen. */ protected int lines; /** * at start of current token. */ protected int columns; /** * used to collapse contiguous white space. */ protected boolean waswhite; /** * true after token has been pushed back. */ protected boolean pushed; /** * when space is moved after end tag. */ protected boolean insertspace; /** * Netscape compatibility. */ protected boolean excludeBlocks; /** * true if moved out of table. */ protected boolean exiled; /** * true if xmlns attribute on html element. */ protected boolean isvoyager; /** * bit vector of HTML versions. */ protected short versions; /** * version as given by doctype (if any). */ protected int doctype; /** * set if html or PUBLIC is missing. */ protected boolean badDoctype; /** * start of current node. */ protected int txtstart; /** * end of current node. */ protected int txtend; /** * state of lexer's finite state machine. */ protected short state; /** * current node. */ protected Node token; /** * Lexer character buffer parse tree nodes span onto this buffer which contains the concatenated text contents of * all of the elements. Lexsize must be reset for each file. Byte buffer of UTF-8 chars. */ protected byte[] lexbuf; /** * allocated. */ protected int lexlength; /** * used. */ protected int lexsize; /** * Inline stack for compatibility with Mosaic. For deferring text node. */ protected Node inode; /** * for inferring inline tags. */ protected int insert; /** * stack. */ protected Stack istack; /** * start of frame. */ protected int istackbase; /** * used for cleaning up presentation markup. */ protected Style styles; /** * configuration. */ protected Configuration configuration; /** * already seen end body tag? */ protected boolean seenEndBody; /** * already seen end html tag? */ protected boolean seenEndHtml; /** * report. */ protected Report report; /** * Root node is saved here. */ protected Node root; /** * node list. */ private List nodeList; /** * Instantiates a new Lexer. * @param in StreamIn * @param configuration configuation instance * @param report report instance, for reporting errors */ public Lexer(StreamIn in, Configuration configuration, Report report) { this.report = report; this.in = in; this.lines = 1; this.columns = 1; this.state = LEX_CONTENT; this.versions = (Dict.VERS_ALL | Dict.VERS_PROPRIETARY); this.doctype = Dict.VERS_UNKNOWN; this.insert = -1; this.istack = new Stack(); this.configuration = configuration; this.nodeList = new Vector(); } /** * Creates a new node and add it to nodelist. * @return Node */ public Node newNode() { Node node = new Node(); this.nodeList.add(node); return node; } /** * Creates a new node and add it to nodelist. * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE | * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG | * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL * @param textarray array of bytes contained in the Node * @param start start position * @param end end position * @return Node */ public Node newNode(short type, byte[] textarray, int start, int end) { Node node = new Node(type, textarray, start, end); this.nodeList.add(node); return node; } /** * Creates a new node and add it to nodelist. * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE | * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG | * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL * @param textarray array of bytes contained in the Node * @param start start position * @param end end position * @param element tag name * @return Node */ public Node newNode(short type, byte[] textarray, int start, int end, String element) { Node node = new Node(type, textarray, start, end, element, this.configuration.tt); this.nodeList.add(node); return node; } /** * Clones a node and add it to node list. * @param node Node * @return cloned Node */ public Node cloneNode(Node node) { Node cnode = node.cloneNode(false); this.nodeList.add(cnode); for (AttVal att = cnode.attributes; att != null; att = att.next) { if (att.asp != null) { this.nodeList.add(att.asp); } if (att.php != null) { this.nodeList.add(att.php); } } return cnode; } /** * Clones an attribute value and add eventual asp or php node to node list. * @param attrs original AttVal * @return cloned AttVal */ public AttVal cloneAttributes(AttVal attrs) { AttVal cattrs = (AttVal) attrs.clone(); for (AttVal att = cattrs; att != null; att = att.next) { if (att.asp != null) { this.nodeList.add(att.asp); } if (att.php != null) { this.nodeList.add(att.php); } } return cattrs; } /** * Update <code>oldtextarray</code> in the current nodes. * @param oldtextarray previous text array * @param newtextarray new text array */ protected void updateNodeTextArrays(byte[] oldtextarray, byte[] newtextarray) { Node node; for (int i = 0; i < this.nodeList.size(); i++) { node = (Node) (this.nodeList.get(i)); if (node.textarray == oldtextarray) { node.textarray = newtextarray; } } } /** * Adds a new line node. Used for creating preformatted text from Word2000. * @return new line node */ public Node newLineNode() { Node node = newNode(); node.textarray = this.lexbuf; node.start = this.lexsize; addCharToLexer('\n'); node.end = this.lexsize; return node; } /** * Has end of input stream been reached? * @return <code>true</code> if end of input stream been reached */ public boolean endOfInput() { return this.in.isEndOfStream(); } /** * Adds a byte to lexer buffer. * @param c byte to add */ public void addByte(int c) { if (this.lexsize + 1 >= this.lexlength) { while (this.lexsize + 1 >= this.lexlength) { if (this.lexlength == 0) { this.lexlength = 8192; } else { this.lexlength = this.lexlength * 2; } } byte[] temp = this.lexbuf; this.lexbuf = new byte[this.lexlength]; if (temp != null) { System.arraycopy(temp, 0, this.lexbuf, 0, temp.length); updateNodeTextArrays(temp, this.lexbuf); } } this.lexbuf[this.lexsize++] = (byte) c; this.lexbuf[this.lexsize] = (byte) '\0'; // debug } /** * Substitute the last char in buffer. * @param c new char */ public void changeChar(byte c) { if (this.lexsize > 0) { this.lexbuf[this.lexsize - 1] = c; } } /** * Store char c as UTF-8 encoded byte stream. * @param c char to store */ public void addCharToLexer(int c) { // Allow only valid XML characters. See: http://www.w3.org/TR/2004/REC-xml-20040204/#NT-Char // Fix by Pablo Mayrgundter 17-08-2004 if ((this.configuration.xmlOut || this.configuration.xHTML) // only for xml output && !((c >= 0x20 && c <= 0xD7FF) // Check the common-case first. || c == 0x9 || c == 0xA || c == 0xD // Then white-space. || (c >= 0xE000 && c <= 0xFFFD) // Then high-range unicode. || (c >= 0x10000 && c <= 0x10FFFF))) { return; } int i = 0; int[] count = new int[]{0}; byte[] buf = new byte[10]; // unsigned char boolean err = EncodingUtils.encodeCharToUTF8Bytes(c, buf, null, count); if (err) { // replacement char 0xFFFD encoded as UTF-8 buf[0] = (byte) 0xEF; buf[1] = (byte) 0xBF; buf[2] = (byte) 0xBD; count[0] = 3; } for (i = 0; i < count[0]; i++) { addByte(buf[i]); // uint } } /** * Adds a string to lexer buffer. * @param str String to add */ public void addStringToLexer(String str) { for (int i = 0; i < str.length(); i++) { addCharToLexer(str.charAt(i)); } } /** * Parse an html entity. * @param mode mode */ public void parseEntity(short mode) { // No longer attempts to insert missing ';' for unknown // entities unless one was present already, since this // gives unexpected results. // // For example: <a href="something.htm?foo&bar&fred"> // was tidied to: <a href="something.htm?foo&bar;&fred;"> // rather than: <a href="something.htm?foo&bar&fred"> // // My thanks for Maurice Buxton for spotting this. // // Also Randy Waki pointed out the following case for the // 04 Aug 00 version (bug #433012): // // For example: <a href="something.htm?id=1&lang=en"> // was tidied to: <a href="something.htm?id=1⟨=en"> // rather than: <a href="something.htm?id=1&lang=en"> // // where "lang" is a known entity (#9001), but browsers would // misinterpret "⟨" because it had a value > 256. // // So the case of an apparently known entity with a value > 256 and // missing a semicolon is handled specially. // // "ParseEntity" is also a bit of a misnomer - it handles entities and // numeric character references. Invalid NCR's are now reported. int start; boolean first = true; boolean semicolon = false; int c, ch, startcol; String str; start = this.lexsize - 1; // to start at "&" startcol = this.in.getCurcol() - 1; while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { if (c == ';') { semicolon = true; break; } if (first && c == '#') { // #431953 - start RJ if (!this.configuration.ncr || "BIG5".equals(this.configuration.getInCharEncodingName()) || "SHIFTJIS".equals(this.configuration.getInCharEncodingName())) { this.in.ungetChar(c); return; } // #431953 - end RJ addCharToLexer(c); first = false; continue; } first = false; if (TidyUtils.isNamechar((char) c)) { addCharToLexer(c); continue; } // otherwise put it back this.in.ungetChar(c); break; } str = TidyUtils.getString(this.lexbuf, start, this.lexsize - start); if ("&apos".equals(str) && !configuration.xmlOut && !this.isvoyager && !configuration.xHTML) { report.entityError(this, Report.APOS_UNDEFINED, str, 39); } ch = EntityTable.getDefaultEntityTable().entityCode(str); // drops invalid numeric entities from XML mode. Fix by Pablo Mayrgundter 17-08-2004 // if ((this.configuration.xmlOut || this.configuration.xHTML) // only for xml output // && !((ch >= 0x20 && ch <= 0xD7FF) // Check the common-case first. // || ch == 0x9 || ch == 0xA || ch == 0xD // Then white-space. // || (ch >= 0xE000 && ch <= 0xFFFD))) // { // this.lexsize = start; // return; // } // deal with unrecognized or invalid entities // #433012 - fix by Randy Waki 17 Feb 01 // report invalid NCR's - Terry Teague 01 Sep 01 if (ch <= 0 || (ch >= 256 && c != ';')) { // set error position just before offending character this.lines = this.in.getCurline(); this.columns = startcol; if (this.lexsize > start + 1) { if (ch >= 128 && ch <= 159) { // invalid numeric character reference int c1 = 0; if ("WIN1252".equals(configuration.replacementCharEncoding)) { c1 = EncodingUtils.decodeWin1252(ch); } else if ("MACROMAN".equals(configuration.replacementCharEncoding)) { c1 = EncodingUtils.decodeMacRoman(ch); } // "or" DISCARDED_CHAR with the other errors if discarding char; otherwise default is replacing int replaceMode = c1 != 0 ? Report.REPLACED_CHAR : Report.DISCARDED_CHAR; if (c != ';') /* issue warning if not terminated by ';' */ { report.entityError(this, Report.MISSING_SEMICOLON_NCR, str, c); } report.encodingError(this, (short) (Report.INVALID_NCR | replaceMode), ch); if (c1 != 0) { // make the replacement this.lexsize = start; addCharToLexer(c1); semicolon = false; } else { /* discard */ this.lexsize = start; semicolon = false; } } else { report.entityError(this, Report.UNKNOWN_ENTITY, str, ch); } if (semicolon) { addCharToLexer(';'); } } else { // naked & report.entityError(this, Report.UNESCAPED_AMPERSAND, str, ch); } } else { // issue warning if not terminated by ';' if (c != ';') { // set error position just before offending character this.lines = this.in.getCurline(); this.columns = startcol; report.entityError(this, Report.MISSING_SEMICOLON, str, c); } this.lexsize = start; if (ch == 160 && TidyUtils.toBoolean(mode & PREFORMATTED)) { ch = ' '; } addCharToLexer(ch); if (ch == '&' && !this.configuration.quoteAmpersand) { addCharToLexer('a'); addCharToLexer('m'); addCharToLexer('p'); addCharToLexer(';'); } } } /** * Parses a tag name. * @return first char after the tag name */ public char parseTagName() { int c; // fold case of first char in buffer c = this.lexbuf[this.txtstart]; if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c)) { c = TidyUtils.toLower((char) c); this.lexbuf[this.txtstart] = (byte) c; } while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { if (!TidyUtils.isNamechar((char) c)) { break; } // fold case of subsequent chars if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c)) { c = TidyUtils.toLower((char) c); } addCharToLexer(c); } this.txtend = this.lexsize; return (char) c; } /** * calls addCharToLexer for any char in the string. * @param str input String */ public void addStringLiteral(String str) { int len = str.length(); for (int i = 0; i < len; i++) { addCharToLexer(str.charAt(i)); } } /** * calls addCharToLexer for any char in the string till len is reached. * @param str input String * @param len length of the substring to be added */ void addStringLiteralLen(String str, int len) { int strlen = str.length(); if (strlen < len) { len = strlen; } for (int i = 0; i < len; i++) { addCharToLexer(str.charAt(i)); } } /** * Choose what version to use for new doctype. * @return html version constant */ public short htmlVersion() { if (TidyUtils.toBoolean(versions & Dict.VERS_HTML20)) { return Dict.VERS_HTML20; } if (!(this.configuration.xmlOut | this.configuration.xmlTags | this.isvoyager) && TidyUtils.toBoolean(versions & Dict.VERS_HTML32)) { return Dict.VERS_HTML32; } if (TidyUtils.toBoolean(versions & Dict.VERS_XHTML11)) { return Dict.VERS_XHTML11; } if (TidyUtils.toBoolean(versions & Dict.VERS_HTML40_STRICT)) { return Dict.VERS_HTML40_STRICT; } if (TidyUtils.toBoolean(versions & Dict.VERS_HTML40_LOOSE)) { return Dict.VERS_HTML40_LOOSE; } if (TidyUtils.toBoolean(versions & Dict.VERS_FRAMESET)) { return Dict.VERS_FRAMESET; } return Dict.VERS_UNKNOWN; } /** * Choose what version to use for new doctype. * @return html version name */ public String htmlVersionName() { short guessed; int j; guessed = apparentVersion(); for (j = 0; j < W3CVERSION.length; ++j) { if (guessed == W3CVERSION[j].code) { if (this.isvoyager) { return W3CVERSION[j].voyagerName; } return W3CVERSION[j].name; } } return null; } /** * Add meta element for Tidy. If the meta tag is already present, update release date. * @param root root node * @return <code>true</code> if the tag has been added */ public boolean addGenerator(Node root) { AttVal attval; Node node; Node head = root.findHEAD(this.configuration.tt); if (head != null) { String meta = "HTML Tidy for Java (vers. " + Report.RELEASE_DATE_STRING + "), see jtidy.sourceforge.net"; for (node = head.content; node != null; node = node.next) { if (node.tag == this.configuration.tt.tagMeta) { attval = node.getAttrByName("name"); if (attval != null && attval.value != null && "generator".equalsIgnoreCase(attval.value)) { attval = node.getAttrByName("content"); if (attval != null && attval.value != null && attval.value.length() >= 9 && "HTML Tidy".equalsIgnoreCase(attval.value.substring(0, 9))) { attval.value = meta; return false; } } } } node = this.inferredTag("meta"); node.addAttribute("content", meta); node.addAttribute("name", "generator"); head.insertNodeAtStart(node); return true; } return false; } /** * Check system keywords (keywords should be uppercase). * @param doctype doctype node * @return true if doctype keywords are all uppercase */ public boolean checkDocTypeKeyWords(Node doctype) { int len = doctype.end - doctype.start; String s = TidyUtils.getString(this.lexbuf, doctype.start, len); return !(TidyUtils.findBadSubString("SYSTEM", s, s.length()) || TidyUtils.findBadSubString("PUBLIC", s, s.length()) || TidyUtils.findBadSubString("//DTD", s, s.length()) || TidyUtils.findBadSubString("//W3C", s, s.length()) || TidyUtils.findBadSubString("//EN", s, s.length())); } /** * Examine DOCTYPE to identify version. * @param doctype doctype node * @return version code */ public short findGivenVersion(Node doctype) { String p, s; int i, j; int len; String str1; String str2; // if root tag for doctype isn't html give up now str1 = TidyUtils.getString(this.lexbuf, doctype.start, 5); if (!"html ".equalsIgnoreCase(str1)) { return 0; } if (!checkDocTypeKeyWords(doctype)) { report.warning(this, doctype, null, Report.DTYPE_NOT_UPPER_CASE); } // give up if all we are given is the system id for the doctype str1 = TidyUtils.getString(this.lexbuf, doctype.start + 5, 7); if ("SYSTEM ".equalsIgnoreCase(str1)) { // but at least ensure the case is correct if (!str1.substring(0, 6).equals("SYSTEM")) { System.arraycopy(TidyUtils.getBytes("SYSTEM"), 0, this.lexbuf, doctype.start + 5, 6); } return 0; // unrecognized } if ("PUBLIC ".equalsIgnoreCase(str1)) { if (!str1.substring(0, 6).equals("PUBLIC")) { System.arraycopy(TidyUtils.getBytes("PUBLIC "), 0, this.lexbuf, doctype.start + 5, 6); } } else { this.badDoctype = true; } for (i = doctype.start; i < doctype.end; ++i) { if (this.lexbuf[i] == (byte) '"') { str1 = TidyUtils.getString(this.lexbuf, i + 1, 12); str2 = TidyUtils.getString(this.lexbuf, i + 1, 13); if (str1.equals("-//W3C//DTD ")) { // compute length of identifier e.g. "HTML 4.0 Transitional" for (j = i + 13; j < doctype.end && this.lexbuf[j] != (byte) '/'; ++j) { // } len = j - i - 13; p = TidyUtils.getString(this.lexbuf, i + 13, len); for (j = 1; j < W3CVERSION.length; ++j) { s = W3CVERSION[j].name; if (len == s.length() && s.equals(p)) { return W3CVERSION[j].code; } } // else unrecognized version } else if (str2.equals("-//IETF//DTD ")) { // compute length of identifier e.g. "HTML 2.0" for (j = i + 14; j < doctype.end && this.lexbuf[j] != (byte) '/'; ++j) { // } len = j - i - 14; p = TidyUtils.getString(this.lexbuf, i + 14, len); s = W3CVERSION[0].name; if (len == s.length() && s.equals(p)) { return W3CVERSION[0].code; } // else unrecognized version } break; } } return 0; } /** * Fix xhtml namespace. * @param root root Node * @param profile current profile */ public void fixHTMLNameSpace(Node root, String profile) { Node node; AttVal attr; node = root.content; while (node != null && node.tag != this.configuration.tt.tagHtml) { node = node.next; } if (node != null) { for (attr = node.attributes; attr != null; attr = attr.next) { if (attr.attribute.equals("xmlns")) { break; } } if (attr != null) { if (!attr.value.equals(profile)) { report.warning(this, node, null, Report.INCONSISTENT_NAMESPACE); attr.value = profile; } } else { attr = new AttVal(node.attributes, null, '"', "xmlns", profile); attr.dict = AttributeTable.getDefaultAttributeTable().findAttribute(attr); node.attributes = attr; } } } /** * Put DOCTYPE declaration between the <:?xml version "1.0" ... ?> declaration, if any, and the * <code>html</code> tag. Should also work for any comments, etc. that may precede the <code>html</code> tag. * @param root root node * @return new doctype node */ Node newXhtmlDocTypeNode(Node root) { Node html = root.findHTML(this.configuration.tt); if (html == null) { return null; } Node newdoctype = newNode(); newdoctype.setType(Node.DOCTYPE_TAG); newdoctype.next = html; newdoctype.parent = root; newdoctype.prev = null; if (html == root.content) { // No <?xml ... ?> declaration. root.content.prev = newdoctype; root.content = newdoctype; newdoctype.prev = null; } else { // we have an <?xml ... ?> declaration. newdoctype.prev = html.prev; newdoctype.prev.next = newdoctype; } html.prev = newdoctype; return newdoctype; } /** * Adds a new xhtml doctype to the document. * @param root root node * @return <code>true</code> if a doctype has been added */ public boolean setXHTMLDocType(Node root) { String fpi = " "; String sysid = ""; String namespace = XHTML_NAMESPACE; String dtdsub = null; Node doctype; int dtdlen = 0; doctype = root.findDocType(); fixHTMLNameSpace(root, namespace); // #427839 - fix by Evan Lenz 05 Sep 00 if (this.configuration.docTypeMode == Configuration.DOCTYPE_OMIT) { if (doctype != null) { Node.discardElement(doctype); } return true; } if (this.configuration.docTypeMode == Configuration.DOCTYPE_AUTO) { // see what flavor of XHTML this document matches if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_STRICT)) { // use XHTML strict fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = VOYAGER_STRICT; } else if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET)) { // use XHTML frames fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; sysid = VOYAGER_FRAMESET; } else if (TidyUtils.toBoolean(this.versions & Dict.VERS_LOOSE)) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = VOYAGER_LOOSE; } else if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11)) { // use XHTML 1.1 fpi = "-//W3C//DTD XHTML 1.1//EN"; sysid = VOYAGER_11; } else { // proprietary fpi = null; sysid = ""; if (doctype != null)// #473490 - fix by Bjšrn Hšhrmann 10 Oct 01 { Node.discardElement(doctype); } } } else if (this.configuration.docTypeMode == Configuration.DOCTYPE_STRICT) { fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = VOYAGER_STRICT; } else if (this.configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = VOYAGER_LOOSE; } if (this.configuration.docTypeMode == Configuration.DOCTYPE_USER && this.configuration.docTypeStr != null) { fpi = this.configuration.docTypeStr; sysid = ""; } if (fpi == null) { return false; } if (doctype != null) { // Look for internal DTD subset if (configuration.xHTML || configuration.xmlOut) { int len = doctype.end - doctype.start + 1; String start = TidyUtils.getString(this.lexbuf, doctype.start, len); int dtdbeg = start.indexOf('['); if (dtdbeg >= 0) { int dtdend = start.substring(dtdbeg).indexOf(']'); if (dtdend >= 0) { dtdlen = dtdend + 1; dtdsub = start.substring(dtdbeg); } } } } else { if ((doctype = newXhtmlDocTypeNode(root)) == null) { return false; } } this.txtstart = this.lexsize; this.txtend = this.lexsize; // add public identifier addStringLiteral("html PUBLIC "); // check if the fpi is quoted or not if (fpi.charAt(0) == '"') { addStringLiteral(fpi); } else { addStringLiteral("\""); addStringLiteral(fpi); addStringLiteral("\""); } if (this.configuration.wraplen != 0 && sysid.length() + 6 >= this.configuration.wraplen) { addStringLiteral("\n\""); } else { // FG: don't wrap addStringLiteral(" \""); } // add system identifier addStringLiteral(sysid); addStringLiteral("\""); if (dtdlen > 0 && dtdsub != null) { addCharToLexer(' '); addStringLiteralLen(dtdsub, dtdlen); } this.txtend = this.lexsize; int length = this.txtend - this.txtstart; doctype.textarray = new byte[length]; System.arraycopy(this.lexbuf, this.txtstart, doctype.textarray, 0, length); doctype.start = 0; doctype.end = length; return false; } /** * Return the html version used in document. * @return version code */ public short apparentVersion() { switch (this.doctype) { case Dict.VERS_UNKNOWN : return htmlVersion(); case Dict.VERS_HTML20 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML20)) { return Dict.VERS_HTML20; } break; case Dict.VERS_HTML32 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML32)) { return Dict.VERS_HTML32; } break; // to replace old version by new case Dict.VERS_HTML40_STRICT : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_STRICT)) { return Dict.VERS_HTML40_STRICT; } break; case Dict.VERS_HTML40_LOOSE : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_LOOSE)) { return Dict.VERS_HTML40_LOOSE; } break; // to replace old version by new case Dict.VERS_FRAMESET : if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET)) { return Dict.VERS_FRAMESET; } break; case Dict.VERS_XHTML11 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11)) { return Dict.VERS_XHTML11; } break; default : // should never reach here break; } // kludge to avoid error appearing at end of file // it would be better to note the actual position // when first encountering the doctype declaration this.lines = 1; this.columns = 1; report.warning(this, null, null, Report.INCONSISTENT_VERSION); return this.htmlVersion(); } /** * Fixup doctype if missing. * @param root root node * @return <code>false</code> if current version has not been identified */ public boolean fixDocType(Node root) { Node doctype; int guessed = Dict.VERS_HTML40_STRICT, i; if (this.badDoctype) { report.warning(this, null, null, Report.MALFORMED_DOCTYPE); } doctype = root.findDocType(); if (this.configuration.docTypeMode == Configuration.DOCTYPE_OMIT) { if (doctype != null) { Node.discardElement(doctype); } return true; } if (this.configuration.xmlOut) { return true; } if (this.configuration.docTypeMode == Configuration.DOCTYPE_STRICT) { Node.discardElement(doctype); doctype = null; guessed = Dict.VERS_HTML40_STRICT; } else if (this.configuration.docTypeMode == Configuration.DOCTYPE_LOOSE) { Node.discardElement(doctype); doctype = null; guessed = Dict.VERS_HTML40_LOOSE; } else if (this.configuration.docTypeMode == Configuration.DOCTYPE_AUTO) { if (doctype != null) { if (this.doctype == Dict.VERS_UNKNOWN) { return false; } switch (this.doctype) { case Dict.VERS_UNKNOWN : return false; case Dict.VERS_HTML20 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML20)) { return true; } break; // to replace old version by new case Dict.VERS_HTML32 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML32)) { return true; } break; // to replace old version by new case Dict.VERS_HTML40_STRICT : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_STRICT)) { return true; } break; // to replace old version by new case Dict.VERS_HTML40_LOOSE : if (TidyUtils.toBoolean(this.versions & Dict.VERS_HTML40_LOOSE)) { return true; } break; // to replace old version by new case Dict.VERS_FRAMESET : if (TidyUtils.toBoolean(this.versions & Dict.VERS_FRAMESET)) { return true; } break; // to replace old version by new case Dict.VERS_XHTML11 : if (TidyUtils.toBoolean(this.versions & Dict.VERS_XHTML11)) { return true; } break; // to replace old version by new default : // should never reach here break; } // INCONSISTENT_VERSION warning is now issued by ApparentVersion() } // choose new doctype guessed = htmlVersion(); } if (guessed == Dict.VERS_UNKNOWN) { return false; } // for XML use the Voyager system identifier if (this.configuration.xmlOut || this.configuration.xmlTags || this.isvoyager) { if (doctype != null) { Node.discardElement(doctype); } fixHTMLNameSpace(root, XHTML_NAMESPACE); // Namespace is the same for all XHTML variants // Also, don't return yet. Still need to add DOCTYPE declaration. // // for (i = 0; i < W3CVersion.length; ++i) // { // if (guessed == W3CVersion[i].code) // { // fixHTMLNameSpace(root, W3CVersion[i].profile); // break; // } // } // return true; } if (doctype == null) { if ((doctype = newXhtmlDocTypeNode(root)) == null) { return false; } } this.txtstart = this.lexsize; this.txtend = this.lexsize; // use the appropriate public identifier addStringLiteral("html PUBLIC "); if (this.configuration.docTypeMode == Configuration.DOCTYPE_USER && this.configuration.docTypeStr != null && this.configuration.docTypeStr.length() > 0) { // check if the fpi is quoted or not if (this.configuration.docTypeStr.charAt(0) == '"') { addStringLiteral(this.configuration.docTypeStr); } else { addStringLiteral("\""); // #431889 - fix by Dave Bryan 04 Jan 2001 addStringLiteral(this.configuration.docTypeStr); addStringLiteral("\""); // #431889 - fix by Dave Bryan 04 Jan 2001 } } else if (guessed == Dict.VERS_HTML20) { addStringLiteral("\"-//IETF//DTD HTML 2.0//EN\""); } else { addStringLiteral("\"-//W3C//DTD "); for (i = 0; i < W3CVERSION.length; ++i) { if (guessed == W3CVERSION[i].code) { addStringLiteral(W3CVERSION[i].name); break; } } addStringLiteral("//EN\""); } this.txtend = this.lexsize; int length = this.txtend - this.txtstart; doctype.textarray = new byte[length]; System.arraycopy(this.lexbuf, this.txtstart, doctype.textarray, 0, length); doctype.start = 0; doctype.end = length; return true; } /** * Ensure XML document starts with <code><?XML version="1.0"?></code>. Add encoding attribute if not using * ASCII or UTF-8 output. * @param root root node * @return always true */ public boolean fixXmlDecl(Node root) { Node xml; AttVal version; AttVal encoding; if (root.content != null && root.content.type == Node.XML_DECL) { xml = root.content; } else { xml = newNode(Node.XML_DECL, this.lexbuf, 0, 0); root.insertNodeAtStart(xml); } version = xml.getAttrByName("version"); encoding = xml.getAttrByName("encoding"); // We need to insert a check if declared encoding and output encoding mismatch // and fix the Xml declaration accordingly!!! if (encoding == null && !"UTF8".equals(this.configuration.getOutCharEncodingName())) { if ("ISO8859_1".equals(this.configuration.getOutCharEncodingName())) { xml.addAttribute("encoding", "iso-8859-1"); } if ("ISO2022".equals(this.configuration.getOutCharEncodingName())) { xml.addAttribute("encoding", "iso-2022"); } } if (version == null) { xml.addAttribute("version", "1.0"); } return true; } /** * Generates and inserts a new node. * @param name tag name * @return generated node */ public Node inferredTag(String name) { Node node; node = newNode(Node.START_TAG, this.lexbuf, this.txtstart, this.txtend, name); node.implicit = true; return node; } private static final int CDATA_INTERMEDIATE = 0; private static final int CDATA_STARTTAG = 1; private static final int CDATA_ENDTAG = 2; /** * Create a text node for the contents of a CDATA element like style or script which ends with </foo> for some * foo. * @param container container node * @return cdata node */ public Node getCDATA(Node container) { int start = 0; int nested = 0; int state = CDATA_INTERMEDIATE; int c; boolean isEmpty = true; boolean matches = false; boolean hasSrc = container.getAttrByName("src") != null; this.lines = this.in.getCurline(); this.columns = this.in.getCurcol(); this.waswhite = false; this.txtstart = this.lexsize; this.txtend = this.lexsize; /* seen start tag, look for matching end tag */ while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { addCharToLexer(c); txtend = lexsize; if (state == CDATA_INTERMEDIATE) { if (c != '<') { if (isEmpty && !TidyUtils.isWhite((char) c)) { isEmpty = false; } continue; } c = in.readChar(); if (TidyUtils.isLetter((char) c)) { /* <head><script src=foo><meta name=foo content=bar>*/ if (hasSrc && isEmpty && container.tag == configuration.tt.tagScript) { /* ReportError(doc, container, NULL, MISSING_ENDTAG_FOR); */ lexsize = txtstart; in.ungetChar(c); in.ungetChar('<'); return null; } addCharToLexer(c); start = lexsize - 1; state = CDATA_STARTTAG; } else if (c == '/') { addCharToLexer(c); c = in.readChar(); if (!TidyUtils.isLetter((char) c)) { in.ungetChar(c); continue; } in.ungetChar(c); start = lexsize; state = CDATA_ENDTAG; } else if (c == '\\') { /* recognize document.write("<script><\/script>") */ addCharToLexer(c); c = in.readChar(); if (c != '/') { in.ungetChar(c); continue; } addCharToLexer(c); c = in.readChar(); if (!TidyUtils.isLetter((char) c)) { in.ungetChar(c); continue; } in.ungetChar(c); start = lexsize; state = CDATA_ENDTAG; } else { in.ungetChar(c); } } else if (state == CDATA_STARTTAG) { /* '<' + Letter found */ if (TidyUtils.isLetter((char) c)) { continue; } matches = container.element.equalsIgnoreCase(TidyUtils.getString(lexbuf, start, container.element.length())); if (matches) { nested++; } state = CDATA_INTERMEDIATE; } else if (state == CDATA_ENDTAG) { /* '<' + '/' + Letter found */ if (TidyUtils.isLetter((char) c)) { continue; } matches = container.element.equalsIgnoreCase(TidyUtils.getString(lexbuf, start, container.element.length())); if (isEmpty && !matches) { /* ReportError(doc, container, NULL, MISSING_ENDTAG_FOR); */ for (int i = lexsize - 1; i >= start; --i) { in.ungetChar(lexbuf[i]); } in.ungetChar('/'); in.ungetChar('<'); break; } if (matches && nested-- <= 0) { for (int i = lexsize - 1; i >= start; --i) { in.ungetChar(lexbuf[i]); } in.ungetChar('/'); in.ungetChar('<'); lexsize -= (lexsize - start) + 2; break; } else if (lexbuf[start - 2] != '\\') { /* if the end tag is not already escaped using backslash */ lines = in.getCurline(); columns = in.getCurcol(); columns -= 3; report.warning(this, null, null, Report.BAD_CDATA_CONTENT); /* if javascript insert backslash before / */ if (container.isJavaScript()) { for (int i = lexsize; i > start-1; --i) { lexbuf[i] = lexbuf[i-1]; } lexbuf[start-1] = '\\'; lexsize++; } } state = CDATA_INTERMEDIATE; } } if (isEmpty) { lexsize = txtstart = txtend; } else { txtend = lexsize; } if (c == StreamIn.END_OF_STREAM) { report.warning(this, container, null, Report.MISSING_ENDTAG_FOR); } /* this was disabled for some reason... */ // #if 0 // if (lexer->txtend > lexer->txtstart) // return TextToken(lexer); // else // return NULL; // #else return newNode(Node.TEXT_NODE, lexbuf, txtstart, txtend); // #endif } /** * * */ public void ungetToken() { this.pushed = true; } /** * Gets a token. * @param mode one of the following: * <ul> * <li><code>MixedContent</code>-- for elements which don't accept PCDATA</li> * <li><code>Preformatted</code>-- white spacepreserved as is</li> * <li><code>IgnoreMarkup</code>-- for CDATA elements such as script, style</li> * </ul> * @return next Node */ public Node getToken(short mode) { int c = 0; int badcomment = 0; // pass by reference boolean[] isempty = new boolean[1]; boolean inDTDSubset = false; AttVal attributes = null; if (this.pushed) { // duplicate inlines in preference to pushed text nodes when appropriate if (this.token.type != Node.TEXT_NODE || (this.insert == -1 && this.inode == null)) { this.pushed = false; return this.token; } } // at start of block elements, unclosed inline if (this.insert != -1 || this.inode != null) { return insertedToken(); } this.lines = this.in.getCurline(); this.columns = this.in.getCurcol(); this.waswhite = false; this.txtstart = this.lexsize; this.txtend = this.lexsize; while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { // FG fix for [427846] different from tidy // if (this.insertspace && (!TidyUtils.toBoolean(mode & IGNORE_WHITESPACE))) if (this.insertspace && mode != IGNORE_WHITESPACE) { addCharToLexer(' '); } if (this.insertspace && (!TidyUtils.toBoolean(mode & IGNORE_WHITESPACE))) { this.waswhite = true; this.insertspace = false; } // treat \r\n as \n and \r as \n if (c == '\r') { c = this.in.readChar(); if (c != '\n') { this.in.ungetChar(c); } c = '\n'; } addCharToLexer(c); switch (this.state) { case LEX_CONTENT : // element content // Discard white space if appropriate. // Its cheaper to do this here rather than in parser methods for elements that // don't have mixed content. if (TidyUtils.isWhite((char) c) && (mode == IGNORE_WHITESPACE) && this.lexsize == this.txtstart + 1) { --this.lexsize; this.waswhite = false; this.lines = this.in.getCurline(); this.columns = this.in.getCurcol(); continue; } if (c == '<') { this.state = LEX_GT; continue; } if (TidyUtils.isWhite((char) c)) { // was previous char white? if (this.waswhite) { if (mode != PREFORMATTED && mode != IGNORE_MARKUP) { --this.lexsize; this.lines = this.in.getCurline(); this.columns = this.in.getCurcol(); } } else { // prev char wasn't white this.waswhite = true; if (mode != PREFORMATTED && mode != IGNORE_MARKUP && c != ' ') { changeChar((byte) ' '); } } continue; } else if (c == '&' && mode != IGNORE_MARKUP) { parseEntity(mode); } // this is needed to avoid trimming trailing whitespace if (mode == IGNORE_WHITESPACE) { mode = MIXED_CONTENT; } this.waswhite = false; continue; case LEX_GT : // < // check for endtag if (c == '/') { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { this.in.ungetChar(c); continue; } addCharToLexer(c); if (TidyUtils.isLetter((char) c)) { this.lexsize -= 3; this.txtend = this.lexsize; this.in.ungetChar(c); this.state = LEX_ENDTAG; this.lexbuf[this.lexsize] = (byte) '\0'; // debug // changed from // this.in.curcol -= 2; this.columns -= 2; // if some text before the </ return it now if (this.txtend > this.txtstart) { // trim space char before end tag if (mode == IGNORE_WHITESPACE && this.lexbuf[this.lexsize - 1] == (byte) ' ') { this.lexsize -= 1; this.txtend = this.lexsize; } this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } continue; // no text so keep going } // otherwise treat as CDATA this.waswhite = false; this.state = LEX_CONTENT; continue; } if (mode == IGNORE_MARKUP) { // otherwise treat as CDATA this.waswhite = false; this.state = LEX_CONTENT; continue; } // look out for comments, doctype or marked sections this isn't quite right, but its getting there if (c == '!') { c = this.in.readChar(); if (c == '-') { c = this.in.readChar(); if (c == '-') { this.state = LEX_COMMENT; // comment this.lexsize -= 2; this.txtend = this.lexsize; // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } report.warning(this, null, null, Report.MALFORMED_COMMENT); } else if (c == 'd' || c == 'D') { this.state = LEX_DOCTYPE; // doctype this.lexsize -= 2; this.txtend = this.lexsize; mode = IGNORE_WHITESPACE; // skip until white space or '>' for (;;) { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM || c == '>') { this.in.ungetChar(c); break; } if (!TidyUtils.isWhite((char) c)) { continue; } // and skip to end of whitespace for (;;) { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM || c == '>') { this.in.ungetChar(c); break; } if (TidyUtils.isWhite((char) c)) { continue; } this.in.ungetChar(c); break; } break; } // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } else if (c == '[') { // Word 2000 embeds <![if ...]> ... <![endif]> sequences this.lexsize -= 2; this.state = LEX_SECTION; this.txtend = this.lexsize; // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } // otherwise swallow chars up to and including next '>' while (true) { c = this.in.readChar(); if (c == '>') { break; } if (c == -1) { this.in.ungetChar(c); break; } } this.lexsize -= 2; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; continue; } // processing instructions if (c == '?') { this.lexsize -= 2; this.state = LEX_PROCINSTR; this.txtend = this.lexsize; // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } // Microsoft ASP's e.g. <% ... server-code ... %> if (c == '%') { this.lexsize -= 2; this.state = LEX_ASP; this.txtend = this.lexsize; // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } // Netscapes JSTE e.g. <# ... server-code ... #> if (c == '#') { this.lexsize -= 2; this.state = LEX_JSTE; this.txtend = this.lexsize; // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } this.txtstart = this.lexsize; continue; } // check for start tag if (TidyUtils.isLetter((char) c)) { this.in.ungetChar(c); // push back letter this.lexsize -= 2; // discard " <" + letter this.txtend = this.lexsize; this.state = LEX_STARTTAG; // ready to read tag name // if some text before < return it now if (this.txtend > this.txtstart) { this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } continue; // no text so keep going } // otherwise treat as CDATA this.state = LEX_CONTENT; this.waswhite = false; continue; case LEX_ENDTAG : // </letter this.txtstart = this.lexsize - 1; // changed from // this.in.curcol -= 2; this.columns -= 2; c = parseTagName(); this.token = newNode(Node.END_TAG, // create endtag token this.lexbuf, this.txtstart, this.txtend, TidyUtils.getString(this.lexbuf, this.txtstart, this.txtend - this.txtstart)); this.lexsize = this.txtstart; this.txtend = this.txtstart; // skip to '>' while (TidyUtils.isWhite((char) c)) { c = this.in.readChar(); } if (c == StreamIn.END_OF_STREAM) { this.in.ungetChar(c); report.attrError(this, this.token, null, Report.UNEXPECTED_GT); continue; } // should be at the '>' if we're not, assume one if (c != '>') { this.in.ungetChar(c); c = '>'; report.attrError(this, this.token, null, Report.UNEXPECTED_GT); } this.state = LEX_CONTENT; this.waswhite = false; return this.token; // the endtag token case LEX_STARTTAG : // first letter of tagname this.txtstart = this.lexsize - 1; // set txtstart to first letter c = parseTagName(); isempty[0] = false; attributes = null; this.token = newNode( (isempty[0] ? Node.START_END_TAG : Node.START_TAG), this.lexbuf, this.txtstart, this.txtend, TidyUtils.getString(this.lexbuf, this.txtstart, this.txtend - this.txtstart)); // parse attributes, consuming closing ">" if (c != '>') { if (c == '/') { this.in.ungetChar(c); } attributes = parseAttrs(isempty); } if (isempty[0]) { this.token.type = Node.START_END_TAG; } this.token.attributes = attributes; this.lexsize = this.txtstart; this.txtend = this.txtstart; // swallow newline following start tag // special check needed for CRLF sequence // this doesn't apply to empty elements // nor to preformatted content that needs escaping if ( (mode != PREFORMATTED || preContent(this.token)) && (this.token.expectsContent() || this.token.tag == this.configuration.tt.tagBr)) { c = this.in.readChar(); if (c == '\r') { c = this.in.readChar(); if (c != '\n') { this.in.ungetChar(c); } } else if (c != '\n' && c != '\f') { this.in.ungetChar(c); } this.waswhite = true; // to swallow leading whitespace } else { this.waswhite = false; } this.state = LEX_CONTENT; if (this.token.tag == null) { report.error(this, null, this.token, Report.UNKNOWN_ELEMENT); } else if (!this.configuration.xmlTags) { constrainVersion(this.token.tag.versions); if (TidyUtils.toBoolean(this.token.tag.versions & Dict.VERS_PROPRIETARY)) { // #427810 - fix by Gary Deschaines 24 May 00 if (this.configuration.makeClean && (this.token.tag != this.configuration.tt.tagNobr && // this.token.tag != this.configuration.tt.tagWbr)) { report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT); } // #427810 - fix by Terry Teague 2 Jul 01 else if (!this.configuration.makeClean) { report.warning(this, null, this.token, Report.PROPRIETARY_ELEMENT); } } if (this.token.tag.getChkattrs() != null) { this.token.tag.getChkattrs().check(this, this.token); } else { this.token.checkAttributes(this); } // should this be called before attribute checks? this.token.repairDuplicateAttributes(this); } return this.token; // return start tag case LEX_COMMENT : // seen <!-- so look for --> if (c != '-') { continue; } c = this.in.readChar(); addCharToLexer(c); if (c != '-') { continue; } end_comment : while (true) { c = this.in.readChar(); if (c == '>') { if (badcomment != 0) { report.warning(this, null, null, Report.MALFORMED_COMMENT); } this.txtend = this.lexsize - 2; // AQ 8Jul2000 this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.COMMENT_TAG, this.lexbuf, this.txtstart, this.txtend); // now look for a line break c = this.in.readChar(); if (c == '\r') { c = this.in.readChar(); if (c != '\n') { this.token.linebreak = true; } } if (c == '\n') { this.token.linebreak = true; } else { this.in.ungetChar(c); } return this.token; } // note position of first such error in the comment if (badcomment == 0) { this.lines = this.in.getCurline(); this.columns = this.in.getCurcol() - 3; } badcomment++; if (this.configuration.fixComments) { this.lexbuf[this.lexsize - 2] = (byte) '='; } addCharToLexer(c); // if '-' then look for '>' to end the comment if (c != '-') { break end_comment; } } // otherwise continue to look for --> this.lexbuf[this.lexsize - 2] = (byte) '='; continue; case LEX_DOCTYPE : // seen <!d so look for '> ' munging whitespace if (TidyUtils.isWhite((char) c)) { if (this.waswhite) { this.lexsize -= 1; } this.waswhite = true; } else { this.waswhite = false; } if (inDTDSubset) { if (c == ']') { inDTDSubset = false; } } else if (c == '[') { inDTDSubset = true; } if (inDTDSubset || c != '>') { continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.DOCTYPE_TAG, this.lexbuf, this.txtstart, this.txtend); // make a note of the version named by the doctype this.doctype = findGivenVersion(this.token); return this.token; case LEX_PROCINSTR : // seen <? so look for '> ' // check for PHP preprocessor instructions <?php ... ?> if (this.lexsize - this.txtstart == 3) { if ((TidyUtils.getString(this.lexbuf, this.txtstart, 3)).equals("php")) { this.state = LEX_PHP; continue; } } if (this.lexsize - this.txtstart == 4) { if ((TidyUtils.getString(this.lexbuf, this.txtstart, 3)).equals("xml") && TidyUtils.isWhite((char) this.lexbuf[this.txtstart + 3])) { this.state = LEX_XMLDECL; attributes = null; continue; } } if (this.configuration.xmlPIs) // insist on ?> as terminator { if (c != '?') { continue; } // now look for '>' c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { report.warning(this, null, null, Report.UNEXPECTED_END_OF_FILE); this.in.ungetChar(c); continue; } addCharToLexer(c); } if (c != '>') { continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.PROC_INS_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; case LEX_ASP : // seen <% so look for "%> " if (c != '%') { continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.ASP_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; case LEX_JSTE : // seen <# so look for "#> " if (c != '#') { continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.JSTE_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; case LEX_PHP : // seen " <?php" so look for "?> " if (c != '?') { continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.PHP_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; case LEX_XMLDECL : // seen "<?xml" so look for "?>" if (TidyUtils.isWhite((char) c) && c != '?') { continue; } // get pseudo-attribute if (c != '?') { String name; Node[] asp = new Node[1]; Node[] php = new Node[1]; AttVal av = new AttVal(); int[] pdelim = new int[1]; isempty[0] = false; this.in.ungetChar(c); name = this.parseAttribute(isempty, asp, php); av.attribute = name; av.value = this.parseValue(name, true, isempty, pdelim); av.delim = pdelim[0]; av.next = attributes; attributes = av; // continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.txtstart; this.lexbuf[this.txtend] = '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.XML_DECL, this.lexbuf, this.txtstart, this.txtend); this.token.attributes = attributes; return this.token; case LEX_SECTION : // seen " <![" so look for "]> " if (c == '[') { if (this.lexsize == (this.txtstart + 6) && (TidyUtils.getString(this.lexbuf, this.txtstart, 6)).equals("CDATA[")) { this.state = LEX_CDATA; this.lexsize -= 6; continue; } } if (c != ']') { continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.SECTION_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; case LEX_CDATA : // seen " <![CDATA[" so look for "]]> " if (c != ']') { continue; } // now look for ']' c = this.in.readChar(); if (c != ']') { this.in.ungetChar(c); continue; } // now look for '>' c = this.in.readChar(); if (c != '>') { this.in.ungetChar(c); continue; } this.lexsize -= 1; this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.CDATA_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; default : // should never reach here break; } } if (this.state == LEX_CONTENT) // text string { this.txtend = this.lexsize; if (this.txtend > this.txtstart) { this.in.ungetChar(c); if (this.lexbuf[this.lexsize - 1] == (byte) ' ') { this.lexsize -= 1; this.txtend = this.lexsize; } this.token = newNode(Node.TEXT_NODE, this.lexbuf, this.txtstart, this.txtend); return this.token; } } else if (this.state == LEX_COMMENT) // comment { if (c == StreamIn.END_OF_STREAM) { report.warning(this, null, null, Report.MALFORMED_COMMENT); } this.txtend = this.lexsize; this.lexbuf[this.lexsize] = (byte) '\0'; this.state = LEX_CONTENT; this.waswhite = false; this.token = newNode(Node.COMMENT_TAG, this.lexbuf, this.txtstart, this.txtend); return this.token; } return null; } /** * parser for ASP within start tags Some people use ASP for to customize attributes Tidy isn't really well suited to * dealing with ASP This is a workaround for attributes, but won't deal with the case where the ASP is used to * tailor the attribute value. Here is an example of a work around for using ASP in attribute values: * <code>href='<%=rsSchool.Fields("ID").Value%>'</code> where the ASP that generates the attribute value is * masked from Tidy by the quotemarks. * @return parsed Node */ public Node parseAsp() { int c; Node asp = null; this.txtstart = this.lexsize; while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { addCharToLexer(c); if (c != '%') { continue; } if ((c = this.in.readChar()) == StreamIn.END_OF_STREAM) { break; } addCharToLexer(c); if (c == '>') { break; } } this.lexsize -= 2; this.txtend = this.lexsize; if (this.txtend > this.txtstart) { asp = newNode(Node.ASP_TAG, this.lexbuf, this.txtstart, this.txtend); } this.txtstart = this.txtend; return asp; } /** * PHP is like ASP but is based upon XML processing instructions, e.g. <code><?php ... ?></code>. * @return parsed Node */ public Node parsePhp() { int c; Node php = null; this.txtstart = this.lexsize; while ((c = this.in.readChar()) != StreamIn.END_OF_STREAM) { addCharToLexer(c); if (c != '?') { continue; } if ((c = this.in.readChar()) == StreamIn.END_OF_STREAM) { break; } addCharToLexer(c); if (c == '>') { break; } } this.lexsize -= 2; this.txtend = this.lexsize; if (this.txtend > this.txtstart) { php = newNode(Node.PHP_TAG, this.lexbuf, this.txtstart, this.txtend); } this.txtstart = this.txtend; return php; } /** * consumes the '>' terminating start tags. * @param isempty flag is passed as array so it can be modified * @param asp asp Node, passed as array so it can be modified * @param php php Node, passed as array so it can be modified * @return parsed attribute */ public String parseAttribute(boolean[] isempty, Node[] asp, Node[] php) { int start = 0; String attr; int c = 0; int lastc = 0; asp[0] = null; // clear asp pointer php[0] = null; // clear php pointer // skip white space before the attribute for (;;) { c = this.in.readChar(); if (c == '/') { c = this.in.readChar(); if (c == '>') { isempty[0] = true; return null; } this.in.ungetChar(c); c = '/'; break; } if (c == '>') { return null; } if (c == '<') { c = this.in.readChar(); if (c == '%') { asp[0] = parseAsp(); return null; } else if (c == '?') { php[0] = parsePhp(); return null; } this.in.ungetChar(c); if (this.state != LEX_XMLDECL) // FG fix for 532535 { this.in.ungetChar('<'); // fix for 433360 } report.attrError(this, this.token, null, Report.UNEXPECTED_GT); return null; } if (c == '=') { report.attrError(this, this.token, null, Report.UNEXPECTED_EQUALSIGN); continue; } if (c == '"' || c == '\'') { report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); continue; } if (c == StreamIn.END_OF_STREAM) { report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); this.in.ungetChar(c); return null; } if (!TidyUtils.isWhite((char) c)) { break; } } start = this.lexsize; lastc = c; for (;;) { // but push back '=' for parseValue() if (c == '=' || c == '>') { this.in.ungetChar(c); break; } if (c == '<' || c == StreamIn.END_OF_STREAM) { this.in.ungetChar(c); break; } if (lastc == '-' && (c == '"' || c == '\'')) { this.lexsize--; this.in.ungetChar(c); break; } if (TidyUtils.isWhite((char) c)) { break; } // what should be done about non-namechar characters? // currently these are incorporated into the attr name if (!this.configuration.xmlTags && TidyUtils.isUpper((char) c)) { c = TidyUtils.toLower((char) c); } // ++len; #427672 - handle attribute names with multibyte chars - fix by Randy Waki - 10 Aug 00 addCharToLexer(c); lastc = c; c = this.in.readChar(); } // #427672 - handle attribute names with multibyte chars - fix by Randy Waki - 10 Aug 00 int len = this.lexsize - start; attr = (len > 0 ? TidyUtils.getString(this.lexbuf, start, len) : null); this.lexsize = start; return attr; } /** * Invoked when < is seen in place of attribute value but terminates on whitespace if not ASP, PHP or Tango this * routine recognizes ' and " quoted strings. * @return delimiter */ public int parseServerInstruction() { int c, delim = '"'; boolean isrule = false; c = this.in.readChar(); addCharToLexer(c); // check for ASP, PHP or Tango if (c == '%' || c == '?' || c == '@') { isrule = true; } for (;;) { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { break; } if (c == '>') { if (isrule) { addCharToLexer(c); } else { this.in.ungetChar(c); } break; } // if not recognized as ASP, PHP or Tango // then also finish value on whitespace if (!isrule) { if (TidyUtils.isWhite((char) c)) { break; } } addCharToLexer(c); if (c == '"') { do { c = this.in.readChar(); if (endOfInput()) // #427840 - fix by Terry Teague 30 Jun 01 { report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); this.in.ungetChar(c); return 0; } if (c == '>') // #427840 - fix by Terry Teague 30 Jun 01 { this.in.ungetChar(c); report.attrError(this, this.token, null, Report.UNEXPECTED_GT); return 0; } addCharToLexer(c); } while (c != '"'); delim = '\''; continue; } if (c == '\'') { do { c = this.in.readChar(); if (endOfInput()) // #427840 - fix by Terry Teague 30 Jun 01 { report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); this.in.ungetChar(c); return 0; } if (c == '>') // #427840 - fix by Terry Teague 30 Jun 01 { this.in.ungetChar(c); report.attrError(this, this.token, null, Report.UNEXPECTED_GT); return 0; } addCharToLexer(c); } while (c != '\''); } } return delim; } /** * Parse an attribute value. * @param name attribute name * @param foldCase fold case? * @param isempty is attribute empty? Passed as an array reference to allow modification * @param pdelim delimiter, passed as an array reference to allow modification * @return parsed value */ public String parseValue(String name, boolean foldCase, boolean[] isempty, int[] pdelim) { // values start with "=" or " = " etc. // doesn't consume the ">" at end of start tag int len = 0; int start; boolean seenGt = false; boolean munge = true; int c = 0; int lastc, delim, quotewarning; String value; delim = 0; pdelim[0] = '"'; // Henry Zrepa reports that some folk are using the embed element with script attributes where newlines are // significant and must be preserved if (this.configuration.literalAttribs) { munge = false; } // skip white space before the '=' while (true) { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { this.in.ungetChar(c); break; } if (!TidyUtils.isWhite((char) c)) { break; } } // c should be '=' if there is a value other legal possibilities are white space, '/' and '>' if (c != '=' && c != '"' && c != '\'') { this.in.ungetChar(c); return null; } // skip white space after '=' while (true) { c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { this.in.ungetChar(c); break; } if (!TidyUtils.isWhite((char) c)) { break; } } // check for quote marks if (c == '"' || c == '\'') { delim = c; } else if (c == '<') { start = this.lexsize; addCharToLexer(c); pdelim[0] = parseServerInstruction(); len = this.lexsize - start; this.lexsize = start; return (len > 0 ? TidyUtils.getString(this.lexbuf, start, len) : null); } else { this.in.ungetChar(c); } // and read the value string check for quote mark if needed quotewarning = 0; start = this.lexsize; c = '\0'; while (true) { lastc = c; // track last character c = this.in.readChar(); if (c == StreamIn.END_OF_STREAM) { report.attrError(this, this.token, null, Report.UNEXPECTED_END_OF_FILE); this.in.ungetChar(c); break; } if (delim == (char) 0) { if (c == '>') { this.in.ungetChar(c); break; } if (c == '"' || c == '\'') { int q = c; report.attrError(this, this.token, null, Report.UNEXPECTED_QUOTEMARK); /* handle <input onclick=s("btn1")> and <a title=foo""">...</a> */ /* this doesn't handle <a title=foo"/> which browsers treat as */ /* 'foo"/' nor <a title=foo" /> which browser treat as 'foo"' */ c = in.readChar(); if (c == '>') { addCharToLexer(q); in.ungetChar(c); break; } else { in.ungetChar(c); c = q; } } if (c == '<') { this.in.ungetChar(c); // fix for 433360 c = '>'; this.in.ungetChar(c); report.attrError(this, this.token, null, Report.UNEXPECTED_GT); break; } // For cases like <br clear=all/> need to avoid treating /> as part of the attribute value, however // care is needed to avoid so treating <a href=http://www.acme.com /> in this way, which would map the // <a> tag to <a href="http://www.acme.com"/> if (c == '/') { // peek ahead in case of /> c = this.in.readChar(); if (c == '>' && !AttributeTable.getDefaultAttributeTable().isUrl(name)) { isempty[0] = true; this.in.ungetChar(c); break; } // unget peeked char this.in.ungetChar(c); c = '/'; } } else { // delim is '\'' or '"' if (c == delim) { break; } // treat CRLF, CR and LF as single line break if (c == '\r') { c = this.in.readChar(); if (c != '\n') { this.in.ungetChar(c); } c = '\n'; } if (c == '\n' || c == '<' || c == '>') { ++quotewarning; } if (c == '>') { seenGt = true; } } if (c == '&') { // no entities in ID attributes if ("id".equalsIgnoreCase(name)) { report.attrError(this, null, null, Report.ENTITY_IN_ID); continue; } addCharToLexer(c); parseEntity((short) 0); continue; } // kludge for JavaScript attribute values with line continuations in string literals if (c == '\\') { c = this.in.readChar(); if (c != '\n') { this.in.ungetChar(c); c = '\\'; } } if (TidyUtils.isWhite((char) c)) { if (delim == (char) 0) { break; } if (munge) { // discard line breaks in quoted URLs // #438650 - fix by Randy Waki if (c == '\n' && AttributeTable.getDefaultAttributeTable().isUrl(name)) { // warn that we discard this newline report.attrError(this, this.token, null, Report.NEWLINE_IN_URI); continue; } c = ' '; if (lastc == ' ') { continue; } } } else if (foldCase && TidyUtils.isUpper((char) c)) { c = TidyUtils.toLower((char) c); } addCharToLexer(c); } if (quotewarning > 10 && seenGt && munge) { // there is almost certainly a missing trailing quote mark as we have see too many newlines, < or > // characters. an exception is made for Javascript attributes and the javascript URL scheme which may // legitimately include < and >, and for attributes starting with "<xml " as generated by Microsoft Office. if (!AttributeTable.getDefaultAttributeTable().isScript(name) && !(AttributeTable.getDefaultAttributeTable().isUrl(name) && "javascript:".equals(TidyUtils.getString( this.lexbuf, start, 11))) && !"<xml ".equals(TidyUtils.getString(this.lexbuf, start, 5))) // #500236 - fix by Klaus Johannes Rusch // 06 Jan 02 { report.error(this, null, null, Report.SUSPECTED_MISSING_QUOTE); } } len = this.lexsize - start; this.lexsize = start; if (len > 0 || delim != 0) { // ignore leading and trailing white space for all but title, alt, value and prompts attributes unless // --literal-attributes is set to yes // #994841 - Whitespace is removed from value attributes if (munge && !TidyUtils.isInValuesIgnoreCase(new String[]{"alt", "title", "value", "prompt"}, name)) { while (TidyUtils.isWhite((char) this.lexbuf[start + len - 1])) { --len; } while (TidyUtils.isWhite((char) this.lexbuf[start]) && start < len) { ++start; --len; } } value = TidyUtils.getString(this.lexbuf, start, len); } else { value = null; } // note delimiter if given if (delim != 0) { pdelim[0] = delim; } else { pdelim[0] = '"'; } return value; } /** * Check if attr is a valid name. * @param attr String to check, must be non-null * @return <code>true</code> if attr is a valid name. */ public static boolean isValidAttrName(String attr) { char c; int i; // first character should be a letter c = attr.charAt(0); if (!TidyUtils.isLetter(c)) { return false; } // remaining characters should be namechars for (i = 1; i < attr.length(); i++) { c = attr.charAt(i); if (TidyUtils.isNamechar(c)) { continue; } return false; } return true; } /** * In CSS1, selectors can contain only the characters A-Z, 0-9, and Unicode characters 161-255, plus dash (-); they * cannot start with a dash or a digit; they can also contain escaped characters and any Unicode character as a * numeric code (see next item). The backslash followed by at most four hexadecimal digits (0..9A..F) stands for the * Unicode character with that number. Any character except a hexadecimal digit can be escaped to remove its special * meaning, by putting a backslash in front. * @param buf css selector name * @return <code>true</code> if the given string is a valid css1 selector name */ public static boolean isCSS1Selector(String buf) { if (buf == null) { return false; } // #508936 - CSS class naming for -clean option boolean valid = true; int esclen = 0; char c; int pos; for (pos = 0; valid && pos < buf.length(); ++pos) { c = buf.charAt(pos); if (c == '\\') { esclen = 1; // ab\555\444 is 4 chars {'a', 'b', \555, \444} } else if (Character.isDigit(c)) { // Digit not 1st, unless escaped (Max length "\112F") if (esclen > 0) { valid = (++esclen < 6); } if (valid) { valid = (pos > 0 || esclen > 0); } } else { valid = (esclen > 0 // Escaped? Anything goes. || (pos > 0 && c == '-') // Dash cannot be 1st char || Character.isLetter(c) // a-z, A-Z anywhere || (c >= 161 && c <= 255)); // Unicode 161-255 anywhere esclen = 0; } } return valid; } /** * Parse tag attributes. * @param isempty is tag empty? * @return parsed attribute/value list */ public AttVal parseAttrs(boolean[] isempty) { AttVal av, list; String attribute, value; int[] delim = new int[1]; Node[] asp = new Node[1]; Node[] php = new Node[1]; list = null; while (!endOfInput()) { attribute = parseAttribute(isempty, asp, php); if (attribute == null) { // check if attributes are created by ASP markup if (asp[0] != null) { av = new AttVal(list, null, asp[0], null, '\0', null, null); list = av; continue; } // check if attributes are created by PHP markup if (php[0] != null) { av = new AttVal(list, null, null, php[0], '\0', null, null); list = av; continue; } break; } value = parseValue(attribute, false, isempty, delim); if (attribute != null && isValidAttrName(attribute)) { av = new AttVal(list, null, null, null, delim[0], attribute, value); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); list = av; } else { av = new AttVal(null, null, null, null, 0, attribute, value); // #427664 - fix by Gary Peskin 04 Aug 00; other fixes by Dave Raggett if (value != null) { report.attrError(this, this.token, av, Report.BAD_ATTRIBUTE_VALUE); } else if (TidyUtils.lastChar(attribute) == '"') { report.attrError(this, this.token, av, Report.MISSING_QUOTEMARK); } else { report.attrError(this, this.token, av, Report.UNKNOWN_ATTRIBUTE); } } } return list; } /** * Push a copy of an inline node onto stack but don't push if implicit or OBJECT or APPLET (implicit tags are ones * generated from the istack) One issue arises with pushing inlines when the tag is already pushed. For instance: * <code><p><em> text <p><em> more text</code> Shouldn't be mapped to * <code><p><em> text </em></p><p><em><em> more text </em></em></code> * @param node Node to be pushed */ public void pushInline(Node node) { IStack is; if (node.implicit) { return; } if (node.tag == null) { return; } if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { return; } if (TidyUtils.toBoolean(node.tag.model & Dict.CM_OBJECT)) { return; } if (node.tag != this.configuration.tt.tagFont && isPushed(node)) { return; } // make sure there is enough space for the stack is = new IStack(); is.tag = node.tag; is.element = node.element; if (node.attributes != null) { is.attributes = cloneAttributes(node.attributes); } this.istack.push(is); } /** * Pop a copy of an inline node from the stack. * @param node Node to be popped */ public void popInline(Node node) { IStack is; if (node != null) { if (node.tag == null) { return; } if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE)) { return; } if (TidyUtils.toBoolean(node.tag.model & Dict.CM_OBJECT)) { return; } // if node is </a> then pop until we find an <a> if (node.tag == this.configuration.tt.tagA) { while (this.istack.size() > 0) { is = (IStack) this.istack.pop(); if (is.tag == this.configuration.tt.tagA) { break; } } if (this.insert >= this.istack.size()) { this.insert = -1; } return; } } if (this.istack.size() > 0) { is = (IStack) this.istack.pop(); if (this.insert >= this.istack.size()) { this.insert = -1; } } } /** * Is the node in the stack? * @param node Node * @return <code>true</code> is the node is found in the stack */ public boolean isPushed(Node node) { int i; IStack is; for (i = this.istack.size() - 1; i >= 0; --i) { is = (IStack) this.istack.elementAt(i); if (is.tag == node.tag) { return true; } } return false; } /** * This has the effect of inserting "missing" inline elements around the contents of blocklevel elements such as P, * TD, TH, DIV, PRE etc. This procedure is called at the start of ParseBlock. When the inline stack is not empty, as * will be the case in: <code><i><h1>italic heading</h1></i></code> which is then treated as * equivalent to <code><h1><i>italic heading</i></h1></code> This is implemented by setting the lexer * into a mode where it gets tokens from the inline stack rather than from the input stream. * @param node original node * @return stack size */ public int inlineDup(Node node) { int n; n = this.istack.size() - this.istackbase; if (n > 0) { this.insert = this.istackbase; this.inode = node; } return n; } /** * @return */ public Node insertedToken() { Node node; IStack is; int n; // this will only be null if inode != null if (this.insert == -1) { node = this.inode; this.inode = null; return node; } // is this is the "latest" node then update the position, otherwise use current values if (this.inode == null) { this.lines = this.in.getCurline(); this.columns = this.in.getCurcol(); } node = newNode(Node.START_TAG, this.lexbuf, this.txtstart, this.txtend); // GLP: Bugfix 126261. Remove when this change is fixed in istack.c in the original Tidy node.implicit = true; is = (IStack) this.istack.elementAt(this.insert); node.element = is.element; node.tag = is.tag; if (is.attributes != null) { node.attributes = cloneAttributes(is.attributes); } // advance lexer to next item on the stack n = this.insert; // and recover state if we have reached the end if (++n < this.istack.size()) { this.insert = n; } else { this.insert = -1; } return node; } /** * Can the given element be removed? * @param element node * @return <code>true</code> if he element can be removed */ public boolean canPrune(Node element) { if (element.type == Node.TEXT_NODE) { return true; } if (element.content != null) { return false; } if (element.tag == this.configuration.tt.tagA && element.attributes != null) { return false; } if (element.tag == this.configuration.tt.tagP && !this.configuration.dropEmptyParas) { return false; } if (element.tag == null) { return false; } if (TidyUtils.toBoolean(element.tag.model & Dict.CM_ROW)) { return false; } if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY)) { return false; } if (element.tag == this.configuration.tt.tagApplet) { return false; } if (element.tag == this.configuration.tt.tagObject) { return false; } if (element.tag == this.configuration.tt.tagScript && element.getAttrByName("src") != null) { return false; } // #540555 Empty title tag is trimmed if (element.tag == this.configuration.tt.tagTitle) { return false; } // #433359 - fix by Randy Waki 12 Mar 01 - Empty iframe is trimmed if (element.tag == this.configuration.tt.tagIframe) { return false; } if (element.getAttrByName("id") != null || element.getAttrByName("name") != null) { return false; } return true; } /** * duplicate name attribute as an id and check if id and name match. * @param node Node to check for name/it attributes */ public void fixId(Node node) { AttVal name = node.getAttrByName("name"); AttVal id = node.getAttrByName("id"); if (name != null) { if (id != null) { if (id.value != null && !id.value.equals(name.value)) { report.attrError(this, node, name, Report.ID_NAME_MISMATCH); } } else if (this.configuration.xmlOut) { node.addAttribute("id", name.value); } } } /** * Defer duplicates when entering a table or other element where the inlines shouldn't be duplicated. */ public void deferDup() { this.insert = -1; this.inode = null; } /** * Constraint the html version in the document to the given one. Everything is allowed in proprietary version of * HTML this is handled here rather than in the tag/attr dicts. * @param vers html version code */ void constrainVersion(int vers) { this.versions &= (vers | Dict.VERS_PROPRIETARY); } /** * Is content acceptable for pre elements? * @param node content * @return <code>true</code> if node is acceptable in pre elements */ protected boolean preContent(Node node) { // p is coerced to br's if (node.tag == this.configuration.tt.tagP) { return true; } if (node.tag == null || node.tag == this.configuration.tt.tagP || !TidyUtils.toBoolean(node.tag.model & (Dict.CM_INLINE | Dict.CM_NEW))) { return false; } return true; } /** * document type. */ private static class W3CVersionInfo { /** * name. */ String name; /** * voyager name. */ String voyagerName; /** * profile. */ String profile; /** * code. */ short code; /** * Instantiates a new W3CVersionInfo. * @param name version name * @param voyagerName voyager (xhtml) name * @param profile VOYAGER_STRICT | VOYAGER_LOOSE | VOYAGER_FRAMESET * @param code unique code for this version info */ public W3CVersionInfo(String name, String voyagerName, String profile, short code) { this.name = name; this.voyagerName = voyagerName; this.profile = profile; this.code = code; } } } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Node.java����������������������������������������������������������0000644�0001750�0001750�00000120353�11463540605�020567� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Used for elements and text nodes element name is null for text nodes start and end are offsets into lexbuf which * contains the textual content of all elements in the parse tree. Parent and content allow traversal of the parse tree * in any direction. attributes are represented as a linked list of AttVal nodes which hold the strings for * attribute/value pairs. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1162 $ ($Author: fgiust $) */ public class Node { /** * node type: root. */ public static final short ROOT_NODE = 0; /** * node type: doctype. */ public static final short DOCTYPE_TAG = 1; /** * node type: comment. */ public static final short COMMENT_TAG = 2; /** * node type: . */ public static final short PROC_INS_TAG = 3; /** * node type: text. */ public static final short TEXT_NODE = 4; /** * Start tag. */ public static final short START_TAG = 5; /** * End tag. */ public static final short END_TAG = 6; /** * Start of an end tag. */ public static final short START_END_TAG = 7; /** * node type: CDATA. */ public static final short CDATA_TAG = 8; /** * node type: section tag. */ public static final short SECTION_TAG = 9; /** * node type: asp tag. */ public static final short ASP_TAG = 10; /** * node type: jste tag. */ public static final short JSTE_TAG = 11; /** * node type: php tag. */ public static final short PHP_TAG = 12; /** * node type: doctype. */ public static final short XML_DECL = 13; /** * Description for all the node types. Used in toString. */ private static final String[] NODETYPE_STRING = { "RootNode", "DocTypeTag", "CommentTag", "ProcInsTag", "TextNode", "StartTag", "EndTag", "StartEndTag", "SectionTag", "AspTag", "PhpTag", "XmlDecl"}; /** * parent node. */ protected Node parent; /** * pevious node. */ protected Node prev; /** * next node. */ protected Node next; /** * last node. */ protected Node last; /** * start of span onto text array. */ protected int start; /** * end of span onto text array. */ protected int end; /** * the text array. */ protected byte[] textarray; /** * TextNode, StartTag, EndTag etc. */ protected short type; /** * true if closed by explicit end tag. */ protected boolean closed; /** * true if inferred. */ protected boolean implicit; /** * true if followed by a line break. */ protected boolean linebreak; /** * old tag when it was changed. */ protected Dict was; /** * tag's dictionary definition. */ protected Dict tag; /** * Tag name. */ protected String element; /** * Attribute/Value linked list. */ protected AttVal attributes; /** * Contained node. */ protected Node content; /** * DOM adapter. */ protected org.w3c.dom.Node adapter; /** * Instantiates a new text node. */ public Node() { this(TEXT_NODE, null, 0, 0); } /** * Instantiates a new node. * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE | * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG | * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL * @param textarray array of bytes contained in the Node * @param start start position * @param end end position */ public Node(short type, byte[] textarray, int start, int end) { this.parent = null; this.prev = null; this.next = null; this.last = null; this.start = start; this.end = end; this.textarray = textarray; this.type = type; this.closed = false; this.implicit = false; this.linebreak = false; this.was = null; this.tag = null; this.element = null; this.attributes = null; this.content = null; } /** * Instantiates a new node. * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE | * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG | * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL * @param textarray array of bytes contained in the Node * @param start start position * @param end end position * @param element tag name * @param tt tag table instance */ public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt) { this.parent = null; this.prev = null; this.next = null; this.last = null; this.start = start; this.end = end; this.textarray = textarray; this.type = type; this.closed = false; this.implicit = false; this.linebreak = false; this.was = null; this.tag = null; this.element = element; this.attributes = null; this.content = null; if (type == START_TAG || type == START_END_TAG || type == END_TAG) { tt.findTag(this); } } /** * Returns an attribute with the given name in the current node. * @param name attribute name. * @return AttVal instance or null if no attribute with the iven name is found */ public AttVal getAttrByName(String name) { AttVal attr; for (attr = this.attributes; attr != null; attr = attr.next) { if (name != null && attr.attribute != null && attr.attribute.equals(name)) { break; } } return attr; } /** * Default method for checking an element's attributes. * @param lexer Lexer */ public void checkAttributes(Lexer lexer) { AttVal attval; for (attval = this.attributes; attval != null; attval = attval.next) { attval.checkAttribute(lexer, this); } } /** * The same attribute name can't be used more than once in each element. Discard or join attributes according to * configuration. * @param lexer Lexer */ public void repairDuplicateAttributes(Lexer lexer) { AttVal attval; for (attval = this.attributes; attval != null;) { if (attval.asp == null && attval.php == null) { AttVal current; for (current = attval.next; current != null;) { if (current.asp == null && current.php == null && attval.attribute != null && attval.attribute.equalsIgnoreCase(current.attribute)) { AttVal temp; if ("class".equalsIgnoreCase(current.attribute) && lexer.configuration.joinClasses) { // concatenate classes current.value = current.value + " " + attval.value; temp = attval.next; if (temp.next == null) { current = null; } else { current = current.next; } lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE); removeAttribute(attval); attval = temp; } else if ("style".equalsIgnoreCase(current.attribute) && lexer.configuration.joinStyles) { // concatenate styles // this doesn't handle CSS comments and leading/trailing white-space very well see // http://www.w3.org/TR/css-style-attr int end = current.value.length() - 1; if (current.value.charAt(end) == ';') { // attribute ends with declaration seperator current.value = current.value + " " + attval.value; } else if (current.value.charAt(end) == '}') { // attribute ends with rule set current.value = current.value + " { " + attval.value + " }"; } else { // attribute ends with property value current.value = current.value + "; " + attval.value; } temp = attval.next; if (temp.next == null) { current = null; } else { current = current.next; } lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE); removeAttribute(attval); attval = temp; } else if (lexer.configuration.duplicateAttrs == Configuration.KEEP_LAST) { temp = current.next; lexer.report.attrError(lexer, this, current, Report.REPEATED_ATTRIBUTE); removeAttribute(current); current = temp; } else { temp = attval.next; if (attval.next == null) { current = null; } else { current = current.next; } lexer.report.attrError(lexer, this, attval, Report.REPEATED_ATTRIBUTE); removeAttribute(attval); attval = temp; } } else { current = current.next; } } attval = attval.next; } else { attval = attval.next; } } } /** * Adds an attribute to the node. * @param name attribute name * @param value attribute value */ public void addAttribute(String name, String value) { AttVal av = new AttVal(null, null, null, null, '"', name, value); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); if (this.attributes == null) { this.attributes = av; } else { // append to end of attributes AttVal here = this.attributes; while (here.next != null) { here = here.next; } here.next = av; } } /** * Remove an attribute from node and then free it. * @param attr attribute to remove */ public void removeAttribute(AttVal attr) { AttVal av; AttVal prev = null; AttVal next; for (av = this.attributes; av != null; av = next) { next = av.next; if (av == attr) { if (prev != null) { prev.next = next; } else { this.attributes = next; } } else { prev = av; } } } /** * Find the doctype element. * @return doctype node or null if not found */ public Node findDocType() { Node node = this.content; while (node != null && node.type != DOCTYPE_TAG) { node = node.next; } return node; } /** * Discard the doctype node. */ public void discardDocType() { Node node; node = findDocType(); if (node != null) { if (node.prev != null) { node.prev.next = node.next; } else { node.parent.content = node.next; } if (node.next != null) { node.next.prev = node.prev; } node.next = null; } } /** * Remove node from markup tree and discard it. * @param element discarded node * @return next node */ public static Node discardElement(Node element) { Node next = null; if (element != null) { next = element.next; element.removeNode(); } return next; } /** * Insert a node into markup tree. * @param node to insert */ public void insertNodeAtStart(Node node) { node.parent = this; if (this.content == null) { this.last = node; } else { this.content.prev = node; // AQ added 13 Apr 2000 } node.next = this.content; node.prev = null; this.content = node; } /** * Insert node into markup tree. * @param node Node to insert */ public void insertNodeAtEnd(Node node) { node.parent = this; node.prev = this.last; if (this.last != null) { this.last.next = node; } else { this.content = node; } this.last = node; } /** * Insert node into markup tree in pace of element which is moved to become the child of the node. * @param element child node. Will be inserted as a child of element * @param node parent node */ public static void insertNodeAsParent(Node element, Node node) { node.content = element; node.last = element; node.parent = element.parent; element.parent = node; if (node.parent.content == element) { node.parent.content = node; } if (node.parent.last == element) { node.parent.last = node; } node.prev = element.prev; element.prev = null; if (node.prev != null) { node.prev.next = node; } node.next = element.next; element.next = null; if (node.next != null) { node.next.prev = node; } } /** * Insert node into markup tree before element. * @param element child node. Will be insertedbefore element * @param node following node */ public static void insertNodeBeforeElement(Node element, Node node) { Node parent; parent = element.parent; node.parent = parent; node.next = element; node.prev = element.prev; element.prev = node; if (node.prev != null) { node.prev.next = node; } if (parent != null && parent.content == element) { parent.content = node; } } /** * Insert node into markup tree after element. * @param node new node to insert */ public void insertNodeAfterElement(Node node) { Node parent; parent = this.parent; node.parent = parent; // AQ - 13Jan2000 fix for parent == null if (parent != null && parent.last == this) { parent.last = node; } else { node.next = this.next; // AQ - 13Jan2000 fix for node.next == null if (node.next != null) { node.next.prev = node; } } this.next = node; node.prev = this; } /** * Trim an empty element. * @param lexer Lexer * @param element empty node to be removed */ public static void trimEmptyElement(Lexer lexer, Node element) { // don't trim if user explicitely set trim-empty-elements to false // empty element can be needed in css sites if (lexer.configuration.trimEmpty) { TagTable tt = lexer.configuration.tt; if (lexer.canPrune(element)) { if (element.type != TEXT_NODE) { lexer.report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT); } discardElement(element); } else if (element.tag == tt.tagP && element.content == null) { // replace <p></p> by <br><br> to preserve formatting Node node = lexer.inferredTag("br"); Node.coerceNode(lexer, element, tt.tagBr); element.insertNodeAfterElement(node); } } } /** * This maps <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>. If last child of * element is a text node then trim trailing white space character moving it to after element's end tag. * @param lexer Lexer * @param element node * @param last last child of element */ public static void trimTrailingSpace(Lexer lexer, Node element, Node last) { byte c; TagTable tt = lexer.configuration.tt; if (last != null && last.type == Node.TEXT_NODE) { if (last.end > last.start) { c = lexer.lexbuf[last.end - 1]; if (c == 160 || c == (byte) ' ') { // take care with <td>   </td> // fix for [435920] if (c == 160 && (element.tag == tt.tagTd || element.tag == tt.tagTh)) { if (last.end > last.start + 1) { last.end -= 1; } } else { last.end -= 1; if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE) && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD)) { lexer.insertspace = true; } } } } // if empty string then delete from parse tree if (last.start == last.end) // COMMENT_NBSP_FIX: && tag != tag_td && tag != tag_th { trimEmptyElement(lexer, last); } } } /** * Escapes the given tag. * @param lexer Lexer * @param element node to be escaped * @return escaped node */ protected static Node escapeTag(Lexer lexer, Node element) { Node node = lexer.newNode(); node.start = lexer.lexsize; node.textarray = element.textarray; // @todo check it lexer.addByte('<'); if (element.type == END_TAG) { lexer.addByte('/'); } if (element.element != null) { lexer.addStringLiteral(element.element); } else if (element.type == DOCTYPE_TAG) { int i; lexer.addByte('!'); lexer.addByte('D'); lexer.addByte('O'); lexer.addByte('C'); lexer.addByte('T'); lexer.addByte('Y'); lexer.addByte('P'); lexer.addByte('E'); lexer.addByte(' '); for (i = element.start; i < element.end; ++i) { lexer.addByte(lexer.lexbuf[i]); } } if (element.type == START_END_TAG) { lexer.addByte('/'); } lexer.addByte('>'); node.end = lexer.lexsize; return node; } /** * Is the node content empty or blank? Assumes node is a text node. * @param lexer Lexer * @return <code>true</code> if the node content empty or blank */ public boolean isBlank(Lexer lexer) { if (this.type == TEXT_NODE) { if (this.end == this.start) { return true; } if (this.end == this.start + 1 && lexer.lexbuf[this.end - 1] == ' ') { return true; } } return false; } /** * This maps <code><p> hello <em> world </em></code> to <code><p> hello <em> world </em></code>. * Trims initial space, by moving it before the start tag, or if this element is the first in parent's content, then * by discarding the space. * @param lexer Lexer * @param element parent node * @param text text node */ public static void trimInitialSpace(Lexer lexer, Node element, Node text) { Node prev, node; if (text.type == TEXT_NODE && lexer.lexbuf[text.start] == (byte) ' ' && (text.start < text.end)) { if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE) && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD)) { prev = element.prev; if (prev != null && prev.type == TEXT_NODE) { if (prev.textarray[prev.end - 1] != (byte) ' ') { prev.textarray[prev.end++] = (byte) ' '; } ++element.start; } else { // create new node node = lexer.newNode(); // Local fix for bug 228486 (GLP). This handles the case // where we need to create a preceeding text node but there are // no "slots" in textarray that we can steal from the current // element. Therefore, we create a new textarray containing // just the blank. When Tidy is fixed, this should be removed. if (element.start >= element.end) { node.start = 0; node.end = 1; node.textarray = new byte[1]; } else { node.start = element.start++; node.end = element.start; node.textarray = element.textarray; } node.textarray[node.start] = (byte) ' '; Node.insertNodeBeforeElement(element, node); } } // discard the space in current node ++text.start; } } /** * Move initial and trailing space out. This routine maps: hello <em> world </em> to hello <em> world </em> and * <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>. * @param lexer Lexer * @param element Node */ public static void trimSpaces(Lexer lexer, Node element) { Node text = element.content; TagTable tt = lexer.configuration.tt; if (text != null && text.type == Node.TEXT_NODE && element.tag != tt.tagPre) { trimInitialSpace(lexer, element, text); } text = element.last; if (text != null && text.type == Node.TEXT_NODE) { trimTrailingSpace(lexer, element, text); } } /** * Is this node contained in a given tag? * @param tag descendant tag * @return <code>true</code> if node is contained in tag */ public boolean isDescendantOf(Dict tag) { Node parent; for (parent = this.parent; parent != null; parent = parent.parent) { if (parent.tag == tag) { return true; } } return false; } /** * The doctype has been found after other tags, and needs moving to before the html element. * @param lexer Lexer * @param element document * @param doctype doctype node to insert at the beginning of element */ public static void insertDocType(Lexer lexer, Node element, Node doctype) { TagTable tt = lexer.configuration.tt; lexer.report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS); while (element.tag != tt.tagHtml) { element = element.parent; } insertNodeBeforeElement(element, doctype); } /** * Find the body node. * @param tt tag table * @return body node */ public Node findBody(TagTable tt) { Node node; node = this.content; while (node != null && node.tag != tt.tagHtml) { node = node.next; } if (node == null) { return null; } node = node.content; while (node != null && node.tag != tt.tagBody && node.tag != tt.tagFrameset) { node = node.next; } if (node.tag == tt.tagFrameset) { node = node.content; while (node != null && node.tag != tt.tagNoframes) { node = node.next; } if (node != null) { node = node.content; while (node != null && node.tag != tt.tagBody) { node = node.next; } } } return node; } /** * Is the node an element? * @return <code>true</code> if type is START_TAG | START_END_TAG */ public boolean isElement() { return (this.type == START_TAG || this.type == START_END_TAG ? true : false); } /** * Unexpected content in table row is moved to just before the table in accordance with Netscape and IE. This code * assumes that node hasn't been inserted into the row. * @param row Row node * @param node Node which should be moved before the table * @param tt tag table */ public static void moveBeforeTable(Node row, Node node, TagTable tt) { Node table; /* first find the table element */ for (table = row.parent; table != null; table = table.parent) { if (table.tag == tt.tagTable) { if (table.parent.content == table) { table.parent.content = node; } node.prev = table.prev; node.next = table; table.prev = node; node.parent = table.parent; if (node.prev != null) { node.prev.next = node; } break; } } } /** * If a table row is empty then insert an empty cell.This practice is consistent with browser behavior and avoids * potential problems with row spanning cells. * @param lexer Lexer * @param row row node */ public static void fixEmptyRow(Lexer lexer, Node row) { Node cell; if (row.content == null) { cell = lexer.inferredTag("td"); row.insertNodeAtEnd(cell); lexer.report.warning(lexer, row, cell, Report.MISSING_STARTTAG); } } /** * Coerce a node. * @param lexer Lexer * @param node Node * @param tag tag dictionary reference */ public static void coerceNode(Lexer lexer, Node node, Dict tag) { Node tmp = lexer.inferredTag(tag.name); lexer.report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT); node.was = node.tag; node.tag = tag; node.type = START_TAG; node.implicit = true; node.element = tag.name; } /** * Extract this node and its children from a markup tree. */ public void removeNode() { if (this.prev != null) { this.prev.next = this.next; } if (this.next != null) { this.next.prev = this.prev; } if (this.parent != null) { if (this.parent.content == this) { this.parent.content = this.next; } if (this.parent.last == this) { this.parent.last = this.prev; } } this.parent = null; this.prev = null; this.next = null; } /** * Insert a node at the end. * @param element parent node * @param node will be inserted at the end of element * @return <code>true</code> if the node has been inserted */ public static boolean insertMisc(Node element, Node node) { if (node.type == COMMENT_TAG || node.type == PROC_INS_TAG || node.type == CDATA_TAG || node.type == SECTION_TAG || node.type == ASP_TAG || node.type == JSTE_TAG || node.type == PHP_TAG || node.type == XML_DECL) { element.insertNodeAtEnd(node); return true; } return false; } /** * Is this a new (user defined) node? Used to determine how attributes without values should be printed. This was * introduced to deal with user defined tags e.g. Cold Fusion. * @return <code>true</code> if this node represents a user-defined tag. */ public boolean isNewNode() { if (this.tag != null) { return TidyUtils.toBoolean(this.tag.model & Dict.CM_NEW); } return true; } /** * Does the node have one (and only one) child? * @return <code>true</code> if the node has one child */ public boolean hasOneChild() { return (this.content != null && this.content.next == null); } /** * Find the "html" element. * @param tt tag table * @return html node */ public Node findHTML(TagTable tt) { Node node; for (node = this.content; node != null && node.tag != tt.tagHtml; node = node.next) { // } return node; } /** * Find the head tag. * @param tt tag table * @return head node */ public Node findHEAD(TagTable tt) { Node node; node = this.findHTML(tt); if (node != null) { for (node = node.content; node != null && node.tag != tt.tagHead; node = node.next) { // } } return node; } public Node findTITLE(TagTable tt) { Node node = findHEAD(tt); if (node != null) { for (node = node.content; node != null && node.tag != tt.tagTitle; node = node.next) { // do nothing } } return node; } /** * Checks for node integrity. * @return false if node is not consistent */ public boolean checkNodeIntegrity() { Node child; if (this.prev != null) { if (this.prev.next != this) { return false; } } if (this.next != null) { if (next == this || this.next.prev != this) { return false; } } if (this.parent != null) { if (this.prev == null && this.parent.content != this) { return false; } if (this.next == null && this.parent.last != this) { return false; } } for (child = this.content; child != null; child = child.next) { if (child.parent != this || !child.checkNodeIntegrity()) { return false; } } return true; } /** * Add a css class to the node. If a class attribute already exists adds the value to the existing attribute. * @param classname css class name */ public void addClass(String classname) { AttVal classattr = this.getAttrByName("class"); // if there already is a class attribute then append class name after a space if (classattr != null) { classattr.value = classattr.value + " " + classname; } else { // create new class attribute this.addAttribute("class", classname); } } /** * @see java.lang.Object#toString() */ public String toString() { String s = ""; Node n = this; while (n != null) { s += "[Node type="; s += NODETYPE_STRING[n.type]; s += ",element="; if (n.element != null) { s += n.element; } else { s += "null"; } if (n.type == TEXT_NODE || n.type == COMMENT_TAG || n.type == PROC_INS_TAG) { s += ",text="; if (n.textarray != null && n.start <= n.end) { s += "\""; s += TidyUtils.getString(n.textarray, n.start, n.end - n.start); s += "\""; } else { s += "null"; } } s += ",content="; if (n.content != null) { s += n.content.toString(); } else { s += "null"; } s += "]"; if (n.next != null) { s += ","; } n = n.next; } return s; } /** * Returns a DOM Node which wrap the current tidy Node. * @return org.w3c.dom.Node instance */ protected org.w3c.dom.Node getAdapter() { if (adapter == null) { switch (this.type) { case ROOT_NODE : adapter = new DOMDocumentImpl(this); break; case START_TAG : case START_END_TAG : adapter = new DOMElementImpl(this); break; case DOCTYPE_TAG : adapter = new DOMDocumentTypeImpl(this); break; case COMMENT_TAG : adapter = new DOMCommentImpl(this); break; case TEXT_NODE : adapter = new DOMTextImpl(this); break; case CDATA_TAG : adapter = new DOMCDATASectionImpl(this); break; case PROC_INS_TAG : adapter = new DOMProcessingInstructionImpl(this); break; default : adapter = new DOMNodeImpl(this); } } return adapter; } /** * Clone this node. * @param deep if true deep clone the node (also clones all the contained nodes) * @return cloned node */ protected Node cloneNode(boolean deep) { Node node = new Node(type, textarray, start, end); node.parent = parent; node.closed = closed; node.implicit = implicit; node.tag = tag; node.element = element; if (attributes != null) { node.attributes = (AttVal) attributes.clone(); } if (deep) { Node child; Node newChild; for (child = this.content; child != null; child = child.next) { newChild = child.cloneNode(deep); node.insertNodeAtEnd(newChild); } } return node; } /** * Setter for node type. * @param newType a valid node type constant */ protected void setType(short newType) { this.type = newType; } /** * Used to check script node for script language. * @return <code>true</code> if the script node contains javascript */ public boolean isJavaScript() { boolean result = false; AttVal attr; if (this.attributes == null) { return true; } for (attr = this.attributes; attr != null; attr = attr.next) { if (("language".equalsIgnoreCase(attr.attribute) || "type".equalsIgnoreCase(attr.attribute)) && attr.value != null && attr.value.toLowerCase().contains("javascript")) { result = true; } } return result; } /** * Does the node expect contents? * @return <code>false</code> if this node should be empty */ public boolean expectsContent() { if (this.type != Node.START_TAG) { return false; } // unknown element? if (this.tag == null) { return true; } if (TidyUtils.toBoolean(this.tag.model & Dict.CM_EMPTY)) { return false; } return true; } } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/EncodingUtils.java�������������������������������������������������0000644�0001750�0001750�00000057462�10121404460�022447� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * @author Fabrizio Giustina * @version $Revision: 622 $ ($Author: fgiust $) */ public final class EncodingUtils { /** * the big-endian (default) UNICODE BOM. */ public static final int UNICODE_BOM_BE = 0xFEFF; /** * the default (big-endian) UNICODE BOM. */ public static final int UNICODE_BOM = UNICODE_BOM_BE; /** * the little-endian UNICODE BOM. */ public static final int UNICODE_BOM_LE = 0xFFFE; /** * the UTF-8 UNICODE BOM. */ public static final int UNICODE_BOM_UTF8 = 0xEFBBBF; /** * states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch * character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + * "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets. State ASCII. */ public static final int FSM_ASCII = 0; /** * state ESC. */ public static final int FSM_ESC = 1; /** * state ESCD. */ public static final int FSM_ESCD = 2; /** * state ESCDP. */ public static final int FSM_ESCDP = 3; /** * state ESCP. */ public static final int FSM_ESCP = 4; /** * state NONASCII. */ public static final int FSM_NONASCII = 5; /** * Max UTF-88 valid char value. */ public static final int MAX_UTF8_FROM_UCS4 = 0x10FFFF; /** * Max UTF-16 value. */ public static final int MAX_UTF16_FROM_UCS4 = 0x10FFFF; /** * utf16 low surrogate. */ public static final int LOW_UTF16_SURROGATE = 0xD800; /** * UTF-16 surrogates begin. */ public static final int UTF16_SURROGATES_BEGIN = 0x10000; /** * UTF-16 surrogate pair areas: low surrogates begin. */ public static final int UTF16_LOW_SURROGATE_BEGIN = 0xD800; /** * UTF-16 surrogate pair areas: low surrogates end. */ public static final int UTF16_LOW_SURROGATE_END = 0xDBFF; /** * UTF-16 surrogate pair areas: high surrogates begin. */ public static final int UTF16_HIGH_SURROGATE_BEGIN = 0xDC00; /** * UTF-16 surrogate pair areas: high surrogates end. */ public static final int UTF16_HIGH_SURROGATE_END = 0xDFFF; /** * UTF-16 high surrogate. */ public static final int HIGH_UTF16_SURROGATE = 0xDFFF; /** * UTF-8 bye swap: invalid char. */ private static final int UTF8_BYTE_SWAP_NOT_A_CHAR = 0xFFFE; /** * UTF-8 invalid char. */ private static final int UTF8_NOT_A_CHAR = 0xFFFF; /** * Mapping for Windows Western character set (128-159) to Unicode. */ private static final int[] WIN2UNICODE = { 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178}; /** * John Love-Jensen contributed this table for mapping MacRoman character set to Unicode. */ private static final int[] MAC2UNICODE = { // modified to only need chars 128-255/U+0080-U+00FF Terry T 19 Aug 01 // x7F = DEL 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, // =BD U+2126 OHM SIGN 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, // =DB U+00A4 CURRENCY SIGN 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, // xF0 = Apple Logo // =F0 U+2665 BLACK HEART SUIT 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7}; /** * table to map symbol font characters to Unicode; undefined characters are mapped to 0x0000 and characters without * any unicode equivalent are mapped to '?'. Is this appropriate? */ private static final int[] SYMBOL2UNICODE = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, 0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393, 0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F, 0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9, 0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F, 0x00AF, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3, 0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF, 0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9, 0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x003F, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00A0, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, 0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x00B7, 0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0x003F, 0x003F, 0x21B5, 0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, 0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, 0x2220, 0x2207, 0x00AE, 0x00A9, 0x2122, 0x220F, 0x221A, 0x22C5, 0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3, 0x25CA, 0x2329, 0x00AE, 0x00A9, 0x2122, 0x2211, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x20AC, 0x232A, 0x222B, 0x2320, 0x003F, 0x2321, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F}; /** * Array of valid UTF8 sequences. */ private static final ValidUTF8Sequence[] VALID_UTF8 = { new ValidUTF8Sequence(0x0000, 0x007F, 1, new char[]{0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}), new ValidUTF8Sequence(0x0080, 0x07FF, 2, new char[]{0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}), new ValidUTF8Sequence(0x0800, 0x0FFF, 3, new char[]{0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}), new ValidUTF8Sequence(0x1000, 0xFFFF, 3, new char[]{0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}), new ValidUTF8Sequence(0x10000, 0x3FFFF, 4, new char[]{0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}), new ValidUTF8Sequence(0x40000, 0xFFFFF, 4, new char[]{0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}), new ValidUTF8Sequence(0x100000, 0x10FFFF, 4, new char[]{0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})}; /** * number of valid utf8 sequances. */ private static final int NUM_UTF8_SEQUENCES = VALID_UTF8.length; /** * Offset for utf8 sequences. */ private static final int[] OFFSET_UTF8_SEQUENCES = {0, // 1 byte 1, // 2 bytes 2, // 3 bytes 4, // 4 bytes NUM_UTF8_SEQUENCES}; // must be last /** * don't instantiate. */ private EncodingUtils() { // unused } /** * Function for conversion from Windows-1252 to Unicode. * @param c char to decode * @return decoded char */ protected static int decodeWin1252(int c) { return WIN2UNICODE[c - 128]; } /** * Function to convert from MacRoman to Unicode. * @param c char to decode * @return decoded char */ protected static int decodeMacRoman(int c) { if (127 < c) { c = MAC2UNICODE[c - 128]; } return c; } /** * Function to convert from Symbol Font chars to Unicode. * @param c char to decode * @return decoded char */ static int decodeSymbolFont(int c) { if (c > 255) { return c; } return SYMBOL2UNICODE[c]; } /** * Decodes an array of bytes to a char. * @param c will contain the decoded char * @param firstByte first input byte * @param successorBytes array containing successor bytes (can be null if a getter is provided). * @param getter callback used to get new bytes if successorBytes doesn't contain enough bytes * @param count will contain the number of bytes read * @param startInSuccessorBytesArray starting offset for bytes in successorBytes * @return <code>true</code> if error */ static boolean decodeUTF8BytesToChar(int[] c, int firstByte, byte[] successorBytes, GetBytes getter, int[] count, int startInSuccessorBytesArray) { byte[] buf = new byte[10]; int ch = 0; int n = 0; int i, bytes = 0; boolean hasError = false; if (successorBytes.length != 0) { buf = successorBytes; } // special check if we have been passed an EOF char if (firstByte == StreamIn.END_OF_STREAM) //uint { // at present c[0] = firstByte; count[0] = 1; return false; } ch = TidyUtils.toUnsigned(firstByte); // first byte is passed in separately if (ch <= 0x7F) // 0XXX XXXX one byte { n = ch; bytes = 1; } else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */ { n = ch & 31; bytes = 2; } else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */ { n = ch & 15; bytes = 3; } else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */ { n = ch & 7; bytes = 4; } else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */ { n = ch & 3; bytes = 5; hasError = true; } else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */ { n = ch & 1; bytes = 6; hasError = true; } else { // not a valid first byte of a UTF-8 sequence n = ch; bytes = 1; hasError = true; } for (i = 1; i < bytes; ++i) { int[] tempCount = new int[1]; // no. of additional bytes to get // successor bytes should have the form 10XX XXXX if (getter != null && (bytes - i > 0)) { tempCount[0] = 1; // to simplify things, get 1 byte at a time int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i - 1]}; getter.doGet(buftocopy, tempCount, false); //readRawBytesFromStream(buftocopy, tempCount, false); if (tempCount[0] <= 0) // EOF { hasError = true; bytes = i; break; } } if ((buf[startInSuccessorBytesArray + i - 1] & 0xC0) != 0x80) { // illegal successor byte value hasError = true; bytes = i; if (getter != null) { int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i - 1]}; tempCount[0] = 1; // to simplify things, unget 1 byte at a time getter.doGet(buftocopy, tempCount, true); } break; } n = (n << 6) | (buf[startInSuccessorBytesArray + i - 1] & 0x3F); } if (!hasError && ((n == UTF8_BYTE_SWAP_NOT_A_CHAR) || (n == UTF8_NOT_A_CHAR))) { hasError = true; } if (!hasError && (n > MAX_UTF8_FROM_UCS4)) { hasError = true; } if (!hasError && (n >= UTF16_LOW_SURROGATE_BEGIN) && (n <= UTF16_HIGH_SURROGATE_END)) { // unpaired surrogates not allowed hasError = true; } if (!hasError) { int lo = OFFSET_UTF8_SEQUENCES[bytes - 1]; int hi = OFFSET_UTF8_SEQUENCES[bytes] - 1; // check for overlong sequences if ((n < VALID_UTF8[lo].lowChar) || (n > VALID_UTF8[hi].highChar)) { hasError = true; } else { hasError = true; // assume error until proven otherwise for (i = lo; i <= hi; i++) { int tempCount; char theByte; //unsigned for (tempCount = 0; tempCount < bytes; tempCount++) { if (!TidyUtils.toBoolean(tempCount)) { theByte = (char) firstByte; } else { theByte = (char) buf[startInSuccessorBytesArray + tempCount - 1]; } if ((theByte >= VALID_UTF8[i].validBytes[(tempCount * 2)]) && (theByte <= VALID_UTF8[i].validBytes[(tempCount * 2) + 1])) { hasError = false; } if (hasError) { break; } } } } } count[0] = bytes; c[0] = n; // n = 0xFFFD; // replacement char - do this in the caller return hasError; } /** * Encode a char to an array of bytes. * @param c char to encode * @param encodebuf will contain the decoded bytes * @param putter if not null it will be called to write bytes to out * @param count number of bytes written * @return <code>false</code>= ok, <code>true</code>= error */ static boolean encodeCharToUTF8Bytes(int c, byte[] encodebuf, PutBytes putter, int[] count) { int bytes = 0; byte[] buf = new byte[10]; if (encodebuf != null) { buf = encodebuf; } boolean hasError = false; if (c <= 0x7F) // 0XXX XXXX one byte { buf[0] = (byte) c; bytes = 1; } else if (c <= 0x7FF) // 110X XXXX two bytes { buf[0] = (byte) (0xC0 | (c >> 6)); buf[1] = (byte) (0x80 | (c & 0x3F)); bytes = 2; } else if (c <= 0xFFFF) // 1110 XXXX three bytes { buf[0] = (byte) (0xE0 | (c >> 12)); buf[1] = (byte) (0x80 | ((c >> 6) & 0x3F)); buf[2] = (byte) (0x80 | (c & 0x3F)); bytes = 3; if ((c == UTF8_BYTE_SWAP_NOT_A_CHAR) || (c == UTF8_NOT_A_CHAR)) { hasError = true; } else if ((c >= UTF16_LOW_SURROGATE_BEGIN) && (c <= UTF16_HIGH_SURROGATE_END)) { // unpaired surrogates not allowed hasError = true; } } else if (c <= 0x1FFFFF) // 1111 0XXX four bytes { buf[0] = (byte) (0xF0 | (c >> 18)); buf[1] = (byte) (0x80 | ((c >> 12) & 0x3F)); buf[2] = (byte) (0x80 | ((c >> 6) & 0x3F)); buf[3] = (byte) (0x80 | (c & 0x3F)); bytes = 4; if (c > MAX_UTF8_FROM_UCS4) { hasError = true; } } else if (c <= 0x3FFFFFF) // 1111 10XX five bytes { buf[0] = (byte) (0xF8 | (c >> 24)); buf[1] = (byte) (0x80 | (c >> 18)); buf[2] = (byte) (0x80 | ((c >> 12) & 0x3F)); buf[3] = (byte) (0x80 | ((c >> 6) & 0x3F)); buf[4] = (byte) (0x80 | (c & 0x3F)); bytes = 5; hasError = true; } else if (c <= 0x7FFFFFFF) // 1111 110X six bytes { buf[0] = (byte) (0xFC | (c >> 30)); buf[1] = (byte) (0x80 | ((c >> 24) & 0x3F)); buf[2] = (byte) (0x80 | ((c >> 18) & 0x3F)); buf[3] = (byte) (0x80 | ((c >> 12) & 0x3F)); buf[4] = (byte) (0x80 | ((c >> 6) & 0x3F)); buf[5] = (byte) (0x80 | (c & 0x3F)); bytes = 6; hasError = true; } else { hasError = true; } if (!hasError && putter != null) // don't output invalid UTF-8 byte sequence to a stream { int[] tempCount = new int[]{bytes}; putter.doPut(buf, tempCount); if (tempCount[0] < bytes) { hasError = true; } } count[0] = bytes; return hasError; } /** * Getter callback: called to retrieve 1 or more additional UTF-8 bytes. The Getter callback can also unget if * necessary to re-synchronize the input stream. */ static interface GetBytes { /** * Get one or more byte. * @param buf will contain the bytes. * @param count number of bytes actually stored in "buf". <= 0 if error or EOF * @param unget unget bytes? */ void doGet(int[] buf, int[] count, boolean unget); } /** * Putter callbacks: called to store 1 or more additional UTF-8 bytes. */ static interface PutBytes { /** * Store one or more byte. * @param buf will contain the bytes. * @param count number of bytes actually stored in "buf". <= 0 if error or EOF */ void doPut(byte[] buf, int[] count); } } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ParseProperty.java�������������������������������������������������0000644�0001750�0001750�00000006473�10111224300�022503� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Interface for configuration property parser. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public interface ParseProperty { /** * Parse a configuration option. * @param value option value * @param option option name * @param configuration actual configuration instance * @return parsed configuration value */ Object parse(String value, String option, Configuration configuration); /** * Returns the option type. * @return option type */ String getType(); /** * Returns the valid values. * @return valid values (text) */ String getOptionValues(); /** * Returns the "friendly name" for the passed value. Needed to print actual configuration setting. * @param option option name * @param value actual value * @param configuration actual configuration * @return "friendly" actual value */ String getFriendlyName(String option, Object value, Configuration configuration); }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/OutFactory.java����������������������������������������������������0000644�0001750�0001750�00000006703�10463645504�022006� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.io.Writer; /** * Tidy Output factory. * @author Fabrizio Giustina * @version $Revision: 807 $ ($Author: fgiust $) */ public final class OutFactory { /** * Don't instantiate. */ private OutFactory() { // unused } /** * Returns the appropriate Out implementation. * @param config configuration instance * @param stream output stream * @return out instance */ public static Out getOut(Configuration config, OutputStream stream) { try { return new OutJavaImpl(config, config.getOutCharEncodingName(), stream); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Unsupported encoding: " + e.getMessage()); } } /** * Returns the appropriate Out implementation. * @param config configuration instance * @param writer Writer * @return out instance */ public static Out getOut(Configuration config, Writer writer) { return new OutJavaImpl(config, writer); } } �������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/EntityTable.java���������������������������������������������������0000644�0001750�0001750�00000034231�10121404460�022111� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.Hashtable; import java.util.Iterator; import java.util.Map; /** * Entity hash table. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 622 $ ($Author: fgiust $) */ public final class EntityTable { /** * the default entity table. */ private static EntityTable defaultEntityTable; /** * Known entities. */ private static Entity[] entities = { new Entity("nbsp", 160), new Entity("iexcl", 161), new Entity("cent", 162), new Entity("pound", 163), new Entity("curren", 164), new Entity("yen", 165), new Entity("brvbar", 166), new Entity("sect", 167), new Entity("uml", 168), new Entity("copy", 169), new Entity("ordf", 170), new Entity("laquo", 171), new Entity("not", 172), new Entity("shy", 173), new Entity("reg", 174), new Entity("macr", 175), new Entity("deg", 176), new Entity("plusmn", 177), new Entity("sup2", 178), new Entity("sup3", 179), new Entity("acute", 180), new Entity("micro", 181), new Entity("para", 182), new Entity("middot", 183), new Entity("cedil", 184), new Entity("sup1", 185), new Entity("ordm", 186), new Entity("raquo", 187), new Entity("frac14", 188), new Entity("frac12", 189), new Entity("frac34", 190), new Entity("iquest", 191), new Entity("Agrave", 192), new Entity("Aacute", 193), new Entity("Acirc", 194), new Entity("Atilde", 195), new Entity("Auml", 196), new Entity("Aring", 197), new Entity("AElig", 198), new Entity("Ccedil", 199), new Entity("Egrave", 200), new Entity("Eacute", 201), new Entity("Ecirc", 202), new Entity("Euml", 203), new Entity("Igrave", 204), new Entity("Iacute", 205), new Entity("Icirc", 206), new Entity("Iuml", 207), new Entity("ETH", 208), new Entity("Ntilde", 209), new Entity("Ograve", 210), new Entity("Oacute", 211), new Entity("Ocirc", 212), new Entity("Otilde", 213), new Entity("Ouml", 214), new Entity("times", 215), new Entity("Oslash", 216), new Entity("Ugrave", 217), new Entity("Uacute", 218), new Entity("Ucirc", 219), new Entity("Uuml", 220), new Entity("Yacute", 221), new Entity("THORN", 222), new Entity("szlig", 223), new Entity("agrave", 224), new Entity("aacute", 225), new Entity("acirc", 226), new Entity("atilde", 227), new Entity("auml", 228), new Entity("aring", 229), new Entity("aelig", 230), new Entity("ccedil", 231), new Entity("egrave", 232), new Entity("eacute", 233), new Entity("ecirc", 234), new Entity("euml", 235), new Entity("igrave", 236), new Entity("iacute", 237), new Entity("icirc", 238), new Entity("iuml", 239), new Entity("eth", 240), new Entity("ntilde", 241), new Entity("ograve", 242), new Entity("oacute", 243), new Entity("ocirc", 244), new Entity("otilde", 245), new Entity("ouml", 246), new Entity("divide", 247), new Entity("oslash", 248), new Entity("ugrave", 249), new Entity("uacute", 250), new Entity("ucirc", 251), new Entity("uuml", 252), new Entity("yacute", 253), new Entity("thorn", 254), new Entity("yuml", 255), new Entity("fnof", 402), new Entity("Alpha", 913), new Entity("Beta", 914), new Entity("Gamma", 915), new Entity("Delta", 916), new Entity("Epsilon", 917), new Entity("Zeta", 918), new Entity("Eta", 919), new Entity("Theta", 920), new Entity("Iota", 921), new Entity("Kappa", 922), new Entity("Lambda", 923), new Entity("Mu", 924), new Entity("Nu", 925), new Entity("Xi", 926), new Entity("Omicron", 927), new Entity("Pi", 928), new Entity("Rho", 929), new Entity("Sigma", 931), new Entity("Tau", 932), new Entity("Upsilon", 933), new Entity("Phi", 934), new Entity("Chi", 935), new Entity("Psi", 936), new Entity("Omega", 937), new Entity("alpha", 945), new Entity("beta", 946), new Entity("gamma", 947), new Entity("delta", 948), new Entity("epsilon", 949), new Entity("zeta", 950), new Entity("eta", 951), new Entity("theta", 952), new Entity("iota", 953), new Entity("kappa", 954), new Entity("lambda", 955), new Entity("mu", 956), new Entity("nu", 957), new Entity("xi", 958), new Entity("omicron", 959), new Entity("pi", 960), new Entity("rho", 961), new Entity("sigmaf", 962), new Entity("sigma", 963), new Entity("tau", 964), new Entity("upsilon", 965), new Entity("phi", 966), new Entity("chi", 967), new Entity("psi", 968), new Entity("omega", 969), new Entity("thetasym", 977), new Entity("upsih", 978), new Entity("piv", 982), new Entity("bull", 8226), new Entity("hellip", 8230), new Entity("prime", 8242), new Entity("Prime", 8243), new Entity("oline", 8254), new Entity("frasl", 8260), new Entity("weierp", 8472), new Entity("image", 8465), new Entity("real", 8476), new Entity("trade", 8482), new Entity("alefsym", 8501), new Entity("larr", 8592), new Entity("uarr", 8593), new Entity("rarr", 8594), new Entity("darr", 8595), new Entity("harr", 8596), new Entity("crarr", 8629), new Entity("lArr", 8656), new Entity("uArr", 8657), new Entity("rArr", 8658), new Entity("dArr", 8659), new Entity("hArr", 8660), new Entity("forall", 8704), new Entity("part", 8706), new Entity("exist", 8707), new Entity("empty", 8709), new Entity("nabla", 8711), new Entity("isin", 8712), new Entity("notin", 8713), new Entity("ni", 8715), new Entity("prod", 8719), new Entity("sum", 8721), new Entity("minus", 8722), new Entity("lowast", 8727), new Entity("radic", 8730), new Entity("prop", 8733), new Entity("infin", 8734), new Entity("ang", 8736), new Entity("and", 8743), new Entity("or", 8744), new Entity("cap", 8745), new Entity("cup", 8746), new Entity("int", 8747), new Entity("there4", 8756), new Entity("sim", 8764), new Entity("cong", 8773), new Entity("asymp", 8776), new Entity("ne", 8800), new Entity("equiv", 8801), new Entity("le", 8804), new Entity("ge", 8805), new Entity("sub", 8834), new Entity("sup", 8835), new Entity("nsub", 8836), new Entity("sube", 8838), new Entity("supe", 8839), new Entity("oplus", 8853), new Entity("otimes", 8855), new Entity("perp", 8869), new Entity("sdot", 8901), new Entity("lceil", 8968), new Entity("rceil", 8969), new Entity("lfloor", 8970), new Entity("rfloor", 8971), new Entity("lang", 9001), new Entity("rang", 9002), new Entity("loz", 9674), new Entity("spades", 9824), new Entity("clubs", 9827), new Entity("hearts", 9829), new Entity("diams", 9830), new Entity("quot", 34), new Entity("amp", 38), new Entity("lt", 60), new Entity("gt", 62), new Entity("OElig", 338), new Entity("oelig", 339), new Entity("Scaron", 352), new Entity("scaron", 353), new Entity("Yuml", 376), new Entity("circ", 710), new Entity("tilde", 732), new Entity("ensp", 8194), new Entity("emsp", 8195), new Entity("thinsp", 8201), new Entity("zwnj", 8204), new Entity("zwj", 8205), new Entity("lrm", 8206), new Entity("rlm", 8207), new Entity("ndash", 8211), new Entity("mdash", 8212), new Entity("lsquo", 8216), new Entity("rsquo", 8217), new Entity("sbquo", 8218), new Entity("ldquo", 8220), new Entity("rdquo", 8221), new Entity("bdquo", 8222), new Entity("dagger", 8224), new Entity("Dagger", 8225), new Entity("permil", 8240), new Entity("lsaquo", 8249), new Entity("rsaquo", 8250), new Entity("euro", 8364)}; /** * Entity map. */ private Map entityHashtable = new Hashtable(); /** * use getDefaultEntityTable to get an entity table instance. */ private EntityTable() { super(); } /** * installs an entity. * @param ent entity * @return installed Entity */ private Entity install(Entity ent) { return (Entity) this.entityHashtable.put(ent.getName(), ent); } /** * Lookup an entity by its name. * @param name entity name * @return entity */ public Entity lookup(String name) { return (Entity) this.entityHashtable.get(name); } /** * Returns the entity code for the given entity name. * @param name entity name * @return entity code or 0 for unknown entity names */ public int entityCode(String name) { // entity starting with "&" returns zero on error. int c; if (name.length() <= 1) { return 0; } // numeric entitity: name = "&#" followed by number if (name.charAt(1) == '#') { c = 0; // zero on missing/bad number // 'x' prefix denotes hexadecimal number format try { if (name.length() >= 4 && name.charAt(2) == 'x') { c = Integer.parseInt(name.substring(3), 16); } else if (name.length() >= 3) { c = Integer.parseInt(name.substring(2)); } } catch (NumberFormatException e) { // ignore } return c; } // Named entity: name ="&" followed by a name Entity ent = lookup(name.substring(1)); if (ent != null) { return ent.getCode(); } return 0; // zero signifies unknown entity name } /** * Returns the entity name for the given entity code. * @param code entity code * @return entity name or null for unknown entity codes */ public String entityName(short code) { String name = null; Entity ent; Iterator en = this.entityHashtable.values().iterator(); while (en.hasNext()) { ent = (Entity) en.next(); if (ent.getCode() == code) { name = ent.getName(); break; } } return name; } /** * Returns the default entity table instance. * @return entity table instance */ public static EntityTable getDefaultEntityTable() { if (defaultEntityTable == null) { defaultEntityTable = new EntityTable(); for (int i = 0; i < entities.length; i++) { defaultEntityTable.install(entities[i]); } } return defaultEntityTable; } }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TagTable.java������������������������������������������������������0000644�0001750�0001750�00000100234�10123335515�021353� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.ArrayList; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Map; /** * Tag dictionary node hash table. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 633 $ ($Author: fgiust $) */ public final class TagTable { /** * dummy entry for all xml tags. */ public static final Dict XML_TAGS = new Dict(null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null); /** * all the known tags. */ private static final Dict[] TAGS = { new Dict( "html", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.HTML, TagCheckImpl.HTML), new Dict("head", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.HEAD, null), new Dict("title", Dict.VERS_ALL, Dict.CM_HEAD, ParserImpl.TITLE, null), new Dict("base", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("link", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.LINK), new Dict("meta", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.META), new Dict( "style", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_HEAD, ParserImpl.SCRIPT, TagCheckImpl.STYLE), new Dict( "script", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE), ParserImpl.SCRIPT, TagCheckImpl.SCRIPT), new Dict( "server", Dict.VERS_NETSCAPE, (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE), ParserImpl.SCRIPT, null), new Dict("body", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.BODY, null), new Dict("frameset", Dict.VERS_FRAMESET, (Dict.CM_HTML | Dict.CM_FRAMES), ParserImpl.FRAMESET, null), new Dict("p", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OPT), ParserImpl.INLINE, null), new Dict("h1", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("h2", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("h3", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("h4", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("h5", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("h6", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null), new Dict("ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null), new Dict("ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null), new Dict("dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.DEFLIST, null), new Dict("dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null), new Dict("menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null), new Dict("pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.PRE, null), new Dict("listing", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null), new Dict("xmp", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null), new Dict("plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null), new Dict("address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, TagCheckImpl.FORM), new Dict("isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("fieldset", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.TABLETAG, TagCheckImpl.TABLE), new Dict( "hr", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.HR), new Dict("div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED), ParserImpl.BLOCK, null), new Dict("align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict("center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.BLOCK, null), new Dict( "ins", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED), ParserImpl.INLINE, null), new Dict( "del", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED), ParserImpl.INLINE, null), new Dict("li", Dict.VERS_ALL, (Dict.CM_LIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null), new Dict("dt", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.INLINE, null), new Dict("dd", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null), new Dict("caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.INLINE, TagCheckImpl.CAPTION), new Dict("colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.COLGROUP, null), new Dict("col", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict( "thead", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT), ParserImpl.ROWGROUP, null), new Dict( "tfoot", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT), ParserImpl.ROWGROUP, null), new Dict( "tbody", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT), ParserImpl.ROWGROUP, null), new Dict("tr", Dict.VERS_FROM32, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.ROW, null), new Dict( "td", Dict.VERS_FROM32, (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, TagCheckImpl.TABLECELL), new Dict( "th", Dict.VERS_FROM32, (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, TagCheckImpl.TABLECELL), new Dict("q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, TagCheckImpl.ANCHOR), new Dict("br", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict( "img", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.IMG), new Dict( "object", Dict.VERS_HTML40, (Dict.CM_OBJECT | Dict.CM_HEAD | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM), ParserImpl.BLOCK, null), new Dict( "applet", Dict.VERS_LOOSE, (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM), ParserImpl.BLOCK, null), new Dict( "servlet", Dict.VERS_SUN, (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM), ParserImpl.BLOCK, null), new Dict("param", Dict.VERS_FROM32, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.BLOCK, null), new Dict("frame", Dict.VERS_FRAMESET, (Dict.CM_FRAMES | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("noframes", Dict.VERS_IFRAME, (Dict.CM_BLOCK | Dict.CM_FRAMES), ParserImpl.NOFRAMES, null), new Dict( "noscript", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED), ParserImpl.BLOCK, null), new Dict("b", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("i", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("tt", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("big", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("small", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("sub", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("sup", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE | Dict.CM_OPT), ParserImpl.INLINE, null), new Dict("bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict( "nolayer", Dict.VERS_NETSCAPE, (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED), ParserImpl.BLOCK, null), new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict( "map", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.BLOCK, TagCheckImpl.MAP), new Dict( "area", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.AREA), new Dict("input", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("select", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.SELECT, null), new Dict("option", Dict.VERS_ALL, (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.TEXT, null), new Dict( "optgroup", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.OPTGROUP, null), new Dict("textarea", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.TEXT, null), new Dict("label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("legend", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("button", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null), new Dict("font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("bdo", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null), // elements for XHTML 1.1 new Dict("ruby", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("rbc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("rtc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("rb", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("rt", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), new Dict("rp", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null), // }; /** * html tag. */ protected Dict tagHtml; /** * head tag. */ protected Dict tagHead; /** * body tag. */ protected Dict tagBody; /** * frameset tag. */ protected Dict tagFrameset; /** * frame tag. */ protected Dict tagFrame; /** * iframe tag. */ protected Dict tagIframe; /** * noframes tag. */ protected Dict tagNoframes; /** * meta tag. */ protected Dict tagMeta; /** * title tag. */ protected Dict tagTitle; /** * base tag. */ protected Dict tagBase; /** * hr tag. */ protected Dict tagHr; /** * pre tag. */ protected Dict tagPre; /** * listing tag. */ protected Dict tagListing; /** * h1 tag. */ protected Dict tagH1; /** * h2 tag. */ protected Dict tagH2; /** * p tag. */ protected Dict tagP; /** * ul tag. */ protected Dict tagUl; /** * ol tag. */ protected Dict tagOl; /** * dir tag. */ protected Dict tagDir; /** * li tag. */ protected Dict tagLi; /** * dt tag. */ protected Dict tagDt; /** * dd tag. */ protected Dict tagDd; /** * dl tag. */ protected Dict tagDl; /** * td tag. */ protected Dict tagTd; /** * th tag. */ protected Dict tagTh; /** * tr tag. */ protected Dict tagTr; /** * col tag. */ protected Dict tagCol; /** * colgroup tag. */ protected Dict tagColgroup; /** * br tag. */ protected Dict tagBr; /** * a tag. */ protected Dict tagA; /** * link tag. */ protected Dict tagLink; /** * b tag. */ protected Dict tagB; /** * i tag. */ protected Dict tagI; /** * strong tag. */ protected Dict tagStrong; /** * em tag. */ protected Dict tagEm; /** * big tag. */ protected Dict tagBig; /** * small tag. */ protected Dict tagSmall; /** * param tag. */ protected Dict tagParam; /** * option tag. */ protected Dict tagOption; /** * optgroup tag. */ protected Dict tagOptgroup; /** * img tag. */ protected Dict tagImg; /** * map tag. */ protected Dict tagMap; /** * area tag. */ protected Dict tagArea; /** * nobr tag. */ protected Dict tagNobr; /** * wbr tag. */ protected Dict tagWbr; /** * font tag. */ protected Dict tagFont; /** * spacer tag. */ protected Dict tagSpacer; /** * layer tag. */ protected Dict tagLayer; /** * center tag. */ protected Dict tagCenter; /** * style tag. */ protected Dict tagStyle; /** * script tag. */ protected Dict tagScript; /** * noscript tag. */ protected Dict tagNoscript; /** * table tag. */ protected Dict tagTable; /** * caption tag. */ protected Dict tagCaption; /** * form tag. */ protected Dict tagForm; /** * textarea tag. */ protected Dict tagTextarea; /** * blockquote tag. */ protected Dict tagBlockquote; /** * applet tag. */ protected Dict tagApplet; /** * object tag. */ protected Dict tagObject; /** * div tag. */ protected Dict tagDiv; /** * span tag. */ protected Dict tagSpan; /** * input tag. */ protected Dict tagInput; /** * tag. */ protected Dict tagQ; /** * a proprietary tag added by Tidy, along with tag_nobr, tag_wbr. */ protected Dict tagBlink; /** * anchor/node hash. */ protected Anchor anchorList; /** * configuration. */ private Configuration configuration; /** * hashTable containing tags. */ private Map tagHashtable = new Hashtable(); /** * Instantiates a new tag table with known tags. */ protected TagTable() { for (int i = 0; i < TAGS.length; i++) { install(TAGS[i]); } tagHtml = lookup("html"); tagHead = lookup("head"); tagBody = lookup("body"); tagFrameset = lookup("frameset"); tagFrame = lookup("frame"); tagIframe = lookup("iframe"); tagNoframes = lookup("noframes"); tagMeta = lookup("meta"); tagTitle = lookup("title"); tagBase = lookup("base"); tagHr = lookup("hr"); tagPre = lookup("pre"); tagListing = lookup("listing"); tagH1 = lookup("h1"); tagH2 = lookup("h2"); tagP = lookup("p"); tagUl = lookup("ul"); tagOl = lookup("ol"); tagDir = lookup("dir"); tagLi = lookup("li"); tagDt = lookup("dt"); tagDd = lookup("dd"); tagDl = lookup("dl"); tagTd = lookup("td"); tagTh = lookup("th"); tagTr = lookup("tr"); tagCol = lookup("col"); tagColgroup = lookup("colgroup"); tagBr = lookup("br"); tagA = lookup("a"); tagLink = lookup("link"); tagB = lookup("b"); tagI = lookup("i"); tagStrong = lookup("strong"); tagEm = lookup("em"); tagBig = lookup("big"); tagSmall = lookup("small"); tagParam = lookup("param"); tagOption = lookup("option"); tagOptgroup = lookup("optgroup"); tagImg = lookup("img"); tagMap = lookup("map"); tagArea = lookup("area"); tagNobr = lookup("nobr"); tagWbr = lookup("wbr"); tagFont = lookup("font"); tagSpacer = lookup("spacer"); tagLayer = lookup("layer"); tagCenter = lookup("center"); tagStyle = lookup("style"); tagScript = lookup("script"); tagNoscript = lookup("noscript"); tagTable = lookup("table"); tagCaption = lookup("caption"); tagForm = lookup("form"); tagTextarea = lookup("textarea"); tagBlockquote = lookup("blockquote"); tagApplet = lookup("applet"); tagObject = lookup("object"); tagDiv = lookup("div"); tagSpan = lookup("span"); tagInput = lookup("input"); tagQ = lookup("q"); tagBlink = lookup("blink"); } /** * Setter for the current configuration instance. * @param configuration configuration instance */ public void setConfiguration(Configuration configuration) { this.configuration = configuration; } /** * Lookup a tag definition by its name. * @param name tag name * @return tag definition (Dict) */ public Dict lookup(String name) { return (Dict) tagHashtable.get(name); } /** * Installs a new tag in the tag table, or modify an existing one. * @param dict tag definition * @return installed Dict instance */ public Dict install(Dict dict) { Dict d = (Dict) tagHashtable.get(dict.name); if (d != null) { d.versions = dict.versions; d.model |= dict.model; d.setParser(dict.getParser()); d.setChkattrs(dict.getChkattrs()); return d; } tagHashtable.put(dict.name, dict); return dict; } /** * Finds a tag by name. * @param node Node to find. If the element is found the tag property of node will be set. * @return true if the tag is found, false otherwise */ public boolean findTag(Node node) { Dict np; if (configuration != null && configuration.xmlTags) { node.tag = XML_TAGS; return true; } if (node.element != null) { np = lookup(node.element); if (np != null) { node.tag = np; return true; } } return false; } /** * Finds a parser fo the given node. * @param node Node * @return parser for the node */ public Parser findParser(Node node) { Dict np; if (node.element != null) { np = lookup(node.element); if (np != null) { return np.getParser(); } } return null; } /** * May id or name serve as anchor? * @param node Node * @return <code>true</code> if tag can serve as an anchor */ boolean isAnchorElement(Node node) { return node.tag == this.tagA || node.tag == this.tagApplet || node.tag == this.tagForm || node.tag == this.tagFrame || node.tag == this.tagIframe || node.tag == this.tagImg || node.tag == this.tagMap; } /** * Defines a new tag. * @param tagType tag type. Can be TAGTYPE_BLOCK | TAGTYPE_EMPTY | TAGTYPE_PRE | TAGTYPE_INLINE * @param name tag name */ public void defineTag(short tagType, String name) { Parser tagParser; short model; switch (tagType) { case Dict.TAGTYPE_BLOCK : model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW); tagParser = ParserImpl.BLOCK; break; case Dict.TAGTYPE_EMPTY : model = (short) (Dict.CM_EMPTY | Dict.CM_NO_INDENT | Dict.CM_NEW); tagParser = ParserImpl.BLOCK; break; case Dict.TAGTYPE_PRE : model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW); tagParser = ParserImpl.PRE; break; case Dict.TAGTYPE_INLINE : default : // default to inline tag model = (short) (Dict.CM_INLINE | Dict.CM_NO_INDENT | Dict.CM_NEW); tagParser = ParserImpl.INLINE; break; } install(new Dict(name, Dict.VERS_PROPRIETARY, model, tagParser, null)); } /** * return a List containing all the user-defined tag names. * @param tagType one of Dict.TAGTYPE_EMPTY | Dict.TAGTYPE_INLINE | Dict.TAGTYPE_BLOCK | Dict.TAGTYPE_PRE * @return List containing all the user-defined tag names */ List findAllDefinedTag(short tagType) { List tagNames = new ArrayList(); Iterator iterator = tagHashtable.values().iterator(); while (iterator.hasNext()) { Dict curDictEntry = (Dict) iterator.next(); if (curDictEntry != null) { switch (tagType) { // defined tags can be empty + inline case Dict.TAGTYPE_EMPTY : if ((curDictEntry.versions == Dict.VERS_PROPRIETARY) && ((curDictEntry.model & Dict.CM_EMPTY) == Dict.CM_EMPTY) && // (curDictEntry.parser == ParseBlock) && (curDictEntry != tagWbr)) { tagNames.add(curDictEntry.name); } break; // defined tags can be empty + inline case Dict.TAGTYPE_INLINE : if ((curDictEntry.versions == Dict.VERS_PROPRIETARY) && ((curDictEntry.model & Dict.CM_INLINE) == Dict.CM_INLINE) && // (curDictEntry.parser == ParseInline) && (curDictEntry != tagBlink) && (curDictEntry != tagNobr) && (curDictEntry != tagWbr)) { tagNames.add(curDictEntry.name); } break; // defined tags can be empty + block case Dict.TAGTYPE_BLOCK : if ((curDictEntry.versions == Dict.VERS_PROPRIETARY) && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK) && (curDictEntry.getParser() == ParserImpl.BLOCK)) { tagNames.add(curDictEntry.name); } break; case Dict.TAGTYPE_PRE : if ((curDictEntry.versions == Dict.VERS_PROPRIETARY) && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK) && (curDictEntry.getParser() == ParserImpl.PRE)) { tagNames.add(curDictEntry.name); } break; } } } return tagNames; } /** * Free node's attributes. * @param node Node */ public void freeAttrs(Node node) { while (node.attributes != null) { AttVal av = node.attributes; if ("id".equalsIgnoreCase(av.attribute) || "name".equalsIgnoreCase(av.attribute) && isAnchorElement(node)) { removeAnchorByNode(node); } node.attributes = av.next; } } /** * Removes anchor for specific node. * @param node Node */ void removeAnchorByNode(Node node) { Anchor delme = null; Anchor found = null; Anchor prev = null; Anchor next = null; for (found = anchorList; found != null; found = found.next) { next = found.next; if (found.node == node) { if (prev != null) { prev.next = next; } else { anchorList = next; } delme = found; } else { prev = found; } } if (delme != null) { delme = null; // freeAnchor } } /** * Initialize a new anchor. * @return a new anchor element */ Anchor newAnchor() { Anchor a = new Anchor(); return a; } /** * Adds a new anchor to namespace. * @param name anchor name * @param node destination for this anchor * @return Anchor */ Anchor addAnchor(String name, Node node) { Anchor a = newAnchor(); a.name = name; a.node = node; if (anchorList == null) { anchorList = a; } else { Anchor here = anchorList; while (here.next != null) { here = here.next; } here.next = a; } return anchorList; } /** * Return node associated with anchor. * @param name anchor name * @return node associated with anchor */ Node getNodeByAnchor(String name) { Anchor found; for (found = anchorList; found != null; found = found.next) { if (name.equalsIgnoreCase(found.name)) { break; } } if (found != null) { return found.node; } return null; } /** * free all anchors. */ void freeAnchors() { anchorList = null; } }��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/AttrCheckImpl.java�������������������������������������������������0000644�0001750�0001750�00000100514�11355334615�022373� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.HashMap; import java.util.Iterator; import java.util.Map; /** * Check attribute values implementations. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1099 $ ($Author: aditsu $) */ public final class AttrCheckImpl { /** * checker for URLs. */ public static final AttrCheck URL = new CheckUrl(); /** * checker for scripts. */ public static final AttrCheck SCRIPT = new CheckScript(); /** * checker for "name" attribute. */ public static final AttrCheck NAME = new CheckName(); /** * checker for ids. */ public static final AttrCheck ID = new CheckId(); /** * checker for "align" attribute. */ public static final AttrCheck ALIGN = new CheckAlign(); /** * checker for "valign" attribute. */ public static final AttrCheck VALIGN = new CheckValign(); /** * checker for boolean attributes. */ public static final AttrCheck BOOL = new CheckBool(); /** * checker for "lenght" attribute. */ public static final AttrCheck LENGTH = new CheckLength(); /** * checker for "target" attribute. */ public static final AttrCheck TARGET = new CheckTarget(); /** * checker for "submit" attribute. */ public static final AttrCheck FSUBMIT = new CheckFsubmit(); /** * checker for "clear" attribute. */ public static final AttrCheck CLEAR = new CheckClear(); /** * checker for "shape" attribute. */ public static final AttrCheck SHAPE = new CheckShape(); /** * checker for "number" attribute. */ public static final AttrCheck NUMBER = new CheckNumber(); /** * checker for "scope" attribute. */ public static final AttrCheck SCOPE = new CheckScope(); /** * checker for "color" attribute. */ public static final AttrCheck COLOR = new CheckColor(); /** * checker for "vtype" attribute. */ public static final AttrCheck VTYPE = new CheckVType(); /** * checker for "scroll" attribute. */ public static final AttrCheck SCROLL = new CheckScroll(); /** * checker for "dir" attribute. */ public static final AttrCheck TEXTDIR = new CheckTextDir(); /** * checker for "lang" and "xml:lang" attributes. */ public static final AttrCheck LANG = new CheckLang(); /** * checker for text attributes. Actually null (no validation). */ public static final AttrCheck TEXT = null; /** * checker for "charset" attribute. Actually null (no validation). */ public static final AttrCheck CHARSET = null; /** * checker for "type" attribute. Actually null (no validation). */ public static final AttrCheck TYPE = null; /** * checker for attributes that can contain a single character. Actually null (no validation). */ public static final AttrCheck CHARACTER = null; /** * checker for attributes which contain a list of urls. Actually null (no validation). */ public static final AttrCheck URLS = null; /** * checker for "cols" attribute. Actually null (no validation). */ public static final AttrCheck COLS = null; /** * checker for "coords" attribute. Actually null (no validation). */ public static final AttrCheck COORDS = null; /** * checker for attributes containing dates. Actually null (no validation). */ public static final AttrCheck DATE = null; /** * checker for attributes referencng an id. Actually null (no validation). */ public static final AttrCheck IDREF = null; /** * checker for table "frame" attribute. Actually null (no validation). */ public static final AttrCheck TFRAME = null; /** * checker for "frameborder" attribute. Actually null (no validation). */ public static final AttrCheck FBORDER = null; /** * checker for "media" attribute. Actually null (no validation). */ public static final AttrCheck MEDIA = null; /** * checker for "rel" and "rev" attributes. Actually null (no validation). */ public static final AttrCheck LINKTYPES = null; /** * checker for table "rules" attribute. Actually null (no validation). */ public static final AttrCheck TRULES = null; /** * utility class, don't instantiate. */ private AttrCheckImpl() { // empty private constructor } /** * AttrCheck implementation for checking URLs. */ public static class CheckUrl implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { char c; StringBuffer dest; boolean escapeFound = false; boolean backslashFound = false; int i = 0; if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } String p = attval.value; boolean isJavascript = attval.value.startsWith("javascript:"); for (i = 0; i < p.length(); ++i) { c = p.charAt(i); // find \ if (c == '\\') { backslashFound = true; } // find non-ascii chars else if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c == '>')) { escapeFound = true; } } // backslashes found, fix them if (lexer.configuration.fixBackslash && backslashFound && !isJavascript) { attval.value = attval.value.replace('\\', '/'); p = attval.value; } // non-ascii chars found, fix them if (lexer.configuration.fixUri && escapeFound) { dest = new StringBuffer(); for (i = 0; i < p.length(); ++i) { c = p.charAt(i); if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c == '>')) { dest.append('%'); dest.append(Integer.toHexString(c).toUpperCase()); } else { dest.append(c); } } attval.value = dest.toString(); } if (backslashFound) { if (lexer.configuration.fixBackslash) { lexer.report.attrError(lexer, node, attval, Report.FIXED_BACKSLASH); } else { lexer.report.attrError(lexer, node, attval, Report.BACKSLASH_IN_URI); } } if (escapeFound) { if (lexer.configuration.fixUri) { lexer.report.attrError(lexer, node, attval, Report.ESCAPED_ILLEGAL_URI); } else { lexer.report.attrError(lexer, node, attval, Report.ILLEGAL_URI_REFERENCE); } lexer.badChars |= Report.INVALID_URI; } } } /** * AttrCheck implementation for checking scripts. */ public static class CheckScript implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { // not implemented } } /** * AttrCheck implementation for checking the "align" attribute. */ public static class CheckAlign implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"left", "center", "right", "justify"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { // IMG, OBJECT, APPLET and EMBED use align for vertical position if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)) { VALIGN.check(lexer, node, attval); return; } if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking the "valign" attribute. */ public static class CheckValign implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"top", "middle", "bottom", "baseline"}; /** * valid values for this attribute (only for img tag). */ private static final String[] VALID_VALUES_IMG = new String[]{"left", "right"}; /** * proprietary values for this attribute. */ private static final String[] VALID_VALUES_PROPRIETARY = new String[]{ "texttop", "absmiddle", "absbottom", "textbottom"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { String value; if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); value = attval.value; if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value)) { // all is fine return; } if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_IMG, value)) { if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } else if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_PROPRIETARY, value)) { lexer.constrainVersion(Dict.VERS_PROPRIETARY); lexer.report.attrError(lexer, node, attval, Report.PROPRIETARY_ATTR_VALUE); } else { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking boolean attributes. */ public static class CheckBool implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { return; } attval.checkLowerCaseAttrValue(lexer, node); } } /** * AttrCheck implementation for checking the "length" attribute. */ public static class CheckLength implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } // don't check for <col width=...> and <colgroup width=...> if ("width".equalsIgnoreCase(attval.attribute) && (node.tag == lexer.configuration.tt.tagCol || node.tag == lexer.configuration.tt.tagColgroup)) { return; } String p = attval.value; if (p.length() == 0 || (!Character.isDigit(p.charAt(0)) && !('%' == p.charAt(0)))) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } else { TagTable tt = lexer.configuration.tt; for (int j = 1; j < p.length(); j++) { // elements th and td must not use percentages if ((!Character.isDigit(p.charAt(j)) && (node.tag == tt.tagTd || node.tag == tt.tagTh)) || (!Character.isDigit(p.charAt(j)) && p.charAt(j) != '%')) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); break; } } } } } /** * AttrCheck implementation for checking the "target" attribute. */ public static class CheckTarget implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"_blank", "_self", "_parent", "_top"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { // No target attribute in strict HTML versions lexer.constrainVersion(~Dict.VERS_HTML40_STRICT); if (attval.value == null || attval.value.length() == 0) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } String value = attval.value; // target names must begin with A-Za-z ... if (Character.isLetter(value.charAt(0))) { return; } // or be one of _blank, _self, _parent and _top if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking the "submit" attribute. */ public static class CheckFsubmit implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"get", "post"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking the "clear" attribute. */ public static class CheckClear implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"none", "left", "right", "all"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); attval.value = VALID_VALUES[0]; return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking the "shape" attribute. */ public static class CheckShape implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"rect", "default", "circle", "poly"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking Scope. */ public static class CheckScope implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"row", "rowgroup", "col", "colgroup"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking numbers. */ public static class CheckNumber implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } // don't check <frameset cols=... rows=...> if (("cols".equalsIgnoreCase(attval.attribute) || "rows".equalsIgnoreCase(attval.attribute)) && node.tag == lexer.configuration.tt.tagFrameset) { return; } String value = attval.value; int j = 0; // font size may be preceded by + or - if (node.tag == lexer.configuration.tt.tagFont && (value.startsWith("+") || value.startsWith("-"))) { ++j; } for (; j < value.length(); j++) { char p = value.charAt(j); if (!Character.isDigit(p)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); break; } } } } /** * AttrCheck implementation for checking ids. */ public static class CheckId implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { Node old; if (attval.value == null || attval.value.length() == 0) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } String p = attval.value; char s = p.charAt(0); if (p.length() == 0 || !Character.isLetter(p.charAt(0))) { if (lexer.isvoyager && (TidyUtils.isXMLLetter(s) || s == '_' || s == ':')) { lexer.report.attrError(lexer, node, attval, Report.XML_ID_SYNTAX); } else { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } else { for (int j = 1; j < p.length(); j++) { s = p.charAt(j); if (!TidyUtils.isNamechar(s)) { if (lexer.isvoyager && TidyUtils.isXMLNamechar(s)) { lexer.report.attrError(lexer, node, attval, Report.XML_ID_SYNTAX); } else { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } break; } } } if (((old = lexer.configuration.tt.getNodeByAnchor(attval.value)) != null) && old != node) { lexer.report.attrError(lexer, node, attval, Report.ANCHOR_NOT_UNIQUE); } else { lexer.configuration.tt.anchorList = lexer.configuration.tt.addAnchor(attval.value, node); } } } /** * AttrCheck implementation for checking the "name" attribute. */ public static class CheckName implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { Node old; if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } else if (lexer.configuration.tt.isAnchorElement(node)) { lexer.constrainVersion(~Dict.VERS_XHTML11); if (((old = lexer.configuration.tt.getNodeByAnchor(attval.value)) != null) && old != node) { lexer.report.attrError(lexer, node, attval, Report.ANCHOR_NOT_UNIQUE); } else { lexer.configuration.tt.anchorList = lexer.configuration.tt.addAnchor(attval.value, node); } } } } /** * AttrCheck implementation for checking colors. */ public static class CheckColor implements AttrCheck { /** * valid html colors. */ private static final Map COLORS = new HashMap(); static { COLORS.put("black", "#000000"); COLORS.put("green", "#008000"); COLORS.put("silver", "#C0C0C0"); COLORS.put("lime", "#00FF00"); COLORS.put("gray", "#808080"); COLORS.put("olive", "#808000"); COLORS.put("white", "#FFFFFF"); COLORS.put("yellow", "#FFFF00"); COLORS.put("maroon", "#800000"); COLORS.put("navy", "#000080"); COLORS.put("red", "#FF0000"); COLORS.put("blue", "#0000FF"); COLORS.put("purple", "#800080"); COLORS.put("teal", "#008080"); COLORS.put("fuchsia", "#FF00FF"); COLORS.put("aqua", "#00FFFF"); } /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { boolean hexUppercase = true; boolean invalid = false; boolean found = false; if (attval.value == null || attval.value.length() == 0) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } String given = attval.value; Iterator colorIter = COLORS.entrySet().iterator(); while (colorIter.hasNext()) { Map.Entry color = (Map.Entry) colorIter.next(); if (given.charAt(0) == '#') { if (given.length() != 7) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); invalid = true; break; } else if (given.equalsIgnoreCase((String) color.getValue())) { if (lexer.configuration.replaceColor) { attval.value = (String) color.getKey(); } found = true; break; } } else if (TidyUtils.isLetter(given.charAt(0))) { if (given.equalsIgnoreCase((String) color.getKey())) { if (lexer.configuration.replaceColor) { attval.value = (String) color.getKey(); } found = true; break; } } else { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); invalid = true; break; } } if (!found && !invalid) { if (given.charAt(0) == '#') { // check if valid hex digits and letters for (int i = 1; i < 7; ++i) { if (!TidyUtils.isDigit(given.charAt(i)) && ("abcdef".indexOf(Character.toLowerCase(given.charAt(i))) == -1)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); invalid = true; break; } } // convert hex letters to uppercase if (!invalid && hexUppercase) { for (int i = 1; i < 7; ++i) { attval.value = given.toUpperCase(); } } } else { // we could search for more colors and mark the file as HTML Proprietary, but I don't thinks // it's worth the effort, so values not in HTML 4.01 are invalid lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); invalid = true; } } } } /** * AttrCheck implementation for checking valuetype. */ public static class CheckVType implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"data", "object", "ref"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking scroll. */ public static class CheckScroll implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"no", "yes", "auto"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking dir. */ public static class CheckTextDir implements AttrCheck { /** * valid values for this attribute. */ private static final String[] VALID_VALUES = new String[]{"rtl", "ltr"}; /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } attval.checkLowerCaseAttrValue(lexer, node); if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value)) { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } /** * AttrCheck implementation for checking lang and xml:lang. */ public static class CheckLang implements AttrCheck { /** * @see AttrCheck#check(Lexer, Node, AttVal) */ public void check(Lexer lexer, Node node, AttVal attval) { if ("lang".equals(attval.attribute)) { lexer.constrainVersion(~Dict.VERS_XHTML11); } if (attval.value == null) { lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE); return; } } } }������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/IStack.java��������������������������������������������������������0000644�0001750�0001750�00000006566�10116675277�021101� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Inline stack node. * <p> * Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup * errors such as: <i>italic text <p> more italic text</b> normal text which for compatibility with Mosaic is * mapped to: <i>italic text</i> <p> <i>more italic text</i> normal text Note that any inline end tag * pop's the effect of the current inline start tag, so that</b> pop's <i>in the above example. * </p> * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 587 $ ($Author: fgiust $) */ public class IStack { /** * Next element in the stack. */ protected IStack next; /** * tag's dictionary definition. */ protected Dict tag; /** * name (null for text nodes). */ protected String element; /** * Attributes. */ protected AttVal attributes; }������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TidyBeanInfo.java��������������������������������������������������0000644�0001750�0001750�00000005533�10102754223�022207� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.awt.Image; import java.beans.SimpleBeanInfo; /** * BeanInfo for Tidy. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 400 $ ($Author: fgiust $) */ public class TidyBeanInfo extends SimpleBeanInfo { /** * @see java.beans.BeanInfo#getIcon(int) */ public Image getIcon(int kind) { return loadImage("tidy.gif"); } }���������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TagCheckImpl.java��������������������������������������������������0000644�0001750�0001750�00000045252�10152114041�022162� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Check HTML attributes implementation. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 747 $ ($Author: fgiust $) */ public final class TagCheckImpl { /** * CheckHTML instance. */ public static final TagCheck HTML = new CheckHTML(); /** * CheckSCRIPT instance. */ public static final TagCheck SCRIPT = new CheckSCRIPT(); /** * CheckTABLE instance. */ public static final TagCheck TABLE = new CheckTABLE(); /** * CheckCaption instance. */ public static final TagCheck CAPTION = new CheckCaption(); /** * CheckIMG instance. */ public static final TagCheck IMG = new CheckIMG(); /** * CheckAREA instance. */ public static final TagCheck AREA = new CheckAREA(); /** * CheckAnchor instance. */ public static final TagCheck ANCHOR = new CheckAnchor(); /** * CheckMap instance. */ public static final TagCheck MAP = new CheckMap(); /** * CheckSTYLE instance. */ public static final TagCheck STYLE = new CheckSTYLE(); /** * CheckTableCell instance. */ public static final TagCheck TABLECELL = new CheckTableCell(); /** * CheckLINK instance. */ public static final TagCheck LINK = new CheckLINK(); /** * CheckHR instance. */ public static final TagCheck HR = new CheckHR(); /** * CheckForm instance. */ public static final TagCheck FORM = new CheckForm(); /** * CheckMeta instance. */ public static final TagCheck META = new CheckMeta(); /** * don't instantiate. */ private TagCheckImpl() { // unused } /** * Checker implementation for html tag. */ public static class CheckHTML implements TagCheck { /** * xhtml namepace String. */ private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"; /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal attval; AttVal xmlns; xmlns = node.getAttrByName("xmlns"); if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value)) { lexer.isvoyager = true; if (!lexer.configuration.htmlOut) // Unless user has specified plain HTML output, { lexer.configuration.xHTML = true; // output format will be XHTML. } // adjust other config options, just as in Configuration lexer.configuration.xmlOut = true; lexer.configuration.upperCaseTags = false; lexer.configuration.upperCaseAttrs = false; } for (attval = node.attributes; attval != null; attval = attval.next) { attval.checkAttribute(lexer, node); } } } /** * Checker implementation for script tags. */ public static class CheckSCRIPT implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal lang, type; node.checkAttributes(lexer); lang = node.getAttrByName("language"); type = node.getAttrByName("type"); if (type == null) { AttVal missingType = new AttVal(null, null, '"', "type", ""); lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE); // check for javascript if (lang != null) { String str = lang.value; if ("javascript".equalsIgnoreCase(str) || "jscript".equalsIgnoreCase(str)) { node.addAttribute("type", "text/javascript"); } else if ("vbscript".equalsIgnoreCase(str)) { // per Randy Waki 8/6/01 node.addAttribute("type", "text/vbscript"); } } else { node.addAttribute("type", "text/javascript"); } } } } /** * Checker implementation for table. */ public static class CheckTABLE implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; boolean hasSummary = false; for (attval = node.attributes; attval != null; attval = attval.next) { attribute = attval.checkAttribute(lexer, node); if (attribute == AttributeTable.attrSummary) { hasSummary = true; } } /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */ if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32) { lexer.badAccess |= Report.MISSING_SUMMARY; // summary is not required, should be only an accessibility warning // AttVal missingSummary = new AttVal(null, null, '"', "summary", ""); // lexer.report.attrError(lexer, node, missingSummary, Report.MISSING_ATTRIBUTE); } /* convert <table border> to <table border="1"> */ if (lexer.configuration.xmlOut) { attval = node.getAttrByName("border"); if (attval != null) { if (attval.value == null) { attval.value = "1"; } } } /* <table height="..."> is proprietary */ if ((attval = node.getAttrByName("height")) != null) { lexer.report.attrError(lexer, node, attval, Report.PROPRIETARY_ATTRIBUTE); lexer.versions &= Dict.VERS_PROPRIETARY; } } } /** * Checker implementation for table caption. */ public static class CheckCaption implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal attval; String value = null; node.checkAttributes(lexer); for (attval = node.attributes; attval != null; attval = attval.next) { if ("align".equalsIgnoreCase(attval.attribute)) { value = attval.value; break; } } if (value != null) { if ("left".equalsIgnoreCase(value) || "right".equalsIgnoreCase(value)) { lexer.constrainVersion(Dict.VERS_HTML40_LOOSE); } else if ("top".equalsIgnoreCase(value) || "bottom".equalsIgnoreCase(value)) { lexer.constrainVersion(~(Dict.VERS_HTML20 | Dict.VERS_HTML32)); } else { lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE); } } } } /** * Checker implementation for hr. */ public static class CheckHR implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal av = node.getAttrByName("src"); node.checkAttributes(lexer); if (av != null) { lexer.report.attrError(lexer, node, av, Report.PROPRIETARY_ATTR_VALUE); } } } /** * Checker implementation for image tags. */ public static class CheckIMG implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; boolean hasAlt = false; boolean hasSrc = false; boolean hasUseMap = false; boolean hasIsMap = false; boolean hasDataFld = false; for (attval = node.attributes; attval != null; attval = attval.next) { attribute = attval.checkAttribute(lexer, node); if (attribute == AttributeTable.attrAlt) { hasAlt = true; } else if (attribute == AttributeTable.attrSrc) { hasSrc = true; } else if (attribute == AttributeTable.attrUsemap) { hasUseMap = true; } else if (attribute == AttributeTable.attrIsmap) { hasIsMap = true; } else if (attribute == AttributeTable.attrDatafld) { hasDataFld = true; } else if (attribute == AttributeTable.attrWidth || attribute == AttributeTable.attrHeight) { lexer.constrainVersion(~Dict.VERS_HTML20); } } if (!hasAlt) { lexer.badAccess |= Report.MISSING_IMAGE_ALT; AttVal missingAlt = new AttVal(null, null, '"', "alt", ""); lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE); if (lexer.configuration.altText != null) { node.addAttribute("alt", lexer.configuration.altText); } } if (!hasSrc && !hasDataFld) { AttVal missingSrc = new AttVal(null, null, '"', "src", ""); lexer.report.attrError(lexer, node, missingSrc, Report.MISSING_ATTRIBUTE); } if (hasIsMap && !hasUseMap) { AttVal missingIsMap = new AttVal(null, null, '"', "ismap", ""); lexer.report.attrError(lexer, node, missingIsMap, Report.MISSING_IMAGEMAP); } } } /** * Checker implementation for area. */ public static class CheckAREA implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal attval; Attribute attribute; boolean hasAlt = false; boolean hasHref = false; for (attval = node.attributes; attval != null; attval = attval.next) { attribute = attval.checkAttribute(lexer, node); if (attribute == AttributeTable.attrAlt) { hasAlt = true; } else if (attribute == AttributeTable.attrHref) { hasHref = true; } } if (!hasAlt) { lexer.badAccess |= Report.MISSING_LINK_ALT; AttVal missingAlt = new AttVal(null, null, '"', "alt", ""); lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE); } if (!hasHref) { AttVal missingHref = new AttVal(null, null, '"', "href", ""); lexer.report.attrError(lexer, node, missingHref, Report.MISSING_ATTRIBUTE); } } } /** * Checker implementation for anchors. */ public static class CheckAnchor implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { node.checkAttributes(lexer); lexer.fixId(node); } } /** * Checker implementation for image maps. */ public static class CheckMap implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { node.checkAttributes(lexer); lexer.fixId(node); } } /** * Checker implementation for style tags. */ public static class CheckSTYLE implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal type = node.getAttrByName("type"); node.checkAttributes(lexer); if (type == null) { AttVal missingType = new AttVal(null, null, '"', "type", ""); lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE); node.addAttribute("type", "text/css"); } } } /** * Checker implementation for forms. Reports missing action attribute. */ public static class CheckForm implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal action = node.getAttrByName("action"); node.checkAttributes(lexer); if (action == null) { AttVal missingAttribute = new AttVal(null, null, '"', "action", ""); lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE); } } } /** * Checker implementation for meta tags. Reports missing content attribute. */ public static class CheckMeta implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal content = node.getAttrByName("content"); node.checkAttributes(lexer); if (content == null) { AttVal missingAttribute = new AttVal(null, null, '"', "content", ""); lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE); } // name or http-equiv attribute must also be set } } /** * Checker implementation for table cells. */ public static class CheckTableCell implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { node.checkAttributes(lexer); // HTML4 strict doesn't allow mixed content for elements with %block; as their content model if (node.getAttrByName("width") != null || node.getAttrByName("height") != null) { lexer.constrainVersion(~Dict.VERS_HTML40_STRICT); } } } /** * add missing type attribute when appropriate. */ public static class CheckLINK implements TagCheck { /** * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node) */ public void check(Lexer lexer, Node node) { AttVal rel = node.getAttrByName("rel"); node.checkAttributes(lexer); if (rel != null && rel.value != null && rel.value.equals("stylesheet")) { AttVal type = node.getAttrByName("type"); if (type == null) { AttVal missingType = new AttVal(null, null, '"', "type", ""); lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE); node.addAttribute("type", "text/css"); } } } } }������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/TidyMessageListener.java�������������������������������������������0000644�0001750�0001750�00000005307�10121145262�023615� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Listener interface for validation errors/warnings and info. * @see Tidy#setMessageListener(TidyMessageListener) * @author Fabrizio Giustina * @version $Revision: 607 $ ($Author: fgiust $) */ public interface TidyMessageListener { /** * Called by tidy when a warning or error occurs. * @param message Tidy message */ void messageReceived(TidyMessage message); } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Out.java�����������������������������������������������������������0000644�0001750�0001750�00000005647�10463645504�020464� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Tidy Output interface. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 807 $ ($Author: fgiust $) */ public interface Out { /** * writes an char. * @param c char to write */ void outc(int c); /** * writes a byte. * @param c byte to write */ void outc(byte c); /** * writes a newline. */ void newline(); /** * Flush the stream. */ void flush(); }�����������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ant/���������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345034�017615� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ant/JTidyTask.java�������������������������������������������������0000644�0001750�0001750�00000032056�11432356302�022327� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy.ant; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Properties; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.DirectoryScanner; import org.apache.tools.ant.Project; import org.apache.tools.ant.Task; import org.apache.tools.ant.types.FileSet; import org.apache.tools.ant.types.Parameter; import org.apache.tools.ant.util.FileNameMapper; import org.apache.tools.ant.util.FlatFileNameMapper; import org.apache.tools.ant.util.IdentityMapper; import org.w3c.tidy.Tidy; /** * JTidy ant task. * <h3>Parameters</h3> * <table cellspacing="0" border="1"> <thead> * <tr> * <th>Attribute</th> * <th>Description</th> * <th>Required</th> * </tr> * </thead> <tbody> * <tr> * <td>srcfile</td> * <td>source file</td> * <td>Yes, unless a nested <code><fileset></code> element is used.</td> * </tr> * <tr> * <td>destfile</td> * <td>destination file for output</td> * <td rowspan="2">With the <code>srcfile</code> attribute, either <code>destfile</code> or <code>destdir</code> * can be used. With nested <code><fileset></code> elements only <code>destdir</code> is allowed.</td> * </tr> * <tr> * <td>destdir</td> * <td>destination directory for output</td> * </tr> * <tr> * <td>properties</td> * <td>Path to a valid tidy properties file</td> * <td>No</td> * </tr> * <tr> * <td>flatten</td> * <td>Ignore the directory structure of the source files, and copy all files into the directory specified by the * <code>destdir</code> attribute.</td> * <td>No; defaults to false.</td> * </tr> * <tr> * <td>failonerror</td> * <td>boolean to control whether failure to execute should throw a BuildException or just print an error. If set to * <code>true</code> errors in input files which tidy is enable to fix will cause a failure.</td> * <td>No; defaults to false.</td> * </tr> * </tbody> </table> * <h3>Nested elements</h3> * <ul> * <li><strong>Fileset </strong>: if you need to run tidy on more than one file, you can specify nested filesets.</li> * <li><strong>Parameter </strong>: you can specify any tidy configuration option directly using a nested * <code>parameter</code> element.</li> * </ul> * <h3>Setup</h3> * <p> * Adds the following <code>typedef</code> to setup the JTidy task in your build.xml: * </p> * * <pre> * <taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask"/> * </pre> * * <p> * This will work if JTidy jar is copied to ant lib directory. If you need to reference the jar elsewhere on the * filesystem you can add a nested classpath element: * </p> * * <pre> * <taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask"> * <classpath> * <pathelement location="${lib.dir}/jtidy.jar"/> * </classpath> * </taskdef> * </pre> * * <h3>Examples</h3> * * <pre> * <tidy destdir="out" properties="/path/to/tidy.properties"> * <fileset dir="inputdir" /> * <parameter name="drop-font-tags" value="true" /> * </tidy> * </pre> * * @author Fabrizio Giustina * @version $Revision: 1120 $ ($Author: aditsu $) */ public class JTidyTask extends Task { /** * Filesets. */ private List filesets = new ArrayList(); /** * Destination directory for output. */ private File destdir; /** * Destination file for output. */ private File destfile; /** * Source file. */ private File srcfile; /** * Control whether failure to execute should throw a BuildException. */ private boolean failonerror; /** * Don't output directories. */ private boolean flatten; /** * tidy instance. */ private Tidy tidy; /** * Configured properties. */ private Properties props; /** * Properties file. */ private File properties; /** * @param destdir The destdir to set. */ public void setDestdir(File destdir) { this.destdir = destdir; } /** * @param destfile The destfile to set. */ public void setDestfile(File destfile) { this.destfile = destfile; } /** * @param srcfile The srcfile to set. */ public void setSrcfile(File srcfile) { this.srcfile = srcfile; } /** * @param failonerror The failonerror to set. */ public void setFailonerror(boolean failonerror) { this.failonerror = failonerror; } /** * @param flatten The flatten to set. */ public void setFlatten(boolean flatten) { this.flatten = flatten; } /** * @param properties The properties to set. */ public void setProperties(File properties) { this.properties = properties; } /** * Adds a fileset to be processed Fileset * @param fileSet */ public void addFileset(FileSet fileSet) { filesets.add(fileSet); } /** * Setter method for any property using the ant type Parameter. * @param prop Ant type Parameter */ public void addConfiguredParameter(Parameter prop) { props.setProperty(prop.getName(), prop.getValue()); } /** * Initializes the task. */ public void init() { super.init(); // Setup a Tidy instance tidy = new Tidy(); props = new Properties(); } /** * Validates task parameters. * @throws BuildException if any invalid parameter is found */ protected void validateParameters() throws BuildException { if (srcfile == null && filesets.size() == 0) { throw new BuildException("Specify at least srcfile or a fileset."); } if (srcfile != null && filesets.size() > 0) { throw new BuildException("You can't specify both srcfile and nested filesets."); } if (destfile == null && destdir == null) { throw new BuildException("One of destfile or destdir must be set."); } if (srcfile == null && destfile != null) { throw new BuildException("You only can use destfile with srcfile."); } if (srcfile != null && srcfile.isDirectory()) { throw new BuildException("srcfile can't be a directory."); } if (properties != null && ( !properties.exists() || properties.isDirectory())) { throw new BuildException("Invalid properties file specified: " + properties.getPath()); } } /** * Run the task. * @exception BuildException The exception raised during task execution. */ public void execute() throws BuildException { // validate validateParameters(); // load configuration if (this.properties != null) { try { this.props.load(new FileInputStream(this.properties)); } catch (IOException e) { throw new BuildException("Unable to load properties file " + properties, e); } } // hide output unless set in properties tidy.setErrout(new PrintWriter(new ByteArrayOutputStream())); tidy.setConfigurationFromProps(props); if (this.srcfile != null) { // process a single file executeSingle(); } else { // process filesets executeSet(); } } /** * A single file has been specified. */ protected void executeSingle() { if (!srcfile.exists()) { throw new BuildException("Could not find source file " + srcfile.getAbsolutePath() + "."); } if (destfile == null) { // destdir can't be null, condition is checked in validateParameters() destfile = new File(destdir, srcfile.getName()); } processFile(srcfile, destfile); } /** * Run tidy on filesets. */ protected void executeSet() { FileNameMapper mapper = null; if (flatten) { mapper = new FlatFileNameMapper(); } else { mapper = new IdentityMapper(); } mapper.setTo(this.destdir.getAbsolutePath()); Iterator iterator = filesets.iterator(); while (iterator.hasNext()) { FileSet fileSet = (FileSet) iterator.next(); DirectoryScanner directoryScanner = fileSet.getDirectoryScanner(getProject()); String[] sourceFiles = directoryScanner.getIncludedFiles(); File inputdir = directoryScanner.getBasedir(); mapper.setFrom(inputdir.getAbsolutePath()); for (int j = 0; j < sourceFiles.length; j++) { String[] mapped = mapper.mapFileName(sourceFiles[j]); processFile(new File(inputdir, sourceFiles[j]), new File(this.destdir, mapped[0])); } } } /** * Run tidy on a file. * @param inputFile input file * @param outputFile output file */ protected void processFile(File inputFile, File outputFile) { log("Processing " + inputFile.getAbsolutePath(), Project.MSG_DEBUG); InputStream is; OutputStream os; try { is = new BufferedInputStream(new FileInputStream(inputFile)); } catch (IOException e) { throw new BuildException("Unable to open file " + inputFile); } try { outputFile.getParentFile().mkdirs(); outputFile.createNewFile(); os = new BufferedOutputStream(new FileOutputStream(outputFile)); } catch (IOException e) { throw new BuildException("Unable to open destination file " + outputFile, e); } tidy.parse(is, os); try { is.close(); } catch (IOException e1) { // ignore } try { os.flush(); os.close(); } catch (IOException e1) { // ignore } // cleanup empty files if (tidy.getParseErrors() > 0 && !tidy.getForceOutput()) { outputFile.delete(); } if (failonerror && tidy.getParseErrors() > 0) { throw new BuildException("Tidy was unable to process file " + inputFile + ", " + tidy.getParseErrors() + " returned."); } } }����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/StreamInFactory.java�����������������������������������������������0000644�0001750�0001750�00000007020�10463645504�022752� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.InputStream; import java.io.Reader; import java.io.UnsupportedEncodingException; /** * Tidy Input factory. * @author Fabrizio Giustina * @version $Revision: 807 $ ($Author: fgiust $) */ public final class StreamInFactory { /** * Don't instantiate. */ private StreamInFactory() { // unused } /** * Returns the appropriate StreamIn implementation. * @param config configuration instance * @param stream input stream * @return StreamIn instance */ public static StreamIn getStreamIn(Configuration config, InputStream stream) { try { return new StreamInJavaImpl(stream, config.getInCharEncodingName(), config.tabsize); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Unsupported encoding: " + e.getMessage()); } } /** * Returns the appropriate StreamIn implementation. * @param config configuration instance * @param stream input stream * @return StreamIn instance */ public static StreamIn getStreamIn(Configuration config, Reader reader) { return new StreamInJavaImpl(reader, config.tabsize); } } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Report.java��������������������������������������������������������0000644�0001750�0001750�00000151115�11272671607�021162� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.InputStream; import java.io.PrintWriter; import java.text.MessageFormat; import java.util.MissingResourceException; import java.util.Properties; import java.util.ResourceBundle; import org.w3c.tidy.TidyMessage.Level; /** * Error/informational message reporter. You should only need to edit the file TidyMessages.properties to localize HTML * tidy. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 921 $ ($Author: aditsu $) */ public final class Report { /** * used to point to Web Accessibility Guidelines. */ public static final String ACCESS_URL = "http://www.w3.org/WAI/GL"; /** * Release date String. */ public static final String RELEASE_DATE_STRING = readReleaseDate(); private static String readReleaseDate() { final Properties p = new Properties(); try { final InputStream s = Report.class.getResourceAsStream("/jtidy.properties"); p.load(s); s.close(); } catch (Exception e) { throw new RuntimeException("Failed to load jtidy.properties", e); } return p.getProperty("date"); } /** * invalid entity: missing semicolon. */ public static final short MISSING_SEMICOLON = 1; /** * invalid entity: missing semicolon. */ public static final short MISSING_SEMICOLON_NCR = 2; /** * invalid entity: unknown entity. */ public static final short UNKNOWN_ENTITY = 3; /** * invalid entity: unescaped ampersand. */ public static final short UNESCAPED_AMPERSAND = 4; /** * invalid entity: apos undefined in current definition. */ public static final short APOS_UNDEFINED = 5; /** * missing an end tag. */ public static final short MISSING_ENDTAG_FOR = 6; /** * missing end tag before. */ public static final short MISSING_ENDTAG_BEFORE = 7; /** * discarding unexpected element. */ public static final short DISCARDING_UNEXPECTED = 8; /** * nested emphasis. */ public static final short NESTED_EMPHASIS = 9; /** * non matching end tag. */ public static final short NON_MATCHING_ENDTAG = 10; /** * tag not allowed in. */ public static final short TAG_NOT_ALLOWED_IN = 11; /** * missing start tag. */ public static final short MISSING_STARTTAG = 12; /** * unexpected end tag. */ public static final short UNEXPECTED_ENDTAG = 13; /** * unsing br in place of. */ public static final short USING_BR_INPLACE_OF = 14; /** * inserting tag. */ public static final short INSERTING_TAG = 15; /** * suspected missing quote. */ public static final short SUSPECTED_MISSING_QUOTE = 16; /** * missing title element. */ public static final short MISSING_TITLE_ELEMENT = 17; /** * duplicate frameset. */ public static final short DUPLICATE_FRAMESET = 18; /** * elments can be nested. */ public static final short CANT_BE_NESTED = 19; /** * obsolete element. */ public static final short OBSOLETE_ELEMENT = 20; /** * proprietary element. */ public static final short PROPRIETARY_ELEMENT = 21; /** * unknown element. */ public static final short UNKNOWN_ELEMENT = 22; /** * trim empty element. */ public static final short TRIM_EMPTY_ELEMENT = 23; /** * coerce to end tag. */ public static final short COERCE_TO_ENDTAG = 24; /** * illegal nesting. */ public static final short ILLEGAL_NESTING = 25; /** * noframes content. */ public static final short NOFRAMES_CONTENT = 26; /** * content after body. */ public static final short CONTENT_AFTER_BODY = 27; /** * inconsistent version. */ public static final short INCONSISTENT_VERSION = 28; /** * malformed comment. */ public static final short MALFORMED_COMMENT = 29; /** * bad coment chars. */ public static final short BAD_COMMENT_CHARS = 30; /** * bad xml comment. */ public static final short BAD_XML_COMMENT = 31; /** * bad cdata comment. */ public static final short BAD_CDATA_CONTENT = 32; /** * inconsistent namespace. */ public static final short INCONSISTENT_NAMESPACE = 33; /** * doctype after tags. */ public static final short DOCTYPE_AFTER_TAGS = 34; /** * malformed doctype. */ public static final short MALFORMED_DOCTYPE = 35; /** * unexpected end of file. */ public static final short UNEXPECTED_END_OF_FILE = 36; /** * doctype not upper case. */ public static final short DTYPE_NOT_UPPER_CASE = 37; /** * too many element. */ public static final short TOO_MANY_ELEMENTS = 38; /** * unescaped element. */ public static final short UNESCAPED_ELEMENT = 39; /** * nested quotation. */ public static final short NESTED_QUOTATION = 40; /** * element not empty. */ public static final short ELEMENT_NOT_EMPTY = 41; /** * encoding IO conflict. */ public static final short ENCODING_IO_CONFLICT = 42; /** * mixed content in block. */ public static final short MIXED_CONTENT_IN_BLOCK = 43; /** * missing doctype. */ public static final short MISSING_DOCTYPE = 44; /** * space preceding xml declaration. */ public static final short SPACE_PRECEDING_XMLDECL = 45; /** * too many elements in. */ public static final short TOO_MANY_ELEMENTS_IN = 46; /** * unexpected endag in. */ public static final short UNEXPECTED_ENDTAG_IN = 47; /** * replacing element. */ public static final short REPLACING_ELEMENT = 83; /** * replacing unexcaped element. */ public static final short REPLACING_UNEX_ELEMENT = 84; /** * coerce to endtag. */ public static final short COERCE_TO_ENDTAG_WARN = 85; /** * attribute: unknown attribute. */ public static final short UNKNOWN_ATTRIBUTE = 48; /** * attribute: missing attribute. */ public static final short MISSING_ATTRIBUTE = 49; /** * attribute: missing attribute value. */ public static final short MISSING_ATTR_VALUE = 50; /** * attribute: bad attribute value. */ public static final short BAD_ATTRIBUTE_VALUE = 51; /** * attribute: unexpected gt. */ public static final short UNEXPECTED_GT = 52; /** * attribute: proprietary attribute. */ public static final short PROPRIETARY_ATTRIBUTE = 53; /** * attribute: proprietary attribute value. */ public static final short PROPRIETARY_ATTR_VALUE = 54; /** * attribute: repeated attribute. */ public static final short REPEATED_ATTRIBUTE = 55; /** * attribute: missing image map. */ public static final short MISSING_IMAGEMAP = 56; /** * attribute: xml attribute value. */ public static final short XML_ATTRIBUTE_VALUE = 57; /** * attribute: missing quotemark. */ public static final short MISSING_QUOTEMARK = 58; /** * attribute: unexpected quotemark. */ public static final short UNEXPECTED_QUOTEMARK = 59; /** * attribute: id and name mismatch. */ public static final short ID_NAME_MISMATCH = 60; /** * attribute: backslash in URI. */ public static final short BACKSLASH_IN_URI = 61; /** * attribute: fixed backslash. */ public static final short FIXED_BACKSLASH = 62; /** * attribute: illegal URI reference. */ public static final short ILLEGAL_URI_REFERENCE = 63; /** * attribute: escaped illegal URI. */ public static final short ESCAPED_ILLEGAL_URI = 64; /** * attribute: newline in URI. */ public static final short NEWLINE_IN_URI = 65; /** * attribute: anchor not unique. */ public static final short ANCHOR_NOT_UNIQUE = 66; /** * attribute: entity in id. */ public static final short ENTITY_IN_ID = 67; /** * attribute: joining attribute. */ public static final short JOINING_ATTRIBUTE = 68; /** * attribute: expected equalsign. */ public static final short UNEXPECTED_EQUALSIGN = 69; /** * attribute: attribute value not lower case. */ public static final short ATTR_VALUE_NOT_LCASE = 70; /** * attribute: id sintax. */ public static final short XML_ID_SYNTAX = 71; /** * attribute: invalid attribute. */ public static final short INVALID_ATTRIBUTE = 72; /** * attribute: bad attribute value replaced. */ public static final short BAD_ATTRIBUTE_VALUE_REPLACED = 73; /** * attribute: invalid xml id. */ public static final short INVALID_XML_ID = 74; /** * attribute: unexpected end of file. */ public static final short UNEXPECTED_END_OF_FILE_ATTR = 75; /** * character encoding: vendor specific chars. */ public static final short VENDOR_SPECIFIC_CHARS = 76; /** * character encoding: invalid sgml chars. */ public static final short INVALID_SGML_CHARS = 77; /** * character encoding: invalid utf8. */ public static final short INVALID_UTF8 = 78; /** * character encoding: invalid utf16. */ public static final short INVALID_UTF16 = 79; /** * character encoding: encoding mismatch. */ public static final short ENCODING_MISMATCH = 80; /** * character encoding: nvalid URI. */ public static final short INVALID_URI = 81; /** * character encoding: invalid NCR. */ public static final short INVALID_NCR = 82; /** * Constant used for reporting of given doctype. */ public static final short DOCTYPE_GIVEN_SUMMARY = 110; /** * Constant used for reporting of version summary. */ public static final short REPORT_VERSION_SUMMARY = 111; /** * Constant used for reporting of bad access summary. */ public static final short BADACCESS_SUMMARY = 112; /** * Constant used for reporting of bad form summary. */ public static final short BADFORM_SUMMARY = 113; /** * accessibility flaw: missing image map. */ public static final short MISSING_IMAGE_ALT = 1; /** * accessibility flaw: missing link alt. */ public static final short MISSING_LINK_ALT = 2; /** * accessibility flaw: missing summary. */ public static final short MISSING_SUMMARY = 4; /** * accessibility flaw: missing image map. */ public static final short MISSING_IMAGE_MAP = 8; /** * accessibility flaw: using frames. */ public static final short USING_FRAMES = 16; /** * accessibility flaw: using noframes. */ public static final short USING_NOFRAMES = 32; /** * presentation flaw: using spacer. */ public static final short USING_SPACER = 1; /** * presentation flaw: using layer. */ public static final short USING_LAYER = 2; /** * presentation flaw: using nobr. */ public static final short USING_NOBR = 4; /** * presentation flaw: using font. */ public static final short USING_FONT = 8; /** * presentation flaw: using body. */ public static final short USING_BODY = 16; /** * character encoding error: windows chars. */ public static final short WINDOWS_CHARS = 1; /** * character encoding error: non ascii. */ public static final short NON_ASCII = 2; /** * character encoding error: found utf16. */ public static final short FOUND_UTF16 = 4; /** * char has been replaced. */ public static final short REPLACED_CHAR = 0; /** * char has been discarder. */ public static final short DISCARDED_CHAR = 1; /** * Resource bundle with messages. */ private static ResourceBundle res; /** * Printed in GNU Emacs messages. */ private String currentFile; /** * message listener for error reporting. */ private TidyMessageListener listener; static { try { res = ResourceBundle.getBundle("org/w3c/tidy/TidyMessages"); } catch (MissingResourceException e) { throw new Error(e.toString()); } } /** * Instantiated only in Tidy() constructor. */ protected Report() { super(); } /** * Generates a complete message for the warning/error. The message is composed by: * <ul> * <li>position in file</li> * <li>prefix for the error level (warning: | error:)</li> * <li>message read from ResourceBundle</li> * <li>optional parameters added to message using MessageFormat</li> * </ul> * @param errorCode tidy error code * @param lexer Lexer * @param message key for the ResourceBundle * @param params optional parameters added with MessageFormat * @param level message level. One of <code>TidyMessage.LEVEL_ERROR</code>, * <code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code> * @return formatted message * @throws MissingResourceException if <code>message</code> key is not available in jtidy resource bundle. * @see TidyMessage */ protected String getMessage(int errorCode, Lexer lexer, String message, Object[] params, Level level) throws MissingResourceException { String resource; resource = res.getString(message); String position; if (lexer != null && level != Level.SUMMARY) { position = getPosition(lexer); } else { position = ""; } String prefix; if (level == Level.ERROR) { prefix = res.getString("error"); } else if (level == Level.WARNING) { prefix = res.getString("warning"); } else { prefix = ""; } String messageString; if (params != null) { messageString = MessageFormat.format(resource, params); } else { messageString = resource; } if (listener != null) { TidyMessage msg = new TidyMessage(errorCode, (lexer != null) ? lexer.lines : 0, (lexer != null) ? lexer.columns : 0, level, messageString); listener.messageReceived(msg); } return position + prefix + messageString; } /** * Prints a message to lexer.errout after calling getMessage(). * @param errorCode tidy error code * @param lexer Lexer * @param message key for the ResourceBundle * @param params optional parameters added with MessageFormat * @param level message level. One of <code>TidyMessage.LEVEL_ERROR</code>, * <code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code> * @see TidyMessage */ private void printMessage(int errorCode, Lexer lexer, String message, Object[] params, Level level) { String resource; try { resource = getMessage(errorCode, lexer, message, params, level); } catch (MissingResourceException e) { lexer.errout.println(e.toString()); return; } lexer.errout.println(resource); } /** * Prints a message to errout after calling getMessage(). Used when lexer is not yet defined. * @param errout PrintWriter * @param message key for the ResourceBundle * @param params optional parameters added with MessageFormat * @param level message level. One of <code>TidyMessage.LEVEL_ERROR</code>, * <code>TidyMessage.LEVEL_WARNING</code>,<code>TidyMessage.LEVEL_INFO</code> * @see TidyMessage */ private void printMessage(PrintWriter errout, String message, Object[] params, Level level) { String resource; try { resource = getMessage(-1, null, message, params, level); } catch (MissingResourceException e) { errout.println(e.toString()); return; } errout.println(resource); } /** * print version information. * @param p printWriter */ public void showVersion(PrintWriter p) { printMessage(p, "version_summary", new Object[]{RELEASE_DATE_STRING}, Level.SUMMARY); } /** * Returns a formatted tag name handling start and ent tags, nulls, doctypes, and text. * @param tag Node * @return formatted tag name */ private String getTagName(Node tag) { if (tag != null) { if (tag.type == Node.START_TAG) { return "<" + tag.element + ">"; } else if (tag.type == Node.END_TAG) { return "</" + tag.element + ">"; } else if (tag.type == Node.DOCTYPE_TAG) { return "<!DOCTYPE>"; } else if (tag.type == Node.TEXT_NODE) { return "plain text"; } else { return tag.element; } } return ""; } /** * Prints an "unknown option" error message. Lexer is not defined when this is called. * @param option unknown option name */ public void unknownOption(String option) { try { System.err.println(MessageFormat.format(res.getString("unknown_option"), new Object[]{option})); } catch (MissingResourceException e) { System.err.println(e.toString()); } } /** * Prints a "bad argument" error message. Lexer is not defined when this is called. * @param key argument name * @param value bad argument value */ public void badArgument(String key, String value) { try { System.err.println(MessageFormat.format(res.getString("bad_argument"), new Object[]{value, key})); } catch (MissingResourceException e) { System.err.println(e.toString()); } } /** * Returns a formatted String describing the current position in file. * @param lexer Lexer * @return String position ("line:column") */ private String getPosition(Lexer lexer) { try { // Change formatting to be parsable by GNU Emacs if (lexer.configuration.emacs) { return MessageFormat.format(res.getString("emacs_format"), new Object[]{ this.currentFile, new Integer(lexer.lines), new Integer(lexer.columns)}) + " "; } // traditional format return MessageFormat.format(res.getString("line_column"), new Object[]{ new Integer(lexer.lines), new Integer(lexer.columns)}); } catch (MissingResourceException e) { lexer.errout.println(e.toString()); } return ""; } /** * Prints encoding error messages. * @param lexer Lexer * @param code error code * @param c invalid char */ public void encodingError(Lexer lexer, int code, int c) { lexer.warnings++; if (lexer.errors > lexer.configuration.showErrors) // keep quiet after <showErrors> errors { return; } if (lexer.configuration.showWarnings) { String buf = Integer.toHexString(c); // An encoding mismatch is currently treated as a non-fatal error if ((code & ~DISCARDED_CHAR) == ENCODING_MISMATCH) { // actual encoding passed in "c" lexer.badChars |= ENCODING_MISMATCH; printMessage( code, lexer, "encoding_mismatch", new Object[]{ lexer.configuration.getInCharEncodingName(), ParsePropertyImpl.CHAR_ENCODING.getFriendlyName(null, new Integer(c), lexer.configuration)}, Level.WARNING); } else if ((code & ~DISCARDED_CHAR) == VENDOR_SPECIFIC_CHARS) { lexer.badChars |= VENDOR_SPECIFIC_CHARS; printMessage( code, lexer, "invalid_char", new Object[]{new Integer(code & DISCARDED_CHAR), buf}, Level.WARNING); } else if ((code & ~DISCARDED_CHAR) == INVALID_SGML_CHARS) { lexer.badChars |= INVALID_SGML_CHARS; printMessage( code, lexer, "invalid_char", new Object[]{new Integer(code & DISCARDED_CHAR), buf}, Level.WARNING); } else if ((code & ~DISCARDED_CHAR) == INVALID_UTF8) { lexer.badChars |= INVALID_UTF8; printMessage( code, lexer, "invalid_utf8", new Object[]{new Integer(code & DISCARDED_CHAR), buf}, Level.WARNING); } else if ((code & ~DISCARDED_CHAR) == INVALID_UTF16) { lexer.badChars |= INVALID_UTF16; printMessage( code, lexer, "invalid_utf16", new Object[]{new Integer(code & DISCARDED_CHAR), buf}, Level.WARNING); } else if ((code & ~DISCARDED_CHAR) == INVALID_NCR) { lexer.badChars |= INVALID_NCR; printMessage( code, lexer, "invalid_ncr", new Object[]{new Integer(code & DISCARDED_CHAR), buf}, Level.WARNING); } } } /** * Prints entity error messages. * @param lexer Lexer * @param code error code * @param entity invalid entity String * @param c invalid char */ public void entityError(Lexer lexer, short code, String entity, int c) { lexer.warnings++; if (lexer.errors > lexer.configuration.showErrors) // keep quiet after <showErrors> errors { return; } if (lexer.configuration.showWarnings) { switch (code) { case MISSING_SEMICOLON : printMessage(code, lexer, "missing_semicolon", new Object[]{entity}, Level.WARNING); break; case MISSING_SEMICOLON_NCR : printMessage(code, lexer, "missing_semicolon_ncr", new Object[]{entity}, Level.WARNING); break; case UNKNOWN_ENTITY : printMessage(code, lexer, "unknown_entity", new Object[]{entity}, Level.WARNING); break; case UNESCAPED_AMPERSAND : printMessage(code, lexer, "unescaped_ampersand", null, Level.WARNING); break; case APOS_UNDEFINED : printMessage(code, lexer, "apos_undefined", null, Level.WARNING); break; default : // should not reach here break; } } } /** * Prints error messages for attributes. * @param lexer Lexer * @param node current tag * @param attribute attribute * @param code error code */ public void attrError(Lexer lexer, Node node, AttVal attribute, short code) { if (code == UNEXPECTED_GT) { lexer.errors++; } else { lexer.warnings++; } if (lexer.errors > lexer.configuration.showErrors) // keep quiet after <showErrors> errors { return; } if (code == UNEXPECTED_GT) // error { printMessage(code, lexer, "unexpected_gt", new Object[]{getTagName(node)}, Level.ERROR); } if (!lexer.configuration.showWarnings) // warnings { return; } switch (code) { case UNKNOWN_ATTRIBUTE : printMessage(code, lexer, "unknown_attribute", new Object[]{attribute.attribute}, Level.WARNING); break; case MISSING_ATTRIBUTE : printMessage( code, lexer, "missing_attribute", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case MISSING_ATTR_VALUE : printMessage( code, lexer, "missing_attr_value", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case MISSING_IMAGEMAP : printMessage(code, lexer, "missing_imagemap", new Object[]{getTagName(node)}, Level.WARNING); lexer.badAccess |= MISSING_IMAGE_MAP; break; case BAD_ATTRIBUTE_VALUE : printMessage(code, lexer, "bad_attribute_value", new Object[]{ getTagName(node), attribute.attribute, attribute.value}, Level.WARNING); break; case XML_ID_SYNTAX : printMessage( code, lexer, "xml_id_sintax", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case XML_ATTRIBUTE_VALUE : printMessage( code, lexer, "xml_attribute_value", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case UNEXPECTED_QUOTEMARK : printMessage(code, lexer, "unexpected_quotemark", new Object[]{getTagName(node)}, Level.WARNING); break; case MISSING_QUOTEMARK : printMessage(code, lexer, "missing_quotemark", new Object[]{getTagName(node)}, Level.WARNING); break; case REPEATED_ATTRIBUTE : printMessage(code, lexer, "repeated_attribute", new Object[]{ getTagName(node), attribute.value, attribute.attribute}, Level.WARNING); break; case PROPRIETARY_ATTR_VALUE : printMessage( code, lexer, "proprietary_attr_value", new Object[]{getTagName(node), attribute.value}, Level.WARNING); break; case PROPRIETARY_ATTRIBUTE : printMessage( code, lexer, "proprietary_attribute", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case UNEXPECTED_END_OF_FILE : // on end of file adjust reported position to end of input lexer.lines = lexer.in.getCurline(); lexer.columns = lexer.in.getCurcol(); printMessage(code, lexer, "unexpected_end_of_file", new Object[]{getTagName(node)}, Level.WARNING); break; case ID_NAME_MISMATCH : printMessage(code, lexer, "id_name_mismatch", new Object[]{getTagName(node)}, Level.WARNING); break; case BACKSLASH_IN_URI : printMessage(code, lexer, "backslash_in_uri", new Object[]{getTagName(node)}, Level.WARNING); break; case FIXED_BACKSLASH : printMessage(code, lexer, "fixed_backslash", new Object[]{getTagName(node)}, Level.WARNING); break; case ILLEGAL_URI_REFERENCE : printMessage(code, lexer, "illegal_uri_reference", new Object[]{getTagName(node)}, Level.WARNING); break; case ESCAPED_ILLEGAL_URI : printMessage(code, lexer, "escaped_illegal_uri", new Object[]{getTagName(node)}, Level.WARNING); break; case NEWLINE_IN_URI : printMessage(code, lexer, "newline_in_uri", new Object[]{getTagName(node)}, Level.WARNING); break; case ANCHOR_NOT_UNIQUE : printMessage( code, lexer, "anchor_not_unique", new Object[]{getTagName(node), attribute.value}, Level.WARNING); break; case ENTITY_IN_ID : printMessage(code, lexer, "entity_in_id", null, Level.WARNING); break; case JOINING_ATTRIBUTE : printMessage( code, lexer, "joining_attribute", new Object[]{getTagName(node), attribute.attribute}, Level.WARNING); break; case UNEXPECTED_EQUALSIGN : printMessage(code, lexer, "expected_equalsign", new Object[]{getTagName(node)}, Level.WARNING); break; case ATTR_VALUE_NOT_LCASE : printMessage(code, lexer, "attr_value_not_lcase", new Object[]{ getTagName(node), attribute.value, attribute.attribute}, Level.WARNING); break; default : break; } } /** * Prints warnings. * @param lexer Lexer * @param element parent/missing tag * @param node current tag * @param code error code */ public void warning(Lexer lexer, Node element, Node node, short code) { TagTable tt = lexer.configuration.tt; if (!((code == DISCARDING_UNEXPECTED) && lexer.badForm != 0)) // lexer->errors++; already done in BadForm() { lexer.warnings++; } // keep quiet after <showErrors> errors if (lexer.errors > lexer.configuration.showErrors) { return; } if (lexer.configuration.showWarnings) { switch (code) { case MISSING_ENDTAG_FOR : printMessage(code, lexer, "missing_endtag_for", new Object[]{element.element}, Level.WARNING); break; case MISSING_ENDTAG_BEFORE : printMessage( code, lexer, "missing_endtag_before", new Object[]{element.element, getTagName(node)}, Level.WARNING); break; case DISCARDING_UNEXPECTED : if (lexer.badForm == 0) { // the case for when this is an error not a warning, is handled later printMessage( code, lexer, "discarding_unexpected", new Object[]{getTagName(node)}, Level.WARNING); } break; case NESTED_EMPHASIS : printMessage(code, lexer, "nested_emphasis", new Object[]{getTagName(node)}, Level.INFO); break; case COERCE_TO_ENDTAG : printMessage(code, lexer, "coerce_to_endtag", new Object[]{element.element}, Level.INFO); break; case NON_MATCHING_ENDTAG : printMessage( code, lexer, "non_matching_endtag", new Object[]{getTagName(node), element.element}, Level.WARNING); break; case TAG_NOT_ALLOWED_IN : printMessage( code, lexer, "tag_not_allowed_in", new Object[]{getTagName(node), element.element}, Level.WARNING); break; case DOCTYPE_AFTER_TAGS : printMessage(code, lexer, "doctype_after_tags", null, Level.WARNING); break; case MISSING_STARTTAG : printMessage(code, lexer, "missing_starttag", new Object[]{node.element}, Level.WARNING); break; case UNEXPECTED_ENDTAG : if (element != null) { printMessage( code, lexer, "unexpected_endtag_in", new Object[]{node.element, element.element}, Level.WARNING); } else { printMessage(code, lexer, "unexpected_endtag", new Object[]{node.element}, Level.WARNING); } break; case TOO_MANY_ELEMENTS : if (element != null) { printMessage( code, lexer, "too_many_elements_in", new Object[]{node.element, element.element}, Level.WARNING); } else { printMessage(code, lexer, "too_many_elements", new Object[]{node.element}, Level.WARNING); } break; case USING_BR_INPLACE_OF : printMessage(code, lexer, "using_br_inplace_of", new Object[]{getTagName(node)}, Level.WARNING); break; case INSERTING_TAG : printMessage(code, lexer, "inserting_tag", new Object[]{node.element}, Level.WARNING); break; case CANT_BE_NESTED : printMessage(code, lexer, "cant_be_nested", new Object[]{getTagName(node)}, Level.WARNING); break; case PROPRIETARY_ELEMENT : printMessage(code, lexer, "proprietary_element", new Object[]{getTagName(node)}, Level.WARNING); if (node.tag == tt.tagLayer) { lexer.badLayout |= USING_LAYER; } else if (node.tag == tt.tagSpacer) { lexer.badLayout |= USING_SPACER; } else if (node.tag == tt.tagNobr) { lexer.badLayout |= USING_NOBR; } break; case OBSOLETE_ELEMENT : if (element.tag != null && (element.tag.model & Dict.CM_OBSOLETE) != 0) { printMessage(code, lexer, "obsolete_element", new Object[]{ getTagName(element), getTagName(node)}, Level.WARNING); } else { printMessage(code, lexer, "replacing_element", new Object[]{ getTagName(element), getTagName(node)}, Level.WARNING); } break; case UNESCAPED_ELEMENT : printMessage(code, lexer, "unescaped_element", new Object[]{getTagName(element)}, Level.WARNING); break; case TRIM_EMPTY_ELEMENT : printMessage(code, lexer, "trim_empty_element", new Object[]{getTagName(element)}, Level.WARNING); break; case MISSING_TITLE_ELEMENT : printMessage(code, lexer, "missing_title_element", null, Level.WARNING); break; case ILLEGAL_NESTING : printMessage(code, lexer, "illegal_nesting", new Object[]{getTagName(element)}, Level.WARNING); break; case NOFRAMES_CONTENT : printMessage(code, lexer, "noframes_content", new Object[]{getTagName(node)}, Level.WARNING); break; case INCONSISTENT_VERSION : printMessage(code, lexer, "inconsistent_version", null, Level.WARNING); break; case MALFORMED_DOCTYPE : printMessage(code, lexer, "malformed_doctype", null, Level.WARNING); break; case CONTENT_AFTER_BODY : printMessage(code, lexer, "content_after_body", null, Level.WARNING); break; case MALFORMED_COMMENT : printMessage(code, lexer, "malformed_comment", null, Level.WARNING); break; case BAD_COMMENT_CHARS : printMessage(code, lexer, "bad_comment_chars", null, Level.WARNING); break; case BAD_XML_COMMENT : printMessage(code, lexer, "bad_xml_comment", null, Level.WARNING); break; case BAD_CDATA_CONTENT : printMessage(code, lexer, "bad_cdata_content", null, Level.WARNING); break; case INCONSISTENT_NAMESPACE : printMessage(code, lexer, "inconsistent_namespace", null, Level.WARNING); break; case DTYPE_NOT_UPPER_CASE : printMessage(code, lexer, "dtype_not_upper_case", null, Level.WARNING); break; case UNEXPECTED_END_OF_FILE : // on end of file adjust reported position to end of input lexer.lines = lexer.in.getCurline(); lexer.columns = lexer.in.getCurcol(); printMessage( code, lexer, "unexpected_end_of_file", new Object[]{getTagName(element)}, Level.WARNING); break; case NESTED_QUOTATION : printMessage(code, lexer, "nested_quotation", null, Level.WARNING); break; case ELEMENT_NOT_EMPTY : printMessage(code, lexer, "element_not_empty", new Object[]{getTagName(element)}, Level.WARNING); break; case MISSING_DOCTYPE : printMessage(code, lexer, "missing_doctype", null, Level.WARNING); break; default : break; } } if ((code == DISCARDING_UNEXPECTED) && lexer.badForm != 0) { // the case for when this is a warning not an error, is handled earlier printMessage(code, lexer, "discarding_unexpected", new Object[]{getTagName(node)}, Level.ERROR); } } /** * Prints errors. * @param lexer Lexer * @param element parent/missing tag * @param node current tag * @param code error code */ public void error(Lexer lexer, Node element, Node node, short code) { lexer.errors++; // keep quiet after <showErrors> errors if (lexer.errors > lexer.configuration.showErrors) { return; } if (code == SUSPECTED_MISSING_QUOTE) { printMessage(code, lexer, "suspected_missing_quote", null, Level.ERROR); } else if (code == DUPLICATE_FRAMESET) { printMessage(code, lexer, "duplicate_frameset", null, Level.ERROR); } else if (code == UNKNOWN_ELEMENT) { printMessage(code, lexer, "unknown_element", new Object[]{getTagName(node)}, Level.ERROR); } else if (code == UNEXPECTED_ENDTAG) { if (element != null) { printMessage( code, lexer, "unexpected_endtag_in", new Object[]{node.element, element.element}, Level.ERROR); } else { printMessage(code, lexer, "unexpected_endtag", new Object[]{node.element}, Level.ERROR); } } } /** * Prints error summary. * @param lexer Lexer */ public void errorSummary(Lexer lexer) { // adjust badAccess to that its null if frames are ok if ((lexer.badAccess & (USING_FRAMES | USING_NOFRAMES)) != 0) { if (!(((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0))) { lexer.badAccess &= ~(USING_FRAMES | USING_NOFRAMES); } } if (lexer.badChars != 0) { if ((lexer.badChars & VENDOR_SPECIFIC_CHARS) != 0) { int encodingChoiche = 0; if ("Cp1252".equals(lexer.configuration.getInCharEncodingName())) { encodingChoiche = 1; } else if ("MacRoman".equals(lexer.configuration.getInCharEncodingName())) { encodingChoiche = 2; } printMessage(VENDOR_SPECIFIC_CHARS, lexer, "vendor_specific_chars_summary", new Object[]{new Integer( encodingChoiche)}, Level.SUMMARY); } if ((lexer.badChars & INVALID_SGML_CHARS) != 0 || (lexer.badChars & INVALID_NCR) != 0) { int encodingChoiche = 0; if ("Cp1252".equals(lexer.configuration.getInCharEncodingName())) { encodingChoiche = 1; } else if ("MacRoman".equals(lexer.configuration.getInCharEncodingName())) { encodingChoiche = 2; } printMessage(INVALID_SGML_CHARS, lexer, "invalid_sgml_chars_summary", new Object[]{new Integer( encodingChoiche)}, Level.SUMMARY); } if ((lexer.badChars & INVALID_UTF8) != 0) { printMessage(INVALID_UTF8, lexer, "invalid_utf8_summary", null, Level.SUMMARY); } if ((lexer.badChars & INVALID_UTF16) != 0) { printMessage(INVALID_UTF16, lexer, "invalid_utf16_summary", null, Level.SUMMARY); } if ((lexer.badChars & INVALID_URI) != 0) { printMessage(INVALID_URI, lexer, "invaliduri_summary", null, Level.SUMMARY); } } if (lexer.badForm != 0) { printMessage(BADFORM_SUMMARY, lexer, "badform_summary", null, Level.SUMMARY); } if (lexer.badAccess != 0) { if ((lexer.badAccess & MISSING_SUMMARY) != 0) { printMessage(MISSING_SUMMARY, lexer, "badaccess_missing_summary", null, Level.SUMMARY); } if ((lexer.badAccess & MISSING_IMAGE_ALT) != 0) { printMessage(MISSING_IMAGE_ALT, lexer, "badaccess_missing_image_alt", null, Level.SUMMARY); } if ((lexer.badAccess & MISSING_IMAGE_MAP) != 0) { printMessage(MISSING_IMAGE_MAP, lexer, "badaccess_missing_image_map", null, Level.SUMMARY); } if ((lexer.badAccess & MISSING_LINK_ALT) != 0) { printMessage(MISSING_LINK_ALT, lexer, "badaccess_missing_link_alt", null, Level.SUMMARY); } if (((lexer.badAccess & USING_FRAMES) != 0) && ((lexer.badAccess & USING_NOFRAMES) == 0)) { printMessage(USING_FRAMES, lexer, "badaccess_frames", null, Level.SUMMARY); } printMessage(BADACCESS_SUMMARY, lexer, "badaccess_summary", new Object[]{ACCESS_URL}, Level.SUMMARY); } if (lexer.badLayout != 0) { if ((lexer.badLayout & USING_LAYER) != 0) { printMessage(USING_LAYER, lexer, "badlayout_using_layer", null, Level.SUMMARY); } if ((lexer.badLayout & USING_SPACER) != 0) { printMessage(USING_SPACER, lexer, "badlayout_using_spacer", null, Level.SUMMARY); } if ((lexer.badLayout & USING_FONT) != 0) { printMessage(USING_FONT, lexer, "badlayout_using_font", null, Level.SUMMARY); } if ((lexer.badLayout & USING_NOBR) != 0) { printMessage(USING_NOBR, lexer, "badlayout_using_nobr", null, Level.SUMMARY); } if ((lexer.badLayout & USING_BODY) != 0) { printMessage(USING_BODY, lexer, "badlayout_using_body", null, Level.SUMMARY); } } } /** * Prints the "unknown option" message. * @param errout PrintWriter * @param c invalid option char */ public void unknownOption(PrintWriter errout, char c) { printMessage(errout, "unrecognized_option", new Object[]{new String(new char[]{c})}, Level.ERROR); } /** * Prints the "unknown file" message. * @param errout PrintWriter * @param file invalid file name */ public void unknownFile(PrintWriter errout, String file) { printMessage(errout, "unknown_file", new Object[]{"Tidy", file}, Level.ERROR); } /** * Prints the "needs author intervention" message. * @param errout PrintWriter */ public void needsAuthorIntervention(PrintWriter errout) { printMessage(errout, "needs_author_intervention", null, Level.SUMMARY); } /** * Prints the "missing body" message. * @param errout PrintWriter */ public void missingBody(PrintWriter errout) { printMessage(errout, "missing_body", null, Level.ERROR); } /** * Prints the number of generated slides. * @param errout PrintWriter * @param count slides count */ public void reportNumberOfSlides(PrintWriter errout, int count) { printMessage(errout, "slides_found", new Object[]{new Integer(count)}, Level.SUMMARY); } /** * Prints tidy general info. * @param errout PrintWriter */ public void generalInfo(PrintWriter errout) { printMessage(errout, "general_info", null, Level.SUMMARY); } /** * Sets the current file name. * @param filename current file. */ public void setFilename(String filename) { this.currentFile = filename; // for use with Gnu Emacs } /** * Prints information for html version in input file. * @param errout PrintWriter * @param lexer Lexer * @param filename file name * @param doctype doctype Node */ public void reportVersion(PrintWriter errout, Lexer lexer, String filename, Node doctype) { int i, c; int state = 0; String vers = lexer.htmlVersionName(); int[] cc = new int[1]; // adjust reported position to first line lexer.lines = 1; lexer.columns = 1; if (doctype != null) { StringBuffer doctypeBuffer = new StringBuffer(); for (i = doctype.start; i < doctype.end; ++i) { c = doctype.textarray[i]; // look for UTF-8 multibyte character if (c < 0) { i += PPrint.getUTF8(doctype.textarray, i, cc); c = cc[0]; } if (c == '"') { ++state; } else if (state == 1) { doctypeBuffer.append((char) c); } } printMessage( DOCTYPE_GIVEN_SUMMARY, lexer, "doctype_given", new Object[]{filename, doctypeBuffer}, Level.SUMMARY); } printMessage(REPORT_VERSION_SUMMARY, lexer, "report_version", new Object[]{ filename, (vers != null ? vers : "HTML proprietary")}, Level.SUMMARY); } /** * Prints the number of error/warnings found. * @param errout PrintWriter * @param lexer Lexer */ public void reportNumWarnings(PrintWriter errout, Lexer lexer) { if (lexer.warnings > 0 || lexer.errors > 0) { printMessage( errout, "num_warnings", new Object[]{new Integer(lexer.warnings), new Integer(lexer.errors)}, Level.SUMMARY); } else { printMessage(errout, "no_warnings", null, Level.SUMMARY); } } /** * Prints tidy help. * @param out PrintWriter */ public void helpText(PrintWriter out) { printMessage(out, "help_text", new Object[]{"Tidy", RELEASE_DATE_STRING}, Level.SUMMARY); } /** * Prints the "bad tree" message. * @param errout PrintWriter */ public void badTree(PrintWriter errout) { printMessage(errout, "bad_tree", null, Level.ERROR); } /** * Adds a message listener. * @param listener TidyMessageListener */ public void addMessageListener(TidyMessageListener listener) { this.listener = listener; } }���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMDocumentImpl.java�����������������������������������������������0000644�0001750�0001750�00000032365�11433336655�022654� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMConfiguration; import org.w3c.dom.DOMException; /** * DOMDocumentImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1125 $ ($Author: aditsu $) */ public class DOMDocumentImpl extends DOMNodeImpl implements org.w3c.dom.Document { /** * A DOM Document has its own TagTable. */ private TagTable tt; /** * Instantiates a new Dom document with a default tag table. * @param adaptee tidy Node */ protected DOMDocumentImpl(Node adaptee) { super(adaptee); this.tt = new TagTable(); } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return "#document"; } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.DOCUMENT_NODE; } /** * @see org.w3c.dom.Document#getDoctype */ public org.w3c.dom.DocumentType getDoctype() { Node node = this.adaptee.content; while (node != null) { if (node.type == Node.DOCTYPE_TAG) { break; } node = node.next; } if (node != null) { return (org.w3c.dom.DocumentType) node.getAdapter(); } return null; } /** * @todo DOM level 2 getImplementation() Not implemented. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#getImplementation */ public org.w3c.dom.DOMImplementation getImplementation() { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @see org.w3c.dom.Document#getDocumentElement */ public org.w3c.dom.Element getDocumentElement() { Node node = this.adaptee.content; while (node != null) { if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { break; } node = node.next; } if (node != null) { return (org.w3c.dom.Element) node.getAdapter(); } return null; } /** * @see org.w3c.dom.Document#createElement */ public org.w3c.dom.Element createElement(String tagName) throws DOMException { Node node = new Node(Node.START_END_TAG, null, 0, 0, tagName, this.tt); if (node.tag == null) // Fix Bug 121206 { node.tag = TagTable.XML_TAGS; } return (org.w3c.dom.Element) node.getAdapter(); } /** * @todo DOM level 2 createDocumentFragment() Not implemented. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createDocumentFragment */ public org.w3c.dom.DocumentFragment createDocumentFragment() { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @see org.w3c.dom.Document#createTextNode */ public org.w3c.dom.Text createTextNode(String data) { byte[] textarray = TidyUtils.getBytes(data); Node node = new Node(Node.TEXT_NODE, textarray, 0, textarray.length); return (org.w3c.dom.Text) node.getAdapter(); } /** * @see org.w3c.dom.Document#createComment */ public org.w3c.dom.Comment createComment(String data) { byte[] textarray = TidyUtils.getBytes(data); Node node = new Node(Node.COMMENT_TAG, textarray, 0, textarray.length); return (org.w3c.dom.Comment) node.getAdapter(); } /** * @todo DOM level 2 createCDATASection() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createCDATASection */ public org.w3c.dom.CDATASection createCDATASection(String data) throws DOMException { // NOT_SUPPORTED_ERR: Raised if this document is an HTML document. throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "HTML document"); } /** * @todo DOM level 2 createProcessingInstruction() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createProcessingInstruction */ public org.w3c.dom.ProcessingInstruction createProcessingInstruction(String target, String data) throws DOMException { // NOT_SUPPORTED_ERR: Raised if this document is an HTML document. throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "HTML document"); } /** * @see org.w3c.dom.Document#createAttribute */ public org.w3c.dom.Attr createAttribute(String name) throws DOMException { AttVal av = new AttVal(null, null, '"', name, null); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); return av.getAdapter(); } /** * @todo DOM level 2 createEntityReference() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createEntityReference */ public org.w3c.dom.EntityReference createEntityReference(String name) throws DOMException { // NOT_SUPPORTED_ERR: Raised if this document is an HTML document throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "createEntityReference not supported"); } /** * @see org.w3c.dom.Document#getElementsByTagName */ public org.w3c.dom.NodeList getElementsByTagName(String tagname) { return new DOMNodeListByTagNameImpl(this.adaptee, tagname); } /** * @todo DOM level 2 importNode() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#importNode(org.w3c.dom.Node, boolean) */ public org.w3c.dom.Node importNode(org.w3c.dom.Node importedNode, boolean deep) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "importNode not supported"); } /** * @todo DOM level 2 createAttributeNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createAttributeNS(java.lang.String, java.lang.String) */ public org.w3c.dom.Attr createAttributeNS(String namespaceURI, String qualifiedName) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "createAttributeNS not supported"); } /** * @todo DOM level 2 createElementNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#createElementNS(java.lang.String, java.lang.String) */ public org.w3c.dom.Element createElementNS(String namespaceURI, String qualifiedName) throws org.w3c.dom.DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "createElementNS not supported"); } /** * @todo DOM level 2 getElementsByTagNameNS() Not supported. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#getElementsByTagNameNS(java.lang.String, java.lang.String) */ public org.w3c.dom.NodeList getElementsByTagNameNS(String namespaceURI, String localName) { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "getElementsByTagNameNS not supported"); } /** * @todo DOM level 2 getElementById() Not implemented. Returns null. * @see org.w3c.dom.Document#getElementById(java.lang.String) */ public org.w3c.dom.Element getElementById(String elementId) { return null; } /** * @todo DOM level 3 adoptNode() Not implemented. * @see org.w3c.dom.Document#adoptNode(org.w3c.dom.Node) */ public org.w3c.dom.Node adoptNode(org.w3c.dom.Node source) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 3 getDocumentURI() Not implemented. Returns null. * @see org.w3c.dom.Document#getDocumentURI() */ public String getDocumentURI() { return null; } /** * @todo DOM level 3 getDomConfig() Not implemented. Returns null. * @see org.w3c.dom.Document#getDomConfig() */ public DOMConfiguration getDomConfig() { return null; } /** * @todo DOM level 3 getInputEncoding() Not implemented. Returns null. * @see org.w3c.dom.Document#getInputEncoding() */ public String getInputEncoding() { return null; } /** * @todo DOM level 3 getStrictErrorChecking() Not implemented. Returns true. * @see org.w3c.dom.Document#getStrictErrorChecking() */ public boolean getStrictErrorChecking() { return true; } /** * @todo DOM level 3 getXmlEncoding() Not implemented. Returns null. * @see org.w3c.dom.Document#getXmlEncoding() */ public String getXmlEncoding() { return null; } /** * @todo DOM level 3 getXmlStandalone() Not implemented. Returns false. * @see org.w3c.dom.Document#getXmlStandalone() */ public boolean getXmlStandalone() { return false; } /** * @todo DOM level 3 getXmlVersion() Not implemented. Always returns "1.0". * @see org.w3c.dom.Document#getXmlVersion() */ public String getXmlVersion() { // An attribute specifying, as part of the XML declaration, the version number of this document. If there is no // declaration and if this document supports the "XML" feature, the value is "1.0" return "1.0"; } /** * @todo DOM level 3 normalizeDocument() Not implemented. Do nothing. * @see org.w3c.dom.Document#normalizeDocument() */ public void normalizeDocument() { // do nothing } /** * @todo DOM level 3 renameNode() Not implemented. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.Document#renameNode(org.w3c.dom.Node, java.lang.String, java.lang.String) */ public org.w3c.dom.Node renameNode(org.w3c.dom.Node n, String namespaceURI, String qualifiedName) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 3 setDocumentURI() Not implemented. Do nothing. * @see org.w3c.dom.Document#setDocumentURI(java.lang.String) */ public void setDocumentURI(String documentURI) { // do nothing } /** * @todo DOM level 3 setStrictErrorChecking() Not implemented. Do nothing. * @see org.w3c.dom.Document#setStrictErrorChecking(boolean) */ public void setStrictErrorChecking(boolean strictErrorChecking) { // do nothing } /** * @todo DOM level 3 setXmlStandalone() Not implemented. Do nothing. * @see org.w3c.dom.Document#setXmlStandalone(boolean) */ public void setXmlStandalone(boolean xmlStandalone) throws DOMException { // do nothing } /** * @todo DOM level 3 setXmlVersion() Not implemented. Do nothing. * @see org.w3c.dom.Document#setXmlVersion(java.lang.String) */ public void setXmlVersion(String xmlVersion) throws DOMException { // do nothing } }���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMAttrMapImpl.java������������������������������������������������0000644�0001750�0001750�00000016026�10144212711�022422� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; /** * Tidy implementation of org.w3c.dom.NamedNodeMap. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMAttrMapImpl implements org.w3c.dom.NamedNodeMap { /** * wrapped org.w3c.tidy.AttVal. */ private AttVal first; /** * instantiates a new DOMAttrMapImpl for the given AttVal. * @param firstAttVal wrapped AttVal */ protected DOMAttrMapImpl(AttVal firstAttVal) { this.first = firstAttVal; } /** * @see org.w3c.dom.NamedNodeMap#getNamedItem(java.lang.String) */ public org.w3c.dom.Node getNamedItem(String name) { AttVal att = this.first; while (att != null) { if (att.attribute.equals(name)) { break; } att = att.next; } if (att != null) { return att.getAdapter(); } return null; } /** * @see org.w3c.dom.NamedNodeMap#item */ public org.w3c.dom.Node item(int index) { int i = 0; AttVal att = this.first; while (att != null) { if (i >= index) { break; } i++; att = att.next; } if (att != null) { return att.getAdapter(); } return null; } /** * @see org.w3c.dom.NamedNodeMap#getLength */ public int getLength() { int len = 0; AttVal att = this.first; while (att != null) { len++; att = att.next; } return len; } /** * @todo DOM level 2 setNamedItem() Not implemented. Throws NOT_SUPPORTED_ERR. * @see org.w3c.dom.NamedNodeMap#setNamedItem */ public org.w3c.dom.Node setNamedItem(org.w3c.dom.Node arg) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @see org.w3c.dom.NamedNodeMap#removeNamedItem */ public org.w3c.dom.Node removeNamedItem(String name) throws DOMException { AttVal att = this.first; AttVal previous = null; while (att != null) { if (att.attribute.equals(name)) { if (previous == null) { this.first = att.getNext(); } else { previous.setNext(att.getNext()); } break; } previous = att; att = att.next; } if (att != null) { return att.getAdapter(); } throw new DOMException(DOMException.NOT_FOUND_ERR, "Named item " + name + "Not found"); } /** * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>. * @see org.w3c.dom.NamedNodeMap#getNamedItemNS(java.lang.String, java.lang.String) */ public org.w3c.dom.Node getNamedItemNS(String namespaceURI, String localName) { // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the feature "XML" and the language // exposed through the Document does not support XML Namespaces (such as HTML 4.01). throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>. * @see org.w3c.dom.NamedNodeMap#setNamedItemNS(org.w3c.dom.Node) */ public org.w3c.dom.Node setNamedItemNS(org.w3c.dom.Node arg) throws org.w3c.dom.DOMException { // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the feature "XML" and the language // exposed through the Document does not support XML Namespaces (such as HTML 4.01). throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * Not supported, returns <code>DOMException.NOT_SUPPORTED_ERR</code>. * @see org.w3c.dom.NamedNodeMap#removeNamedItemNS(java.lang.String, java.lang.String) */ public org.w3c.dom.Node removeNamedItemNS(String namespaceURI, String localName) throws org.w3c.dom.DOMException { // NOT_SUPPORTED_ERR: May be raised if the implementation does not support the feature "XML" and the language // exposed through the Document does not support XML Namespaces (such as HTML 4.01). throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } }����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ValidUTF8Sequence.java���������������������������������������������0000644�0001750�0001750�00000006230�10116675277�023106� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * @author Fabrizio Giustina (translation from c) * @version $Revision: 587 $ ($Author: fgiust $) */ public class ValidUTF8Sequence { /** * low char. */ int lowChar; /** * high char. */ int highChar; /** * number of bytes. */ int numBytes; /** * array of valid bytes. */ char[] validBytes = new char[8]; /** * Instantiates a new ValidUTF8Sequence. * @param lowChar low utf8 char * @param highChar high utf8 char * @param numBytes number of bytes in the sequence * @param validBytes valid bytes array */ public ValidUTF8Sequence(int lowChar, int highChar, int numBytes, char[] validBytes) { this.lowChar = lowChar; this.highChar = highChar; this.numBytes = numBytes; this.validBytes = validBytes; } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Clean.java���������������������������������������������������������0000644�0001750�0001750�00000202502�11433336655�020726� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Clean up misuse of presentation markup. Filters from other formats such as Microsoft Word often make excessive use of * presentation markup such as font tags, B, I, and the align attribute. By applying a set of production rules, it is * straight forward to transform this to use CSS. Some rules replace some of the children of an element by style * properties on the element, e.g. * <p> * <b>... </b> * </p>. * <p style="font-weight: bold"> * ... * </p> * Such rules are applied to the element's content and then to the element itself until none of the rules more apply. * Having applied all the rules to an element, it will have a style attribute with one or more properties. Other rules * strip the element they apply to, replacing it by style properties on the contents, e.g. <dir> * <li> * <p> * ...</li> * </dir>. * <p style="margin-left 1em"> * ... These rules are applied to an element before processing its content and replace the current element by the first * element in the exposed content. After applying both sets of rules, you can replace the style attribute by a class * value and style rule in the document head. To support this, an association of styles and class names is built. A * naive approach is to rely on string matching to test when two property lists are the same. A better approach would be * to first sort the properties before matching. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1125 $ ($Author: aditsu $) */ public class Clean { /** * sequential number for generated css classes. */ private int classNum; /** * Tag table. */ private TagTable tt; /** * Instantiates a new Clean. * @param tagTable tag table instance */ public Clean(TagTable tagTable) { this.tt = tagTable; } /** * Insert a css style property. * @param props StyleProp instance * @param name property name * @param value property value * @return StyleProp containin the given property */ private StyleProp insertProperty(StyleProp props, String name, String value) { StyleProp first, prev, prop; int cmp; prev = null; first = props; while (props != null) { cmp = props.name.compareTo(name); if (cmp == 0) { // this property is already defined, ignore new value return first; } if (cmp > 0) // props.name > name { // insert before this prop = new StyleProp(name, value, props); if (prev != null) { prev.next = prop; } else { first = prop; } return first; } prev = props; props = props.next; } prop = new StyleProp(name, value, null); if (prev != null) { prev.next = prop; } else { first = prop; } return first; } /** * Create sorted linked list of properties from style string. * @param prop StyleProp * @param style style string * @return StyleProp with given style */ private StyleProp createProps(StyleProp prop, String style) { int nameEnd; int valueEnd; int valueStart = 0; int nameStart = 0; boolean more; nameStart = 0; while (nameStart < style.length()) { while (nameStart < style.length() && style.charAt(nameStart) == ' ') { ++nameStart; } nameEnd = nameStart; while (nameEnd < style.length()) { if (style.charAt(nameEnd) == ':') { valueStart = nameEnd + 1; break; } ++nameEnd; } if (nameEnd >= style.length() || style.charAt(nameEnd) != ':') { break; } while (valueStart < style.length() && style.charAt(valueStart) == ' ') { ++valueStart; } valueEnd = valueStart; more = false; while (valueEnd < style.length()) { if (style.charAt(valueEnd) == ';') { more = true; break; } ++valueEnd; } prop = insertProperty(prop, style.substring(nameStart, nameEnd), style.substring(valueStart, valueEnd)); if (more) { nameStart = valueEnd + 1; continue; } break; } return prop; } /** * Create a css property. * @param props StyleProp * @return css property as String */ private String createPropString(StyleProp props) { String style = ""; int len; StyleProp prop; // compute length for (len = 0, prop = props; prop != null; prop = prop.next) { len += prop.name.length() + 2; len += prop.value.length() + 2; } for (prop = props; prop != null; prop = prop.next) { style = style.concat(prop.name); style = style.concat(": "); style = style.concat(prop.value); if (prop.next == null) { break; } style = style.concat("; "); } return style; } /** * Creates a string with merged properties. * @param style css style * @param property css properties * @return merged string */ private String addProperty(String style, String property) { StyleProp prop; prop = createProps(null, style); prop = createProps(prop, property); style = createPropString(prop); return style; } /** * Generates a new css class name. * @param lexer Lexer * @return generated css class */ private String gensymClass(final Lexer lexer) { String pfx = lexer.configuration.cssPrefix; if (pfx == null) { pfx = "c"; } return pfx + ++classNum; } /** * Finds a css style. * @param lexer Lexer * @param tag tag name * @param properties css properties * @return style string */ private String findStyle(Lexer lexer, String tag, String properties) { Style style; for (style = lexer.styles; style != null; style = style.next) { if (style.tag.equals(tag) && style.properties.equals(properties)) { return style.tagClass; } } style = new Style(tag, gensymClass(lexer), properties, lexer.styles); lexer.styles = style; return style.tagClass; } /** * Find style attribute in node, and replace it by corresponding class attribute. Search for class in style * dictionary otherwise gensym new class and add to dictionary. Assumes that node doesn't have a class attribute. * @param lexer Lexer * @param node node with a style attribute */ private void style2Rule(Lexer lexer, Node node) { AttVal styleattr, classattr; String classname; styleattr = node.getAttrByName("style"); if (styleattr != null) { classname = findStyle(lexer, node.element, styleattr.value); classattr = node.getAttrByName("class"); // if there already is a class attribute then append class name after a space if (classattr != null) { classattr.value = classattr.value + " " + classname; node.removeAttribute(styleattr); } else { // reuse style attribute for class attribute styleattr.attribute = "class"; styleattr.value = classname; } } } /** * Adds a css rule for color. * @param lexer Lexer * @param selector css selector * @param color color value */ private void addColorRule(Lexer lexer, String selector, String color) { if (color != null) { lexer.addStringLiteral(selector); lexer.addStringLiteral(" { color: "); lexer.addStringLiteral(color); lexer.addStringLiteral(" }\n"); } } /** * Move presentation attribs from body to style element. * * <pre> * background="foo" . body { background-image: url(foo) } * bgcolor="foo" . body { background-color: foo } * text="foo" . body { color: foo } * link="foo" . :link { color: foo } * vlink="foo" . :visited { color: foo } * alink="foo" . :active { color: foo } * </pre> * * @param lexer Lexer * @param body body node */ private void cleanBodyAttrs(Lexer lexer, Node body) { AttVal attr; String bgurl = null; String bgcolor = null; String color = null; attr = body.getAttrByName("background"); if (attr != null) { bgurl = attr.value; attr.value = null; body.removeAttribute(attr); } attr = body.getAttrByName("bgcolor"); if (attr != null) { bgcolor = attr.value; attr.value = null; body.removeAttribute(attr); } attr = body.getAttrByName("text"); if (attr != null) { color = attr.value; attr.value = null; body.removeAttribute(attr); } if (bgurl != null || bgcolor != null || color != null) { lexer.addStringLiteral(" body {\n"); if (bgurl != null) { lexer.addStringLiteral(" background-image: url("); lexer.addStringLiteral(bgurl); lexer.addStringLiteral(");\n"); } if (bgcolor != null) { lexer.addStringLiteral(" background-color: "); lexer.addStringLiteral(bgcolor); lexer.addStringLiteral(";\n"); } if (color != null) { lexer.addStringLiteral(" color: "); lexer.addStringLiteral(color); lexer.addStringLiteral(";\n"); } lexer.addStringLiteral(" }\n"); } attr = body.getAttrByName("link"); if (attr != null) { addColorRule(lexer, " :link", attr.value); body.removeAttribute(attr); } attr = body.getAttrByName("vlink"); if (attr != null) { addColorRule(lexer, " :visited", attr.value); body.removeAttribute(attr); } attr = body.getAttrByName("alink"); if (attr != null) { addColorRule(lexer, " :active", attr.value); body.removeAttribute(attr); } } /** * Check deprecated attributes in body tag. * @param lexer Lexer * @param doc document root node * @return <code>true</code> is the body doesn't contain deprecated attributes, false otherwise. */ private boolean niceBody(Lexer lexer, Node doc) { Node body = doc.findBody(lexer.configuration.tt); if (body != null) { if (body.getAttrByName("background") != null || body.getAttrByName("bgcolor") != null || body.getAttrByName("text") != null || body.getAttrByName("link") != null || body.getAttrByName("vlink") != null || body.getAttrByName("alink") != null) { lexer.badLayout |= Report.USING_BODY; return false; } } return true; } /** * Create style element using rules from dictionary. * @param lexer Lexer * @param doc root node */ private void createStyleElement(Lexer lexer, Node doc) { Node node, head, body; Style style; AttVal av; if (lexer.styles == null && niceBody(lexer, doc)) { return; } node = lexer.newNode(Node.START_TAG, null, 0, 0, "style"); node.implicit = true; // insert type attribute av = new AttVal(null, null, '"', "type", "text/css"); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); node.attributes = av; body = doc.findBody(lexer.configuration.tt); lexer.txtstart = lexer.lexsize; if (body != null) { cleanBodyAttrs(lexer, body); } for (style = lexer.styles; style != null; style = style.next) { lexer.addCharToLexer(' '); lexer.addStringLiteral(style.tag); lexer.addCharToLexer('.'); lexer.addStringLiteral(style.tagClass); lexer.addCharToLexer(' '); lexer.addCharToLexer('{'); lexer.addStringLiteral(style.properties); lexer.addCharToLexer('}'); lexer.addCharToLexer('\n'); } lexer.txtend = lexer.lexsize; node.insertNodeAtEnd(lexer.newNode(Node.TEXT_NODE, lexer.lexbuf, lexer.txtstart, lexer.txtend)); // now insert style element into document head doc is root node. search its children for html node the head // node should be first child of html node head = doc.findHEAD(lexer.configuration.tt); if (head != null) { head.insertNodeAtEnd(node); } } /** * Ensure bidirectional links are consistent. * @param node root node */ private void fixNodeLinks(Node node) { Node child; if (node.prev != null) { node.prev.next = node; } else { node.parent.content = node; } if (node.next != null) { node.next.prev = node; } else { node.parent.last = node; } for (child = node.content; child != null; child = child.next) { child.parent = node; } } /** * Used to strip child of node when the node has one and only one child. * @param node parent node */ private void stripOnlyChild(Node node) { Node child; child = node.content; node.content = child.content; node.last = child.last; child.content = null; for (child = node.content; child != null; child = child.next) { child.parent = node; } } /** * Used to strip font start and end tags. * @param element original node * @param pnode passed in as array to allow modification. pnode[0] will contain the final node * @todo remove the pnode parameter and make it a return value */ private void discardContainer(Node element, Node[] pnode) { Node node; Node parent = element.parent; if (element.content != null) { element.last.next = element.next; if (element.next != null) { element.next.prev = element.last; element.last.next = element.next; } else { parent.last = element.last; } if (element.prev != null) { element.content.prev = element.prev; element.prev.next = element.content; } else { parent.content = element.content; } for (node = element.content; node != null; node = node.next) { node.parent = parent; } pnode[0] = element.content; } else { if (element.next != null) { element.next.prev = element.prev; } else { parent.last = element.prev; } if (element.prev != null) { element.prev.next = element.next; } else { parent.content = element.next; } pnode[0] = element.next; } element.next = null; element.content = null; } /** * Add style property to element, creating style attribute as needed and adding ; delimiter. * @param node node * @param property property added to node */ private void addStyleProperty(Node node, String property) { AttVal av; for (av = node.attributes; av != null; av = av.next) { if (av.attribute.equals("style")) { break; } } // if style attribute already exists then insert property if (av != null) { String s; s = addProperty(av.value, property); av.value = s; } else { // else create new style attribute av = new AttVal(node.attributes, null, '"', "style", property); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); node.attributes = av; } } /** * Create new string that consists of the combined style properties in s1 and s2. To merge property lists, we build * a linked list of property/values and insert properties into the list in order, merging values for the same * property name. * @param s1 first property * @param s2 second property * @return merged properties */ private String mergeProperties(String s1, String s2) { String s; StyleProp prop; prop = createProps(null, s1); prop = createProps(prop, s2); s = createPropString(prop); return s; } /** * Merge class attributes from 2 nodes. * @param node Node * @param child Child node */ private void mergeClasses(Node node, Node child) { AttVal av; String s1, s2, names; for (s2 = null, av = child.attributes; av != null; av = av.next) { if ("class".equals(av.attribute)) { s2 = av.value; break; } } for (s1 = null, av = node.attributes; av != null; av = av.next) { if ("class".equals(av.attribute)) { s1 = av.value; break; } } if (s1 != null) { if (s2 != null) // merge class names from both { names = s1 + ' ' + s2; av.value = names; } } else if (s2 != null) // copy class names from child { av = new AttVal(node.attributes, null, '"', "class", s2); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); node.attributes = av; } } /** * Merge style from 2 nodes. * @param node Node * @param child Child node */ private void mergeStyles(Node node, Node child) { AttVal av; String s1, s2, style; // the child may have a class attribute used for attaching styles, if so the class name needs to be copied to // node's class mergeClasses(node, child); for (s2 = null, av = child.attributes; av != null; av = av.next) { if (av.attribute.equals("style")) { s2 = av.value; break; } } for (s1 = null, av = node.attributes; av != null; av = av.next) { if (av.attribute.equals("style")) { s1 = av.value; break; } } if (s1 != null) { if (s2 != null) // merge styles from both { style = mergeProperties(s1, s2); av.value = style; } } else if (s2 != null) // copy style of child { av = new AttVal(node.attributes, null, '"', "style", s2); av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av); node.attributes = av; } } /** * Map a % font size to a named font size. * @param size size in % * @return font size name */ private String fontSize2Name(String size) { String[] sizes = {"60%", "70%", "80%", null, "120%", "150%", "200%"}; String buf; if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6') { int n = size.charAt(0) - '0'; return sizes[n]; } if (size.length() > 0 && size.charAt(0) == '-') { if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6') { int n = size.charAt(1) - '0'; double x; for (x = 1.0; n > 0; --n) { x *= 0.8; } x *= 100.0; buf = "" + (int) x + "%"; return buf; } return "smaller"; /* "70%"; */ } if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6') { int n = size.charAt(1) - '0'; double x; for (x = 1.0; n > 0; --n) { x *= 1.2; } x *= 100.0; buf = "" + (int) x + "%"; return buf; } return "larger"; /* "140%" */ } /** * Adds a font-family style. * @param node Node * @param face font face */ private void addFontFace(Node node, String face) { addStyleProperty(node, "font-family: " + face); } /** * Adds a font size style. * @param node Node * @param size font size */ private void addFontSize(Node node, String size) { if (size == null) { return; } if ("6".equals(size) && node.tag == this.tt.tagP) { node.element = "h1"; this.tt.findTag(node); return; } if ("5".equals(size) && node.tag == this.tt.tagP) { node.element = "h2"; this.tt.findTag(node); return; } if ("4".equals(size) && node.tag == this.tt.tagP) { node.element = "h3"; this.tt.findTag(node); return; } String value = fontSize2Name(size); if (value != null) { addStyleProperty(node, "font-size: " + value); } } /** * Adds a font color style. * @param node Node * @param color color value */ private void addFontColor(Node node, String color) { addStyleProperty(node, "color: " + color); } /** * Adds an align style. * @param node Node * @param align align value */ private void addAlign(Node node, String align) { // force alignment value to lower case addStyleProperty(node, "text-align: " + align.toLowerCase()); } /** * Add style properties to node corresponding to the font face, size and color attributes. * @param node font tag * @param av attribute list for node */ private void addFontStyles(Node node, AttVal av) { while (av != null) { if (av.attribute.equals("face")) { addFontFace(node, av.value); } else if (av.attribute.equals("size")) { addFontSize(node, av.value); } else if (av.attribute.equals("color")) { addFontColor(node, av.value); } av = av.next; } } /** * Symptom: <code><p align=center></code>. Action: <code><p style="text-align: center"></code>. * @param lexer Lexer * @param node node with center attribute. Will be modified to use css style. */ private void textAlign(Lexer lexer, Node node) { AttVal av, prev; prev = null; for (av = node.attributes; av != null; av = av.next) { if (av.attribute.equals("align")) { if (prev != null) { prev.next = av.next; } else { node.attributes = av.next; } if (av.value != null) { addAlign(node, av.value); } break; } prev = av; } } /* Symptom: <table bgcolor="red"> Action: <table style="background-color: red"> */ private void tableBgColor(final Node node) { final AttVal attr = node.getAttrByName("bgcolor"); if (null != attr) { node.removeAttribute(attr); addStyleProperty(node, "background-color: " + attr.value); } } /** * Symptom: <code><dir><li></code> where <code><li></code> is only child. Action: coerce * <code><dir> <li></code> to <code><div></code> with indent. The clean up rules use the pnode argument * to return the next node when the original node has been deleted. * @param lexer Lexer * @param node dir tag * @return <code>true</code> if a dir tag has been coerced to a div */ private boolean dir2Div(Lexer lexer, Node node) { Node child; if (node.tag == this.tt.tagDir || node.tag == this.tt.tagUl || node.tag == this.tt.tagOl) { child = node.content; if (child == null) { return false; } // check child has no peers if (child.next != null) { return false; } if (child.tag != this.tt.tagLi) { return false; } if (!child.implicit) { return false; } // coerce dir to div node.tag = this.tt.tagDiv; node.element = "div"; addStyleProperty(node, "margin-left: 2em"); stripOnlyChild(node); return true; } return false; } /** * Symptom: * * <pre> * <center> * </pre>. * <p> * Action: replace <code><center></code> by <code><div style="text-align: center"></code> * </p> * @param lexer Lexer * @param node center tag * @param pnode pnode[0] is the same as node, passed in as an array to allow modification * @return <code>true</code> if a center tag has been replaced by a div */ private boolean center2Div(Lexer lexer, Node node, Node[] pnode) { if (node.tag == this.tt.tagCenter) { if (lexer.configuration.dropFontTags) { if (node.content != null) { Node last = node.last; Node parent = node.parent; discardContainer(node, pnode); node = lexer.inferredTag("br"); if (last.next != null) { last.next.prev = node; } node.next = last.next; last.next = node; node.prev = last; if (parent.last == last) { parent.last = node; } node.parent = parent; } else { Node prev = node.prev; Node next = node.next; Node parent = node.parent; discardContainer(node, pnode); node = lexer.inferredTag("br"); node.next = next; node.prev = prev; node.parent = parent; if (next != null) { next.prev = node; } else { parent.last = node; } if (prev != null) { prev.next = node; } else { parent.content = node; } } return true; } node.tag = this.tt.tagDiv; node.element = "div"; addStyleProperty(node, "text-align: center"); return true; } return false; } /** * Symptom: <code><div><div>...</div></div></code> Action: merge the two divs. This is useful after * nested <dir>s used by Word for indenting have been converted to <div>s. * @param lexer Lexer * @param node first div * @return true if the divs have been merged */ private boolean mergeDivs(Lexer lexer, Node node) { Node child; if (node.tag != this.tt.tagDiv) { return false; } child = node.content; if (child == null) { return false; } if (child.tag != this.tt.tagDiv) { return false; } if (child.next != null) { return false; } mergeStyles(node, child); stripOnlyChild(node); return true; } /** * Symptom: * <ul> * <li> * <ul> * ... * </ul> * </li> * </ul> * Action: discard outer list. * @param lexer Lexer * @param node Node * @param pnode passed in as array to allow modifications. * @return <code>true</code> if nested lists have been found and replaced */ private boolean nestedList(Lexer lexer, Node node, Node[] pnode) { Node child, list; if (node.tag == this.tt.tagUl || node.tag == this.tt.tagOl) { child = node.content; if (child == null) { return false; } // check child has no peers if (child.next != null) { return false; } list = child.content; if (list == null) { return false; } if (list.tag != node.tag) { return false; } pnode[0] = list; // Set node to resume iteration // move inner list node into position of outer node list.prev = node.prev; list.next = node.next; list.parent = node.parent; fixNodeLinks(list); // get rid of outer ul and its li // XXX: Are we leaking the child node? -creitzel 7 Jun, 01 child.content = null; node.content = null; node.next = null; node = null; // If prev node was a list the chances are this node should be appended to that list. Word has no way of // recognizing nested lists and just uses indents if (list.prev != null) { if (list.prev.tag == this.tt.tagUl || list.prev.tag == this.tt.tagOl) { node = list; list = node.prev; list.next = node.next; if (list.next != null) { list.next.prev = list; } child = list.last; /* <li> */ node.parent = child; node.next = null; node.prev = child.last; fixNodeLinks(node); cleanNode(lexer, node); } } return true; } return false; } /** * Symptom: the only child of a block-level element is a presentation element such as B, I or FONT. Action: add * style "font-weight: bold" to the block and strip the <b>element, leaving its children. example: * * <pre> * <p> * <b><font face="Arial" size="6">Draft Recommended Practice</font></b> * </p> * </pre> * * becomes: * * <pre> * <p style="font-weight: bold; font-family: Arial; font-size: 6"> * Draft Recommended Practice * </p> * </pre> * * <p> * This code also replaces the align attribute by a style attribute. However, to avoid CSS problems with Navigator * 4, this isn't done for the elements: caption, tr and table * </p> * @param lexer Lexer * @param node parent node * @return <code>true</code> if the child node has been removed */ private boolean blockStyle(Lexer lexer, Node node) { /* check for bgcolor */ if (node.tag == tt.tagTable || node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag == tt.tagTr) { tableBgColor(node); } Node child; if ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_LIST | Dict.CM_DEFLIST | Dict.CM_TABLE)) != 0) { if (node.tag != this.tt.tagTable && node.tag != this.tt.tagTr && node.tag != this.tt.tagLi) { // check for align attribute if (node.tag != this.tt.tagCaption) { textAlign(lexer, node); } child = node.content; if (child == null) { return false; } // check child has no peers if (child.next != null) { return false; } if (child.tag == this.tt.tagB) { mergeStyles(node, child); addStyleProperty(node, "font-weight: bold"); stripOnlyChild(node); return true; } if (child.tag == this.tt.tagI) { mergeStyles(node, child); addStyleProperty(node, "font-style: italic"); stripOnlyChild(node); return true; } if (child.tag == this.tt.tagFont) { mergeStyles(node, child); addFontStyles(node, child.attributes); stripOnlyChild(node); return true; } } } return false; } /** * If the node has only one b, i, or font child remove the child node and add the appropriate style attributes to * parent. * @param lexer Lexer * @param node parent node * @param pnode passed as an array to allow modifications * @return <code>true</code> if child node has been stripped, replaced by style attributes. */ private boolean inlineStyle(Lexer lexer, Node node, Node[] pnode) { Node child; if (node.tag != this.tt.tagFont && (node.tag.model & (Dict.CM_INLINE | Dict.CM_ROW)) != 0) { child = node.content; if (child == null) { return false; } // check child has no peers if (child.next != null) { return false; } if (child.tag == this.tt.tagB && lexer.configuration.logicalEmphasis) { mergeStyles(node, child); addStyleProperty(node, "font-weight: bold"); stripOnlyChild(node); return true; } if (child.tag == this.tt.tagI && lexer.configuration.logicalEmphasis) { mergeStyles(node, child); addStyleProperty(node, "font-style: italic"); stripOnlyChild(node); return true; } if (child.tag == this.tt.tagFont) { mergeStyles(node, child); addFontStyles(node, child.attributes); stripOnlyChild(node); return true; } } return false; } /** * Replace font elements by span elements, deleting the font element's attributes and replacing them by a single * style attribute. * @param lexer Lexer * @param node font tag * @param pnode passed as an array to allow modifications * @return <code>true</code> if a font tag has been dropped and replaced by style attributes */ private boolean font2Span(Lexer lexer, Node node, Node[] pnode) { AttVal av, style, next; if (node.tag == this.tt.tagFont) { if (lexer.configuration.dropFontTags) { discardContainer(node, pnode); return false; } // if FONT is only child of parent element then leave alone if (node.parent.content == node && node.next == null) { return false; } addFontStyles(node, node.attributes); // extract style attribute and free the rest av = node.attributes; style = null; while (av != null) { next = av.next; if (av.attribute.equals("style")) { av.next = null; style = av; } av = next; } node.attributes = style; node.tag = this.tt.tagSpan; node.element = "span"; return true; } return false; } /** * Applies all matching rules to a node. * @param lexer Lexer * @param node original node * @return cleaned up node */ private Node cleanNode(Lexer lexer, Node node) { Node next = null; Node[] o = new Node[1]; boolean b = false; for (next = node; node != null && node.isElement(); node = next) { o[0] = next; b = dir2Div(lexer, node); next = o[0]; if (b) { continue; } // Special case: true result means that arg node and its parent no longer exist. // So we must jump back up the CreateStyleProperties() call stack until we have a valid node reference. b = nestedList(lexer, node, o); next = o[0]; if (b) { return next; } b = center2Div(lexer, node, o); next = o[0]; if (b) { continue; } b = mergeDivs(lexer, node); next = o[0]; if (b) { continue; } b = blockStyle(lexer, node); next = o[0]; if (b) { continue; } b = inlineStyle(lexer, node, o); next = o[0]; if (b) { continue; } b = font2Span(lexer, node, o); next = o[0]; if (b) { continue; } break; } return next; } /** * Special case: if the current node is destroyed by CleanNode() lower in the tree, this node and its parent no * longer exist. So we must jump back up the CreateStyleProperties() call stack until we have a valid node * reference. * @param lexer Lexer * @param node Node * @param prepl passed in as array to allow modifications * @return cleaned Node */ private Node createStyleProperties(Lexer lexer, Node node, Node[] prepl) { Node child = node.content; if (child != null) { Node[] repl = new Node[1]; repl[0] = node; while (child != null) { child = createStyleProperties(lexer, child, repl); if (repl[0] != node) { return repl[0]; } if (child != null) { child = child.next; } } } return cleanNode(lexer, node); } /** * Find style attribute in node content, and replace it by corresponding class attribute. * @param lexer Lexer * @param node parent node */ private void defineStyleRules(Lexer lexer, Node node) { Node child; if (node.content != null) { child = node.content; while (child != null) { defineStyleRules(lexer, child); child = child.next; } } style2Rule(lexer, node); } /** * Clean an html tree. * @param lexer Lexer * @param doc root node */ public void cleanTree(Lexer lexer, Node doc) { Node[] repl = new Node[1]; repl[0] = doc; doc = createStyleProperties(lexer, doc, repl); if (lexer.configuration.makeClean) { defineStyleRules(lexer, doc); createStyleElement(lexer, doc); } } /** * simplifies <b><b>... </b> ... </b> etc. * @param node root Node */ public void nestedEmphasis(Node node) { Node[] o = new Node[1]; Node next; while (node != null) { next = node.next; if ((node.tag == this.tt.tagB || node.tag == this.tt.tagI) && node.parent != null && node.parent.tag == node.tag) { // strip redundant inner element o[0] = next; discardContainer(node, o); next = o[0]; node = next; continue; } if (node.content != null) { nestedEmphasis(node.content); } node = next; } } /** * Replace i by em and b by strong. * @param node root Node */ public void emFromI(Node node) { while (node != null) { if (node.tag == this.tt.tagI) { node.element = this.tt.tagEm.name; node.tag = this.tt.tagEm; } else if (node.tag == this.tt.tagB) { node.element = this.tt.tagStrong.name; node.tag = this.tt.tagStrong; } if (node.content != null) { emFromI(node.content); } node = node.next; } } /** * Some people use dir or ul without an li to indent the content. The pattern to look for is a list with a single * implicit li. This is recursively replaced by an implicit blockquote. * @param node root Node */ public void list2BQ(Node node) { while (node != null) { if (node.content != null) { list2BQ(node.content); } if (node.tag != null && node.tag.getParser() == ParserImpl.LIST && node.hasOneChild() && node.content.implicit) { stripOnlyChild(node); node.element = this.tt.tagBlockquote.name; node.tag = this.tt.tagBlockquote; node.implicit = true; } node = node.next; } } /** * Replace implicit blockquote by div with an indent taking care to reduce nested blockquotes to a single div with * the indent set to match the nesting depth. * @param node root Node */ public void bQ2Div(Node node) { int indent; String indentBuf; AttVal attval; while (node != null) { if (node.tag == this.tt.tagBlockquote && node.implicit) { indent = 1; while (node.hasOneChild() && node.content.tag == this.tt.tagBlockquote && node.implicit) { ++indent; stripOnlyChild(node); } if (node.content != null) { bQ2Div(node.content); } indentBuf = "margin-left: " + (new Integer(2 * indent)).toString() + "em"; node.element = this.tt.tagDiv.name; node.tag = this.tt.tagDiv; attval = node.getAttrByName("style"); if (attval != null && attval.value != null) { attval.value = indentBuf + "; " + attval.value; } else { node.addAttribute("style", indentBuf); } } else if (node.content != null) { bQ2Div(node.content); } node = node.next; } } /** * Find the enclosing table cell for the given node. * @param node Node * @return enclosing cell node */ Node findEnclosingCell(Node node) { Node check; for (check = node; check != null; check = check.parent) { if (check.tag == tt.tagTd) { return check; } } return null; } /** * node is <code><![if ...]></code> prune up to <code><![endif]></code>. * @param lexer Lexer * @param node Node * @return cleaned up Node */ public Node pruneSection(Lexer lexer, Node node) { for (;;) { // FG: commented out - don't add   to empty cells // if ((Lexer.getString(node.textarray, node.start, 21)).equals("if !supportEmptyParas")) // { // Node cell = findEnclosingCell(node); // if (cell != null) // { // // Need to put   into cell so it doesn't look weird // char onesixty[] = {(char) 160, (char) 0}; // Node nbsp = lexer.newLiteralTextNode(lexer, onesixty); // Node.insertNodeBeforeElement(node, nbsp); // } // } // discard node and returns next node = Node.discardElement(node); if (node == null) { return null; } if (node.type == Node.SECTION_TAG) { if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if")) { node = pruneSection(lexer, node); continue; } if ((TidyUtils.getString(node.textarray, node.start, 5)).equals("endif")) { node = Node.discardElement(node); break; } } } return node; } /** * Drop if/endif sections inserted by word2000. * @param lexer Lexer * @param node Node root node */ public void dropSections(Lexer lexer, Node node) { while (node != null) { if (node.type == Node.SECTION_TAG) { // prune up to matching endif if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if") && (!(TidyUtils.getString(node.textarray, node.start, 7)).equals("if !vml"))) // #444394 - fix 13 // Sep 01 { node = pruneSection(lexer, node); continue; } // discard others as well node = Node.discardElement(node); continue; } if (node.content != null) { dropSections(lexer, node.content); } node = node.next; } } /** * Remove word2000 attributes from node. * @param node node to cleanup */ public void purgeWord2000Attributes(Node node) { AttVal attr = null; AttVal next = null; AttVal prev = null; for (attr = node.attributes; attr != null; attr = next) { next = attr.next; // special check for class="Code" denoting pre text // Pass thru user defined styles as HTML class names if (attr.attribute != null && attr.value != null && attr.attribute.equals("class")) { if (attr.value.equals("Code") || !attr.value.startsWith("Mso")) { prev = attr; continue; } } if (attr.attribute != null && (attr.attribute.equals("class") || attr.attribute.equals("style") || attr.attribute.equals("lang") || attr.attribute.startsWith("x:") || ((attr.attribute.equals("height") || attr.attribute .equals("width")) && // (node.tag == this.tt.tagTd || node.tag == this.tt.tagTr || node.tag == this.tt.tagTh)))) { if (prev != null) { prev.next = next; } else { node.attributes = next; } } else { prev = attr; } } } /** * Word2000 uses span excessively, so we strip span out. * @param lexer Lexer * @param span Node span * @return cleaned node */ public Node stripSpan(Lexer lexer, Node span) { Node node; Node prev = null; Node content; // deal with span elements that have content by splicing the content in place of the span after having // processed it cleanWord2000(lexer, span.content); content = span.content; if (span.prev != null) { prev = span.prev; } else if (content != null) { node = content; content = content.next; node.removeNode(); Node.insertNodeBeforeElement(span, node); prev = node; } while (content != null) { node = content; content = content.next; node.removeNode(); prev.insertNodeAfterElement(node); prev = node; } if (span.next == null) { span.parent.last = prev; } node = span.next; span.content = null; Node.discardElement(span); return node; } /** * Map non-breaking spaces to regular spaces. * @param lexer Lexer * @param node Node */ private void normalizeSpaces(Lexer lexer, Node node) { while (node != null) { if (node.content != null) { normalizeSpaces(lexer, node.content); } if (node.type == Node.TEXT_NODE) { int i; int[] c = new int[1]; int p = node.start; for (i = node.start; i < node.end; ++i) { c[0] = node.textarray[i]; // look for UTF-8 multibyte character if (c[0] > 0x7F) { i += PPrint.getUTF8(node.textarray, i, c); } if (c[0] == 160) { c[0] = ' '; } p = PPrint.putUTF8(node.textarray, p, c[0]); } } node = node.next; } } /** * Used to hunt for hidden preformatted sections. * @param node checked node * @return <code>true</code> if the node has a "margin-top: 0" or "margin-bottom: 0" style */ boolean noMargins(Node node) { AttVal attval = node.getAttrByName("style"); if (attval == null || attval.value == null) { return false; } // search for substring "margin-top: 0" if (attval.value.indexOf("margin-top: 0") == -1) { return false; } // search for substring "margin-top: 0" if (attval.value.indexOf("margin-bottom: 0") == -1) { return false; } return true; } /** * Does element have a single space as its content? * @param lexer Lexer * @param node checked node * @return <code>true</code> if the element has a single space as its content */ boolean singleSpace(Lexer lexer, Node node) { if (node.content != null) { node = node.content; if (node.next != null) { return false; } if (node.type != Node.TEXT_NODE) { return false; } if (((node.end - node.start) == 1) && lexer.lexbuf[node.start] == ' ') { return true; } if ((node.end - node.start) == 2) { int[] c = new int[1]; PPrint.getUTF8(lexer.lexbuf, node.start, c); if (c[0] == 160) { return true; } } } return false; } /** * This is a major clean up to strip out all the extra stuff you get when you save as web page from Word 2000. It * doesn't yet know what to do with VML tags, but these will appear as errors unless you declare them as new tags, * such as o:p which needs to be declared as inline. * @param lexer Lexer * @param node node to clean up */ public void cleanWord2000(Lexer lexer, Node node) { // used to a list from a sequence of bulletted p's Node list = null; while (node != null) { // get rid of Word's xmlns attributes if (node.tag == tt.tagHtml) { // check that it's a Word 2000 document if ((node.getAttrByName("xmlns:o") == null)) { return; } lexer.configuration.tt.freeAttrs(node); } // fix up preformatted sections by looking for a sequence of paragraphs with zero top/bottom margin if (node.tag == tt.tagP) { if (noMargins(node)) { Node pre; Node next; Node.coerceNode(lexer, node, tt.tagPre); purgeWord2000Attributes(node); if (node.content != null) { cleanWord2000(lexer, node.content); } pre = node; node = node.next; // continue to strip p's while (node != null && node.tag == tt.tagP && noMargins(node)) { next = node.next; node.removeNode(); pre.insertNodeAtEnd(lexer.newLineNode()); pre.insertNodeAtEnd(node); stripSpan(lexer, node); node = next; } if (node == null) { break; } } } if (node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_BLOCK) && singleSpace(lexer, node)) { node = stripSpan(lexer, node); continue; } // discard Word's style verbiage if (node.tag == this.tt.tagStyle || node.tag == this.tt.tagMeta || node.type == Node.COMMENT_TAG) { node = Node.discardElement(node); continue; } // strip out all span and font tags Word scatters so liberally! if (node.tag == this.tt.tagSpan || node.tag == this.tt.tagFont) { node = stripSpan(lexer, node); continue; } if (node.tag == this.tt.tagLink) { AttVal attr = node.getAttrByName("rel"); if (attr != null && attr.value != null && attr.value.equals("File-List")) { node = Node.discardElement(node); continue; } } // discard empty paragraphs if (node.content == null && node.tag == this.tt.tagP) { node = Node.discardElement(node); continue; } if (node.tag == this.tt.tagP) { AttVal attr = node.getAttrByName("class"); AttVal atrStyle = node.getAttrByName("style"); // (JES) Sometimes Word marks a list item with the following hokie syntax // <p class="MsoNormal" style="...;mso-list:l1 level1 lfo1; // translate these into <li> // map sequence of <p class="MsoListBullet"> to <ul> ... </ul> // map <p class="MsoListNumber"> to <ol>...</ol> if (attr != null && attr.value != null && ((attr.value.equals("MsoListBullet") || attr.value.equals("MsoListNumber")) // || (atrStyle != null && (atrStyle.value.indexOf("mso-list:") != -1)))) // 463066 - fix by Joel // Shafer 19 Sep 01 { Dict listType = tt.tagUl; if (attr.value.equals("MsoListNumber")) { listType = tt.tagOl; } Node.coerceNode(lexer, node, this.tt.tagLi); if (list == null || list.tag != listType) { list = lexer.inferredTag(listType.name); Node.insertNodeBeforeElement(node, list); } purgeWord2000Attributes(node); if (node.content != null) { cleanWord2000(lexer, node.content); } // remove node and append to contents of list node.removeNode(); list.insertNodeAtEnd(node); node = list; } // map sequence of <p class="Code"> to <pre> ... </pre> else if (attr != null && attr.value != null && attr.value.equals("Code")) { Node br = lexer.newLineNode(); normalizeSpaces(lexer, node); if (list == null || list.tag != this.tt.tagPre) { list = lexer.inferredTag("pre"); Node.insertNodeBeforeElement(node, list); } // remove node and append to contents of list node.removeNode(); list.insertNodeAtEnd(node); stripSpan(lexer, node); list.insertNodeAtEnd(br); node = list.next; } else { list = null; } } else { list = null; } // strip out style and class attributes if (node.type == Node.START_TAG || node.type == Node.START_END_TAG) { purgeWord2000Attributes(node); } if (node.content != null) { cleanWord2000(lexer, node.content); } node = node.next; } } /** * Check if the current document is a converted Word document. * @param root root Node * @return <code>true</code> if the document has been geenrated by Microsoft Word. */ public boolean isWord2000(Node root) { AttVal attval; Node node; Node head; Node html = root.findHTML(this.tt); if (html != null && html.getAttrByName("xmlns:o") != null) { return true; } // search for <meta name="GENERATOR" content="Microsoft ..."> head = root.findHEAD(tt); if (head != null) { for (node = head.content; node != null; node = node.next) { if (node.tag != tt.tagMeta) { continue; } attval = node.getAttrByName("name"); if (attval == null || attval.value == null) { continue; } if (!"generator".equals(attval.value)) { continue; } attval = node.getAttrByName("content"); if (attval == null || attval.value == null) { continue; } if (attval.value.indexOf("Microsoft") != -1) { return true; } } } return false; } /** * Where appropriate move object elements from head to body. * @param lexer Lexer * @param html html node */ static void bumpObject(Lexer lexer, Node html) { if (html == null) { return; } Node node, next, head = null, body = null; TagTable tt = lexer.configuration.tt; for (node = html.content; node != null; node = node.next) { if (node.tag == tt.tagHead) { head = node; } if (node.tag == tt.tagBody) { body = node; } } if (head != null && body != null) { for (node = head.content; node != null; node = next) { next = node.next; if (node.tag == tt.tagObject) { Node child; boolean bump = false; for (child = node.content; child != null; child = child.next) { // bump to body unless content is param if ((child.type == Node.TEXT_NODE && !node.isBlank(lexer)) || child.tag != tt.tagParam) { bump = true; break; } } if (bump) { node.removeNode(); body.insertNodeAtStart(node); } } } } } }����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/DOMTextImpl.java���������������������������������������������������0000644�0001750�0001750�00000010237�10144212711�021774� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; import org.w3c.dom.Text; /** * DOMTextImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 738 $ ($Author: fgiust $) */ public class DOMTextImpl extends DOMCharacterDataImpl implements org.w3c.dom.Text { /** * Instantiates a new DOM text node. * @param adaptee wrapped Tidy node */ protected DOMTextImpl(Node adaptee) { super(adaptee); } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return "#text"; } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { return org.w3c.dom.Node.TEXT_NODE; } /** * @todo DOM level 2 splitText() Not supported. Throws NO_MODIFICATION_ALLOWED_ERR. * @see org.w3c.dom.Text#splitText(int) */ public org.w3c.dom.Text splitText(int offset) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Not supported"); } /** * @todo DOM level 3 getWholeText() Not implemented. Returns null. * @see org.w3c.dom.Text#getWholeText() */ public String getWholeText() { return null; } /** * @todo DOM level 3 isElementContentWhitespace() Not implemented. Returns false. * @see org.w3c.dom.Text#isElementContentWhitespace() */ public boolean isElementContentWhitespace() { return false; } /** * @todo DOM level 3 replaceWholeText() Not implemented. Returns the same node. * @see org.w3c.dom.Text#isElementContentWhitespace() */ public Text replaceWholeText(String content) throws DOMException { return this; } }�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/Anchor.java��������������������������������������������������������0000644�0001750�0001750�00000005227�10103517371�021110� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Anchor/node Linked list. * @author hoehrmann * @author Fabrizio Giustina * @version $Revision: 407 $ ($Author: fgiust $) */ public class Anchor { /** * Anchor name. */ protected String name; /** * Next anchor. */ protected Anchor next; /** * linked node. */ protected Node node; }�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/AttributeTable.java������������������������������������������������0000644�0001750�0001750�00000052766�10111346361�022621� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.Hashtable; import java.util.Map; /** * HTML attribute hash table. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 486 $ ($Author: fgiust $) */ public class AttributeTable { /** * href attribute. */ protected static Attribute attrHref; /** * src attribute. */ protected static Attribute attrSrc; /** * id attribute. */ protected static Attribute attrId; /** * name attribute. */ protected static Attribute attrName; /** * summary attribute. */ protected static Attribute attrSummary; /** * alt attribute. */ protected static Attribute attrAlt; /** * logdesc attribute. */ protected static Attribute attrLongdesc; /** * usemap attribute. */ protected static Attribute attrUsemap; /** * ismap attribute. */ protected static Attribute attrIsmap; /** * language attribute. */ protected static Attribute attrLanguage; /** * type attribute. */ protected static Attribute attrType; /** * title attribute. */ protected static Attribute attrTitle; /** * xmlns attribute. */ protected static Attribute attrXmlns; /** * value attribute. */ protected static Attribute attrValue; /** * content attribute. */ protected static Attribute attrContent; /** * datafld attribute. */ protected static Attribute attrDatafld; /** * width attribute. */ protected static Attribute attrWidth; /** * height attribute. */ protected static Attribute attrHeight; /** * attribute table instance. */ private static AttributeTable defaultAttributeTable; /** * all the known attributes. */ private static final Attribute[] ATTRS = { new Attribute("abbr", Dict.VERS_HTML40, AttrCheckImpl.TEXT), new Attribute("accept-charset", Dict.VERS_HTML40, AttrCheckImpl.CHARSET), new Attribute("accept", Dict.VERS_ALL, AttrCheckImpl.TYPE), new Attribute("accesskey", Dict.VERS_HTML40, AttrCheckImpl.CHARACTER), new Attribute("action", Dict.VERS_ALL, AttrCheckImpl.URL), new Attribute("add_date", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), // A new Attribute("align", Dict.VERS_ALL, AttrCheckImpl.ALIGN), // set varies with element new Attribute("alink", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), new Attribute("alt", Dict.VERS_ALL, AttrCheckImpl.TEXT), new Attribute("archive", Dict.VERS_HTML40, AttrCheckImpl.URLS), // space or comma separated list new Attribute("axis", Dict.VERS_HTML40, AttrCheckImpl.TEXT), new Attribute("background", Dict.VERS_LOOSE, AttrCheckImpl.URL), new Attribute("bgcolor", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), new Attribute("bgproperties", Dict.VERS_PROPRIETARY, AttrCheckImpl.TEXT), // BODY "fixed" fixes background new Attribute("border", Dict.VERS_ALL, AttrCheckImpl.BOOL), // like LENGTH + "border" new Attribute("bordercolor", Dict.VERS_MICROSOFT, AttrCheckImpl.COLOR), // used on TABLE new Attribute("bottommargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER), // used on BODY new Attribute("cellpadding", Dict.VERS_FROM32, AttrCheckImpl.LENGTH), // % or pixel values new Attribute("cellspacing", Dict.VERS_FROM32, AttrCheckImpl.LENGTH), new Attribute("char", Dict.VERS_HTML40, AttrCheckImpl.CHARACTER), new Attribute("charoff", Dict.VERS_HTML40, AttrCheckImpl.LENGTH), new Attribute("charset", Dict.VERS_HTML40, AttrCheckImpl.CHARSET), new Attribute("checked", Dict.VERS_ALL, AttrCheckImpl.BOOL), // i.e. "checked" or absent new Attribute("cite", Dict.VERS_HTML40, AttrCheckImpl.URL), new Attribute("class", Dict.VERS_HTML40, AttrCheckImpl.TEXT), new Attribute("classid", Dict.VERS_HTML40, AttrCheckImpl.URL), new Attribute("clear", Dict.VERS_LOOSE, AttrCheckImpl.CLEAR), // BR: left, right, all new Attribute("code", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), // APPLET new Attribute("codebase", Dict.VERS_HTML40, AttrCheckImpl.URL), // OBJECT new Attribute("codetype", Dict.VERS_HTML40, AttrCheckImpl.TYPE), // OBJECT new Attribute("color", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BASEFONT, FONT new Attribute("cols", Dict.VERS_IFRAME, AttrCheckImpl.COLS), // TABLE & FRAMESET new Attribute("colspan", Dict.VERS_FROM32, AttrCheckImpl.NUMBER), new Attribute("compact", Dict.VERS_ALL, AttrCheckImpl.BOOL), // lists new Attribute("content", Dict.VERS_ALL, AttrCheckImpl.TEXT), // META new Attribute("coords", Dict.VERS_FROM32, AttrCheckImpl.COORDS), // AREA, A new Attribute("data", Dict.VERS_HTML40, AttrCheckImpl.URL), // OBJECT new Attribute("datafld", Dict.VERS_MICROSOFT, AttrCheckImpl.TEXT), // used on DIV, IMG new Attribute("dataformatas", Dict.VERS_MICROSOFT, AttrCheckImpl.TEXT), // used on DIV, IMG new Attribute("datapagesize", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER), // used on DIV, IMG new Attribute("datasrc", Dict.VERS_MICROSOFT, AttrCheckImpl.URL), // used on TABLE new Attribute("datetime", Dict.VERS_HTML40, AttrCheckImpl.DATE), // INS, DEL new Attribute("declare", Dict.VERS_HTML40, AttrCheckImpl.BOOL), // OBJECT new Attribute("defer", Dict.VERS_HTML40, AttrCheckImpl.BOOL), // SCRIPT new Attribute("dir", Dict.VERS_HTML40, AttrCheckImpl.TEXTDIR), // ltr or rtl new Attribute("disabled", Dict.VERS_HTML40, AttrCheckImpl.BOOL), // form fields new Attribute("enctype", Dict.VERS_ALL, AttrCheckImpl.TYPE), // FORM new Attribute("face", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), // BASEFONT, FONT new Attribute("for", Dict.VERS_HTML40, AttrCheckImpl.IDREF), // LABEL new Attribute("frame", Dict.VERS_HTML40, AttrCheckImpl.TFRAME), // TABLE new Attribute("frameborder", (short) (Dict.VERS_FRAMESET | Dict.VERS_IFRAME), AttrCheckImpl.FBORDER), // 0 or 1 new Attribute("framespacing", Dict.VERS_PROPRIETARY, AttrCheckImpl.NUMBER), // pixel value new Attribute("gridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.NUMBER), // TABLE Adobe golive new Attribute("gridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.NUMBER), // TABLE Adobe golive new Attribute("headers", Dict.VERS_HTML40, AttrCheckImpl.IDREF), // table cells new Attribute("height", Dict.VERS_ALL, AttrCheckImpl.LENGTH), // pixels only for TH/TD new Attribute("href", Dict.VERS_ALL, AttrCheckImpl.URL), // A, AREA, LINK and BASE new Attribute("hreflang", Dict.VERS_HTML40, AttrCheckImpl.LANG), // A, LINK new Attribute("hspace", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // APPLET, IMG, OBJECT new Attribute("http-equiv", Dict.VERS_ALL, AttrCheckImpl.TEXT), // META new Attribute("id", Dict.VERS_HTML40, AttrCheckImpl.ID), new Attribute("ismap", Dict.VERS_ALL, AttrCheckImpl.BOOL), // IMG new Attribute("label", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // OPT, OPTGROUP new Attribute("lang", Dict.VERS_HTML40, AttrCheckImpl.LANG), new Attribute("language", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), // SCRIPT new Attribute("last_modified", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), // A new Attribute("last_visit", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), // A new Attribute("leftmargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER), // used on BODY new Attribute("link", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY new Attribute("longdesc", Dict.VERS_HTML40, AttrCheckImpl.URL), // IMG new Attribute("lowsrc", Dict.VERS_PROPRIETARY, AttrCheckImpl.URL), // IMG new Attribute("marginheight", Dict.VERS_IFRAME, AttrCheckImpl.NUMBER), // FRAME, IFRAME, BODY new Attribute("marginwidth", Dict.VERS_IFRAME, AttrCheckImpl.NUMBER), // ditto new Attribute("maxlength", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // INPUT new Attribute("media", Dict.VERS_HTML40, AttrCheckImpl.MEDIA), // STYLE, LINK new Attribute("method", Dict.VERS_ALL, AttrCheckImpl.FSUBMIT), // FORM: get or post new Attribute("multiple", Dict.VERS_ALL, AttrCheckImpl.BOOL), // SELECT new Attribute("name", Dict.VERS_ALL, AttrCheckImpl.NAME), new Attribute("nohref", Dict.VERS_FROM32, AttrCheckImpl.BOOL), // AREA new Attribute("noresize", Dict.VERS_FRAMESET, AttrCheckImpl.BOOL), // FRAME new Attribute("noshade", Dict.VERS_LOOSE, AttrCheckImpl.BOOL), // HR new Attribute("nowrap", Dict.VERS_LOOSE, AttrCheckImpl.BOOL), // table cells new Attribute("object", Dict.VERS_HTML40_LOOSE, AttrCheckImpl.TEXT), // APPLET new Attribute("onblur", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onchange", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onclick", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("ondblclick", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onkeydown", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onkeypress", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onkeyup", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onload", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onmousedown", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onmousemove", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onmouseout", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onmouseover", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onmouseup", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onsubmit", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onreset", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onselect", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onunload", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onfocus", Dict.VERS_EVENTS, AttrCheckImpl.SCRIPT), // event new Attribute("onafterupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("onbeforeupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("onerrorupdate", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("onrowenter", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("onrowexit", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("onbeforeunload", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // form fields new Attribute("ondatasetchanged", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // object, applet new Attribute("ondataavailable", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // object, applet new Attribute("ondatasetcomplete", Dict.VERS_MICROSOFT, AttrCheckImpl.SCRIPT), // object, applet new Attribute("profile", Dict.VERS_HTML40, AttrCheckImpl.URL), // HEAD new Attribute("prompt", Dict.VERS_LOOSE, AttrCheckImpl.TEXT), // ISINDEX new Attribute("readonly", Dict.VERS_HTML40, AttrCheckImpl.BOOL), // form fields new Attribute("rel", Dict.VERS_ALL, AttrCheckImpl.LINKTYPES), // A, LINK new Attribute("rev", Dict.VERS_ALL, AttrCheckImpl.LINKTYPES), // A, LINK new Attribute("rightmargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER), // used on BODY new Attribute("rows", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // TEXTAREA new Attribute("rowspan", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // table cells new Attribute("rules", Dict.VERS_HTML40, AttrCheckImpl.TRULES), // TABLE new Attribute("scheme", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // META new Attribute("scope", Dict.VERS_HTML40, AttrCheckImpl.SCOPE), // table cells new Attribute("scrolling", Dict.VERS_IFRAME, AttrCheckImpl.SCROLL), // yes, no or auto new Attribute("selected", Dict.VERS_ALL, AttrCheckImpl.BOOL), // OPTION new Attribute("shape", Dict.VERS_FROM32, AttrCheckImpl.SHAPE), // AREA, A new Attribute("showgrid", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL), // TABLE Adobe golive new Attribute("showgridx", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL), // TABLE Adobe golive new Attribute("showgridy", Dict.VERS_PROPRIETARY, AttrCheckImpl.BOOL), // TABLE Adobe golive new Attribute("size", Dict.VERS_LOOSE, AttrCheckImpl.NUMBER), // HR, FONT, BASEFONT, SELECT new Attribute("span", Dict.VERS_HTML40, AttrCheckImpl.NUMBER), // COL, COLGROUP new Attribute("src", Dict.VERS_ALL, AttrCheckImpl.URL), // IMG, FRAME, IFRAME new Attribute("standby", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // OBJECT new Attribute("start", Dict.VERS_ALL, AttrCheckImpl.NUMBER), // OL new Attribute("style", Dict.VERS_HTML40, AttrCheckImpl.TEXT), new Attribute("summary", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // TABLE new Attribute("tabindex", Dict.VERS_HTML40, AttrCheckImpl.NUMBER), // fields, OBJECT and A new Attribute("target", Dict.VERS_HTML40, AttrCheckImpl.TARGET), // names a frame/window new Attribute("text", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY new Attribute("title", Dict.VERS_HTML40, AttrCheckImpl.TEXT), // text tool tip new Attribute("topmargin", Dict.VERS_MICROSOFT, AttrCheckImpl.NUMBER), // used on BODY new Attribute("type", Dict.VERS_FROM32, AttrCheckImpl.TYPE), // also used by SPACER new Attribute("usemap", Dict.VERS_ALL, AttrCheckImpl.BOOL), // things with images new Attribute("valign", Dict.VERS_FROM32, AttrCheckImpl.VALIGN), new Attribute("value", Dict.VERS_ALL, AttrCheckImpl.TEXT), // OPTION, PARAM new Attribute("valuetype", Dict.VERS_HTML40, AttrCheckImpl.VTYPE), // PARAM: data, ref, object new Attribute("version", Dict.VERS_ALL, AttrCheckImpl.TEXT), // HTML new Attribute("vlink", Dict.VERS_LOOSE, AttrCheckImpl.COLOR), // BODY new Attribute("vspace", Dict.VERS_LOOSE, AttrCheckImpl.NUMBER), // IMG, OBJECT, APPLET new Attribute("width", Dict.VERS_ALL, AttrCheckImpl.LENGTH), // pixels only for TD/TH new Attribute("wrap", Dict.VERS_NETSCAPE, AttrCheckImpl.TEXT), // textarea new Attribute("xml:lang", Dict.VERS_XML, AttrCheckImpl.TEXT), // XML language new Attribute("xml:space", Dict.VERS_XML, AttrCheckImpl.TEXT), // XML language new Attribute("xmlns", Dict.VERS_ALL, AttrCheckImpl.TEXT), // name space new Attribute("rbspan", Dict.VERS_XHTML11, AttrCheckImpl.NUMBER), // ruby markup }; /** * Map containing all the installed attributes. */ private Map attributeHashtable = new Hashtable(); /** * lookup an installed Attribute. * @param name attribute name * @return Attribute or null if the attribute is not found */ public Attribute lookup(String name) { return (Attribute) this.attributeHashtable.get(name); } /** * installs a new Attribute. * @param attr Atribute * @return installed Attribute */ public Attribute install(Attribute attr) { return (Attribute) this.attributeHashtable.put(attr.getName(), attr); } /** * public method for finding attribute definition by name. * @param attval AttVal instance * @return Attribute with name = attval.name */ public Attribute findAttribute(AttVal attval) { Attribute np; if (attval.attribute != null) { np = lookup(attval.attribute); return np; } return null; } /** * Does the given attibute contains an url? * @param attrname attribute name * @return <code>true</code> if the given attribute is expected to contain an URL */ public boolean isUrl(String attrname) { Attribute np; np = lookup(attrname); return (np != null && np.getAttrchk() == AttrCheckImpl.URL); } /** * Does the given attibute contains a script? * @param attrname attribute name * @return <code>true</code> if the given attribute is expected to contain a script */ public boolean isScript(String attrname) { Attribute np; np = lookup(attrname); return (np != null && np.getAttrchk() == AttrCheckImpl.SCRIPT); } /** * Does the given attibute contains a literal attribute? * @param attrname attribute name * @return <code>true</code> if the given attribute is expected to contain a literal attribute */ public boolean isLiteralAttribute(String attrname) { Attribute np; np = lookup(attrname); return (np != null && np.isLiteral()); } /** * Declare a new literal attribute. * @param name atribute name */ public void declareLiteralAttrib(String name) { // Henry Zrepa reports that some folk are using embed with script attributes where newlines are signficant. // These // need to be declared and handled specially! Attribute attrib = lookup(name); if (attrib == null) { attrib = install(new Attribute(name, Dict.VERS_PROPRIETARY, null)); } attrib.setLiteral(true); } /** * Returns the default attribute table instance. * @return AttributeTable instance */ public static AttributeTable getDefaultAttributeTable() { if (defaultAttributeTable == null) { defaultAttributeTable = new AttributeTable(); for (int i = 0; i < ATTRS.length; i++) { defaultAttributeTable.install(ATTRS[i]); } attrHref = defaultAttributeTable.lookup("href"); attrSrc = defaultAttributeTable.lookup("src"); attrId = defaultAttributeTable.lookup("id"); attrName = defaultAttributeTable.lookup("name"); attrSummary = defaultAttributeTable.lookup("summary"); attrAlt = defaultAttributeTable.lookup("alt"); attrLongdesc = defaultAttributeTable.lookup("longdesc"); attrUsemap = defaultAttributeTable.lookup("usemap"); attrIsmap = defaultAttributeTable.lookup("ismap"); attrLanguage = defaultAttributeTable.lookup("language"); attrType = defaultAttributeTable.lookup("type"); attrTitle = defaultAttributeTable.lookup("title"); attrXmlns = defaultAttributeTable.lookup("xmlns"); attrValue = defaultAttributeTable.lookup("value"); attrContent = defaultAttributeTable.lookup("content"); attrDatafld = defaultAttributeTable.lookup("datafld"); attrWidth = defaultAttributeTable.lookup("width"); attrHeight = defaultAttributeTable.lookup("height"); attrAlt.setNowrap(true); attrValue.setNowrap(true); attrContent.setNowrap(true); } return defaultAttributeTable; } }����������jtidy/src/main/java/org/w3c/tidy/DOMNodeImpl.java���������������������������������������������������0000644�0001750�0001750�00000041126�11462075117�021751� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import org.w3c.dom.DOMException; import org.w3c.dom.UserDataHandler; /** * DOMNodeImpl. * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a> * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java) * @author Fabrizio Giustina * @version $Revision: 1132 $ ($Author: aditsu $) */ public class DOMNodeImpl implements org.w3c.dom.Node { /** * Wrapped tidy node. */ protected Node adaptee; /** * Intantiates a new DOM node. * @param adaptee wrapped Tidy node */ protected DOMNodeImpl(Node adaptee) { this.adaptee = adaptee; } /** * @see org.w3c.dom.Node#getNodeValue */ public String getNodeValue() { String value = ""; // BAK 10/10/2000 replaced null if (adaptee.type == Node.TEXT_NODE || adaptee.type == Node.CDATA_TAG || adaptee.type == Node.COMMENT_TAG || adaptee.type == Node.PROC_INS_TAG) { if (adaptee.textarray != null && adaptee.start < adaptee.end) { value = TidyUtils.getString(adaptee.textarray, adaptee.start, adaptee.end - adaptee.start); } } return value; } /** * @see org.w3c.dom.Node#setNodeValue */ public void setNodeValue(String nodeValue) { if (adaptee.type == Node.TEXT_NODE || adaptee.type == Node.CDATA_TAG || adaptee.type == Node.COMMENT_TAG || adaptee.type == Node.PROC_INS_TAG) { byte[] textarray = TidyUtils.getBytes(nodeValue); adaptee.textarray = textarray; adaptee.start = 0; adaptee.end = textarray.length; } } /** * @see org.w3c.dom.Node#getNodeName */ public String getNodeName() { return adaptee.element; } /** * @see org.w3c.dom.Node#getNodeType */ public short getNodeType() { short result = -1; switch (adaptee.type) { case Node.ROOT_NODE : result = org.w3c.dom.Node.DOCUMENT_NODE; break; case Node.DOCTYPE_TAG : result = org.w3c.dom.Node.DOCUMENT_TYPE_NODE; break; case Node.COMMENT_TAG : result = org.w3c.dom.Node.COMMENT_NODE; break; case Node.PROC_INS_TAG : result = org.w3c.dom.Node.PROCESSING_INSTRUCTION_NODE; break; case Node.TEXT_NODE : result = org.w3c.dom.Node.TEXT_NODE; break; case Node.CDATA_TAG : result = org.w3c.dom.Node.CDATA_SECTION_NODE; break; case Node.START_TAG : case Node.START_END_TAG : result = org.w3c.dom.Node.ELEMENT_NODE; break; } return result; } /** * @see org.w3c.dom.Node#getParentNode */ public org.w3c.dom.Node getParentNode() { // Attributes are not children in the DOM, and do not have parents if (adaptee.parent != null) { return adaptee.parent.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#getChildNodes */ public org.w3c.dom.NodeList getChildNodes() { return new DOMNodeListImpl(adaptee); } /** * @see org.w3c.dom.Node#getFirstChild */ public org.w3c.dom.Node getFirstChild() { if (adaptee.content != null) { return adaptee.content.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#getLastChild */ public org.w3c.dom.Node getLastChild() { if (adaptee.last != null) { return adaptee.last.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#getPreviousSibling */ public org.w3c.dom.Node getPreviousSibling() { if (adaptee.prev != null) { return adaptee.prev.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#getNextSibling */ public org.w3c.dom.Node getNextSibling() { if (adaptee.next != null) { return adaptee.next.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#getAttributes */ public org.w3c.dom.NamedNodeMap getAttributes() { return new DOMAttrMapImpl(adaptee.attributes); } /** * @see org.w3c.dom.Node#getOwnerDocument */ public org.w3c.dom.Document getOwnerDocument() { Node node = this.adaptee; if (node != null && node.type == Node.ROOT_NODE) { return null; } while (node != null && node.type != Node.ROOT_NODE) { node = node.parent; } if (node != null) { return (org.w3c.dom.Document) node.getAdapter(); } return null; } /** * @see org.w3c.dom.Node#insertBefore */ public org.w3c.dom.Node insertBefore(org.w3c.dom.Node newChild, org.w3c.dom.Node refChild) { // TODO - handle newChild already in tree if (newChild == null) { return null; } if (!(newChild instanceof DOMNodeImpl)) { throw new DOMException(DOMException.WRONG_DOCUMENT_ERR, "newChild not instanceof DOMNodeImpl"); } DOMNodeImpl newCh = (DOMNodeImpl) newChild; if (this.adaptee.type == Node.ROOT_NODE) { if (newCh.adaptee.type != Node.DOCTYPE_TAG && newCh.adaptee.type != Node.PROC_INS_TAG) { throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild cannot be a child of this node"); } } else if (this.adaptee.type == Node.START_TAG) { if (newCh.adaptee.type != Node.START_TAG && newCh.adaptee.type != Node.START_END_TAG && newCh.adaptee.type != Node.COMMENT_TAG && newCh.adaptee.type != Node.TEXT_NODE && newCh.adaptee.type != Node.CDATA_TAG) { throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild cannot be a child of this node"); } } newCh.adaptee.removeNode(); if (refChild == null) { this.adaptee.insertNodeAtEnd(newCh.adaptee); if (this.adaptee.type == Node.START_END_TAG) { this.adaptee.setType(Node.START_TAG); } } else { final DOMNodeImpl refCh = (DOMNodeImpl) refChild; if (refCh.adaptee.parent != adaptee) { throw new DOMException(DOMException.NOT_FOUND_ERR, "refChild not found"); } Node.insertNodeBeforeElement(refCh.adaptee, newCh.adaptee); } return newChild; } /** * @see org.w3c.dom.Node#replaceChild */ public org.w3c.dom.Node replaceChild(org.w3c.dom.Node newChild, org.w3c.dom.Node oldChild) { insertBefore(newChild, oldChild); if (newChild != oldChild) { removeChild(oldChild); } return oldChild; } /** * @see org.w3c.dom.Node#removeChild */ public org.w3c.dom.Node removeChild(org.w3c.dom.Node oldChild) { if (oldChild == null) { return null; } Node ref = this.adaptee.content; while (ref != null) { if (ref.getAdapter() == oldChild) { break; } ref = ref.next; } if (ref == null) { throw new DOMException(DOMException.NOT_FOUND_ERR, "refChild not found"); } Node.discardElement(ref); if (this.adaptee.content == null && this.adaptee.type == Node.START_TAG) { this.adaptee.setType(Node.START_END_TAG); } return oldChild; } /** * @see org.w3c.dom.Node#appendChild */ public org.w3c.dom.Node appendChild(org.w3c.dom.Node newChild) { // TODO - handle newChild already in tree if (newChild == null) { return null; } if (!(newChild instanceof DOMNodeImpl)) { throw new DOMException(DOMException.WRONG_DOCUMENT_ERR, "newChild not instanceof DOMNodeImpl"); } DOMNodeImpl newCh = (DOMNodeImpl) newChild; if (newCh.adaptee == null) { throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild cannot be a child of this node"); } if (this.adaptee.type == Node.ROOT_NODE) { if (newCh.adaptee.type != Node.DOCTYPE_TAG && newCh.adaptee.type != Node.PROC_INS_TAG) { throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild cannot be a child of this node"); } } else if (this.adaptee.type == Node.START_TAG) { if (newCh.adaptee.type != Node.START_TAG && newCh.adaptee.type != Node.START_END_TAG && newCh.adaptee.type != Node.COMMENT_TAG && newCh.adaptee.type != Node.TEXT_NODE && newCh.adaptee.type != Node.CDATA_TAG) { throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR, "newChild cannot be a child of this node"); } } this.adaptee.insertNodeAtEnd(newCh.adaptee); if (this.adaptee.type == Node.START_END_TAG) { this.adaptee.setType(Node.START_TAG); } return newChild; } /** * @see org.w3c.dom.Node#hasChildNodes */ public boolean hasChildNodes() { return (adaptee.content != null); } /** * @see org.w3c.dom.Node#cloneNode(boolean) */ public org.w3c.dom.Node cloneNode(boolean deep) { Node node = adaptee.cloneNode(deep); node.parent = null; return node.getAdapter(); } /** * Do nothing: text nodes in html documents are important and jtidy already removes useless text during parsing. * @see org.w3c.dom.Node#normalize() */ public void normalize() { // do nothing } /** * DOM2 - not implemented. * @see #isSupported(java.lang.String, java.lang.String) */ public boolean supports(String feature, String version) { return isSupported(feature, version); } /** * @see org.w3c.dom.Node#getNamespaceURI() */ public String getNamespaceURI() { return null; } /** * @see org.w3c.dom.Node#getPrefix() */ public String getPrefix() { return null; } /** * @see org.w3c.dom.Node#setPrefix(java.lang.String) */ public void setPrefix(String prefix) throws DOMException { // The namespace prefix of this node, or null if it is unspecified. When it is defined to be null, setting it // has no effect, including if the node is read-only. // do nothing } /** * @see org.w3c.dom.Node#getLocalName() */ public String getLocalName() { return getNodeName(); } /** * @see org.w3c.dom.Node#isSupported(java.lang.String, java.lang.String) */ public boolean isSupported(String feature, String version) { return false; } /** * @see org.w3c.dom.Node#hasAttributes */ public boolean hasAttributes() { // contributed by dlp@users.sourceforge.net return this.adaptee.attributes != null; } /** * @todo DOM level 3 compareDocumentPosition() Not implemented. * @see org.w3c.dom.Node#compareDocumentPosition(org.w3c.dom.Node) */ public short compareDocumentPosition(org.w3c.dom.Node other) throws DOMException { throw new DOMException(DOMException.NOT_SUPPORTED_ERR, "DOM method not supported"); } /** * @todo DOM level 3 getBaseURI() Not implemented. Returns null. * @see org.w3c.dom.Node#getBaseURI() */ public String getBaseURI() { return null; } /** * @todo DOM level 3 getFeature() Not implemented. Returns null. * @see org.w3c.dom.Node#getFeature(java.lang.String, java.lang.String) */ public Object getFeature(String feature, String version) { return null; } /** * @todo DOM level 3 getTextContent() Not implemented. Returns null. * @see org.w3c.dom.Node#getTextContent() */ public String getTextContent() throws DOMException { return null; } /** * @todo DOM level 3 getUserData() Not implemented. Returns null. * @see org.w3c.dom.Node#getUserData(java.lang.String) */ public Object getUserData(String key) { return null; } /** * @see org.w3c.dom.Node#isDefaultNamespace(java.lang.String) */ public boolean isDefaultNamespace(String namespaceURI) { return false; } /** * @todo DOM level 3 isEqualNode() Not implemented. Returns false. * @see org.w3c.dom.Node#isEqualNode(org.w3c.dom.Node) */ public boolean isEqualNode(org.w3c.dom.Node arg) { return false; } /** * @todo DOM level 3 isSameNode() Not implemented. Returns false. * @see org.w3c.dom.Node#isSameNode(org.w3c.dom.Node) */ public boolean isSameNode(org.w3c.dom.Node other) { return false; } /** * @see org.w3c.dom.Node#lookupNamespaceURI(java.lang.String) */ public String lookupNamespaceURI(String prefix) { return null; } /** * @see org.w3c.dom.Node#lookupPrefix(java.lang.String) */ public String lookupPrefix(String namespaceURI) { return null; } /** * @todo DOM level 3 setTextContent() Not implemented. Throws NO_MODIFICATION_ALLOWED_ERR * @see org.w3c.dom.Node#setTextContent(java.lang.String) */ public void setTextContent(String textContent) throws DOMException { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, "Node is read only"); } /** * @todo DOM level 3 setUserData() Not implemented. Returns null. * @see org.w3c.dom.Node#setUserData(java.lang.String, java.lang.Object, org.w3c.dom.UserDataHandler) */ public Object setUserData(String key, Object data, UserDataHandler handler) { return null; } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/OutJavaImpl.java���������������������������������������������������0000644�0001750�0001750�00000011536�10463645504�022102� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; /** * Output implementation using java writers. * @author Fabrizio Giustina * @version $Revision: 807 $ ($Author: fgiust $) */ public class OutJavaImpl implements Out { /** * Java input stream writer. */ private Writer writer; /** * Newline string. */ private char[] newline; /** * Constructor. * @param configuration actual configuration instance (needed for newline configuration) * @param encoding encoding name * @param out output stream * @throws UnsupportedEncodingException if the undelining OutputStreamWriter doesn't support the rquested encoding. */ protected OutJavaImpl(Configuration configuration, String encoding, OutputStream out) throws UnsupportedEncodingException { this.writer = new OutputStreamWriter(out, encoding); this.newline = configuration.newline; } /** * Constructor. * @param configuration actual configuration instance (needed for newline configuration) * @param out Writer */ protected OutJavaImpl(Configuration configuration, Writer out) { this.writer = out; this.newline = configuration.newline; } /** * @see org.w3c.tidy.Out#outc(int) */ public void outc(int c) { try { writer.write(c); } catch (IOException e) { // @todo throws exception System.err.println("OutJavaImpl.outc: " + e.getMessage()); } } /** * @see org.w3c.tidy.Out#outc(byte) */ public void outc(byte c) { try { writer.write(c); } catch (IOException e) { // @todo throws exception System.err.println("OutJavaImpl.outc: " + e.getMessage()); } } /** * @see org.w3c.tidy.Out#newline() */ public void newline() { try { writer.write(this.newline); } catch (IOException e) { // @todo throws exception System.err.println("OutJavaImpl.newline: " + e.getMessage()); } } /** * @see org.w3c.tidy.Out#flush() */ public void flush() { try { writer.flush(); } catch (IOException e) { System.err.println("OutJavaImpl.flush: " + e.getMessage()); } } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/main/java/org/w3c/tidy/ParsePropertyImpl.java���������������������������������������������0000644�0001750�0001750�00000064424�10463645504�023354� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett <dsr@w3.org> * Andy Quick <ac.quick@sympatico.ca> (translation to Java) * Gary L Peskin <garyp@firstech.com> (Java development) * Sami Lempinen <sami@lempinen.net> (release management) * Fabrizio Giustina <fgiust at users.sourceforge.net> * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.util.Iterator; import java.util.List; import java.util.StringTokenizer; /** * Property parser instances. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public final class ParsePropertyImpl { /** * configuration parser for int values. */ static final ParseProperty INT = new ParseInt(); /** * configuration parser for boolean values. */ static final ParseProperty BOOL = new ParseBoolean(); /** * configuration parser for inverted boolean values. */ static final ParseProperty INVBOOL = new ParseInvBoolean(); /** * configuration parser for char encoding values. */ static final ParseProperty CHAR_ENCODING = new ParseCharEncoding(); /** * configuration parser for name values. */ static final ParseProperty NAME = new ParseName(); /** * configuration parser for tag names. */ static final ParseProperty TAGNAMES = new ParseTagNames(); /** * configuration parser for doctype property. */ static final ParseProperty DOCTYPE = new ParseDocType(); /** * configuration parser for repetated attribute property. */ static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute(); /** * configuration parser for String values. */ static final ParseProperty STRING = new ParseString(); /** * configuration parser for indent property. */ static final ParseProperty INDENT = new ParseIndent(); /** * configuration parser for css selectors. */ static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector(); /** * configuration parser for new line bytes. */ static final ParseProperty NEWLINE = new ParseNewLine(); /** * don't instantiate. */ private ParsePropertyImpl() { // unused } /** * parser for integer values. */ static class ParseInt implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { int i = 0; try { i = Integer.parseInt(value); } catch (NumberFormatException e) { configuration.report.badArgument(value, option); i = -1; } return new Integer(i); } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Integer"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "0, 1, 2, ..."; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { return value == null ? "" : value.toString(); } } /** * parser for boolean values. */ static class ParseBoolean implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { Boolean b = Boolean.TRUE; if (value != null && value.length() > 0) { char c = value.charAt(0); if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1')) { b = Boolean.TRUE; } else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0')) { b = Boolean.FALSE; } else { configuration.report.badArgument(value, option); } } return b; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Boolean"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "y/n, yes/no, t/f, true/false, 1/0"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { if (value == null) { return ""; } return ((Boolean) value).booleanValue() ? "yes" : "no"; } } /** * parser for boolean values. */ static class ParseInvBoolean implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { return (((Boolean) BOOL.parse(value, option, configuration)).booleanValue() ? Boolean.FALSE : Boolean.TRUE); } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Boolean"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "yes, no, true, false"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { if (value == null) { return ""; } return ((Boolean) value).booleanValue() ? "no" : "yes"; } } /** * parse character encoding option. Can be any java encoding name supported by the runtime platform. */ static class ParseCharEncoding implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { if ("raw".equalsIgnoreCase(value)) { // special value for compatibility with tidy c configuration.rawOut = true; } else if (!TidyUtils.isCharEncodingSupported(value)) { configuration.report.badArgument(value, option); } else if ("input-encoding".equalsIgnoreCase(option)) { configuration.setInCharEncodingName(value); } else if ("output-encoding".equalsIgnoreCase(option)) { configuration.setOutCharEncodingName(value); } else if ("char-encoding".equalsIgnoreCase(option)) { configuration.setInCharEncodingName(value); configuration.setOutCharEncodingName(value); } return null; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Encoding"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { // ascii, latin1, raw, utf-8, iso2022, mac, utf-16, utf-16be, utf-16le, big5, shiftjis return "Any valid java char encoding name"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { if ("output-encoding".equalsIgnoreCase(option)) { return configuration.getOutCharEncodingName(); } // for input-encoding or char-encoding return configuration.getInCharEncodingName(); } } /** * parser for name values (a string excluding whitespace). */ static class ParseName implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { StringTokenizer t = new StringTokenizer(value); String rs = null; if (t.countTokens() >= 1) { rs = t.nextToken(); } else { configuration.report.badArgument(value, option); } return rs; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Name"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "-"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { return value == null ? "" : value.toString(); } } /** * parser for name values. */ static class ParseTagNames implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { short tagType = Dict.TAGTYPE_INLINE; if ("new-inline-tags".equals(option)) { tagType = Dict.TAGTYPE_INLINE; } else if ("new-blocklevel-tags".equals(option)) { tagType = Dict.TAGTYPE_BLOCK; } else if ("new-empty-tags".equals(option)) { tagType = Dict.TAGTYPE_EMPTY; } else if ("new-pre-tags".equals(option)) { tagType = Dict.TAGTYPE_PRE; } StringTokenizer t = new StringTokenizer(value, " \t\n\r,"); while (t.hasMoreTokens()) { configuration.definedTags |= tagType; configuration.tt.defineTag(tagType, t.nextToken()); } return null; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Tag names"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "tagX, tagY, ..."; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { short tagType; if ("new-inline-tags".equals(option)) { tagType = Dict.TAGTYPE_INLINE; } else if ("new-blocklevel-tags".equals(option)) { tagType = Dict.TAGTYPE_BLOCK; } else if ("new-empty-tags".equals(option)) { tagType = Dict.TAGTYPE_EMPTY; } else if ("new-pre-tags".equals(option)) { tagType = Dict.TAGTYPE_PRE; } else { return ""; } List tagList = configuration.tt.findAllDefinedTag(tagType); if (tagList.isEmpty()) { return ""; } StringBuffer buffer = new StringBuffer(); Iterator iterator = tagList.iterator(); while (iterator.hasNext()) { buffer.append(iterator.next()); buffer.append(" "); } return buffer.toString(); } } /** * Parse doctype preference. doctype: <code>omit | auto | strict | loose | [fpi]</code> where the fpi is a string * similar to <code>"-//ACME//DTD HTML 3.14159//EN"</code>. */ static class ParseDocType implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { value = value.trim(); /* "-//ACME//DTD HTML 3.14159//EN" or similar */ if (value.startsWith("\"")) { configuration.docTypeMode = Configuration.DOCTYPE_USER; return value; } /* read first word */ String word = ""; StringTokenizer t = new StringTokenizer(value, " \t\n\r,"); if (t.hasMoreTokens()) { word = t.nextToken(); } // #443663 - fix by Terry Teague 23 Jul 01 if ("auto".equalsIgnoreCase(word)) { configuration.docTypeMode = Configuration.DOCTYPE_AUTO; } else if ("omit".equalsIgnoreCase(word)) { configuration.docTypeMode = Configuration.DOCTYPE_OMIT; } else if ("strict".equalsIgnoreCase(word)) { configuration.docTypeMode = Configuration.DOCTYPE_STRICT; } else if ("loose".equalsIgnoreCase(word) || "transitional".equalsIgnoreCase(word)) { configuration.docTypeMode = Configuration.DOCTYPE_LOOSE; } else { configuration.report.badArgument(value, option); } return null; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "DocType"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "omit | auto | strict | loose | [fpi]"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { String stringValue; switch (configuration.docTypeMode) { case Configuration.DOCTYPE_AUTO : stringValue = "auto"; break; case Configuration.DOCTYPE_OMIT : stringValue = "omit"; break; case Configuration.DOCTYPE_STRICT : stringValue = "strict"; break; case Configuration.DOCTYPE_LOOSE : stringValue = "transitional"; break; case Configuration.DOCTYPE_USER : stringValue = configuration.docTypeStr; break; default : stringValue = "unknown"; break; } return stringValue; } } /** * keep-first or keep-last? */ static class ParseRepeatedAttribute implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { int dupAttr; if ("keep-first".equalsIgnoreCase(value)) { dupAttr = Configuration.KEEP_FIRST; } else if ("keep-last".equalsIgnoreCase(value)) { dupAttr = Configuration.KEEP_LAST; } else { configuration.report.badArgument(value, option); dupAttr = -1; } return new Integer(dupAttr); } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Enum"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "keep-first, keep-last"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { if (value == null) { return ""; } int intValue = ((Integer) value).intValue(); String stringValue; switch (intValue) { case Configuration.KEEP_FIRST : stringValue = "keep-first"; break; case Configuration.KEEP_LAST : stringValue = "keep-last"; break; default : stringValue = "unknown"; break; } return stringValue; } } /** * Parser for String values. */ static class ParseString implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { return value; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "String"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "-"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { return value == null ? "" : (String) value; } } /** * Parser for indent values. */ static class ParseIndent implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { boolean b = configuration.indentContent; if ("yes".equalsIgnoreCase(value)) { b = true; configuration.smartIndent = false; } else if ("true".equalsIgnoreCase(value)) { b = true; configuration.smartIndent = false; } else if ("no".equalsIgnoreCase(value)) { b = false; configuration.smartIndent = false; } else if ("false".equalsIgnoreCase(value)) { b = false; configuration.smartIndent = false; } else if ("auto".equalsIgnoreCase(value)) { b = true; configuration.smartIndent = true; } else { configuration.report.badArgument(value, option); } return b ? Boolean.TRUE : Boolean.FALSE; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Indent"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "auto, y/n, yes/no, t/f, true/false, 1/0"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { return value == null ? "" : value.toString(); } } /** * Parser for css selectors. */ static class ParseCSS1Selector implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { StringTokenizer t = new StringTokenizer(value); String buf = null; if (t.countTokens() >= 1) { buf = t.nextToken() + "-"; // Make sure any escaped Unicode is terminated so valid class names are // generated after Tidy appends last digits. } else { configuration.report.badArgument(value, option); } if (!Lexer.isCSS1Selector(value)) { configuration.report.badArgument(value, option); } return buf; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Name"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "CSS1 selector"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { return value == null ? "" : (String) value; } } /** * Parser for newline bytes. Allows lf|crlf|cr. */ static class ParseNewLine implements ParseProperty { /** * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration) */ public Object parse(String value, String option, Configuration configuration) { // lf|crlf|cr if ("lf".equalsIgnoreCase(value)) { configuration.newline = new char[]{'\n'}; } else if ("cr".equalsIgnoreCase(value)) { configuration.newline = new char[]{'\r'}; } else if ("crlf".equalsIgnoreCase(value)) { configuration.newline = new char[]{'\r', '\n'}; } else { configuration.report.badArgument(value, option); } return null; } /** * @see org.w3c.tidy.ParseProperty#getType() */ public String getType() { return "Enum"; } /** * @see org.w3c.tidy.ParseProperty#getOptionValues() */ public String getOptionValues() { return "lf, crlf, cr"; } /** * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration) */ public String getFriendlyName(String option, Object value, Configuration configuration) { if (configuration.newline.length == 1) { return (configuration.newline[0] == '\n') ? "lf" : "cr"; } return "crlf"; } } }��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/config/�����������������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�014200� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/config/checkstyle.xml���������������������������������������������������������������������0000644�0001750�0001750�00000051427�10463343312�017063� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.1//EN" "http://www.puppycrawl.com/dtds/configuration_1_1.dtd"> <module name="Checker"> <module name="TreeWalker"> <module name="com.puppycrawl.tools.checkstyle.checks.ArrayTypeStyleCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.DescendantTokenCheck"> <property name="maximumDepth" value="2147483647"/> <property name="maximumNumber" value="2147483647"/> <property name="minimumDepth" value="0"/> <property name="minimumNumber" value="0"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.FinalParametersCheck"> <property name="severity" value="ignore"/> <property name="tokens" value="METHOD_DEF, CTOR_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.GenericIllegalRegexpCheck"> <property name="format" value="System\.out\.println\("/> <property name="ignoreCase" value="false"/> <property name="message" value="Avoid System.out.println(), use log.debug()"/> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.GenericIllegalRegexpCheck"> <property name="format" value="\.printStackTrace\(\)"/> <property name="ignoreCase" value="false"/> <property name="message" value="Avoid exception.printStackTrace()"/> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.TodoCommentCheck"> <property name="format" value="@todo"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.UpperEllCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.blocks.AvoidNestedBlocksCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.blocks.EmptyBlockCheck"> <property name="option" value="text"/> <property name="severity" value="warning"/> <property name="tokens" value="LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_IF, LITERAL_FOR, LITERAL_TRY, LITERAL_WHILE, STATIC_INIT"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.blocks.LeftCurlyCheck"> <property name="maxLineLength" value="80"/> <property name="option" value="nl"/> <property name="severity" value="warning"/> <property name="tokens" value="CLASS_DEF, CTOR_DEF, INTERFACE_DEF, LITERAL_CATCH, LITERAL_DO, LITERAL_ELSE, LITERAL_FINALLY, LITERAL_FOR, LITERAL_IF, LITERAL_SWITCH, LITERAL_SYNCHRONIZED, LITERAL_TRY, LITERAL_WHILE, METHOD_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.blocks.NeedBracesCheck"> <property name="severity" value="warning"/> <property name="tokens" value="LITERAL_DO, LITERAL_ELSE, LITERAL_IF, LITERAL_FOR, LITERAL_WHILE"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.blocks.RightCurlyCheck"> <property name="option" value="alone"/> <property name="severity" value="warning"/> <property name="tokens" value="LITERAL_CATCH, LITERAL_ELSE, LITERAL_TRY"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.ArrayTrailingCommaCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.AvoidInlineConditionalsCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.CovariantEqualsCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.DeclarationOrderCheck"> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.DoubleCheckedLockingCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.EmptyStatementCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.EqualsHashCodeCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.ExplicitInitializationCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.FinalLocalVariableCheck"> <property name="severity" value="ignore"/> <property name="tokens" value="VARIABLE_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.HiddenFieldCheck"> <property name="ignoreConstructorParameter" value="true"/> <property name="ignoreSetter" value="true"/> <property name="severity" value="ignore"/> <property name="tokens" value="PARAMETER_DEF, VARIABLE_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.IllegalCatchCheck"> <property name="illegalClassNames" value="java.lang.Exception, java.lang.Throwable, java.lang.RuntimeException"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.IllegalInstantiationCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.IllegalTokenCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.IllegalTokenTextCheck"> <property name="format" value="^$"/> <property name="ignoreCase" value="false"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.IllegalTypeCheck"> <property name="severity" value="warning"/> <property name="tokens" value="METHOD_DEF, PARAMETER_DEF, VARIABLE_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.InnerAssignmentCheck"> <property name="severity" value="info"/> <property name="tokens" value="ASSIGN, BAND_ASSIGN, BOR_ASSIGN, BSR_ASSIGN, BXOR_ASSIGN, DIV_ASSIGN, MINUS_ASSIGN, MOD_ASSIGN, PLUS_ASSIGN, SL_ASSIGN, SR_ASSIGN, STAR_ASSIGN"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.JUnitTestCaseCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.MagicNumberCheck"> <property name="ignoreNumbers" value="-1, 0, 1, 2"/> <property name="severity" value="ignore"/> <property name="tokens" value="NUM_DOUBLE, NUM_FLOAT, NUM_INT, NUM_LONG"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.MissingSwitchDefaultCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.NestedIfDepthCheck"> <property name="max" value="3"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.NestedTryDepthCheck"> <property name="max" value="2"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.PackageDeclarationCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.ParameterAssignmentCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.RedundantThrowsCheck"> <property name="allowSubclasses" value="false"/> <property name="allowUnchecked" value="false"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.SimplifyBooleanExpressionCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.SimplifyBooleanReturnCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.StringLiteralEqualityCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.SuperCloneCheck"> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.coding.SuperFinalizeCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.DesignForExtensionCheck"> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.FinalClassCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.HideUtilityClassConstructorCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.InterfaceIsTypeCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.MutableExceptionCheck"> <property name="format" value="^.*Exception$|^.*Error$"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.ThrowsCountCheck"> <property name="max" value="5"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.design.VisibilityModifierCheck"> <property name="packageAllowed" value="true"/> <property name="protectedAllowed" value="true"/> <property name="publicMemberPattern" value="^serialVersionUID"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.imports.AvoidStarImportCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.imports.IllegalImportCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.imports.ImportOrderCheck"> <property name="ordered" value="true"/> <property name="separated" value="false"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.imports.RedundantImportCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.imports.UnusedImportsCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.indentation.IndentationCheck"> <property name="basicOffset" value="4"/> <property name="braceAdjustment" value="0"/> <property name="caseIndent" value="4"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.EntityBeanCheck"> <property name="persistence" value="mixed"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.FinalStaticCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.LocalHomeInterfaceCheck"> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.LocalInterfaceCheck"> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.MessageBeanCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.RemoteHomeInterfaceCheck"> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.RemoteInterfaceCheck"> <property name="severity" value="error"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.SessionBeanCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.ThisParameterCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.j2ee.ThisReturnCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocMethodCheck"> <property name="allowMissingParamTags" value="false"/> <property name="allowMissingReturnTag" value="false"/> <property name="allowMissingThrowsTags" value="false"/> <property name="allowThrowsTagsForSubclasses" value="false"/> <property name="allowUndeclaredRTE" value="false"/> <property name="excludeScope" value="nothing"/> <property name="scope" value="private"/> <property name="severity" value="warning"/> <property name="tokens" value="METHOD_DEF, CTOR_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocStyleCheck"> <property name="checkFirstSentence" value="true"/> <property name="checkHtml" value="true"/> <property name="scope" value="private"/> <property name="severity" value="info"/> <property name="tokens" value="INTERFACE_DEF, CLASS_DEF, METHOD_DEF, CTOR_DEF, VARIABLE_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocTypeCheck"> <property name="scope" value="private"/> <property name="severity" value="warning"/> <property name="tokens" value="CLASS_DEF, INTERFACE_DEF"/> <property name="versionFormat" value="(^\$A_Zevision\:)*(\(\$A-Zuthor: )*(\$\))$"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.javadoc.JavadocVariableCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.AbstractClassNameCheck"> <property name="format" value="^Abstract.*$|^.*Factory$"/> <property name="severity" value="ignore"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.ConstantNameCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.LocalFinalVariableNameCheck"> <property name="format" value="^[A-Z0-9_]*$"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.LocalVariableNameCheck"> <property name="format" value="^[a-z][a-zA-Z0-9]*$"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.MemberNameCheck"> <property name="applyToPackage" value="true"/> <property name="applyToPrivate" value="true"/> <property name="applyToProtected" value="true"/> <property name="applyToPublic" value="true"/> <property name="format" value="^[a-z][a-zA-Z0-9]*$"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.MethodNameCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.PackageNameCheck"> <property name="format" value="^[a-z]+(\.[a-z][a-z0-9]*)*$"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.ParameterNameCheck"> <property name="format" value="^[a-z][a-zA-Z0-9]*$"/> <property name="severity" value="info"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.StaticVariableNameCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.naming.TypeNameCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.AnonInnerLengthCheck"> <property name="max" value="20"/> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.ExecutableStatementCountCheck"> <property name="max" value="100"/> <property name="severity" value="info"/> <property name="tokens" value="INSTANCE_INIT, STATIC_INIT, METHOD_DEF, CTOR_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.FileLengthCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.LineLengthCheck"> <property name="ignorePattern" value="^$"/> <property name="max" value="122"/> <property name="severity" value="warning"/> <property name="tabWidth" value="4"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.MethodLengthCheck"> <property name="countEmpty" value="false"/> <property name="max" value="200"/> <property name="severity" value="info"/> <property name="tokens" value="METHOD_DEF, CTOR_DEF"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.sizes.ParameterNumberCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.EmptyForIteratorPadCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.NoWhitespaceAfterCheck"> <property name="allowLineBreaks" value="true"/> <property name="severity" value="warning"/> <property name="tokens" value="BNOT, DEC, DOT, INC, LNOT, UNARY_MINUS, UNARY_PLUS"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.NoWhitespaceBeforeCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.OperatorWrapCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.ParenPadCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.TabCharacterCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.TypecastParenPadCheck"> <property name="option" value="nospace"/> <property name="severity" value="warning"/> <property name="tokens" value="RPAREN, TYPECAST"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.WhitespaceAfterCheck"> <property name="severity" value="warning"/> </module> <module name="com.puppycrawl.tools.checkstyle.checks.whitespace.WhitespaceAroundCheck"> <property name="severity" value="warning"/> </module> </module> </module> �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/config/clover.license���������������������������������������������������������������������0000644�0001750�0001750�00000016574�10463343312�017045� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Product: Clover License: Open Source License, 0.x, 1.x Issued: Tue Jan 6 2004 18:08:47 CST Expiry: Never Key: 4ef9fb753cdb84aa9cee31e1e Name: Fabrizio Giustina Org: JTidy Certificate: AAABom+Ow8B7/zEbxOMqqKwwrdpP+a1COmJGHco7sCNLjHkHnajPF+dQW Ct12PMy0uml0s9xuus5wKngJ9OFk5XFeh01dzQF66bhXH1bvegLfvja3Kle6BYtDv4LZgE gk3E0aJN4IbgTn+TgUckSevXDR4KzK77NWJfrVzkxV3/JepYER+IwCbX2gEysSyZKJen1e /KmvMnPcfTUNYOj6HT1483QqnhPOWFsBfC77ggTTr2uQmWq3MzTQfAKPFy3LHNEKtZUPnG Oayn9MYKn6lmaPQ7uwlnLrzG3xhP5oUU1LFdl2T/WJkv0K0aW4dJSu103bnXS7mbk/qDJ3 m2nrSTGvuM92O7Wz6BkUlp6OBju6ewTAk7T9ltmt9vL1lSVECEPtzRyAnPHUC5463ct+zq Y9OAR0wQp8sNE1vFv7IwJwP8SZCYaIJRQvz9A8ihKkbm/SqxJIQEly/Rh6PLwyNhg6p1/j Lv5I5jsx8i+fw5+cSMYorL3E1dGtvxTKuPB38xD8hk8FawqNgUs3FcvPaJ3K+46hKmPV9M ELu/STYf9K6nKWPkOXdc/LqxRRvGF7b4vr4r0GJ2WXQP+6D80kZOo8BBInRNqxQH+amzLe /xuEaWHztXp7bzi9vDaoWolbtSNULpuF5zHz17ibXkfjDT52uDNfs9X6kUxODkG88cRjMg YzeQIWBWhHIzT8T2rgF8RuVm0nb33i92PYLWPnJxP12Jk/A== License Agreement: CLOVER VERSION 1 (ONE) SOFTWARE LICENSE AGREEMENT 1. Licenses and Software Cortex eBusiness Pty Ltd, an Australian Proprietary Limited Company ("CENQUA") hereby grants to the purchaser (the "LICENSEE") a limited, revocable, worldwide, non-exclusive, non-transferable, non-sublicensable license to use the Clover version 1 (one) software (the "Software"), including any minor upgrades thereof during the Term (hereinafter defined) up to, but not including the next major version of the Software. The licensee shall not, or knowingly allow others to, reverse engineer, decompile, disassemble, modify, adapt, create derivative works from or otherwise attempt to derive source code from the Software provided. And, in accordance with the terms and conditions of this Software License Agreement (the "Agreement"), the Software shall be used solely by the licensed users in accordance with the following edition specific conditions: a) Server Edition A Server Edition license entitles the Licensee to execute one instance of Clover Server Edition on one (1) machine for the purposes of instrumenting source code and generating reports. There are no limitations on the use of the instrumented source code or generated reports produced by Server Edition. b) Workstation Edition A Workstation Edition license entitles the licensee to use Clover Workstation Edition on one (1) machine by one (1) individual end user. Workstation Edition does not permit the generation of reports for distribution. c) Team Edition A Team Edition license entitles the licensee to use Clover Team edition on any number of machines solely by the licensed number of users. Reports generated by Clover Team Edition are strictly for use only by the licensed number of individual end users. 2. License Fee In exchange for the License(s), the Licensee shall pay to Cenqua a one-time, up front, non-refundable license fee. At the sole discretion of Cenqua this fee will be waived for non-commercial projects. Notwithstanding the Licensee's payment of the License Fee, Cenqua reserves the right to terminate the License if Cenqua discovers that the Licensee and/or the Licensee's use of the Software is in breach of this Agreement. 3. Proprietary Rights Cenqua will retain all right, title and interest in and to the Software, all copies thereof, and Cenqua website(s), software, and other intellectual property, including, but not limited to, ownership of all copyrights, look and feel, trademark rights, design rights, trade secret rights and any and all other intellectual property and other proprietary rights therein. The Licensee will not directly or indirectly obtain or attempt to obtain at any time, any right, title or interest by registration or otherwise in or to the trademarks, service marks, copyrights, trade names, symbols, logos or designations or other intellectual property rights owned or used by Cenqua. All technical manuals or other information provided by Cenqua to the Licensee shall be the sole property of Cenqua. 4. Term and Termination Subject to the other provisions hereof, this Agreement shall commence upon the Licensee's opting into this Agreement and continue until the Licensee discontinues use of the Software or the Agreement terminates automatically upon the Licensee's breach of any term or condition of this Agreement (the "Term"). Upon any such termination, the Licensee will delete the Software immediately. 5. Copying & Transfer The Licensee may copy the Software for back-up purposes only. The Licensee may not assign or otherwise transfer the Software to any third party. 6. Specific Disclaimer of Warranty and Limitation of Liability THE SOFTWARE IS PROVIDED WITHOUT WARRANTY OF ANY KIND. CENQUA DISCLAIMS ALL WARRANTIES, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. CENQUA WILL NOT BE LIABLE FOR ANY DAMAGES ASSOCIATED WITH THE SOFTWARE, INCLUDING, WITHOUT LIMITATION, ORDINARY, INCIDENTAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING BUT NOT LIMITED TO DAMAGES RELATING TO LOST DATA OR LOST PROFITS, EVEN IF CENQUA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 7. Warranties and Representations Licensee Indemnification. CENQUA agrees to indemnify, defend and hold the Licensee harmless from and against any and all liabilities, damages, losses, claims, costs, and expenses (including reasonable legal fees) arising out of or resulting from the Software or the use thereof infringing upon, misappropriating or violating any patents, copyrights, trademarks, or trade secret rights or other proprietary rights of persons, firms or entities who are not parties to this Agreement. CENQUA Indemnification. The Licensee warrants and represents that the Licensee's actions with regard to the Software will be in compliance with all applicable laws; and the Licensee agrees to indemnify, defend, and hold CENQUA harmless from and against any and all liabilities, damages, losses, claims, costs, and expenses (including reasonable legal fees) arising out of or resulting from the Licensee's failure to observe the use restrictions set forth herein. 8. Publicity The Licensee grants permission for CENQUA to use Licensee's name solely in customer lists. CENQUA shall not, without prior consent in writing, use the Licensee's name, or that of its affiliates, in any form with the specific exception of customer lists. CENQUA agrees to remove Licensee's name from any and all materials within 7 days if notified by the Licensee in writing. 9. Governing Law This Agreement shall be governed by the laws of New South Wales, Australia. 10. Independent Contractors The parties are independent contractors with respect to each other, and nothing in this Agreement shall be construed as creating an employer-employee relationship, a partnership, agency relationship or a joint venture between the parties. 11. Assignment This Agreement is not assignable or transferable by the Licensee. CENQUA in its sole discretion may transfer a license to a third party at the written request of the Licensee. 12. Entire Agreement This Agreement constitutes the entire agreement between the parties concerning the Licensee's use of the Software. This Agreement supersedes any prior verbal understanding between the parties and any Licensee purchase order or other ordering document, regardless of whether such document is received by CENQUA before or after execution of this Agreement. This Agreement may be amended only in writing by CENQUA. ������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/�������������������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�013677� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/���������������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�015711� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/css/�����������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�016501� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/css/site.css���������������������������������������������������������������0000644�0001750�0001750�00000007503�10463206432�020157� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������body { min-width: 600px; width: 600px; width: auto !important; background-color: #ddd; } /* main layout */ #banner { color: #FFA500; border: none; margin: 0 0 0 0; border-bottom: 2px solid #D2B48C; background-color: #fff; padding: 10px 20px 10px 20px; } #leftColumn { background-color: transparent; position: absolute; top: 120px; left: 20px; width: 180px; margin: 0px; padding: 0px; border: none; overflow: hidden; } #bodyColumn { margin: 18px 20px 20px 220px; border: 2px solid #D2B48C; background-color: #fff; color: #333333; padding: 30px; position: relative; } #footer div.xright { color: #fff; margin-right: 10px; } /* end main layout */ .deprecated { text-decoration: line-through; } .comment { color: green; } .source pre { font-size: 8pt; font-family: courier; } body,div,span,td,p,h2,h3 { font-family: Georgia, Verdana, Geneva, Arial, Helvetica, sans-serif; font-size: 10pt; } table td,table th { font-size: 8pt; } table.bodyTable { border-collapse: collapse; } table.bodyTable td,table.bodyTable th { border: 1px solid #fff; padding: 3px 5px 3px 5px; } #breadcrumbs { background-color: transparent; border: none; height: 15px; } h2 { border: none; border-bottom: 1px solid #ccc; background-color: transparent; color: #900; font-size: 12pt; padding-left: 0; font-variant: small-caps; font-family: "Trebuchet MS", arial, sans-serif; text-transform: uppercase; letter-spacing: 3px; } h3 { border: none; background-color: transparent; color: #900; font-size: 10pt; font-weight: bolder; padding-left: 0; font-weight: light; font-family: "Trebuchet MS", arial, sans-serif; text-transform: capitalize; letter-spacing: 2px; } #navcolumn h5 { padding: 3px 3px 3px 10px; font-variant: small-caps; font-family: Arial, Helvetica, sans-serif; font-size: 10px; font-weight: 900; letter-spacing: 1px; border: none; background-color: #fff; border: 1px solid #D2B48C; } #navcolumn { padding: 0; } #navcolumn ul { margin: 2px 0 12px 0; } #navcolumn li { margin: 0px 0 0px 3px; padding: 2px; list-style-position: outside; font-size: 7.5pt !important; padding-left: 16px !important; padding-left/**/: 2px !important; } #navcolumn a,#navcolumn strong { padding-left: 14px; text-decoration: underline; padding-bottom: 2px; } #navcolumn a img { margin-top: 0; } #navcolumn a#poweredBy img { margin: 0 0 0 20px; width: 88px; height: 31px; border: 1px solid #000; } #navcolumn a:hover { color: Olive; padding-left: 14px; text-decoration: underline; padding-bottom: 2px; } #breadcrumbs .xright,#breadcrumbs .xleft { color: #fff; padding: 4px; margin-right: 10px; display: inline; } #breadcrumbs .xleft { font-size: 7pt !important; color: #fff; } #banner #organizationLogo { display: none; } #banner a#projectLogo img { background-color: #fff !important; margin-right: 20px !important; } #navcolumn li { color: #fff; } #navcolumn li a.externalLink { background-image: none !important; padding: none; } div.source { background-color: #ddd; } div.source pre,code,td.code { font-size: 8pt !important; font-family: monospace; margin: 0; } td.code { font-size: 10pt !important; font-family: monospace; } div#legend { display: none; } table td.source { border: none !important; } table td,table th { font-size: 8pt !important; font-family: verdana; } table th { font-weight: bold; } .collapsed { background-image: url(../images/ico_collapsed.png) !important; } .expanded { background-image: url(../images/ico_expanded.png) !important; }���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/eclipse_formatter.xml������������������������������������������������������0000644�0001750�0001750�00000062225�10463343312�022143� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <profiles version="6"> <profile name="project" version="6"> <setting id="comment_clear_blank_lines" value="true"/> <setting id="comment_format_comments" value="true"/> <setting id="comment_format_header" value="false"/> <setting id="comment_format_html" value="true"/> <setting id="comment_format_source_code" value="false"/> <setting id="comment_indent_parameter_description" value="true"/> <setting id="comment_indent_root_tags" value="false"/> <setting id="comment_line_length" value="120"/> <setting id="comment_new_line_for_parameter" value="false"/> <setting id="comment_separate_root_tags" value="false"/> <setting id="org.eclipse.jdt.core.formatter.align_type_members_on_columns" value="false"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant" value="16"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_binary_expression" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_compact_if" value="52"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_conditional_expression" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_multiple_fields" value="16"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration" value="16"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration" value="16"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration" value="48"/> <setting id="org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration" value="16"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_after_imports" value="2"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_after_package" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_field" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_imports" value="0"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_member_type" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_method" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk" value="1"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_before_package" value="0"/> <setting id="org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations" value="2"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration" value="end_of_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_array_initializer" value="end_of_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_block" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_block_in_case" value="end_of_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_constant" value="end_of_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration" value="end_of_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_method_declaration" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_switch" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.brace_position_for_type_declaration" value="next_line"/> <setting id="org.eclipse.jdt.core.formatter.compact_else_if" value="true"/> <setting id="org.eclipse.jdt.core.formatter.continuation_indentation" value="1"/> <setting id="org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer" value="1"/> <setting id="org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line" value="false"/> <setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_block" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_statements_compare_to_body" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases" value="true"/> <setting id="org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch" value="true"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_binary_operator" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_after_unary_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_binary_operator" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard" value="insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_before_unary_operator" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation" value="do not insert"/> <setting id="org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line" value="false"/> <setting id="org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line" value="false"/> <setting id="org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line" value="false"/> <setting id="org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line" value="false"/> <setting id="org.eclipse.jdt.core.formatter.lineSplit" value="120"/> <setting id="org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body" value="0"/> <setting id="org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve" value="1"/> <setting id="org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line" value="true"/> <setting id="org.eclipse.jdt.core.formatter.tabulation.char" value="space"/> <setting id="org.eclipse.jdt.core.formatter.tabulation.size" value="4"/> </profile> </profiles> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/nightly/�������������������������������������������������������������������0000755�0001750�0001750�00000000000�11617345035�017367� 5����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/nightly/.htaccess����������������������������������������������������������0000644�0001750�0001750�00000000244�10024424474�021161� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������IndexOptions DescriptionWidth=* IndexOptions NameWidth=* IndexOptions +SuppressHTMLPreamble HeaderName .header.html ReadmeName .footer.html Options Includes Indexes������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/nightly/.footer.html�������������������������������������������������������0000644�0001750�0001750�00000000046�10024424474�021625� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� </div> </body> </html>������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/site/resources/nightly/.header.html�������������������������������������������������������0000644�0001750�0001750�00000003103�10027404227�021551� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <title>JTidy - Nightly builds

Nightly builds

The nightly builds on this page have been automatically generated and are not guaranteed to be stable. It is possible that these builds have major bugs and may not run at all. Be sure to check junit test results to understand what is working and what is not in this drop.

jtidy/src/site/resources/images/0000755000175000017500000000000011617345035017156 5ustar twernertwernerjtidy/src/site/resources/images/ico_file_pdf.png0000644000175000017500000000043010133721471022255 0ustar twernertwernerPNG  IHDR rtIME 91 pHYsnu>gAMA aPLTE8kkkJJssZtRNS@fpIDATxc`ede(dRa`Rb` `P*``4``R` w``(obi@"4$VTZ$7 1 QII˜@40a % E\\\9IENDB`jtidy/src/site/resources/images/ico_newwindow.png0000644000175000017500000000032710133721471022533 0ustar twernertwernerPNG  IHDRz+tEXtCreation Timesab 2 ott 2004 09:47:07 +0100:?tIME :7=I@ pHYs  ~gAMA aPLTE.tRNS@fIDATxc`dB0AIENDB`jtidy/src/site/resources/images/ico_expanded.png0000644000175000017500000000035210133721471022300 0ustar twernertwernerPNG  IHDR +tEXtCreation Timesab 2 ott 2004 15:12:56 +0100DdtIME  7K pHYs  ~gAMA a PLTERm= tRNS@f IDATxc`L #111pNBIbIENDB`jtidy/src/site/resources/images/logo.png0000644000175000017500000001661707776560522020652 0ustar twernertwernerPNG  IHDR?Tv/tEXtDescription(C) Sami Lempinen "tIME5 0 pHYs  ~gAMA acPLTE$C3b"|1JB`ԓFj/Xu9-FilPnX}J`+|=X{p'emU|22eefVGIHtRNS@fgIDATx\:dIX &rnHlz{+]Dhur{V ~އ7r~PoYeíToQe-S?l o}g?^vܼ߷Qe-:|պܼӆ?]ܾgO)fQ̟.;nw _@z_QKf.ϸ ߸rBiOaȵ?]< :#ʫK/O~ҷG?"!Q([(8`Bp; FE '>H[_q~o"9x蔷NLlx*tYmY2t+7.E,>8V緓j;9_,1!*='Gwϳz ˒m#CoWӶ}ʜ&ܒ_GpQӠpпTem{@x%<.==%ܮv-&hY4̤:جeTFEC^h Չ> , 0PN'WkhF<&dl9iIx Wa|o"AvyKo,B;XLDDJQw}r/& M<+Jɯ#Go~nIϿ PV\QNʟDz].crfLOVQtse-E8޶@"e>j{'d<+D-1hȥo|c1|l_sSdyg\klvkL(.@kN4GUm=0i/h0ֹ(lfѧ01d^uʘŇ10/d>YVba夶LLAIk`Ru5qxrd:)|`0+.#mn46 BЀwEPEYM,5_*A\Vu[ɺ_yl\Jv+p IAn(lg_ll%ͽmm>+C`\0m̦^WT}+)fʭ~jlj](cfB3w$3[X7BjEp&1o 0TCӇ-85pf"AS.=m ZrbǍKYɶ!S%Kg{ĴɭX`BhjP/h+MPfUBI\_X+TID!DnKH9pqmce~`lfl7YI>>%4چ0@Mk|pt~:555LߡGXmfL[{F8?( nc$҅h5(`/b&#Ip#d"4=Tlk+ola}8 GF.)N3W`\H%DApMp*$IVePw+We8.W5ck舃y3MsytYhklv"6 anۧ504\k]puݴ/꾃d HhV5$ߡV$sPKsg/Y.s{ $d@3#'DU&7c AR oу '"lё#M:BGrgEUĉ=w՝4M@-}?ϝ\@(y ynG&\k5&ګj:dY3YccnB `GXB2g7UY3~rZ)YIH"k-uY6]q_HnP8+p0 l E Qf vHZ6m̜FW k6`E*crH0ptfmBȪƌ ჵm|-|VL>Ő"(Ʋ\3kPغn-eޕisb6H 7a$ +┞aG?KG>jwe&2XT%:6='{& B|#W|JFz`iCaFGDe60ɋ[pCA.04/D-HBJI?\9`k$J eTy %5״}Y&f.>5`b*qD]]`V*cKGPB:9̘sEӕmg^Vv-ۙEߖ%푇 LRko@'8_I_C+³<-0e,*pr22q'+^Sm'){: dL8`Bif'~U :oz9$9%%LJIQ̟ XXw޲Q5BmYsܰf F2ۆ'l Xf0BrѪtg5jV Eua.͠rq"$ȩ *[? d[Vu[y,=c SXʼnQ`~^er2E Qd Yȥ?O gE`1U48F*:ERSpGDЏTvrnqIvʩULCE<7 VI ,G5Nk\Z+ <7HYJ;klkTyP(A7io\)&@h".5"s1<,w3_m $E5K AH +k0/47&Nx$r)@ޚ]g MD[ ª>>|Iy8! 4&h -ݻ{olBD -zfnzn<7i輇dM-'kT= L#L*Ri6*!eXj}OC- u35|J]CϾvOp Z̈) Ő0XSna[y3]WA[a|PrDg:S2vwj2KS 6#s[ pn0m#R%L"})h`=5ͤ㣾 uj,7,0Gf`TLMЬ,ъ@[DCY\KRe7&cHaw"(cS+,/J1tmcb:M=>nEQPlP8 <6EFq2m\]! J]"as1X2ZkGLjh N[IZc>H@M/;Ev#nU\NvulZSw9½;[h0Ȏ(r۸ i<[9&g:ʾZ^2(?YY &#`$+XܕƄ&/GhYzR#rVmq=,IJ0M1=Vц}_MӎMd{cMDؖ#Ug׵?&B%U,pćvUM&b4Ki#ZaG['tSΊaf(>f%;%sd#u6IrY}/Z+al"& &}÷lbqFplKUʜti[7#XFKZ2EmdɖY$ms6Afl-g6K-H}Vpރi7 {]%sl{e6fO{|9FVd}!V,{z<[ꡑE=l`z|9 UFǗ1tp=dpJ^FL6s!97z hr^j$1'A}S$Uю`KnÚ7: x]BhlB0jLۊ `ׅԥ%oI)(B;"o%<6j]*6'fVHHdlA >F,$)x<ÚuʶjNLnaʈzb偮m(e)K)n4,~BbƍR И Fv{= h&#uN%}mk!8RM\tsfj_:J*3uP(E#]6l@>V|H!>T 6 `_$q7yJ{ЯIC5v6N݅6HV#C284Qҧ*3) ÷K,|fmPo,As+/ʛA 4iS5JT?%HG$JY7B- )nؼ֢[zp0X&ca#tm^=n76^$1&baCǍ`e/zV}LQKkOdV^/z: wP pŨ[gܪhuDQ!nWH m4,#-[5OW%]=Q_%pW9EPRcY' 75ʽϠتXg"m 9^cs9.Ȁ5 Bͦek*u)rYA?/̬@9X%Ҁ_) 8#Z/Еt;#zIPmЯjpxG;duMMeAaOBV,|J\56] r+΢AM74u;I5 #IoّfJ/aچgLT!z-) qӷ7InP?V5 _ߐӳk/7AW-mj Iv.4&ה=%ѻ}s0N7𕾊ufN"G{[] 4՘.H#BQKU=HH4V@S[Z}朘m-]zA9S.PI IENDB`jtidy/src/site/resources/images/project-support.png0000644000175000017500000000614510133721471023044 0ustar twernertwernerPNG  IHDRX =<tIME 8JV[ pHYs  ~gAMA aPLTE    00wx}cccA?@ "**&&%(%%%+,*$%1/2544856/51668ƿóihf0./8963:3869798;;;03.̌CCC?=@ϖPNN%#+#@A>#?B@8B5+4*++ ' $3$+:,0;/1@/ʙȕ1  ' 3 ,sss J@ a \uw_{|}|GQHCQCQOQԼ܁LRKNQP)X*ZҿNMPSSSQQNd#X Z MUTXŵö  (S&YYVZT[YT[TKXG%M*VS#fjh[\[/=0NaP :WeXY[VTaT^]`a^a%<(9"aa^WAiijffhhfghfi s2tIDATxڭV|g?vmEm/ͶHH& j.mZp*שve6!-k.)e 13픚 cJ`jH+5vsǏ}/rxbdrXm%g}fX; ~0FlUu\i^R~X'?vK@Oy< &8ׁmzJO=(^b-Lۚn1z΂q5_T^\qORJtT$TzR%WT %Ywߓ+Nsk$'7'f~{56WS1HDbBz_{[ N&X_X|40HD&$TH̦.vLנH,b5" !K_$bV1Ӵ{zFݵt!x/e"6m\-D7LL8v˯8b<|fi6icM%dNZ#D xo?9E4,[х=:r#!8aC~p 'A4# ZH +C?|tn|CC%HCޠGxqj]-]F*ʪh߼ U,%Mx(\WWܶ/,uiZ ݋(p@O!i?m!Lvuvs%*i!+.K#/($jJPfY:[)"!VT8v+IR,ЭB+3\Y*@/{frmB[ Hw|^ɟy Ffqkg"fH7fn&z;+KdA% pCPs2'_ 1HxT*#?Id'Pp fD:'.ŅĂ?JiqR$䔔vejd6?#E1V{|og@ c1,MQ{ʆk+`^XXM*.J''NOnx9Icx|3.WW־nc.kK#ưw4w}08ͼٷD* ?9Sé?Oy(Dƅhm5ptw\ghKik:/8hWbu]ꦾfz4:$>ԩ/E lbSmmm&jO,BSKʫ˖}~=jz _TBjAseȒZڶLi1iscCt,P$.*CI,VT|0 Re !j sM5B oihSniIIai%og/xGUJR$ɥpC#2.Y"a3*T@PʥK9*P+QAer2.>wfNzI6=12{p [r߅_}%aDIENDB`jtidy/src/site/resources/images/ico_external.png0000644000175000017500000000033310133721471022331 0ustar twernertwernerPNG  IHDRz+tEXtCreation Timesab 2 ott 2004 09:53:51 +0100ζtIME 72 pHYs  ~gAMA aPLTE.tRNS@fIDATxc`d`"0D##(! Y)pIENDB`jtidy/src/site/resources/images/ico_collapsed.png0000644000175000017500000000035410133721471022460 0ustar twernertwernerPNG  IHDR +tEXtCreation Timesab 2 ott 2004 15:12:44 +0100tIME  67i pHYs  ~gAMA a PLTERm= tRNS@f"IDATxc`L #111pbIj#*IENDB`jtidy/src/site/resources/images/logos/0000755000175000017500000000000011617345035020301 5ustar twernertwernerjtidy/src/site/resources/images/logos/maven_sized.png0000644000175000017500000000570410133721471023313 0ustar twernertwernerPNG  IHDRX#DtIME  pHYs  ~gAMA aPLTEfͮεҷӣǢǩˬͨʫ̤ȡƙzplkoquwy|ĦɟŝħʘÏ~trmвЄv{ѸԶӈϔڿټֽךf]a_\V|T{OwJsIqFoClAk~?h|4|ُaRQ迶ٻXiz: BFKШ\Ζ|cӳ|׷g19sՌj _7z1XE` Gz 2J.IIYQQNV)STjT*Ss/9&_.tųم՛=}W/Àϋ9Z}ѡ7Blv>t~o|+zs64<}bs=@ fi8fch$HV+ӔV%Wa5:[iR f\@RSƋeyRP $)F2abs9Zaӽg/9QlmzPD6lj___dwou*w~F!~1GXfNnoZ;ं.sR^j'7ݑ!%Z;-Qd[ssXu:rpwp^c7X-u80*`)$F&5kV/UaSCbB 4E#D"(-L23$"hJRO# ^D<k4&5 ^?2F~iIJ͚bc,~` 9~?Oqfsf]H$/~7]^qs|]= G9i&͸c1gO@[dx<EOthut:6 Z.ttwC#=Et"C_7XLU5pZC]X\lKrDVT5,V{ñzI9yb#0Uz-W{@ :jaԈ1H+ dA)(rId;zDDRPjDPj5"r"'5`3r$T() A J44]M/䡊 TB+HH4?<s-/j5wW Ey 8߶lZ>szIJ&]m7#.O$?LRDf9<-n-_[{/KbowknO泟<:15v.ǽ3>:kqSTqoiP(qrܹH0:>j8P_o`=4*{QjJ$z_PpJn>^Z,3ʎ$c6 r=.1@sT M| iAg@8N,#:jip P%~,y$" 9B.1$4QPT4#.%WL05u [&8C5fJ*F o0ՠb4! 1 5laC*ADrXV#B[-GdpDJγ"*ci=R(PBN#5A5,8H")RLR0ݐaT*R(IR( $ AUJFC(2& 'ҒR9$Yq4JR*ŁVP-US (<<* Z.̔SIENDB`jtidy/src/site/xdoc/0000755000175000017500000000000011617345035014634 5ustar twernertwernerjtidy/src/site/xdoc/coding_conventions.xml0000644000175000017500000001520210463343312021240 0ustar twernertwerner Coding Conventions Fabrizio Giustina

This document describes a list of coding conventions that are required for code submissions to the project. By default, the coding conventions for most Open Source Projects should follow the existing coding conventions in the code that you are working on. For example, if the bracket is on the line after the if statement, then you should write all your code to have that convention.

Below is a list of coding conventions that are specific to this project. Anything else not specifically mentioned here should follow the official Sun Java Coding Conventions .

If you use Eclipse 3 you can download and import this code formatter preference file. Import it from Window-> Preferences -> Java -> Code Style -> Code Formatter -> import.

All brackets (class, method, if, try, etc) must begin and end on a new line. Example :

Brackets are mandatory, even for single line statements !

keywords followed by a parenthesis should be separated by a space. Example :

Blank space should appear after commas in argument lists. Binary operators should be separated from their operands by spaces :

4 spaces. NO tabs . Period. We understand that a lot of you like to use tabs, but the fact of the matter is that in a distributed development environment, when the cvs commit messages get sent to a mailing list, they are almost impossible to read if you use tabs.

Javadoc SHOULD exist on all your class members (methods + class variables), including the private ones. Also, if you are working on existing code and there currently isn't a javadoc for that method/class/variable or whatever, then you should contribute and add it. This will improve the project as a whole.

Also add code comments when you think it's necessary (like assumptions), especially when the code is not obvious.

If you contribute to a file (code or documentation), add yourself to the top of the file (below the existing authors). For java files the preferred Javadoc format is:

Class variables should not have any prefix and must be referenced using the this object. Example :

Method parameters should not have any prefix. For example :

Avoid lines longer than 120 characters for Code, comments, ...

All .java files should have a @version tag like the one below.

All import statements should containing the full class name of classes to import and should not use the "*" notation :

An example :

jtidy/src/site/xdoc/oldnews.xml0000644000175000017500000002452510460747603017043 0ustar twernertwerner JTidy home Fabrizio Giustina

JTidy is a Java port of HTML Tidy , a HTML syntax checker and pretty printer. Like its non-Java cousin, JTidy can be used as a tool for cleaning up malformed and faulty HTML. In addition, JTidy provides a DOM interface to the document that is being processed, which effectively makes you able to use JTidy as a DOM parser for real-world HTML.

JTidy was written by Andy Quick, who later stepped down from the maintainer position. Now JTidy is maintained by a group of volunteers.

More information on JTidy can be found on the JTidy SourceForge project page .

New subproject: jtidyservlet

The JTidy Servlet library is an open source suite of custom tags and servlets that integrate JTidy HTML syntax checker and pretty printer functionality into Servlet/JSP container.

JTidyservlet is managed by Vlad Skarzhevskyy, who recently joined the JTidy project.

Two major new features expected for the next release have been finally committed to cvs!

  • You can now use any supported Java character encoding for input or output , with the classic tidy encoding hadling replaced by a new implementation that take advantage of built-in java character encoding support.
  • You don't need anymore to parse a text output to extract JTidy messages in your application: you can simply attach a listener using Tidy.setMessageListener() and be notified for error, warnings and summary messages.

What is missing before a release?

  • Cleanup the support for new charsets (still to be refined)
  • Doctype handling (seem to be really different from the current tidy c release)
  • Some more tests, expecially for the dom parser/prettyprinter
  • A working ant task and command line interface

More than 50% of the tests are now working, and hundreds of fixes and new features have been ported from the c version. Xml/xhtml output is now fairly more robust. Check out a nightly build and reports any bug found!

Nightly builds are now automatically generated daily and the whole website is refresh at the same time. 1/3 of the implemented tests is working now. Two years of reported bugs are difficult to catch up, but the change log starts becoming "important"...

Do you wanna play with a recent build? Get the source or binary distribution from the nightly builds page .

Site updated using the latest maven version: test report is a lot more readable now (formatting has been fixed in the latest junit-report plugin)... new site layout (using a tweaked version of the maven xdoc plugin: xhtml + tableless CSS)

183 test cases fully implemented now. All the test cases from Tidy and some new tests for JTidy have been added.

All the test cases which caused JTidy to crash or loop have been fixed! Priority (1) is done, now there are other 139 tests failing. Note most of the test are failing at the firsts lines for differences in doctype handling and formatting in Tidy (the latest Tidy release has been used to produce output files for comparison).

These are the priorities before a release:

  • Doctype handling (needed for tests)
  • Formatting (needed for tests)
  • Xml output: making Jtidy always produce valid xml (the well known "duplicate attributes" bug)

Mh, formatting in maven-generated junit report is really bad, I just submitted a bug report to maven: error messages are escaped two times, newlines are not preserved and random whitespaces are added. I think I should spend some time in fixing junit report plugin bugs if I want to be able to fix JTidy bugs...

179 test cases for JTidy have been partially implemented and added!

All the test cases for the non java version of Tidy have been integrated. Partially because most of them don't check yet output or warnings produced by Tidy, but simply test that JTidy doesn't crash or loop.

Well, actually as you can see in the junit report we have 1 test causing a NPE and 4 causing infinite loops! These bugs will have the precedence over any incorrect output bug (fixing these will probably worth a new release, you don't want your software to hang using JTidy, right?).

Anyway, in the TidyCrashingBugsTest (test that crashed the c version of Tidy) 21 of the 24 tests works without problems... not so bad as expected.

See testcases , if you wanna help JTidy supplying tests or fixes.

Thanks to the Clover team for the free license for the JTidy project! Code Coverage by Clover

JTidy new website is online!

The project is starting again after two years without a release. I (Fabrizio Giustina) just joined the project as new administrator and developer.

Main targets are now:

  • migrate to maven as a build system (done)
  • old code cleanup: remove unused code, clean up everything with the help of checkstyle and pmd , and update code to use new coding conventions .
  • add junit test coverage (started, see junit report and clover report ). I'm trying to integrate all the standard tidy testcases to check that jtidy will behave like its non-Java cousin
  • Finally: integrate all the patches supplied by users in these years and the fixes in the non-Java version

A note about mailing lists: there are two new mailing list, specific to jtidy, see project mailing lists . You can find previous discussions in html-tidy@w3.org archives (common to tidy and jtidy).

jtidy/src/site/xdoc/issue-tracking.xml0000644000175000017500000000314510460717103020303 0ustar twernertwerner JTidy Fabrizio Giustina

Go to JTidy SourceForge bug and feature request tracker to see open bugs or to submit new requests.

Please always attach a valid test case if you find a wrong behaviour in JTidy. If possible try to isolate the problem and create a small file with only the tags causing the problem.

If you want to help us supplying a patch for a known bug attach the patch (cvs diff) to the bug in the sourceforge tracker.

Use this form if you want to find a bug for tidy/jtidy given the tracker number. The tracker number is also the name of the method in junit tests, so you can easily find what is working and what is not in jtidy.

item number:
jtidy/src/site/xdoc/howto.xml0000644000175000017500000000700510460717103016512 0ustar twernertwerner JTidy how to Fabrizio Giustina

You can use JTidy as an html checker/prettyprinter or as a DOM parser.

First of all, you will need to download a JTidy distribution. Inside it you will find a jtidy-{version}.jar, where {version} is the JTidy release number: this is the jar containing all the JTidy classes you need, and we will name it this simply jtidy.jar in the following how to. No other libraries are needed.

Now that you have JTidy you can use it in different ways.

Run java -jar jtidy.jar {options} to access JTidy command line interface.

java -jar jtidy.jar -h will output a short help on jtidy command line with a few examples.

java -jar jtidy.jar -help-config will output all the available configuration options and java -jar jtidy.jar -show-config the current (default) values. .

Detailed instructions on how to use the JTidy ant task can be found in JTidyTask javadocs.

The entry point for accessing JTidy functionalities is the org.w3c.tidy.Tidy class. This is a simple example of use:

                    
                

Using parseDOM(java.io.InputStream in, java.io.OutputStream out) instead of parse() you will also obtain a DOM document you can parse and print out later using pprint(org.w3c.dom.Document doc, java.io.OutputStream out) (note that the JTidy DOM implementation is not fully-featured, and many DOM methods are not supported).

Starting from release r8, JTidy also provide a MessageListener interface you can implement to be notified for warning and errors in your html code. For details on advanced uses refer to jtidy javadocs.

jtidy/src/site/xdoc/testcases.xml0000644000175000017500000001060710460717103017352 0ustar twernertwerner Test cases Fabrizio Giustina

JTidy provides some helper classes to test its functionalities. Having a wide test coverage will help JTidy developers to assure code quality along releases. If you want to write a test case for JTidy (please do that if you are a developer and you find a JTidy bug) you can follow few simple guidelines.

See the test xref report for test examples.

To write a JTidy test case your test class must extends org.w3c.tidy.TidyTestCase (you can find in the src/test ) directory. This class offers different utility methods to load files and evaluate results.

Input file for testing should be placed in the src/test-resources directory. An optional configuration file can also be supplied: the file name must be the same name of the input file with the ".cfg" extension.

For testing an input file which causes a NPE or an infinite loop, simply calls executeTidyTest(String fileName) where filename is the simple (without path) file name for your input file. This method will take care of loading the configuration (if a .cfg file with the same name of the input file exists) and call tidy.parse() .

Place a file with the same name of your input file but with the ".out" extension containing the expected result. Call executeTidyTest(String fileName) and the result will be automatically compared with the supplied file. An AssertionException will be thrown if files are different (the exception will include the different lines from both files).

Call parseDomTest(String fileName) : this will return a Document object so you can make assertions on its content.

Call executeTidyTest(String fileName) method and check the number of warning/errors on the Tidy instance. There are a couple of utility methods to test the number of warning of errors reported ( assertNoWarnings(), assertNoErrors(), assertWarnings(int expectedNumber), assertErrors(int expectedNumber) ) This is enough to test simple documents (documents you know must throw 1 or none errors). If you can't have a simple test and you need to test output in error log you can use the assertLogContains(String expectedString) method or directly check the out in the errorLog class instance (be careful with internationalization!).

A big effort is being put in making JTidy working with the test cases supplied for the non java version of Tidy. All the tests from the tidy code base have been integrated in JTidy tests. Most of them can't work now, since newest features from Tidy have not been implemented yet in JTidy. Many other don't work also if they should: many bugs have been fixed in the original Tidy but not yet in JTidy, since the Tidy project was a lot more active than JTidy in the latest years.

If you can supply a patch to make a test case work as expected (as it work for Tidy) please do that! Patch should be submitted by non-developers attaching them to items in the sourceforge bug tracker. If your patch works and does not cause problems to other test cases you can be sure it will be applied in a short time.

jtidy/src/site/site.xml0000644000175000017500000000307110463212170015355 0ustar twernertwerner Sourceforge http://sourceforge.net/sflogo.php?group_id=13153&amp;type=2 http://sourceforge.net/projects/jtidy Displaytag http://jtidy.sf.net/images/logo.png http://jtidy.sf.net/ ${reports} jtidy/src/site/apt/0000755000175000017500000000000011617345035014463 5ustar twernertwernerjtidy/src/site/apt/download.apt0000644000175000017500000000145510463212170016774 0ustar twernertwerner -------------------- JTidy -------------------- Fabrizio Giustina -------------------- 07/07/2006 Download * Releases You can obtain JTidy from the {{{http://sourceforge.net/project/showfiles.php?group_id=13153}SourceForge download area}}. * Source code from SVN You can also obtain the source from the SourceForge SVN Server, see {{{source-repository.html}Source Repository}}. * Snapshots using Maven Snapshots are published to a repository at {{{http://jtidy.sourceforge.net/snapshots/}http://jtidy.sourceforge.net/snapshots/}}. You can link such snapshot repo in your maven 2 pom in order to use recent builds.jtidy/src/site/apt/index.apt0000644000175000017500000000173210460747603016305 0ustar twernertwerner -------------------- JTidy -------------------- Fabrizio Giustina -------------------- 07/07/2006 About JTidy ~~~~~~~~~~~~ JTidy is a Java port of {{{http://www.w3.org/People/Raggett/tidy/}HTML Tidy}}, a HTML syntax checker and pretty printer. Like its non-Java cousin, JTidy can be used as a tool for cleaning up malformed and faulty HTML. In addition, JTidy provides a DOM interface to the document that is being processed, which effectively makes you able to use JTidy as a DOM parser for real-world HTML. JTidy was written by Andy Quick, who later stepped down from the maintainer position. Now JTidy is maintained by a group of volunteers. More information on JTidy can be found on the {{{http://sourceforge.net/projects/jtidy/}JTidy SourceForge project page}} . jtidy/src/site/changes/0000755000175000017500000000000011617345035015307 5ustar twernertwernerjtidy/src/site/changes/changes.xml0000644000175000017500000004461610460713140017443 0ustar twernertwerner Changes Fabrizio Giustina Development is starting againg, with a new project admin and developer. The build system and documentation has been migrated to maven. Due to changes in directory structure, a new module in CVS has been created (tidy2): old files will left untouched and development will continue on this new module. Packages org.w3c.dom and org.xml.sax have been removed from the distribution. Junit tests added to build process. NullPointerException in parsing: Tidy crashes while trying to insert a node into markup tree before element when parent is null. A JTidy ant task has been added: org.w3c.tidy.ant.JTidyTask . NullPointerException in generating slides. Frameset followed by frame infinite loop. Typo in Configuration.parseProps(): indent-attributes parameter ignored. LI w/FRAME/FRAMESET/OPTGROUP/OPTION loop. Span causes infinite loop. Infinite loop after ]]> DOM2 methods should throw DOMException if unsupported: now unimplemented DOM methods properly return DOMException.NOT_SUPPORTED_ERR. Blank char inserted after first char. Space inserted before ]]> . Incorrect wrap behaviour. Remove empty lines between tags in XML. Slides not numbered in zero-padded fmt. XHTML requires form method="post" Tidy inserts entity in -xml mode. Anchor tag without attributes deleted. Nested anchor elements allowed. End tags containing whitespace warning. Proprietary elements not reported as err. Newline in URL attr value becomes space. Tidy adds newlines after ]]> . Output DOCTYPE/Namespace decl separately. Added "-//W3C//DTD HTML 3.2 Final//EN" to W3C_Version[] so FPI in a DOCTYPE will let document be recognized as HTML 3.2. Given doctype reported incorrectly. Well formed XSL xsl:text gives error. Table height="" not flagged as error. ]]> reported unknown attr. Missing / in title endtag makes 2 titles. Empty iframe elements trimmed. BLOCK/INLINE before TABLE parsed wrong. DTD not inserted, but DTD URI Namespace. Report methods are no more static. This will allow in future to add error listeners to a single tidy instance for better multithreading and IDE/tools integration. --body-only: print only body contents. Messages are now generated totally from message resources and no more partially composed in code. This will allow for better message localization. Missing doctype is now reported. Invalid values for table cell attributes are now reported. Identify attribute whose value is bad. Null value changed to "value" for -asxml. Problems nesting user defined inline tags. gets tidied into

.]]>
Missing attribute name garbles output. Single document element discarded. Single document element discarded. Reduce blank lines in output. Duplicate attribute removed. New configuration option: --drop-proprietary-attributes . New configuration option: --fix-uri . New configuration option: --lower-literals . New configuration option: --hide-comments . New configuration option: --indent-cdata . New configuration option: --force-output . New configuration option: --show-errors . New configuration option: --ascii-chars . New configuration options: --join-classes and --join-styles . New configuration option: --escape-cdata . New configuration option: --repeated-attributes . Add support for separate input and output character encodings, configuration options: --input-encoding and --output-encoding . Report current configuration settings. --help-config reports all available settings, --show-config reports actual values. Document Type Declaration for proprietary documents. Hex character references not handled. OPTION w/illegal FONT eats whitespace. Inline emphasis inconsistent propagation. XHTML Strict seen as Transitional w/div. Anchor enclosing Header tags is omitted. Nested <q></q>'s not handled correctly. Font tags handling different. XHTML 1.1 Support. end tags for empty elements in XHTML. </select> does not terminate <option>. Error actually reported as a warning. Bad head-endtag reported incorrectly. Extra endtags not detected. Fix for character references >= 32768. Case of attribute values. Microsoft Access exported HTML files. Parser misinterprets ?xml-stylesheet PI. XHTML TRANSITIONAL doctype set wrongly. <br clear="none"> should be output. Parser complains about xml:lang. Multiple <BODY>'s in <NOFRAMES> allowed. Frameset rows attr. not recognized. You can now use any supported Java character encoding for input or output. The standard tidy encoding handling derived from tidy c has been rewritten using the default java encoding support. This was a most requested feature after JTidy r7, thanks to all the people who sent patches and proposal for this. You can now attach listeners (implementing the TidyMessageListener interface) to be notified for warning, errors and summary message. You don't need anymore to parse a text output to extract jtidy messages in your application! Tidy DOM implementation has been updated with DOM level 3 methods signatures.
TagTable is no longer a Singleton, which brings us one step closer to having the ability to run multiple JTidy's in a single JVM. Tidy.java has been refactored and duplicate code has been removed. The license is stated explicitly in the file LICENSE. Fixed a bug in the parseAttribute() method of Lexer.java which generated an incorrect length for the UTF8 representation of characters above \u007f. This was causing a StringIndexOutOfBoundsException in method isValidAttributeName() of Lexer.java at line 2577. Thanks to dlgo@users.sourceforge.net for reporting this problem. Fixed a bug in the insertedToken() method of Lexer.java which generated a Negative ArraySizeException in the clone() method of Node.java. An ending value for a range in a character array was not being set correctly. Thanks again to dlgo@users.sourceforge.net for reporting this problem. Fixed a bug in Lexer.java where a "!" was omitted from an if condition, causing the reverse of expected behavior with namespaces. Fix is attributed to Randi Waki dlp@users.sourceforge.net contributed a fix so that hasAttributes() in DOMNodeImpl.java would return the correct boolean value instead of throwing a not implemented exception. Added support for CDATASections in the DOM model. Fixed a bug in the createProps() method of Clean.java which caused a StringOutOfBoundsException in line 156 due to trailing spaces in a CSS property name. Changed the behavior of DOMNodeImpl so that nodes added via the DOM interface have the correct node type. Previously, such nodes could not contain any content because they were added with a type of StartEndTag. Added methods to support DOM Level 2. Fixed a bug where createElement() was not properly setting Node.tag if the tag to be added could not be found in the the TagTable. A generic entry is now used instead of leaving the entry set to null. The getValue() method in DOMAttrImpl.java returns the attribute name if the attribute value is null (thanks to Brett Knights for reporting this) Initial implementation of a Windows .bat file to drive the build process has been added (thanks, Gary). Compiled .class files are not included in the distribution as they are already contained in the jar file. The 04aug2000r4 release fixes the bugs reported since r3. Thanks to Gary for implementing these changes. This release fixes a bug in the pre-compiled Tidy JAR. The messages property file is now included. No code changes. The package structure has been reorganised a little. In the binary release, the Java bytecode classes and the Tidy JAR can be found under the build/ subdirectory. The Ant build system is now the authoritative one. The old Makefiles (untouched) can be found under contrib/ should someone want to start maintaining them. A patch by Brett Knights has been incorporated. The patch affects the DOMNodeImpl class and ensures that CharacterData always returns at least an empty string (compared to null) as the value. This patch has not been verified with a DOM guru, so comments are welcome.
jtidy/src/test/0000755000175000017500000000000011617345027013713 5ustar twernertwernerjtidy/src/test/resources/0000755000175000017500000000000011617345034015723 5ustar twernertwernerjtidy/src/test/resources/433607.msg0000644000175000017500000000043011463516445017203 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/433359.html0000644000175000017500000000034607777327667017416 0ustar twernertwerner [ #433359 ] Empty iframe elements trimmed This is a test jtidy/src/test/resources/427844.html0000644000175000017500000000036510000046405017352 0ustar twernertwerner [ #427844 ] End tags containing whitespace warningjtidy/src/test/resources/1003994.msg0000644000175000017500000000066411463520166017272 0ustar twernertwerner 13 2 1 -3 ]]> -1 0 0 0 jtidy/src/test/resources/433012.cfg0000644000175000017500000000017310000613130017115 0ustar twernertwernerindent: auto indent-attributes: yes tidy-mark: no clean: yes indent= auto indent-attributes= yes tidy-mark= no clean= yes jtidy/src/test/resources/538536.msg0000644000175000017500000000175311461621360017212 0ustar twernertwerner 27 2 8 -3 8 2 9 -3 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/540296.html0000644000175000017500000000017507777327667017415 0ustar twernertwerner [ 540296 ] Tidy dumps
jtidy/src/test/resources/540571.out0000644000175000017500000000074610000613130017204 0ustar twernertwerner #540571 Inconsistent behaviour with span inline element

Hello World

The font inline is moved so it becomes a child of the h1 element.

Hello World

The span inline is not moved so it becomes a child of the h1 element, which is inconsistent and does not correspond with current browser behaviour any more.

jtidy/src/test/resources/431883.msg0000644000175000017500000000270211461621360017202 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/467863.html0000644000175000017500000000034607777327667017425 0ustar twernertwerner [ #467863 ] un-nest <a> A B C jtidy/src/test/resources/438658.out0000644000175000017500000000025010000613130017206 0ustar twernertwerner [ #438658 ] Missing / in title endtag makes 2 titles Test jtidy/src/test/resources/1020806b.out0000644000175000017500000000053310120313554017425 0ustar twernertwerner [1020806b] NPE when PPPrint'ing changed DOM tree jtidy/src/test/resources/935796.html0000644000175000017500000000066110044423725017377 0ustar twernertwerner Quote entities in title attribute problem jtidy/src/test/resources/538727.out0000644000175000017500000000035010033573374017232 0ustar twernertwerner [ 538727 ] setDocType uncorrectly adds "" doctype should not contain "" jtidy/src/test/resources/431958.cfg0000644000175000017500000000014610111224155017146 0ustar twernertwerner# Tidy configuration file for bug #431958 indent: auto tidy-mark: false indent= auto tidy-mark= falsejtidy/src/test/resources/588061.html0000644000175000017500000006211507777327667017421 0ustar twernertwerner TVNAV.COM Garmin GPS Home Page

TVNAV.COM

Toll Free 877-625-3546 (US only)



Garmin Logo





To track your package click here.

Check the current REBATE offers!

**NEW! GPSMAP 76S IN STOCK!**

**NEW! Rino 110/120 GPS/FRS/GMRS Expected September**

**NEW! GPSMAP 196 Coming Soon!**

**NEW! City Navigator Australia....$265.00 IN STOCK!**

**NEW! BlueChart software IN STOCK!**

**NEW! Europe MapSource: City Navigator, City Select, MetroGuide and Roads & Recreation IN STOCK!**

**NEW! We now have remanufactured GPS III ($150) in stock. 1 year warranty.
**

**NEW! GPS V IN STOCK!**

**NEW! eTrex/eMap/StreetPilot/ColorMap/StreetPilot III/GPSMAP 295 Bean Bag IN STOCK!**

**NEW! StreetPilot III IN STOCK!**

**NEW! StreetPilot/ColorMap/295 Deluxe Case IN STOCK!**

**NEW! Sunvisor for StreetPilot, ColorMap, StreetPilot III and GPSMAP 295....$20.00 IN STOCK!**

**We have R-A-M mounts now in stock for most Garmin units....Call or email us for prices and availability.**





Total Video became an authorized Garmin dealer in January 1999. We sold 300+ GPS units prior to becoming a Garmin direct dealer, picking them up from various distributors and individuals to sell. By becoming a Garmin direct dealer we now are able to sell for less! Total Video prides itself with *very quick shipping and a strong history of customer satisfaction. Comments from customers.

Want to learn more about GPS? Click here for further GPS information.


Rino 110/120 GPS-Integrated FRS/GMRS Radios....(MAP $169.99/$249.99) Call or email us for our current price....too low to advertise! Coming Soon!

eMap....$170.00 IN STOCK!

eMap with 8MB memory cartridge....$200.00 IN STOCK!

eMap with 8MB memory cartridge and USA MetroGuide MapSource....$215.00 IN STOCK!

eTrex....$115.00 IN STOCK!

eTrex Summit....$210.00 IN STOCK!

eTrex Camo....$125.00 IN STOCK!

eTrex Venture....(MAP $169.00) Call or email us for our current price....too low to advertise! IN STOCK!

eTrex Legend....(MAP $249.00) Call or email us for our current price....too low to advertise! IN STOCK!

eTrex Vista....(MAP $349.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 76S....(MAP $449.99) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 76....(MAP $349.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPS 76....(MAP $219.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 176....(MAP $499.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 176C....(MAP $599.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 2006....(MAP $1199.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 2006C....(MAP $1999.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPSMAP 2010C....(MAP $2499.00) Call or email us for our current price....too low to advertise! IN STOCK!

GPS 12....$140.00 IN STOCK!

GPS 12XL with carrying case....$190.00 IN STOCK!

GPS 12 MAP with PC interface cable....$280.00 IN STOCK!

GPS II Plus....$190.00 IN STOCK!

GPS III Plus with PC interface cable....$280.00 IN STOCK!

GPS V Deluxe w/*new* City Select with all unlocks....(MAP $499.00) Call or email us for our current price....too low to advertise! IN STOCK!

*U.S. Roads and Recreation MapSource....$80.00 IN STOCK!

*WorldMap MapSource....$80.00 IN STOCK!

*TOPO MapSource....$85.00 IN STOCK!

*Fishing Hot Spots MapSource (includes one coverage area unlock)....$85.00 IN STOCK!

*U.S. Waterways & Lights MapSource....$60.00 IN STOCK!

StreetPilot with dash mount, cigarette power cable and PC interface cable....$385.00 IN STOCK!

StreetPilot ColorMap with dash mount, cigarette power cable and PC interface cable....$540.00 IN STOCK!

StreetPilot III Deluxe w/128MB, *new* City Navigator w/all unlocks and portable bean bag mount....(MAP $999.00) Call or email us for our current price....too low to advertise! IN STOCK!

*MetroGuide USA MapSource....$90.00 IN STOCK!

*MetroGuide USA MapSource w/Blank 8MB Memory Cartridge....$110.00 IN STOCK!

*MetroGuide USA MapSource w/Blank 16MB Memory Cartridge....$115.00 IN STOCK!

*MetroGuide Canada with Roads & Recreation....$80.00 IN STOCK!

*City Navigator Europe....$195.00 IN STOCK!

*City Navigator Europe "All" unlock....$215.00 IN STOCK!

*City Select Europe....$115.00 IN STOCK!

*City Select Europe "All" unlock....$115.00 IN STOCK!

*MetroGuide Europe....$115.00 IN STOCK!

*Roads & Recreation Europe....$85.00 IN STOCK!

*City Navigator Australia....$265.00 IN STOCK!

*BlueChart w/one coverage area....(MAP Americas-$139.00/Atlantic-$229.00/Pacific-$189.00) Call or email us for our current prices....too low to advertise! IN STOCK!

*BlueChart Single Region Unlock....Americas-$85.00/Atlantic-$145.00/Pacific-$105.00 IN STOCK!

Blank 8MB Memory Cartridge....$45.00 IN STOCK!

Blank 16MB Memory Cartridge....$55.00 IN STOCK!

Blank 32MB Memory Cartridge....$70.00 IN STOCK!

Blank 64MB Memory Cartridge....$105.00 IN STOCK!

Blank 128MB Memory Cartridge....$155.00 IN STOCK!

USB Data Card Programmer....$70.00 IN STOCK!

PC Download Kit (includes AC/DC adapter and 12V cigarette power/PC interface cable (for round, 4 pin connectors only)....$45.00

eMap/eTrex PC Download Kit (includes AC/DC adapter and cigarette power/PC interface cable....$50.00

GPSMAP 162....$355.00 (w/internal antenna)/$375.00 (w/external antenna) IN STOCK!

GPSMAP 168 Sounder....$495.00 (w/internal antenna)/$515.00 (w/external antenna) IN STOCK!

NavTalk Cellular phone/GPS (ver. 2.16)....$375.00

GBR 21 and GBR 23 Differential Receivers....$180.00

Videos available on many of the Garmin products as well as general GPS usage videos.

Garmin GPS and accessories catalog....$2.00 (for s/h in US. Refundable/free with order)

Online GARMIN manuals.


Stock status subject to change. We try to update the stock status continuously but we sometimes don't get it changed immediately. Check with us for current stock status.

Our Return Policy

Software Return Policy

Accessories and Miscellaneous Items

AVIATION GPS


We also sell Garmin GPS accessories such as mounts, cables, cases, etc.

We sell everything in the Garmin outdoor recreation, marine and cartography line. Email us for prices on any items you don't see listed above.



We charge a flat $10.00 shipping and handling charge (via UPS ground) per GPS order (not per item) in the 48/US.

$5.00 shipping and handling for accessories in the 48/US.

An additional $10.00 charge for COD orders (COD s/h must be credit card secured).

Faster shipping available.

*3 day select (usually arrives in 2 days!)--addtl. $3.00.
*2nd day air--addtl. $5.00.
*Next day air saver--addtl. $20.00.
*Next day air-Saturday delivery--addtl. $35.00
*More shipping may be required on larger packages for 3 day, 2nd day and next day air packages.

Click here to get UPS Ground delivery times. Our zip code is 67601 (Hays, Kansas).

Add $10 to UPS charges for FedEx shipping. (minimum FedEx s/h is $18)

$20 for Priority Mail s/h on GPS units and $15 for accessories in the US.

Email us for requirements/costs for out of 48/US sales or click here.



ORDERING INFORMATION



* Most orders received by 2:00 p.m. central time for in stock items will ship the same day (business days only).

All orders in Kansas must pay a 6.8% sales tax.

We accept payment by Discover/MasterCard/Visa/Pre-pay (orders paid by personal/company check orders held for 10 business days for check clearing. Cashier checks/money orders ship same day.). COD orders welcome (cashiers check or money order).

rescue,hunting,fishing,camping,adsfg,videogi,productshun,GPS III,Garmin,StreetPilot,gps3,gpsIII,gps2,gps2plus,gpsII+,Street Atlas,StreetAtlas,Osborne,Plainville,Stockton,Victoria,Ellis,Kansas,WaKeeney,Quinter,Russell,LaCrosse,Gorham,gpsIIIPLUS,gps3+,gps III+,gps 3+,DeLorme,GPS III Plus,GPS III Pilot,aviation gps,DeLorme Street Atlas,garmin aviation,metro guides,MetroGuides,truck navigation,semi navigation,over the road navigation,navigation aids,truck stops,metro gides,colormap streetpilot,color streetpilot,guidance by Garmin,color street pilot,color map,color map streetpilot,color map Street Pilot,MetroGuide,garmin international,garmond,garmund,magellan,gps12,gps 12,gps12xl,gps 12XL,gps12cx,RANS,gps 12CX,gps12CX,color streetpilot,streetpilot color map,garmin gps,Street Pilot,experimental aircraft,EEA,tvnav.com,gps 12 MAP,NavTalk,StreetPilot ColorMap,emap,ColorMap,Street Pilot ColorMap,Street Pilot Color Map,cell phone,cellular phone,cellar phone,cellar,cellular,EMAP,e map,Nav Talk,GPS,G P S,Global Positioning System,globalpositioningsystem,gps outfitters,gps video,cables,gps cables,navigation,mapsource,map source,MapSource,TOPO MapSource,MetroGuide MapSource



send email

Toll Free Order Line (877) 625-3546 (US only)

FAX (413) 383-8800

Information /International Order Line (785) 625-3546


Home Page


Subscribe to the TVNAV.COM GarminGPS mail list
Powered by groups.yahoo.com



Counter reset 2/1/99

This site last modified 7/25/02

jtidy/src/test/resources/431898.html0000644000175000017500000000054507777327667017425 0ustar twernertwerner ]> [ #431898 ] Tidy messes up X(HT)ML documents

Test

jtidy/src/test/resources/435919.msg0000644000175000017500000000146511461621360017213 0ustar twernertwerner 44 2 1 1 declaration]]> 40 2 6 28 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/517528.out0000644000175000017500000000045610114410047017220 0ustar twernertwerner test doc

A test document

jtidy/src/test/resources/566542.html0000644000175000017500000000056607777327667017423 0ustar twernertwerner[ 566542 ] parser hangs
  • Identify the member disks with ssaraid -H -lssa0 -n pdisk [n] -u -a use=member

  • Identify the hot spare with ssaraid -H -lssa0 -n pdisk [n]-u -a use=spare

    jtidy/src/test/resources/511679.out0000644000175000017500000000042510000613130017205 0ustar twernertwerner [ 511679 ] Block level elements in a <pre> section
    foo
    
    jtidy/src/test/resources/433672.html0000644000175000017500000000050107777327667017405 0ustar twernertwerner [ #433672 ] Anchor enclosing Header tags is omitted

    Section heading

    Another heading

    ack!

    Goto Another Heading jtidy/src/test/resources/994841.msg0000644000175000017500000000125511463516445017225 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427835.html0000644000175000017500000000054307777327667017417 0ustar twernertwerner Test input file for bug #427835

    Test input file for bug #427835

    Use with or without the -asxhtml option.

    -clean has no effect

    jtidy/src/test/resources/647255.cfg0000644000175000017500000000023110113414404017140 0ustar twernertwernerchar-encoding: utf16le newline: LF output-xhtml: yes tidy-mark:false wrap:0 char-encoding= utf16le newline= LF output-xhtml= yes tidy-mark=false wrap=0 jtidy/src/test/resources/543262.cfg0000644000175000017500000000032110000613130017121 0ustar twernertwernerdoctype: omit output-xhtml: yes char-encoding: latin1 numeric-entities: yes quiet: yes tidy-mark: false doctype= omit output-xhtml= yes char-encoding= latin1 numeric-entities= yes quiet= yes tidy-mark= false jtidy/src/test/resources/943559.out0000644000175000017500000000045310044423725017235 0ustar twernertwerner [943559] Form between td
    yyy
    jtidy/src/test/resources/620531.out0000644000175000017500000000031610000613130017170 0ustar twernertwerner [ 620531 ] br in pre must not cause line break

    foo

    bar
    baz
    jtidy/src/test/resources/441508.msg0000644000175000017500000000440311463516445017206 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 4 1 lacks "summary" attribute]]> 55 2 4 1 dropping value "right" for repeated attribute "align"]]> 12 2 7 1 ]]> 8 2 8 -3 ]]> 7 2 9 -3 before ]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/572154.msg0000644000175000017500000000275511461621360017207 0ustar twernertwerner 44 2 1 1 declaration]]> 8 2 7 1 ]]> 111 0 1 1 -1 0 0 0 16 0 1 1 112 0 1 1 jtidy/src/test/resources/502346.out0000644000175000017500000000051010115166156017211 0ustar twernertwerner [502346] Leading and trailing space in attval whitespace in href should be removed, not escaped jtidy/src/test/resources/791933.out0000644000175000017500000000041310115433712017222 0ustar twernertwerner [ 791933 ] Why German special character converted to upper case (ü ->Ü) üüü jtidy/src/test/resources/427662.out0000644000175000017500000000035610013456267017232 0ustar twernertwerner [#427662] BLOCK/INLINE before TABLE parsed wrong Big and bold Big jtidy/src/test/resources/511679.msg0000644000175000017500000000264711463516445017225 0ustar twernertwerner 44 2 1 1 declaration]]> 39 2 4 36 in pre content]]> 39 2 5 1 in pre content]]> 39 2 6 1 in pre content]]> 6 2 7 1 ]]> 6 2 7 2 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/646946.msg0000644000175000017500000000043011463516445017217 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/433012.out0000644000175000017500000005615710000613130017202 0ustar twernertwerner [ #433012 ] Illegal ampersands/character entities

    id=ID =XX
    id=ID¡=XX
    id=ID¢=XX
    id=ID£=XX
    id=ID¤=XX
    id=ID¥=XX
    id=ID¦=XX
    id=ID§=XX
    id=ID¨=XX
    id=ID©=XX
    id=IDª=XX
    id=ID«=XX
    id=ID¬=XX
    id=ID­=XX
    id=ID®=XX
    id=ID¯=XX
    id=ID°=XX
    id=ID±=XX
    id=ID²=XX
    id=ID³=XX
    id=ID´=XX
    id=IDµ=XX
    id=ID¶=XX
    id=ID·=XX
    id=ID¸=XX
    id=ID¹=XX
    id=IDº=XX
    id=ID»=XX
    id=ID¼=XX
    id=ID½=XX
    id=ID¾=XX
    id=ID¿=XX
    id=IDÀ=XX
    id=IDÁ=XX
    id=IDÂ=XX
    id=IDÃ=XX
    id=IDÄ=XX
    id=IDÅ=XX
    id=IDÆ=XX
    id=IDÇ=XX
    id=IDÈ=XX
    id=IDÉ=XX
    id=IDÊ=XX
    id=IDË=XX
    id=IDÌ=XX
    id=IDÍ=XX
    id=IDÎ=XX
    id=IDÏ=XX
    id=IDÐ=XX
    id=IDÑ=XX
    id=IDÒ=XX
    id=IDÓ=XX
    id=IDÔ=XX
    id=IDÕ=XX
    id=IDÖ=XX
    id=ID×=XX
    id=IDØ=XX
    id=IDÙ=XX
    id=IDÚ=XX
    id=IDÛ=XX
    id=IDÜ=XX
    id=IDÝ=XX
    id=IDÞ=XX
    id=IDß=XX
    id=IDà=XX
    id=IDá=XX
    id=IDâ=XX
    id=IDã=XX
    id=IDä=XX
    id=IDå=XX
    id=IDæ=XX
    id=IDç=XX
    id=IDè=XX
    id=IDé=XX
    id=IDê=XX
    id=IDë=XX
    id=IDì=XX
    id=IDí=XX
    id=IDî=XX
    id=IDï=XX
    id=IDð=XX
    id=IDñ=XX
    id=IDò=XX
    id=IDó=XX
    id=IDô=XX
    id=IDõ=XX
    id=IDö=XX
    id=ID÷=XX
    id=IDø=XX
    id=IDù=XX
    id=IDú=XX
    id=IDû=XX
    id=IDü=XX
    id=IDý=XX
    id=IDþ=XX
    id=IDÿ=XX
    id=ID&fnof=XX
    id=ID&Alpha=XX
    id=ID&Beta=XX
    id=ID&Gamma=XX
    id=ID&Delta=XX
    id=ID&Epsilon=XX
    id=ID&Zeta=XX
    id=ID&Eta=XX
    id=ID&Theta=XX
    id=ID&Iota=XX
    id=ID&Kappa=XX
    id=ID&Lambda=XX
    id=ID&Mu=XX
    id=ID&Nu=XX
    id=ID&Xi=XX
    id=ID&Omicron=XX
    id=ID&Pi=XX
    id=ID&Rho=XX
    id=ID&Sigma=XX
    id=ID&Tau=XX
    id=ID&Upsilon=XX
    id=ID&Phi=XX
    id=ID&Chi=XX
    id=ID&Psi=XX
    id=ID&Omega=XX
    id=ID&alpha=XX
    id=ID&beta=XX
    id=ID&gamma=XX
    id=ID&delta=XX
    id=ID&epsilon=XX
    id=ID&zeta=XX
    id=ID&eta=XX
    id=ID&theta=XX
    id=ID&iota=XX
    id=ID&kappa=XX
    id=ID&lambda=XX
    id=ID&mu=XX
    id=ID&nu=XX
    id=ID&xi=XX
    id=ID&omicron=XX
    id=ID&pi=XX
    id=ID&rho=XX
    id=ID&sigmaf=XX
    id=ID&sigma=XX
    id=ID&tau=XX
    id=ID&upsilon=XX
    id=ID&phi=XX
    id=ID&chi=XX
    id=ID&psi=XX
    id=ID&omega=XX
    id=ID&thetasym=XX
    id=ID&upsih=XX
    id=ID&piv=XX
    id=ID&bull=XX
    id=ID&hellip=XX
    id=ID&prime=XX
    id=ID&Prime=XX
    id=ID&oline=XX
    id=ID&frasl=XX
    id=ID&weierp=XX
    id=ID&image=XX
    id=ID&real=XX
    id=ID&trade=XX
    id=ID&alefsym=XX
    id=ID&larr=XX
    id=ID&uarr=XX
    id=ID&rarr=XX
    id=ID&darr=XX
    id=ID&harr=XX
    id=ID&crarr=XX
    id=ID&lArr=XX
    id=ID&uArr=XX
    id=ID&rArr=XX
    id=ID&dArr=XX
    id=ID&hArr=XX
    id=ID&forall=XX
    id=ID&part=XX
    id=ID&exist=XX
    id=ID&empty=XX
    id=ID&nabla=XX
    id=ID&isin=XX
    id=ID&notin=XX
    id=ID&ni=XX
    id=ID&prod=XX
    id=ID&sum=XX
    id=ID&minus=XX
    id=ID&lowast=XX
    id=ID&radic=XX
    id=ID&prop=XX
    id=ID&infin=XX
    id=ID&ang=XX
    id=ID&and=XX
    id=ID&or=XX
    id=ID&cap=XX
    id=ID&cup=XX
    id=ID&int=XX
    id=ID&there4=XX
    id=ID&sim=XX
    id=ID&cong=XX
    id=ID&asymp=XX
    id=ID&ne=XX
    id=ID&equiv=XX
    id=ID&le=XX
    id=ID&ge=XX
    id=ID&sub=XX
    id=ID&sup=XX
    id=ID&nsub=XX
    id=ID&sube=XX
    id=ID&supe=XX
    id=ID&oplus=XX
    id=ID&otimes=XX
    id=ID&perp=XX
    id=ID&sdot=XX
    id=ID&lceil=XX
    id=ID&rceil=XX
    id=ID&lfloor=XX
    id=ID&rfloor=XX
    id=ID&lang=XX
    id=ID&rang=XX
    id=ID&loz=XX
    id=ID&spades=XX
    id=ID&clubs=XX
    id=ID&hearts=XX
    id=ID&diams=XX
    id=ID"=XX
    id=ID&=XX
    id=ID<=XX
    id=ID>=XX
    id=ID&OElig=XX
    id=ID&oelig=XX
    id=ID&Scaron=XX
    id=ID&scaron=XX
    id=ID&Yuml=XX
    id=ID&circ=XX
    id=ID&tilde=XX
    id=ID&ensp=XX
    id=ID&emsp=XX
    id=ID&thinsp=XX
    id=ID&zwnj=XX
    id=ID&zwj=XX
    id=ID&lrm=XX
    id=ID&rlm=XX
    id=ID&ndash=XX
    id=ID&mdash=XX
    id=ID&lsquo=XX
    id=ID&rsquo=XX
    id=ID&sbquo=XX
    id=ID&ldquo=XX
    id=ID&rdquo=XX
    id=ID&bdquo=XX
    id=ID&dagger=XX
    id=ID&Dagger=XX
    id=ID&permil=XX
    id=ID&lsaquo=XX
    id=ID&rsaquo=XX
    id=ID&euro=XX
    id=ID'=XX
    id=ID&foo=XX

    jtidy/src/test/resources/435909.out0000644000175000017500000000104510133721557017231 0ustar twernertwerner [ #435909 ] <noscript></noscript> in <head></head> Test jtidy/src/test/resources/431895.html0000644000175000017500000000135407777327667017421 0ustar twernertwerner Bug-2001-01-03-A [ #431895 ] gnu-emacs filename not set for XML or -q

    Some text.

    jtidy/src/test/resources/435922.msg0000644000175000017500000000224111461621360017176 0ustar twernertwerner 11 2 7 3 isn't allowed in elements]]> 15 2 7 3 ]]> 6 2 12 1 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/545772.html0000644000175000017500000000036507777327667017422 0ustar twernertwerner [ 547057 ] --output-xhtml hangs on most files test jtidy/src/test/resources/427830.msg0000644000175000017500000000120411463516445017204 0ustar twernertwerner 44 2 2 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/434047.html0000644000175000017500000000034307777327667017406 0ustar twernertwerner [ #434047 ] Mixed content in 4.01 Strict not allowed
    Some text.
    jtidy/src/test/resources/517550.out0000644000175000017500000000063510114367421017221 0ustar twernertwerner [ #517550 ] parser misinterprets ?xml-stylesheet PI

    A test document

    jtidy/src/test/resources/586555.cfg0000644000175000017500000000155510121370430017156 0ustar twernertwernerwrap: 68 tab-size: 4 repeated-attributes: keep-last alt-text: None, says tidy show-warnings: no quiet: yes indent: auto indent-attributes: yes output-xml: yes output-xhtml: yes add-xml-decl: yes bare: yes logical-emphasis: yes drop-proprietary-attributes: yes break-before-br: yes quote-nbsp: no assume-xml-procins: yes keep-time: no word-2000: yes tidy-mark: no literal-attributes: yes hide-comments: yes ascii-chars: no join-styles: no wrap= 68 tab-size= 4 repeated-attributes= keep-last alt-text= None, says tidy show-warnings= no quiet= yes indent= auto indent-attributes= yes output-xml= yes output-xhtml= yes add-xml-decl= yes bare= yes logical-emphasis= yes drop-proprietary-attributes= yes break-before-br= yes quote-nbsp= no assume-xml-procins= yes keep-time= no word-2000= yes tidy-mark= no literal-attributes= yes hide-comments= yes ascii-chars= no join-styles= no jtidy/src/test/resources/449348.html0000644000175000017500000000056507777327667017426 0ustar twernertwerner [ #449348 ] Whitespace added/removed to inline tags

    Make this wrap at the end of the line12345678: white-spacejoebob

    This is long enough a wrap at the next line text ...

    jtidy/src/test/resources/427827.html0000644000175000017500000000032010027352230017347 0ustar twernertwerner [ #427827 ] Nested anchor elements allowed link-1 link-2 plain jtidy/src/test/resources/431964.html0000644000175000017500000000040107777327667017406 0ustar twernertwerner [ #431964 ] table height="" not flagged as error
    A cell.
    jtidy/src/test/resources/427835.cfg0000644000175000017500000000024310111224155017143 0ustar twernertwerner# Tidy configuration file for bug #427835 output-xhtml: yes tidy-mark: false wrap: 0 indent-spaces: 0 output-xhtml= yes tidy-mark= false wrap= 0 indent-spaces= 0 jtidy/src/test/resources/917012.html0000644000175000017500000000047310033355672017360 0ustar twernertwerner [#917012] Spaces are moved from content to between tags

    big coloured text

    jtidy/src/test/resources/431739.msg0000644000175000017500000000243211463516445017213 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 7 2 before
    ]]>
    15 2 8 -1 ]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]>
    jtidy/src/test/resources/501669.msg0000644000175000017500000000126211461621360017202 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/438658.html0000644000175000017500000000016407777327667017423 0ustar twernertwerner [ #438658 ] Missing / in title endtag makes 2 titles<title> </head> <body> Test </body> </html>������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/470663.cfg�����������������������������������������������������������������0000644�0001750�0001750�00000000154�10111224155�017141� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Tidy configuration file for bug #470663 word-2000: yes tidy-mark: false word-2000= yes tidy-mark= false ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/427816.html����������������������������������������������������������������0000644�0001750�0001750�00000000230�07777327667�017407� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<html> <head> <title>[ #427816 ] Mismatched quotes for attr segfaults blah jtidy/src/test/resources/431739.html0000644000175000017500000000026507777327667017416 0ustar twernertwerner [#431739] Spaces carried into empty block tags This is a test
    Example jtidy/src/test/resources/444834.html0000644000175000017500000015006510114204746017365 0ustar twernertwerner links




    National Technical University of Athens
    http://www.ntua.gr/

    National and Capodistrian University of Athens
    http://www.uoa.gr/

    Athens University of Economics & Business
    http://www.aueb.gr/

    Panteion University
    http://www.panteion.gr/

    University of Piraeus
    http://www.unipi.gr/

    Aristotle University of Thessaloniki
    http://www.auth.gr/

    University of Macedonia
    http://www.uom.gr/

    Democritus University of Thrace
    http://www.duth.gr/

    University of Thessaly
    http://www.uth.gr/
    University Of Crete
    http://www.uch.gr/
    Technical University of Crete
    http://www.tuc.gr/
    Aegean University
    http://www.aegean.gr/
    University of Patras
    http://www.upatras.gr/
    Ionian University
    http://www.uion.edu.gr/
    Athens School of Fine Arts
    http://www.asfa.gr/
    Harokopio Univercity
    http://www.hua.gr/
    Agricultural University of Athens
    http://www.aua.gr/
    Hellenic Naval Academy
    http://www.snd.edu.gr/

    University of Ioannina
    http://www.uoi.gr/



    http://www.eom.gr/

    (...) , . 2737 27 1999 . ... (, , ) , , , .

    ... . , , , .


    Technical Institute of Athens
    http://www.teiath.gr/

    Technical Institute of Heraklion
    http://www.cs.teiher.gr/

    Technical Institute of Thessaloniki
    http://www.teithe.gr/

    Technical Institute of Kozani
    http://www.teikoz.gr/

    Technical Institute of Larisa
    http://dias.teilar.gr/

    Technical Institute of Messologi
    http://www.teimes.gr/
    Technical Institute of Kavala
    http://www.teikav.edu.gr/
    Technical Institute of Piraeus
    http://www.teipir.gr/
    Technical Institute of Serres
    http://bic.the.forthnet.gr/tei.html
    Technological nstitute of Lamia
    http://www.teilam.gr/
    Technological nstitute of Hania
    http://eretria.teihan.gr/

    Technical Institute of Patras
    http://teipat-gw.teipat.gr/

    Altavista http://www.altavista.com/
    Hotbot http://www.hotbot.com/
    Yahoo http://www.yahoo.com/
    Lycos http://www.lycos.com/
    Excite http://www.excite.com/
    Google http://www.google.com/


     


    http://www.ypepth.gr/

    ,
    http://www.minenv.gr


    http://www.culture.gr/


    http://www.mod.gr/

    ,
    http://www.ypes.gr/


    http://www.yme.gr/


    http://www.ypai.aegean.gr/


    http://www.sport.gov.gr/

    ' &
    http://www.grnet.gr/

    http://www.labor-ministry.gr/intr1_gr.htm

    http://www.mathra.gr/mainpageministrygr.htm
    ( )
    http://www.mof-glk.gr/

    http://www.yen.gr/

    http://www.ydt.gr/

    http://www.minpress.gr/

    http://www.mfa.gr/
    ( & )
    http://www.gsrt.gr/

    http://www.ypetho.gr/ypourgeio/default.asp
     

    European Commission
    http://europa.eu.int/

    European Commission-Hellenic Representative
    http://www.ee.gr/


    http://www.europarl.eu.int/sg/tree/en/

    .
    http://ue.eu.int/jai/default.htm


    http://www.ces.eu.int/


    http://www.jrc.org


    http://eur-op.eu.int/


    http://www.eca.eu.int/
    Community Research & Development Information Service
    http://www.cordis.lu/
    - ECHO
    http://www.echo.lu/
    Resource Center for Access to Data on Europe (EUROSTAT)
    http://www-rcade.dur.ac.uk/
    Europe on line
    http://www.europeonline.com/
    European Political Resources
    http://www.agora.stm.it/politic/europe.htm
    Added Value Information Services on European Research results
    http://adviser.tno.it/adviser/
    IDEA the Electronic Directory of the European Institutions
    http://158.169.50.70/idea/ideaen.html


    - Public Power Corporation
    http://www.dei.gr/

    Athens 2004: candidate city for the Olympics

    http://www.athens.olympic.org/


    http://www.forthnet.gr/oaed/index.html

    - Social Security Institute

    http://www.ika.gr/

    OTE - Hellenic Telecomunications Organization

    http://www.ote.gr/

    (..)

    http://agn.hol.gr/info/olympic1.htm
    TEBE - Hellenic Fund for Craftsment & Small Traders
    http://www.tebe.gr/
    (....) -
    http://www.oasa.gr/
    (....) -
    http://www.oasa.gr/hsap.htm

    http://www.et.gr/

    http://www.otenet.gr/fire/






    Agricultural Bank of Greece
    http://www.ate.gr/

    Alpha Credit Bank
    http://www.alpha.gr/

    Citibank
    http://www.citibank.gr/

    Egnatia Bank
    http://www.egnatiabank.gr/

    ETBA S.A. Hellenic Industrial Development Bank

    http://www.etba.gr/etba/etbag.html
    Piraeus Bank
    http://www.piraeus-bank.gr/
    Xios Bank
    http://www.netor.gr/xiosbank/
    MACEDONIA-THRACE BANK
    http://www.mathrabank.gr/mt/macthrac.asp?Lang=En
    Ionian Bank
    http://www.kapatel.gr/ionian/
    National Bank of Greece
    http://www.ethniki.gr/


    Kefalaio - Magazine about the Hellenic and international money markets.
    http://www.addgr.com/kefalaio/
    Technica - Technical & Economics magazine
    http://www.topnet-computers.com/technica-magazine/
    Forologikos & Ergatikos Odigos
    http://www.infixnet.gr/forergat/
    Forologistiki
    http://www.forologistiki.gr/
    &
    http://www.topnet-computers.com/money_and_tourism_magazine/
    Epihirimatiki Anaptiksi - Bimonthly Economic Review
    http://www.gradus.com/infoplan/9601/index.html

    http://www.hrima.gr
    AGORA
    http://www.kerdos.gr/agora/
    EPILOGI
    http://business.hol.gr/~allmedia/
    BUSINESS TODAY
    http://www.direction.gr/btoday/

    Express - daily financial newspaper
    http://www.kapatel.gr/express/

    Isotimia - a Hellenic financial weekly (in Hellenic)

    http://www.isotimia.gr/

    New Europe - Information and contents of the current edition of New Europe weekly newspaper and business information about the emerging European markets

    http://www.new-europe.gr/

    http://www.pegasus.gr/imerissia/
    Financial Mirror - The on line edition of the business newspaper of Cyprus.
    http://www.cfm.com.cy/
    Naftemboriki - daily business newspaper that focuses on maritime
    http://www.naftemporiki.gr/naftemporiki/


    Papasotiriou
    http://www.papasotiriou.gr/
    Hellenic Bookstore
    http://www.ell-biblio.gr/
    Eleftheroudakis
    http://www.books.gr/
    Books in Greek
    http://www.books-in-greek.gr/


    Athens News
    http://athensnews.dolnet.gr/


    http://www.adesmeftos.aias.net/


    http://www.avgi.org/

    o
    http://www.pegasus.gr/ethnos/

    (daily newspaper)

    http://www.enet.gr/

    (announcements newspaper)

    http://www.pegasus.gr/ethnos/ergasia/ergasia.htm

    xpress (financial newspaper)

    http://www.kapatel.gr/express/


    http://www.kathimerini.gr/

    http://www.kariera.gr
    Mathitiki On Line (6th High School of Serres)
    http://www.6gymnasio.gr/efimer_1/main.htm
     

    http://www.naftemporiki.gr/naftemporiki/

    http://www.rizospastis.gr/
    Sport line
    http://www.sport.gr
    TO BHMA
    http://tovima.dolnet.gr/

    http://ta-nea.dolnet.gr/
    To Pontiki - Weely Newspaper
    http://members.tripod.com/~yiannich/HN/pontiki.htm

    http://www.serres.hol.gr/media/epi/



    Classifieds from newspaper Ta Nea
    http://ta-nea.dolnet.gr/ma/

    Super Agelies - Classifieds in Greece.

    http://www.superaggelies.gr/
    Classifieds from newspaper Kathimerini
    http://www.kathimerini.gr/forpublishing/kathagg/mainmenu.agg

    http://enet.gr/xe/


    Athinorama - weekly guide to events in Athens
    http://www.athinorama.gr/

    Madame Figaro - presents the first fashion website in Greece

    http://www.madamefigaro.gr/

    Men
    http://www.men.gr/

    4
    http://www.4troxoi.gr/

    2Wheels magazine

    http://www.2troxoi.gr/

    Photographiko Eidolo - Hellenic photography magazine.

    http://www.aperito.com/eidolo/index.shtml

    Avopolis music magazine

    http://behemoth.compulink.gr/avopolis/


    http://www.metro.gr/


    http://mag.dolnet.gr/Diakopes/

    Fit For Fun
    http://www.fitforfun.gr/


    http://www.arxaiologia.gr/

    Fun Magazine
    http://www.fan.gr/

    Zoom
    http://www.zoom.gr/

    STATUS

    http://www.statusnet.gr/

    Net life
    http://www.netlife.gr/
    Nemecis
    http://www.nemecis.com/
    Klik
    http://www.klik.gr/
    Sailing World
    http://www.techlink.gr/sailing/
    Sea & Yachting
    http://www.eexi.gr/SailingGr/ymag/
    Flight and Space
    http://www.ptisi.gr/
    Cinema - Cinema magazine
    http://www.pegasus.gr/sinema/
    Diver - Monthly magazine about fishing, sea and diving
    http://www.netplan.gr/hellas/vythos/
    Gefsi magazine - Recipes and more
    http://www.gefsi.gr/
    inologia
    http://www.addgr.com/wine/oinologia
    Menu - Food related magazine
    http://www.pegasus.gr/menu/
    Difono - Magazine for the Hellenic songs and music
    http://www.netplan.gr/difwno/
    JAZZ&TZAZ magazine
    http://www.hol.gr/business/jazz/
    Hxos & Hi-Fi
    http://www.hxos.gr/
    Metal Hammer & Heavy Metal - Hellenic heavy metal magazine
    http://www.netplan.gr/poprock/
     
     

    ROM
    http://www.rom.gr/

    Computer
    http://www.compulink.gr/media/cgo/

    http://www.techlink.gr/kybernografoi/
    Win on-line
    http://www.winmag.gr/index1.html


    T

    Antenna
    http://www.antenna.gr/

    ET3
    http://www.ert3.gr/

    Mega
    http://www.megatv.gr

    Skai
    http://www.webads.gr/skai/

    SAT
    http://www.ariadne-t.gr/sat.html
    MAD
    http://www.mad.gr/
    KID TV
    http://www.areianet.gr/nethold/kidtv/
    Super Sport
    http://www.areianet.gr/nethold/supersport/
    Filmnet
    http://www.filmnet.gr/
    Jeronimo Groovy
    http://www.jeronimogroovy.com/index_tv.html
     

    E
    http://www.877.gr/
    Flash 9.61

    http://www.flash.gr

    -FM
    http://www.klikfm.gr


    http://www.melodia.gr/listen-index.html

    Radio Gold

    http://www.radiogold.gr/radiogold/home.html
    GALAXY 92
    http://www.galaxy92.gr
    1
    http://ert.ntua.gr

    http://www.broadcast.com/radio/


    T - N

    Times Newspapers - The Times and The Sunday Times online, plus an interactive section, archive searching etc.
    http://www.the-times.co.uk/

    Electronic Telegraph - full online version of the daily newspaper, with searchable archives

    http://www.telegraph.co.uk

    Press Association latest UK headlines, weather, sport, listings...

    http://www.pa.press.net/
    BBC
    http://www.bbc.co.uk
    CNN Interactive is another well-presented, in-depth news site.
    http://www.cnn.com/
    World Radio Network - radio news broadcasts from around the world, in RealAudio format.
    http://www.wrn.org/ondemand/


    Athens News Agency
    http://www.ana.gr/

    Macedonian Press Agency
    http://www.mpa.gr/
    Omogeneia News Agency on Line
    http://www.kalami.net/
    New Europe Network
    http://www.new-europe.gr/


    Internet providers

    CompuLink Network
    http://www.compulink.gr/

    FORTHnet
    http://www.forthnet.gr
    Hellas On-Line
    http://www.hol.gr/
    OTEnet
    http://www.otenet.gr


    B

    Amazon.com (The Earth's biggest Bookstore)
    http://www.amazon.com

    Barnes & Noble Bookstores
    http://www.bkstore.com/

    ef="http://www.compulink.gr/">http://www.compulink.gr/
    FORTHnet
    http://www.forthnet.gr
    Hellas On-Line
    http://www.hol.gr/
    OTEnet
    http://www.otenet.gr


    B

    Amazon.com (The Earth's biggest Bookstore)
    http://www.amazon.com

    Barnes & Noble Bookstores
    http://www.bkstore.com/












     
     
     
     
     
     
     




    jtidy/src/test/resources/616744.msg0000644000175000017500000000042711463521405017206 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/525081.html0000644000175000017500000000055607777327667017413 0ustar twernertwerner [ 525081 ] frameset rows attr. not recognized jtidy/src/test/resources/503436.out0000644000175000017500000000016010027336406017212 0ustar twernertwerner Testcase #503436 first jtidy/src/test/resources/433359.msg0000644000175000017500000000126211461621360017202 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/647900.out0000644000175000017500000000117310013533125017214 0ustar twernertwerner [ 647900 ] tables are incorrectly merged
    Table data

    A paragraph

    Foo
    Foo

    Another paragraph

    Input:

    Yet another paragraph

    jtidy/src/test/resources/456596.html0000644000175000017500000000027107777327667017423 0ustar twernertwerner [ #456596 ] Missing attribute name garbles output System News
    jtidy/src/test/resources/467863.out0000644000175000017500000000035510000613130017214 0ustar twernertwerner [ #467863 ] un-nest <a> A B C jtidy/src/test/resources/828316.msg0000644000175000017500000000126211461621360017203 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/994841.html0000644000175000017500000000051310115150162017356 0ustar twernertwerner [994841] Whitespace is removed from value attributes
    jtidy/src/test/resources/660397.cfg0000644000175000017500000000017310115714674017165 0ustar twernertwernerchar-encoding: ibm858 tidy-mark: no output-xhtml: yes wrap:0 char-encoding= ibm858 tidy-mark= no output-xhtml= yes wrap=0 jtidy/src/test/resources/443576.html0000644000175000017500000000032307777327667017413 0ustar twernertwerner [ #443576 ] End script tag inside scripts problem jtidy/src/test/resources/427834.msg0000644000175000017500000000123011461621360017176 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/444394.msg0000644000175000017500000000464511463516445017224 0ustar twernertwerner 48 2 1 1 48 2 1 1 48 2 1 1 44 2 1 1 declaration]]> 49 2 40 1 lacks "type" attribute]]> 21 2 111 76 is not approved by W3C]]> 23 2 111 77 ]]> 48 2 138 35 21 2 141 57 is not approved by W3C]]> 23 2 141 57 ]]> 21 2 143 76 is not approved by W3C]]> 23 2 143 77 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1020806.html0000644000175000017500000000044610115714011017420 0ustar twernertwerner [1020806] NPE when PPPrint'ing changed DOM tree the blank lines are the problem jtidy/src/test/resources/433666.out0000644000175000017500000000046210000613130017205 0ustar twernertwerner [ #433666 ] Attempt to repair duplicate attributes
    Test
    jtidy/src/test/resources/578216.msg0000644000175000017500000000117511463516445017220 0ustar twernertwerner 44 2 2 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/647255.html0000644000175000017500000000101407777327667017411 0ustar twernertwerner<h1>This is a Samle UTF16 Little Cowboy</h1> <p>the next para is Hebrew</p> <p>    </p> <p>The Next one is Russian / crylic</p> <p>?@8;>65=85 4>;6=> >ACI5AB2;OBL</p> <p>The Next one is Greek</p> <p> - Feta, , Salads</p>jtidy/src/test/resources/539369.html0000644000175000017500000000044107777327667017422 0ustar twernertwerner [ 539369 ] Infinite loop </frame> after </frameset> jtidy/src/test/resources/502348.msg0000644000175000017500000000202111461621360017167 0ustar twernertwerner 50 2 7 1 51 2 9 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/475643.out0000644000175000017500000006476510114167510017242 0ustar twernertwerner HTML 4.0 Special Entities

    The Web Design Group

    Special Entities

    The following table gives the character entity reference, decimal character reference, and hexadecimal character reference for markup-significant and internationalization characters, as well as the rendering of each in your browser. Glyphs of the characters are available at the Unicode Consortium.

    With the exception of HTML 2.0's &quot;, &amp;, &lt;, and &gt;, browser support for these entities is generally quite poor, but recent browsers support some of the character entity references and decimal character references.

    Character Entity Decimal Hex Rendering in Your Browser
    Entity Decimal Hex
    quotation mark = APL quote &quot; &#34; &#x22; " " "
    ampersand &amp; &#38; &#x26; & & &
    less-than sign &lt; &#60; &#x3C; < < <
    greater-than sign &gt; &#62; &#x3E; > > >
    Latin capital ligature OE &OElig; &#338; &#x152; Œ Œ Œ
    Latin small ligature oe &oelig; &#339; &#x153; œ œ œ
    Latin capital letter S with caron &Scaron; &#352; &#x160; Š Š Š
    Latin small letter s with caron &scaron; &#353; &#x161; š š š
    Latin capital letter Y with diaeresis &Yuml; &#376; &#x178; Ÿ Ÿ Ÿ
    modifier letter circumflex accent &circ; &#710; &#x2C6; ˆ ˆ ˆ
    small tilde &tilde; &#732; &#x2DC; ˜ ˜ ˜
    en space &ensp; &#8194; &#x2002;
    em space &emsp; &#8195; &#x2003;
    thin space &thinsp; &#8201; &#x2009;
    zero width non-joiner &zwnj; &#8204; &#x200C;
    zero width joiner &zwj; &#8205; &#x200D;
    left-to-right mark &lrm; &#8206; &#x200E;
    right-to-left mark &rlm; &#8207; &#x200F;
    en dash &ndash; &#8211; &#x2013;
    em dash &mdash; &#8212; &#x2014;
    left single quotation mark &lsquo; &#8216; &#x2018;
    right single quotation mark &rsquo; &#8217; &#x2019;
    single low-9 quotation mark &sbquo; &#8218; &#x201A;
    left double quotation mark &ldquo; &#8220; &#x201C;
    right double quotation mark &rdquo; &#8221; &#x201D;
    double low-9 quotation mark &bdquo; &#8222; &#x201E;
    dagger &dagger; &#8224; &#x2020;
    double dagger &Dagger; &#8225; &#x2021;
    per mille sign &permil; &#8240; &#x2030;
    single left-pointing angle quotation mark &lsaquo; &#8249; &#x2039;
    single right-pointing angle quotation mark &rsaquo; &#8250; &#x203A;
    euro sign &euro; &#8364; &#x20AC;

    Latin-1 Entities

    The following table gives the character entity reference, decimal character reference, and hexadecimal character reference for 8-bit characters in the Latin-1 (ISO-8859-1) character set, as well as the rendering of each in your browser. Glyphs of the characters are available at the Unicode Consortium.

    Browser support is generally best for the decimal character references, except for the accented characters (decimal 192-214, 216-246, 248-255), where the character entity references hold a slight edge.

    Note that some Mac browsers will render fourteen Latin-1 characters incorrectly. These characters are decimal 166, 178, 179, 185, 188, 189, 190, 208, 215, 221, 222, 240, 253, and 254. See ISO-8859-1 and the Mac platform for more information.

    Character Entity Decimal Hex Rendering in Your Browser
    Entity Decimal Hex
    no-break space = non-breaking space &nbsp; &#160; &#xA0;      
    inverted exclamation mark &iexcl; &#161; &#xA1; ¡ ¡ ¡
    cent sign &cent; &#162; &#xA2; ¢ ¢ ¢
    pound sign &pound; &#163; &#xA3; £ £ £
    currency sign &curren; &#164; &#xA4; ¤ ¤ ¤
    yen sign = yuan sign &yen; &#165; &#xA5; ¥ ¥ ¥
    broken bar = broken vertical bar &brvbar; &#166; &#xA6; ¦ ¦ ¦
    section sign &sect; &#167; &#xA7; § § §
    diaeresis = spacing diaeresis &uml; &#168; &#xA8; ¨ ¨ ¨
    copyright sign &copy; &#169; &#xA9; © © ©
    feminine ordinal indicator &ordf; &#170; &#xAA; ª ª ª
    left-pointing double angle quotation mark = left pointing guillemet &laquo; &#171; &#xAB; « « «
    not sign &not; &#172; &#xAC; ¬ ¬ ¬
    soft hyphen = discretionary hyphen &shy; &#173; &#xAD; ­ ­ ­
    registered sign = registered trade mark sign &reg; &#174; &#xAE; ® ® ®
    macron = spacing macron = overline = APL overbar &macr; &#175; &#xAF; ¯ ¯ ¯
    degree sign &deg; &#176; &#xB0; ° ° °
    plus-minus sign = plus-or-minus sign &plusmn; &#177; &#xB1; ± ± ±
    superscript two = superscript digit two = squared &sup2; &#178; &#xB2; ² ² ²
    superscript three = superscript digit three = cubed &sup3; &#179; &#xB3; ³ ³ ³
    acute accent = spacing acute &acute; &#180; &#xB4; ´ ´ ´
    micro sign &micro; &#181; &#xB5; µ µ µ
    pilcrow sign = paragraph sign &para; &#182; &#xB6;
    middle dot = Georgian comma = Greek middle dot &middot; &#183; &#xB7; · · ·
    cedilla = spacing cedilla &cedil; &#184; &#xB8; ¸ ¸ ¸
    superscript one = superscript digit one &sup1; &#185; &#xB9; ¹ ¹ ¹
    masculine ordinal indicator &ordm; &#186; &#xBA; º º º
    right-pointing double angle quotation mark = right pointing guillemet &raquo; &#187; &#xBB; » » »
    vulgar fraction one quarter = fraction one quarter &frac14; &#188; &#xBC; ¼ ¼ ¼
    vulgar fraction one half = fraction one half &frac12; &#189; &#xBD; ½ ½ ½
    vulgar fraction three quarters = fraction three quarters &frac34; &#190; &#xBE; ¾ ¾ ¾
    inverted question mark = turned question mark &iquest; &#191; &#xBF; ¿ ¿ ¿
    Latin capital letter A with grave = Latin capital letter A grave &Agrave; &#192; &#xC0; À À À
    Latin capital letter A with acute &Aacute; &#193; &#xC1; Á Á Á
    Latin capital letter A with circumflex &Acirc; &#194; &#xC2; Â Â Â
    Latin capital letter A with tilde &Atilde; &#195; &#xC3; Ã Ã Ã
    Latin capital letter A with diaeresis &Auml; &#196; &#xC4; Ä Ä Ä
    Latin capital letter A with ring above = Latin capital letter A ring &Aring; &#197; &#xC5; Å Å Å
    Latin capital letter AE = Latin capital ligature AE &AElig; &#198; &#xC6; Æ Æ Æ
    Latin capital letter C with cedilla &Ccedil; &#199; &#xC7; Ç Ç Ç
    Latin capital letter E with grave &Egrave; &#200; &#xC8; È È È
    Latin capital letter E with acute &Eacute; &#201; &#xC9; É É É
    Latin capital letter E with circumflex &Ecirc; &#202; &#xCA; Ê Ê Ê
    Latin capital letter E with diaeresis &Euml; &#203; &#xCB; Ë Ë Ë
    Latin capital letter I with grave &Igrave; &#204; &#xCC; Ì Ì Ì
    Latin capital letter I with acute &Iacute; &#205; &#xCD; Í Í Í
    Latin capital letter I with circumflex &Icirc; &#206; &#xCE; Î Î Î
    Latin capital letter I with diaeresis &Iuml; &#207; &#xCF; Ï Ï Ï
    Latin capital letter ETH &ETH; &#208; &#xD0; Ð Ð Ð
    Latin capital letter N with tilde &Ntilde; &#209; &#xD1; Ñ Ñ Ñ
    Latin capital letter O with grave &Ograve; &#210; &#xD2; Ò Ò Ò
    Latin capital letter O with acute &Oacute; &#211; &#xD3; Ó Ó Ó
    Latin capital letter O with circumflex &Ocirc; &#212; &#xD4; Ô Ô Ô
    Latin capital letter O with tilde &Otilde; &#213; &#xD5; Õ Õ Õ
    Latin capital letter O with diaeresis &Ouml; &#214; &#xD6; Ö Ö Ö
    multiplication sign &times; &#215; &#xD7; × × ×
    Latin capital letter O with stroke = Latin capital letter O slash &Oslash; &#216; &#xD8; Ø Ø Ø
    Latin capital letter U with grave &Ugrave; &#217; &#xD9; Ù Ù Ù
    Latin capital letter U with acute &Uacute; &#218; &#xDA; Ú Ú Ú
    Latin capital letter U with circumflex &Ucirc; &#219; &#xDB; Û Û Û
    Latin capital letter U with diaeresis &Uuml; &#220; &#xDC; Ü Ü Ü
    Latin capital letter Y with acute &Yacute; &#221; &#xDD; Ý Ý Ý
    Latin capital letter THORN &THORN; &#222; &#xDE; Þ Þ Þ
    Latin small letter sharp s = ess-zed &szlig; &#223; &#xDF; ß ß ß
    Latin small letter a with grave = Latin small letter a grave &agrave; &#224; &#xE0; à à à
    Latin small letter a with acute &aacute; &#225; &#xE1; á á á
    Latin small letter a with circumflex &acirc; &#226; &#xE2; â â â
    Latin small letter a with tilde &atilde; &#227; &#xE3; ã ã ã
    Latin small letter a with diaeresis &auml; &#228; &#xE4; ä ä ä
    Latin small letter a with ring above = Latin small letter a ring &aring; &#229; &#xE5; å å å
    Latin small letter ae = Latin small ligature ae &aelig; &#230; &#xE6; æ æ æ
    Latin small letter c with cedilla &ccedil; &#231; &#xE7; ç ç ç
    Latin small letter e with grave &egrave; &#232; &#xE8; è è è
    Latin small letter e with acute &eacute; &#233; &#xE9; é é é
    Latin small letter e with circumflex &ecirc; &#234; &#xEA; ê ê ê
    Latin small letter e with diaeresis &euml; &#235; &#xEB; ë ë ë
    Latin small letter i with grave &igrave; &#236; &#xEC; ì ì ì
    Latin small letter i with acute &iacute; &#237; &#xED; í í í
    Latin small letter i with circumflex &icirc; &#238; &#xEE; î î î
    Latin small letter i with diaeresis &iuml; &#239; &#xEF; ï ï ï
    Latin small letter eth &eth; &#240; &#xF0; ð ð ð
    Latin small letter n with tilde &ntilde; &#241; &#xF1; ñ ñ ñ
    Latin small letter o with grave &ograve; &#242; &#xF2; ò ò ò
    Latin small letter o with acute &oacute; &#243; &#xF3; ó ó ó
    Latin small letter o with circumflex &ocirc; &#244; &#xF4; ô ô ô
    Latin small letter o with tilde &otilde; &#245; &#xF5; õ õ õ
    Latin small letter o with diaeresis &ouml; &#246; &#xF6; ö ö ö
    division sign &divide; &#247; &#xF7; ÷ ÷ ÷
    Latin small letter o with stroke = Latin small letter o slash &oslash; &#248; &#xF8; ø ø ø
    Latin small letter u with grave &ugrave; &#249; &#xF9; ù ù ù
    Latin small letter u with acute &uacute; &#250; &#xFA; ú ú ú
    Latin small letter u with circumflex &ucirc; &#251; &#xFB; û û û
    Latin small letter u with diaeresis &uuml; &#252; &#xFC; ü ü ü
    Latin small letter y with acute &yacute; &#253; &#xFD; ý ý ý
    Latin small letter thorn &thorn; &#254; &#xFE; þ þ þ
    Latin small letter y with diaeresis &yuml; &#255; &#xFF; ÿ ÿ ÿ
    jtidy/src/test/resources/991471.xml0000644000175000017500000000016110076523763017226 0ustar twernertwerner jtidy/src/test/resources/435903.out0000644000175000017500000000062610104011573017213 0ustar twernertwerner [ #435903 ] Script element w/body child to table bug>
    jtidy/src/test/resources/578216.cfg0000644000175000017500000000020610111224155017142 0ustar twernertwerner# Tidy configuration file for bug 578216 indent: yes tidy-mark: false indent-spaces: 0 indent= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/433666.html0000644000175000017500000000036110111330752017351 0ustar twernertwerner [ #433666 ] Attempt to repair duplicate attributes
    Test
    jtidy/src/test/resources/603128.msg0000644000175000017500000000122611461621360017173 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/505770.msg0000644000175000017500000000403511463516445017211 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 4 29 before 11 2 4 29 isn't allowed in elements]]> 8 2 4 57 ]]> 11 2 5 1 isn't allowed in elements]]> 11 2 6 2 isn't allowed in elements]]> 49 2 8 2 lacks "action" attribute]]> 4 2 29 37 4 2 30 37 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/435909.msg0000644000175000017500000000203111463516445017211 0ustar twernertwerner 11 2 22 -3 isn't allowed in elements]]> 11 2 23 1 isn't allowed in elements]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/517550.cfg0000644000175000017500000000016110111224155017134 0ustar twernertwerner# Tidy configuration file for bug #517550 output-xhtml: yes tidy-mark: false output-xhtml= yes tidy-mark= false jtidy/src/test/resources/586555.html0000644000175000017500000000112307777327667017417 0ustar twernertwerner [ 586555 ] Misplaced backslash caused by newline

    [ 586555 ] Misplaced backslash caused by newline

    jtidy/src/test/resources/788031.html0000644000175000017500000000022410000011414017332 0ustar twernertwerner jtidy/src/test/resources/438650.out0000644000175000017500000000033610000613130017203 0ustar twernertwerner [ #438650 ] Newline in URL attr value becomes space This is a test jtidy/src/test/resources/470688.msg0000644000175000017500000000275511461621360017220 0ustar twernertwerner 10 2 8 6 by ]]> 23 2 8 6 ]]> 24 1 8 30 is probably intended as ]]> 8 2 8 28 ]]> 8 2 8 32 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/545067.html0000644000175000017500000000016207777327667017412 0ustar twernertwerner [ 545067 ] Implicit closing of head broken

    jtidy/src/test/resources/441508.out0000644000175000017500000000044010000613130017173 0ustar twernertwerner [ #441508 ] parser.c: BadForm() function broken
    Test
    jtidy/src/test/resources/634889.html0000644000175000017500000000026507777327667017431 0ustar twernertwerner [ 634889 ] Problem with <o:p> ms word tag

    Probably OK, now that ParseTagNames() is fixed.

    jtidy/src/test/resources/431736.html0000644000175000017500000000033707777327667017413 0ustar twernertwerner [#431736] Doctype decl added before XML decl

    Run tidy w/ -asxhtml or -asxml options...

    jtidy/src/test/resources/427820.msg0000644000175000017500000000117511463516445017212 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/640474.out0000644000175000017500000000017110000613130017177 0ustar twernertwerner Bjrn Hhrmann Marc-Andr Lemburg jtidy/src/test/resources/480406.msg0000644000175000017500000000042711463520436017203 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/943559.msg0000644000175000017500000000404711463516445017227 0ustar twernertwerner 12 2 10 7 ]]> 8 2 11 13 ]]> 7 2 11 20 before ]]> 8 3 12 7 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 -1 0 0 0 113 0 1 1 and tags. HTML elements should be properly nested and form elements are no exception. For instance you should not place the
    in one table cell and the
    in another. If the
    is placed before a table, the
    cannot be placed inside the table! Note that one form can't be nested inside another! ]]>
    jtidy/src/test/resources/431739.out0000644000175000017500000000041510000613130017202 0ustar twernertwerner [#431739] Spaces carried into empty block tags This is a test
    Example jtidy/src/test/resources/441740.msg0000644000175000017500000000123011461621360017166 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/473490.msg0000644000175000017500000000015510120355471017200 0ustar twernertwerner jtidy/src/test/resources/559774.html0000644000175000017500000000046610133721557017403 0ustar twernertwerner [559774] tidy version rejects all id values

    header with a valid id

    jtidy/src/test/resources/431736.out0000644000175000017500000000050310115150162017205 0ustar twernertwerner [#431736] Doctype decl added before XML decl

    Run tidy w/ -asxhtml or -asxml options...

    jtidy/src/test/resources/435923.out0000644000175000017500000000030110000613130017173 0ustar twernertwerner [ #435923 ] Preserve case of attribute names jtidy/src/test/resources/531962.out0000644000175000017500000000046510123632513017222 0ustar twernertwerner [531962] Closing quotes around attribute values jtidy/src/test/resources/574158.out0000644000175000017500000000171010114173713017222 0ustar twernertwerner [574158] Error with FONT tag jtidy/src/test/resources/648768.msg0000644000175000017500000000124611461621360017220 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/708322.cfg0000644000175000017500000000016210034234737017147 0ustar twernertwernerdrop-proprietary-attributes= true tidy-mark=false wrap=0 drop-proprietary-attributes: true tidy-mark:false wrap:0jtidy/src/test/resources/435923.msg0000644000175000017500000000117411463516445017214 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/470663.html0000644000175000017500000000243607777327667017417 0ustar twernertwerner Test Input For Bug #470663

    Body doesn't matter. Problem occurs parsing <head> element.

    jtidy/src/test/resources/610244.html0000644000175000017500000015006510110502420017335 0ustar twernertwerner links




    National Technical University of Athens
    http://www.ntua.gr/

    National and Capodistrian University of Athens
    http://www.uoa.gr/

    Athens University of Economics & Business
    http://www.aueb.gr/

    Panteion University
    http://www.panteion.gr/

    University of Piraeus
    http://www.unipi.gr/

    Aristotle University of Thessaloniki
    http://www.auth.gr/

    University of Macedonia
    http://www.uom.gr/

    Democritus University of Thrace
    http://www.duth.gr/

    University of Thessaly
    http://www.uth.gr/
    University Of Crete
    http://www.uch.gr/
    Technical University of Crete
    http://www.tuc.gr/
    Aegean University
    http://www.aegean.gr/
    University of Patras
    http://www.upatras.gr/
    Ionian University
    http://www.uion.edu.gr/
    Athens School of Fine Arts
    http://www.asfa.gr/
    Harokopio Univercity
    http://www.hua.gr/
    Agricultural University of Athens
    http://www.aua.gr/
    Hellenic Naval Academy
    http://www.snd.edu.gr/

    University of Ioannina
    http://www.uoi.gr/



    http://www.eom.gr/

    (...) , . 2737 27 1999 . ... (, , ) , , , .

    ... . , , , .


    Technical Institute of Athens
    http://www.teiath.gr/

    Technical Institute of Heraklion
    http://www.cs.teiher.gr/

    Technical Institute of Thessaloniki
    http://www.teithe.gr/

    Technical Institute of Kozani
    http://www.teikoz.gr/

    Technical Institute of Larisa
    http://dias.teilar.gr/

    Technical Institute of Messologi
    http://www.teimes.gr/
    Technical Institute of Kavala
    http://www.teikav.edu.gr/
    Technical Institute of Piraeus
    http://www.teipir.gr/
    Technical Institute of Serres
    http://bic.the.forthnet.gr/tei.html
    Technological nstitute of Lamia
    http://www.teilam.gr/
    Technological nstitute of Hania
    http://eretria.teihan.gr/

    Technical Institute of Patras
    http://teipat-gw.teipat.gr/

    Altavista http://www.altavista.com/
    Hotbot http://www.hotbot.com/
    Yahoo http://www.yahoo.com/
    Lycos http://www.lycos.com/
    Excite http://www.excite.com/
    Google http://www.google.com/


     


    http://www.ypepth.gr/

    ,
    http://www.minenv.gr


    http://www.culture.gr/


    http://www.mod.gr/

    ,
    http://www.ypes.gr/


    http://www.yme.gr/


    http://www.ypai.aegean.gr/


    http://www.sport.gov.gr/

    ' &
    http://www.grnet.gr/

    http://www.labor-ministry.gr/intr1_gr.htm

    http://www.mathra.gr/mainpageministrygr.htm
    ( )
    http://www.mof-glk.gr/

    http://www.yen.gr/

    http://www.ydt.gr/

    http://www.minpress.gr/

    http://www.mfa.gr/
    ( & )
    http://www.gsrt.gr/

    http://www.ypetho.gr/ypourgeio/default.asp
     

    European Commission
    http://europa.eu.int/

    European Commission-Hellenic Representative
    http://www.ee.gr/


    http://www.europarl.eu.int/sg/tree/en/

    .
    http://ue.eu.int/jai/default.htm


    http://www.ces.eu.int/


    http://www.jrc.org


    http://eur-op.eu.int/


    http://www.eca.eu.int/
    Community Research & Development Information Service
    http://www.cordis.lu/
    - ECHO
    http://www.echo.lu/
    Resource Center for Access to Data on Europe (EUROSTAT)
    http://www-rcade.dur.ac.uk/
    Europe on line
    http://www.europeonline.com/
    European Political Resources
    http://www.agora.stm.it/politic/europe.htm
    Added Value Information Services on European Research results
    http://adviser.tno.it/adviser/
    IDEA the Electronic Directory of the European Institutions
    http://158.169.50.70/idea/ideaen.html


    - Public Power Corporation
    http://www.dei.gr/

    Athens 2004: candidate city for the Olympics

    http://www.athens.olympic.org/


    http://www.forthnet.gr/oaed/index.html

    - Social Security Institute

    http://www.ika.gr/

    OTE - Hellenic Telecomunications Organization

    http://www.ote.gr/

    (..)

    http://agn.hol.gr/info/olympic1.htm
    TEBE - Hellenic Fund for Craftsment & Small Traders
    http://www.tebe.gr/
    (....) -
    http://www.oasa.gr/
    (....) -
    http://www.oasa.gr/hsap.htm

    http://www.et.gr/

    http://www.otenet.gr/fire/






    Agricultural Bank of Greece
    http://www.ate.gr/

    Alpha Credit Bank
    http://www.alpha.gr/

    Citibank
    http://www.citibank.gr/

    Egnatia Bank
    http://www.egnatiabank.gr/

    ETBA S.A. Hellenic Industrial Development Bank

    http://www.etba.gr/etba/etbag.html
    Piraeus Bank
    http://www.piraeus-bank.gr/
    Xios Bank
    http://www.netor.gr/xiosbank/
    MACEDONIA-THRACE BANK
    http://www.mathrabank.gr/mt/macthrac.asp?Lang=En
    Ionian Bank
    http://www.kapatel.gr/ionian/
    National Bank of Greece
    http://www.ethniki.gr/


    Kefalaio - Magazine about the Hellenic and international money markets.
    http://www.addgr.com/kefalaio/
    Technica - Technical & Economics magazine
    http://www.topnet-computers.com/technica-magazine/
    Forologikos & Ergatikos Odigos
    http://www.infixnet.gr/forergat/
    Forologistiki
    http://www.forologistiki.gr/
    &
    http://www.topnet-computers.com/money_and_tourism_magazine/
    Epihirimatiki Anaptiksi - Bimonthly Economic Review
    http://www.gradus.com/infoplan/9601/index.html

    http://www.hrima.gr
    AGORA
    http://www.kerdos.gr/agora/
    EPILOGI
    http://business.hol.gr/~allmedia/
    BUSINESS TODAY
    http://www.direction.gr/btoday/

    Express - daily financial newspaper
    http://www.kapatel.gr/express/

    Isotimia - a Hellenic financial weekly (in Hellenic)

    http://www.isotimia.gr/

    New Europe - Information and contents of the current edition of New Europe weekly newspaper and business information about the emerging European markets

    http://www.new-europe.gr/

    http://www.pegasus.gr/imerissia/
    Financial Mirror - The on line edition of the business newspaper of Cyprus.
    http://www.cfm.com.cy/
    Naftemboriki - daily business newspaper that focuses on maritime
    http://www.naftemporiki.gr/naftemporiki/


    Papasotiriou
    http://www.papasotiriou.gr/
    Hellenic Bookstore
    http://www.ell-biblio.gr/
    Eleftheroudakis
    http://www.books.gr/
    Books in Greek
    http://www.books-in-greek.gr/


    Athens News
    http://athensnews.dolnet.gr/


    http://www.adesmeftos.aias.net/


    http://www.avgi.org/

    o
    http://www.pegasus.gr/ethnos/

    (daily newspaper)

    http://www.enet.gr/

    (announcements newspaper)

    http://www.pegasus.gr/ethnos/ergasia/ergasia.htm

    xpress (financial newspaper)

    http://www.kapatel.gr/express/


    http://www.kathimerini.gr/

    http://www.kariera.gr
    Mathitiki On Line (6th High School of Serres)
    http://www.6gymnasio.gr/efimer_1/main.htm
     

    http://www.naftemporiki.gr/naftemporiki/

    http://www.rizospastis.gr/
    Sport line
    http://www.sport.gr
    TO BHMA
    http://tovima.dolnet.gr/

    http://ta-nea.dolnet.gr/
    To Pontiki - Weely Newspaper
    http://members.tripod.com/~yiannich/HN/pontiki.htm

    http://www.serres.hol.gr/media/epi/



    Classifieds from newspaper Ta Nea
    http://ta-nea.dolnet.gr/ma/

    Super Agelies - Classifieds in Greece.

    http://www.superaggelies.gr/
    Classifieds from newspaper Kathimerini
    http://www.kathimerini.gr/forpublishing/kathagg/mainmenu.agg

    http://enet.gr/xe/


    Athinorama - weekly guide to events in Athens
    http://www.athinorama.gr/

    Madame Figaro - presents the first fashion website in Greece

    http://www.madamefigaro.gr/

    Men
    http://www.men.gr/

    4
    http://www.4troxoi.gr/

    2Wheels magazine

    http://www.2troxoi.gr/

    Photographiko Eidolo - Hellenic photography magazine.

    http://www.aperito.com/eidolo/index.shtml

    Avopolis music magazine

    http://behemoth.compulink.gr/avopolis/


    http://www.metro.gr/


    http://mag.dolnet.gr/Diakopes/

    Fit For Fun
    http://www.fitforfun.gr/


    http://www.arxaiologia.gr/

    Fun Magazine
    http://www.fan.gr/

    Zoom
    http://www.zoom.gr/

    STATUS

    http://www.statusnet.gr/

    Net life
    http://www.netlife.gr/
    Nemecis
    http://www.nemecis.com/
    Klik
    http://www.klik.gr/
    Sailing World
    http://www.techlink.gr/sailing/
    Sea & Yachting
    http://www.eexi.gr/SailingGr/ymag/
    Flight and Space
    http://www.ptisi.gr/
    Cinema - Cinema magazine
    http://www.pegasus.gr/sinema/
    Diver - Monthly magazine about fishing, sea and diving
    http://www.netplan.gr/hellas/vythos/
    Gefsi magazine - Recipes and more
    http://www.gefsi.gr/
    inologia
    http://www.addgr.com/wine/oinologia
    Menu - Food related magazine
    http://www.pegasus.gr/menu/
    Difono - Magazine for the Hellenic songs and music
    http://www.netplan.gr/difwno/
    JAZZ&TZAZ magazine
    http://www.hol.gr/business/jazz/
    Hxos & Hi-Fi
    http://www.hxos.gr/
    Metal Hammer & Heavy Metal - Hellenic heavy metal magazine
    http://www.netplan.gr/poprock/
     
     

    ROM
    http://www.rom.gr/

    Computer
    http://www.compulink.gr/media/cgo/

    http://www.techlink.gr/kybernografoi/
    Win on-line
    http://www.winmag.gr/index1.html


    T

    Antenna
    http://www.antenna.gr/

    ET3
    http://www.ert3.gr/

    Mega
    http://www.megatv.gr

    Skai
    http://www.webads.gr/skai/

    SAT
    http://www.ariadne-t.gr/sat.html
    MAD
    http://www.mad.gr/
    KID TV
    http://www.areianet.gr/nethold/kidtv/
    Super Sport
    http://www.areianet.gr/nethold/supersport/
    Filmnet
    http://www.filmnet.gr/
    Jeronimo Groovy
    http://www.jeronimogroovy.com/index_tv.html
     

    E
    http://www.877.gr/
    Flash 9.61

    http://www.flash.gr

    -FM
    http://www.klikfm.gr


    http://www.melodia.gr/listen-index.html

    Radio Gold

    http://www.radiogold.gr/radiogold/home.html
    GALAXY 92
    http://www.galaxy92.gr
    1
    http://ert.ntua.gr

    http://www.broadcast.com/radio/


    T - N

    Times Newspapers - The Times and The Sunday Times online, plus an interactive section, archive searching etc.
    http://www.the-times.co.uk/

    Electronic Telegraph - full online version of the daily newspaper, with searchable archives

    http://www.telegraph.co.uk

    Press Association latest UK headlines, weather, sport, listings...

    http://www.pa.press.net/
    BBC
    http://www.bbc.co.uk
    CNN Interactive is another well-presented, in-depth news site.
    http://www.cnn.com/
    World Radio Network - radio news broadcasts from around the world, in RealAudio format.
    http://www.wrn.org/ondemand/


    Athens News Agency
    http://www.ana.gr/

    Macedonian Press Agency
    http://www.mpa.gr/
    Omogeneia News Agency on Line
    http://www.kalami.net/
    New Europe Network
    http://www.new-europe.gr/


    Internet providers

    CompuLink Network
    http://www.compulink.gr/

    FORTHnet
    http://www.forthnet.gr
    Hellas On-Line
    http://www.hol.gr/
    OTEnet
    http://www.otenet.gr


    B

    Amazon.com (The Earth's biggest Bookstore)
    http://www.amazon.com

    Barnes & Noble Bookstores
    http://www.bkstore.com/

    ef="http://www.compulink.gr/">http://www.compulink.gr/
    FORTHnet
    http://www.forthnet.gr
    Hellas On-Line
    http://www.hol.gr/
    OTEnet
    http://www.otenet.gr


    B

    Amazon.com (The Earth's biggest Bookstore)
    http://www.amazon.com

    Barnes & Noble Bookstores
    http://www.bkstore.com/












     
     
     
     
     
     
     




    jtidy/src/test/resources/427846.html0000644000175000017500000000023707777327667017421 0ustar twernertwerner Test Input For Bug #427846
    text-one
    text-two
    jtidy/src/test/resources/640473.cfg0000644000175000017500000000100410115150162017132 0ustar twernertwerner# [640743] same as default config ... char-encoding: latin1 tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes indent-attributes: yes indent-spaces: 0 # + declared tags new-blocklevel-tags: foo new-inline-tags: bar new-empty-tags: zippo new-pre-tags: baz char-encoding= latin1 tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes indent-attributes= yes indent-spaces= 0 # + declared tags new-blocklevel-tags= foo new-inline-tags= bar new-empty-tags= zippo new-pre-tags= baz jtidy/src/test/resources/527118.msg0000644000175000017500000000166111461621360017202 0ustar twernertwerner 44 2 1 1 declaration]]> 12 2 7 1 ]]> 23 2 11 1 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/441568.out0000644000175000017500000000036010000613130017202 0ustar twernertwerner [ #441568 ] Font tags handling different
    text-one
    text-two jtidy/src/test/resources/540045.xhtml0000644000175000017500000000070007777327667017567 0ustar twernertwerner [ 540045 ] Tidy strips all the IMG tags out! USA flag jtidy/src/test/resources/517528.msg0000644000175000017500000000124611461621360017205 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427811.html0000644000175000017500000000053507777327667017412 0ustar twernertwerner [#427811] FRAME inside NOFRAME infinite loop <body bgcolor="#000000" text="#ffffff"> <h1>Need a Frame Capable Browser!</h1> <frame src="body.html" name="p2"> </body> jtidy/src/test/resources/578216.html0000644000175000017500000000134507777327667017420 0ustar twernertwerner [ 578216 ] Incorrect indent of <SPAN> elements
    Benefits: Using a new Project Profile Knowledge Base...
    Solutions: Comprehensive intranet-based knowledge base containing...
    Roles: Drove site and content management architecture...
    Technology: Visual InterDev, IIS, ...
    jtidy/src/test/resources/427823.html0000644000175000017500000000047207777327667017415 0ustar twernertwerner [ #427823 ] Multiple <BODY>'s in <NOFRAMES> allowed <body> Text in body 1. </body> <body> Text in illegal body 2. </body> Text in inferred illegal body 3. Text in inferred illegal body 4. jtidy/src/test/resources/427677.html0000644000175000017500000000036207777327667017422 0ustar twernertwerner [ #427677 ] TrimInitialSpace() can trim too much

    This is a Red link

    jtidy/src/test/resources/620531.html0000644000175000017500000000015107777327667017376 0ustar twernertwerner[ 620531 ] br in pre must not cause line break

    foo

    bar
    baz
    jtidy/src/test/resources/433856.html0000644000175000017500000000065107777327667017417 0ustar twernertwerner [ #433856 ] Access violation w/Word files w/font tag

    De kop

    Dit is een test.

    jtidy/src/test/resources/1058909.msg0000644000175000017500000000217611461621360017274 0ustar twernertwerner 23 2 11 -3 ]]> 6 2 13 8 ]]> 6 2 13 8 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/791933.msg0000644000175000017500000000126211461621360017207 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/647255.out0000644000175000017500000000164210113414404017217 0ustar twernertwerner<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title></title> </head> <body> <h1>This is a Samle UTF16 Little Cowboy</h1> <p>the next para is Hebrew</p> <p>    </p> <p>The Next one is Russian / crylic</p> <p>?@8;>65=85 4>;6=> >ACI5AB2;OBL</p> <p>The Next one is Greek</p> <p> - Feta, , Salads</p> </body> </html> jtidy/src/test/resources/449348.cfg0000644000175000017500000000015210027341712017152 0ustar twernertwerneroutput-xhtml: yes tidy-mark: false indent-spaces: 0 output-xhtml= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/552861.html0000644000175000017500000000031507777327667017412 0ustar twernertwerner Test Input For Bug #552861
    Testing
    jtidy/src/test/resources/991471.msg0000644000175000017500000000042711463520474017216 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/603128.out0000644000175000017500000000042610000613130017175 0ustar twernertwerner [ 603128 ] tidy adds newlines after </html> There is exactly one line-ending after the </html> - older versions of Tidy will add an additional line-ending. jtidy/src/test/resources/763191.html0000644000175000017500000000055310110502420017343 0ustar twernertwerner [ 763191 ] Again DOM Parsing error (tidy removes spaces in attribute values)
    jtidy/src/test/resources/504206.out0000644000175000017500000001215410000613130017173 0ustar twernertwerner [ #504206] Tidy errors in processing forms.

    COMM 428: Feedback Survey

    Please fill out the following form based on YOUR experience in COMM 428 to date.

    Use the Send button at the bottom of the page to send your feedback to me via e-mail.


    Name (optional):

    E-Mail Address (optional):

    Where will you be this time next year?

    Area(s) of concentation? (Select all that Apply):
    MIS
    Finance
    Marketing
    Management
    Accounting
    Other

    Q1: The instructor gives appropriate consideration to the comments and questions of students.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q2: The instructor relates theoretical concepts covered in the course to practical applications.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q3: The instructor presents class material in a clear and organized manner.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q4: The instructor is prepared for class.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q5: The instructor demonstrates enthusiasm and interest in the subject matter.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q6: The instructor posts reasonable office hours and is available whenever I try to see him during those hours.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Q7: The instructor stimulates me to think about the course.
    Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

    Stengths (class/instructor):

    Weaknesses (class/instructor):

    Suggestions for improvement (class/instructor; NOTE: This field is REQUIRED):


    jtidy/src/test/resources/553414.out0000644000175000017500000000053310115150162017206 0ustar twernertwerner [553414] XHTML strict accept '_target' attribute

    tidy should report a warning for target

    jtidy/src/test/resources/502346.html0000644000175000017500000000051110115166156017347 0ustar twernertwerner [502346] Leading and trailing space in attval whitespace in href should be removed, not escaped jtidy/src/test/resources/445557.out0000644000175000017500000000170610000613130017211 0ustar twernertwerner [ #445557 ] Convert Symbol font chars to Unicode

    The predicate calculus has a number of theorems and axioms for proving logical statements. Here are the main symbols used in predicate calculus:

    P(x) – proposition – a logical statement in the condition x.

    x – any condition in the set of possible conditions.

    c – a particular condition in the set of possible conditions.

    " – "For every"

    $ – "Exists"

    ® – Implication

    Ù – Conjunction (logical and)

    Ú – Disjunction (logical or)

    jtidy/src/test/resources/996484.cfg0000644000175000017500000000004310110645233017157 0ustar twernertwernertidy-mark: false tidy-mark= false jtidy/src/test/resources/487204.out0000644000175000017500000000053610000613130017204 0ustar twernertwerner [ #487204 ] Duplicate DIV style attribute generated
    1. One
    2. Two
    3. Three
    jtidy/src/test/resources/735603.out0000644000175000017500000000025310034237703017216 0ustar twernertwerner [ #735603 ] : drops attributes from declared elements. foo attributes shouldn't be removed jtidy/src/test/resources/511243.xhtml0000644000175000017500000000063207777327667017571 0ustar twernertwerner [ #511243 ] xhtml utf8 format bug

    How to…
    Place an extended-hours order: jtidy/src/test/resources/435903.msg0000644000175000017500000000466011463516445017215 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 4 5 lacks "summary" attribute]]> 49 2 10 9 lacks "type" attribute]]> 11 2 10 9 isn't allowed in elements]]> 8 2 17 1 ]]> 23 2 18 -1 ]]> 7 2 18 -1 before ]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/435922.html0000644000175000017500000000040010000046405017334 0ustar twernertwerner [ #435922 ] Missing <form> around <input> no warning
    jtidy/src/test/resources/431731.msg0000644000175000017500000000173511461621360017177 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/427841.msg0000644000175000017500000000165211461621360017204 0ustar twernertwerner 44 2 1 1 declaration]]> 12 2 24 1 ]]> 6 2 32 1 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433604.out0000644000175000017500000000020510000613130017170 0ustar twernertwerner [ #433604 ] Tidy inserts &nbsp; entity in -xml mode. Use -xml on command line. Test of   jtidy/src/test/resources/434940b.cfg0000644000175000017500000000022610111224155017301 0ustar twernertwerner# Tidy configuration file for bug #434940 show-body-only: yes tidy-mark: false output-xml: true show-body-only= yes tidy-mark= false output-xml= truejtidy/src/test/resources/431889.out0000644000175000017500000000155210026616470017235 0ustar twernertwerner Bug-2000-12-27-B [ #431889 ] Config file options w/"param" don't work

    "Alternate"This image has no ALT attribute.

    jtidy/src/test/resources/471264.out0000644000175000017500000000041410000613130017176 0ustar twernertwerner [ #471264 ] Reduce blank lines in output
    • first element
    • second element
    jtidy/src/test/resources/434940.html0000644000175000017500000000024307777327667017407 0ustar twernertwerner [ #434940 ] --show-body-only: print only body contents Use "--show-body-only yes" on the command line jtidy/src/test/resources/634889.cfg0000644000175000017500000000074110115150162017157 0ustar twernertwernertidy-mark: no output-xml: yes tidy-mark: false drop-proprietary-attributes: no new-inline-tags: o:lock, o:p, v-f, v-formula, v-formulas, v-imagedata, v-path, v-shape, v-shapetype, v-stroke wrap-sections: no drop-empty-paras: no wrap: 0 tidy-mark= no output-xml= yes tidy-mark= false drop-proprietary-attributes= no new-inline-tags= o:lock, o:p, v-f, v-formula, v-formulas, v-imagedata, v-path, v-shape, v-shapetype, v-stroke wrap-sections= no drop-empty-paras= no wrap= 0 jtidy/src/test/resources/433021.msg0000644000175000017500000000314011461621360017161 0ustar twernertwerner 51 2 7 1 attribute "align" has invalid value "fizzle"]]> 51 2 10 1 attribute "valign" has invalid value "center"]]> 51 2 12 1 attribute "valign" has invalid value "fuzzle"]]> 51 2 14 1 attribute "valign" has invalid value "fuzzle"]]> 51 2 14 1 attribute "align" has invalid value "fuzzle"]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427836.out0000644000175000017500000000037010000613130017205 0ustar twernertwerner jtidy/src/test/resources/991469.xml0000644000175000017500000000010310076524731017225 0ustar twernertwerner jtidy/src/test/resources/427816.msg0000644000175000017500000000173411463532165017215 0ustar twernertwerner 44 2 1 1 declaration]]> 59 2 6 1 unexpected or duplicate quote mark]]> 59 2 6 1 unexpected or duplicate quote mark]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1416185.msg0000644000175000017500000000275611461621360017272 0ustar twernertwerner 44 2 1 1 declaration]]> 8 2 9 1 ]]> 111 0 1 1 -1 0 0 0 16 0 1 1 112 0 1 1 jtidy/src/test/resources/431895.cfg0000644000175000017500000000022610111224155017145 0ustar twernertwerner# Tidy configuration file for bug #431895 quiet: yes markup: no gnu-emacs: yes tidy-mark: false quiet= yes markup= no gnu-emacs= yes tidy-mark= falsejtidy/src/test/resources/552861.out0000644000175000017500000000041110013533125017207 0ustar twernertwerner Test Input For Bug #552861
    Testing
    jtidy/src/test/resources/680664.xhtml0000644000175000017500000000074007777327667017607 0ustar twernertwerner [ 680664 ] Malformed comment generates bad (X)HTML
    This is a test of some pre stuff.
    See what happens to this comment 
    
      
        28
        2
        1
        1
        
      
      
        110
        0
        1
        1
        
      
      
        111
        0
        1
        1
        
      
      
        -1
        0
        0
        0
        
      
    
    jtidy/src/test/resources/661606.html0000644000175000017500000000115407777327667017412 0ustar  twernertwerner
    
    
      [661606] Two bytes at the last line, w/ asian options
      
      
    
    
      

    18

    ԁ@ς
    ܂܂B

    uǂ悤cH

    jtidy/src/test/resources/634889.out0000644000175000017500000000025710000613130017221 0ustar twernertwerner [ 634889 ] Problem with <o:p> ms word tag

    Probably OK, now that ParseTagNames() is fixed.

    jtidy/src/test/resources/427820.out0000644000175000017500000000041610011474527017220 0ustar twernertwerner Test Input For Bug #427820

    jtidy/src/test/resources/765852.html0000644000175000017500000000027510024417324017366 0ustar twernertwerner #765852 Empty tag striping

    Text following italics without a blank after the i end tag is not cleaned up correctly (the bold blank is eliminated).

    jtidy/src/test/resources/788651.html0000644000175000017500000007211610110502420017357 0ustar twernertwerner Compatibility & Verification
    Skip Masthead Links developers.sun.com   » search tips  |  Search:     
    Carol McDonald, Staff Engineer, Code Camp Engineer in J2EE, Web Services and Integration
     Java
    The Source for Java Developers
    Sun Microsystems
     Skip to Content
     
     
    J2EE
    Compatibility & Verification
    Early Access
     
     
     
    Early Access
     
    Third Party Java Verified Licensees and Products
    Industry Support
    Documentation
    FAQs
    Compatibility
     
    Tutorial and Code Camps
     
     

    Java ؃vO

    Java Verified S

    ͂߂

    Java ؃vÓAJava 2 Platform, Enterprise Edition (J2EE) eNmWgĊJAقȂ J2EE ւ̈ڐA”\ȃG^[vCYAvP[VF肷vOłB

    ̃vOɂ́AJava ؔF󂯂邽߂ɕKvȃeXg̊TĂ܂BvO̎{ɂĎgpG^[vCY Java AvP[V؃Lbg (AVK) ́AJAvP[ṼKChCƋKɏ]ăeXg邽߂ɃT񋟂ĂeNmWłB

    G^[vCY Java AVK ̃eXgɍiƁAJava ؃vOуg[h}[NCZX\āAeXgς݂̃AvP[V Java Verified ̃Sƃg[h}[Ngp錠ƁAJava Verified ̃AvP[V̕y𑣐i邽߂̃}[PeBOT|[gvOɎQ錠𓾂邱Ƃł܂B Java Verified ̃G^[vCYAvP[V́Aڋqɑ΂āÃAvP[V̊J҂ J2EE ݊AvP[VT[õT|[gϋɓIɒ񋟂悤ƂĂ邱Ƃ܂B

    G^[vCY Java AVK ГœIɎgpꍇAeXgʂJȊO̖ړIŎgp邱Ƃ͂ł܂B\tgEFA̎gpeXg𖞂ĂƂO҂ɊJ邱Ƃ͂ł܂B܂AJava Verified ̃Sgp邱Ƃ͂ł܂B

    vO̖ړI

    J҂̏ꍇ:

    • sꍷʉ & rWlX`X̊g: JiɁuJava VerifiedvSt^邱ƂɂANXvbgtH[T|[g̐Mƌڋq̐Ml₷ȂAiƂ̍ʉ}邱Ƃł܂B

    • ̔TCŇ & RXg̍팸: G^[vCY Java AvP[V؃Lbg (AVK) ̃eXgvZXł́ÃAvP[VT[ogp邱ƂȂÃvbgtH[ŃAvP[V\zуeXĝƓ̂Ƃł܂B ̂߁A̔vZX̃TCNAڍsRXgƌڋq̃XN܂B J҂ɂƂāAJAvP[V Java Verified ̃uht邱Ƃ́AʓI—Lvȃ}[PeBOēɑ΂vコ邱ƂɂȂ܂B

    ڋq̏ꍇ:

    sɂ́AJava eNmW𗘗p푽lȐioĂ܂B̂ȂŌڋq́A萫ƁÃvbgtH[‹Ŏgpł\[VI邽߂̊߂Ă܂B ڋqG^[vCY\[VIۂɋ߂邱Ƃ́ÃAvP[V J2EE eNmWx[X̐iƌ݊A܂uWrite Once, Run AnywherevĂ邱ƂłAuJava VerifiedṽAvP[V͂̂ƂڋqɎƂł܂B

      vO̓T

      Java ؃vOւ̎Qi𓾂AvP[VJ҂́A̋}[PeBOT|[gvOɃANZXł܂B vOɂ́AȃG^[vCY\[Vsɂ Java Verified ̃\[V̔FmxƉ—p߂铭܂B

      Ƃ΁Â悤ȃvO܂B

      • Web v[V:
        • ̐݌ڋqIɃANZXT̃IC Java \[VJ^OɌf
      • pubN[ṼT|[g:
        • C^r[AڋqƂ̃~[eBOACxgȂǂʂāAvXAAiXgAьڋqւ̔Fmx̌
        • vX[Ẍp̏Aьڋq̃P[XX^fB̌f
      • }[PeBOT|[g:
        • JavaOne Developer Conference Ȃǂ̓ʂȃvOT㉇铯l̃Cxgʂăv[V̋@g

      vOւ̎Q@

      JAvP[V G^[vCY Java AvP[V؃LbggeXgɍiꍇAJava ؃vOւ̎QiƁAeXgς݃AvP[V Java Verified ̃g[h}[NƃSt錠𓾂邽߂̐\sƂł܂B

      vO̗v

      Java ؃vOւ̎Ql̏ꍇ́A̎菇ɏ]ĂB

      1. Java AvP[V؃Lbg (AVK) nhubNɋLڂĂɏ]ăAvP[ṼeXg{܂B
      2. Java Verified ̃g[h}[NƃS\邽߁ATƃg[h}[NCZX_܂B
      3. NԃCZXx܂B
      4. Java ؃vÕ`FbNXggĕKv`FbN܂B

      ̃AvP[VvZXɕKvȎ菇̏ڍׂɂ‚ẮAB

      G^[vCY Java AvP[V؃Lbg (AVK)

      G^[vCY Java AVK ́AJ2EE API 𐳂gpAJ2EE ݊AvP[VT[oԂňڐA”\ȃAvP[V\zуeXg邽߂ɊJꂽc[łB J2EE vbgtH[ɂ́AڐAƂ_܂B̗_́AꂳꂽAvP[VT[o̎dlƍL͈͂ɂ킽݊eXg̃Zbg̒񋟂ɂĎ̂łB Java BluePrints vÓAڐA”\ J2EE x[X̃AvP[V\z邽߂̃KChC񋟂܂B AڐÂȂAvP[VӐ}ɍ쐬Ă܂”\܂B G^[vCY Java AVK ́AJ҂̂悤ȎԂł悤ɐ݌vĂ܂B

      AvP[V̗Lp̃eXg

      AvP[Vɂ̓AvP[VT[oKvłBAvP[V́AJavaServer Pages eNmWAJava T[ubgAEnterprise JavaBeans eNmW܂ޕ̃AvP[VT[oŎsł悤ɐ݌vA1 ˆȏ .ear t@CƂăpbP[WKv܂B

      G^[vCY Java AVK ̃eXgIvV

      G^[vCY Java AVK ̃eXgv𖞂Ƃ́AJava ؃vOɎQ邽߂̑OłBƂ͕ʂɁAJAvP[VeXgAAvP[VT[o̔W@\ւ̈ˑxŏɂĈڍsRXg팸邽߂ɁAG^[vCY Java AVK ʓr肷邱Ƃł܂B

      AvP[ṼeXgAJava ؃vOɎQāAeXgς݃AvP[V Java Verified ̃g[h}[NƃSgp錠擾ꍇ́A܂̃vZXɕKvȂׂĂ̎菇mFKv܂Bڂ菇́AɋLڂĂ܂B

      G^[vCY Java AvP[V؃Lbg (AVK) ГœIɃeXgɎgpꍇA\tgEFÃCZXgpA܂̓\tgEFÅ𖞂ĂƂAT̏ʂɂ鎖Ő‚ȂO҂ɊJ邱Ƃ͂ł܂B ̂悤ȋ‚󂯂ɂ́ATƃg[h}[NCZX_A̗xKv܂B

      G^[vCY Java AVK ̒́AJava Administration Center ɂď܂B

      java.sun.com Web TCgɂo^Kv܂B ܂o^ĂȂꍇ́Ao^ĂB o^͕svłB Java ؃vOɊւ邲Aɂ́A蓖čς݂ JSC [U ID tLĂB

      ֘A

      G^[vCY Java AVK Ɋւ邲₪ꍇAAvP[VeXg̕Kvɂ‚ăT̒S҂ɑkꍇ́Averification-sales@sun.com ܂łAB

      ̑̃\[X

      j[X & L

      Java ؃vOɊւŐVmFAJava Verified ̐ĩfJ鍡̃T̃Cxg`FbNĂB

      i FAQ

      Java ؂̃vZXAAvP[VeXg̓KAJava Verified g[h}[NCZXȂǂɊւ邲₪ꍇ́A܂vO FAQmFB Kvȏ FAQ ɋLڂĂȂꍇ́Averification-sales@sun.com ܂łAB

      eNjJT|[g

      G^[vCY Java AVK ̃eNjJT|[ǵÃg[jO󂯂T̃XyVXg񋟂Ă܂B T|[gIvV̏ڍׂ́AɋLڂĂ܂B܂́AJ2EE_AVK_support@sun.com ܂ł₢킹B

      Java Verified ifAvP[V

      Deployathon ł́AJ2EE eNmWx[X̃AvP[V̈ڐÃf񋟂Ă܂BTvAvP[V Java Pet Store Smart Ticket ́Aǂ Java BluePrints vOɏ]ĊJꂽ̂łB̃AvP[V́AύXȂŁA J2EE ݊AvP[VT[oŎsł܂B ȂAǂ̃AvP[VAG^[vCY Java AVK ̗v𖞂Ă܂B


      Related Links
       



      Carol McDonald


      jtidy/src/test/resources/427839.cfg0000644000175000017500000000023410111224155017147 0ustar twernertwerner# Tidy configuration file for bug #427839 output-xhtml: yes doctype: omit tidy-mark: false wrap: 0 output-xhtml= yes doctype= omit tidy-mark= false wrap= 0jtidy/src/test/resources/431731.out0000644000175000017500000000050310103760042017201 0ustar twernertwerner [ #431731 ] Inline emphasis inconsistent propagation OUTSIDE
      OUTSIDE jtidy/src/test/resources/432677.msg0000644000175000017500000000121111461621360017176 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/516370.xhtml0000644000175000017500000000103310115166156017541 0ustar twernertwerner [ #516370 ] Invalid ID value?

      Test valid ID

      Test valid ID

      Test valid ID - : should only be used for namespaces

      Test invalid ID

      Test invalid ID

      Empty invalid ID

      jtidy/src/test/resources/427818.msg0000644000175000017500000000735411461621360017215 0ustar twernertwerner 65 2 7 1 discarding newline in URI reference]]> 58 2 7 1 attribute with missing trailing quote mark]]> 64 2 7 1 escaping malformed URI reference]]> 110 0 1 1 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/480843.xhtml0000644000175000017500000000056107777327667017605 0ustar twernertwerner [ #480843 ] Proposed change to FixID()

      Introduction

      New Introduction

      jtidy/src/test/resources/434940.out0000644000175000017500000000005710000613130017201 0ustar twernertwernerUse "--show-body-only yes" on the command line jtidy/src/test/resources/444834.msg0000644000175000017500000000015510120355471017200 0ustar twernertwerner jtidy/src/test/resources/540571.msg0000644000175000017500000000413711463516445017212 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 6 19 before

      ]]> 23 2 6 19 ]]> 15 2 6 23 ]]> 8 2 6 35 ]]> 7 2 10 19 before

      ]]> 23 2 10 19 ]]> 8 2 10 35 ]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/445557.html0000644000175000017500000000166007777327667017421 0ustar twernertwerner [ #445557 ] Convert Symbol font chars to Unicode

      The predicate calculus has a number of theorems and axioms for proving logical statements. Here are the main symbols used in predicate calculus:

      P(x) – proposition – a logical statement in the condition x.

      x – any condition in the set of possible conditions.

      c – a particular condition in the set of possible conditions.

      " – "For every"

      $ – "Exists"

      ® – Implication

      Ù – Conjunction (logical and)

      Ú – Disjunction (logical or)

      jtidy/src/test/resources/533105.msg0000644000175000017500000000304111463516445017176 0ustar twernertwerner 32 2 12 26 32 2 12 71 32 2 12 94 32 2 13 210 32 2 13 231 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/540555.html0000644000175000017500000000011107777327667017401 0ustar twernertwerner <body> <p>#540555 Empty title tag is trimmed</p> </body>�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/427811.msg�����������������������������������������������������������������0000644�0001750�0001750�00000002225�11461621360�017176� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="ISO-8859-1"?> <!-- expected messages for test 427811--> <messages> <message> <code>44</code> <level>2</level> <line>1</line> <column>1</column> <text><![CDATA[missing <!DOCTYPE> declaration]]></text> </message> <message> <code>8</code> <level>2</level> <line>10</line> <column>1</column> <text><![CDATA[discarding unexpected <noframes>]]></text> </message> <message> <code>11</code> <level>2</level> <line>11</line> <column>3</column> <text><![CDATA[<body> isn't allowed in <body> elements]]></text> </message> <message> <code>11</code> <level>2</level> <line>13</line> <column>3</column> <text><![CDATA[<frame> isn't allowed in <body> elements]]></text> </message> <message> <code>111</code> <level>0</level> <line>1</line> <column>1</column> <text><![CDATA[InputStream: Document content looks like HTML 4.01 Frameset]]></text> </message> <message> <code>-1</code> <level>0</level> <line>0</line> <column>0</column> <text><![CDATA[4 warnings, no errors were found!]]></text> </message> </messages> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/444394.out�����������������������������������������������������������������0000644�0001750�0001750�00000000654�10000613130�017210� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<html> <head> <link rel="Edit-Time-Data" href="./Hello_files/editdata.mso"> <title>Hello

      Hello

      This is a nice document

      Test

      With a nice picture

      jtidy/src/test/resources/586555.msg0000644000175000017500000000015510120571012017176 0ustar twernertwerner jtidy/src/test/resources/431898.out0000644000175000017500000000053310112132455017223 0ustar twernertwerner ]> [ #431898 ] Tidy messes up X(HT)ML documents

      Test

      jtidy/src/test/resources/517550.msg0000644000175000017500000000124611461621360017200 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/909187.msg0000644000175000017500000000124511461621360017212 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433021.html0000644000175000017500000000100110027132442017324 0ustar twernertwerner [ #433021 ] Identify attribute whose value is bad

      text

      "valign" attr value can't be "center"

      "valign" attr value can't be "fuzzle"

      "align"/"valign" attr values can't be "fuzzle"

      jtidy/src/test/resources/1024661.msg0000644000175000017500000000154511461621360017257 0ustar twernertwerner 68 2 7 1 joining values of repeated attribute "style"]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/649812.msg0000644000175000017500000000124711463516445017221 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/735603.msg0000644000175000017500000000145311463516445017212 0ustar twernertwerner 44 2 1 1 declaration]]> 21 2 6 1 is not approved by W3C]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427819.msg0000644000175000017500000000270111461621360017205 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 6 1 lacks "action" attribute]]> 8 2 8 26 ]]> 8 2 8 46 ]]> 8 2 12 26 ]]> 8 2 12 45 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/445074.msg0000644000175000017500000000156711461621360017207 0ustar twernertwerner 70 2 8 1 attribute value "POST" for "method" must be lower case for XHTML]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/996484.out0000644000175000017500000000137510115150162017235 0ustar twernertwerner test whether xx<...>yy</...> is wrapped properly

      some text (a long URI).

      xxyy

      xxyy

      zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
      xx

      jtidy/src/test/resources/503436.cfg0000644000175000017500000000021510111224155017132 0ustar twernertwerner# Tidy configuration file for bug #503436 input-xml: yes tidy-mark: false indent-spaces: 0 input-xml= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/663197.out0000644000175000017500000000053210110502420017210 0ustar twernertwerner [ 663197 ] nbsp handling is wrong whitespaces between words are 0xA0 characters jtidy/src/test/resources/929936.html0000644000175000017500000000051210034326060017362 0ustar twernertwerner [ # ] Escape URI 1
      2
      3
      4
      5
      6
      jtidy/src/test/resources/427845.html0000644000175000017500000000032310104011573017350 0ustar twernertwerner [ #427845 ] Doctypes are output on multiple lines Use "--wrap 70" on the command line jtidy/src/test/resources/634889.msg0000644000175000017500000000171111463516445017225 0ustar twernertwerner 44 2 1 1 declaration]]> 21 2 7 3 is not approved by W3C]]> 23 2 7 4 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431956.cfg0000644000175000017500000000021310111224155017137 0ustar twernertwerner# Tidy configuration file for bug #431956 input-xml: yes output-xml: yes tidy-mark: false input-xml= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/646946.cfg0000644000175000017500000000023410111224155017151 0ustar twernertwerner# Tidy configuration file for bug #640474 input-xml: yes output-xml: yes tidy-mark: false wrap: 0 input-xml= yes output-xml= yes tidy-mark= false wrap= 0 jtidy/src/test/resources/1003994.xml0000644000175000017500000000000610110421776017265 0ustar twernertwernerjtidy/src/test/resources/1033035.html0000644000175000017500000000037210125574065017432 0ustar twernertwerner [#1033035] EOF bug in parseAsp and parsePhp [ 540045 ] Tidy strips all the IMG tags out!

      USA flag

      jtidy/src/test/resources/443381.out0000644000175000017500000000045510013533125017213 0ustar twernertwerner [ #443381 ] end tags for empty elements in XHTML

      TestcoolTest

      jtidy/src/test/resources/435920.out0000644000175000017500000000040310000613130017173 0ustar twernertwerner [ #435920 ] Space inserted before </td> causes probs bla
       
      jtidy/src/test/resources/427662.msg0000644000175000017500000000245611461621360017210 0ustar twernertwerner 44 2 1 1 declaration]]> 11 2 7 1 isn't allowed in elements]]> 23 2 8 -3 ]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/450389.html0000644000175000017500000000521407777327667017417 0ustar twernertwerner[ #450389 ] Color attval check allows only black/#

      Test black
      Test green
      Test silver
      Test lime
      Test gray
      Test olive
      Test white
      Test yellow
      Test maroon
      Test navy
      Test red
      Test blue
      Test purple
      Test teal
      Test fuchsia
      Test aqua

      Test Red
      Test RED

      Test invalid reddish

      Test black #000000
      Test green #008000
      Test silver #C0C0C0
      Test lime #00FF00
      Test gray #808080
      Test olive #808000
      Test white #FFFFFF
      Test yellow #FFFF00
      Test maroon #800000
      Test navy #000080
      Test red #FF0000
      Test blue #0000FF
      Test purple #800080
      Test teal #008080
      Test fuchsia #FF00FF
      Test aqua #00FFFF

      Test red #ff0000
      Test red #fF0000

      Test invalid red #FF

      Test invalid grurple
      Test invalid #grurple
      Test invalid #1234567

      jtidy/src/test/resources/514893.out0000644000175000017500000000135310104011573017217 0ustar twernertwerner[ 514893 ] Incorrect http-equiv <meta> tag

      This document will test synchronization of the <meta http-equiv ...> tag. It contains the header <meta http-equiv="CONTENT-TYPE" content="TEXT/HTML; CHARSET=WINDOWS-1252" />.

      On output, the header should be modified to reflect whatever output encoding you have specified.

      jtidy/src/test/resources/616744.cfg0000644000175000017500000000040210026671162017150 0ustar twernertwernerinput-xml: yes new-pre-tags: programlisting new-inline-tags: literal indent: no indent-spaces: 0 wrap: 999999 tidy-mark: false input-xml= yes new-pre-tags= programlisting new-inline-tags= literal indent= no indent-spaces= 0 wrap= 999999 tidy-mark= falsejtidy/src/test/resources/538727.msg0000644000175000017500000000117311461621360017210 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/514893.msg0000644000175000017500000000124611463516445017220 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/434940b.html0000644000175000017500000000024710026660405017520 0ustar twernertwerner[ #434940 ] --show-body-only: print only body contents Use "--show-body-only yes" on the command line
      jtidy/src/test/resources/427838.html0000644000175000017500000000067210000613130017351 0ustar twernertwerner[ #427838 ] Name Anchor thrown away A fragment of html created by Frontpage.....

      Clipboard

      lots more stuff deleted.... some text some text jtidy/src/test/resources/616744.out0000644000175000017500000000042310000613130017202 0ustar twernertwerner This is some stuff in a para. There's a " command" in it.

      This line is indented 4 spaces. This (3rd) line is indented 8 spaces.
      jtidy/src/test/resources/661606.msg0000644000175000017500000000126211461621360017200 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/661606.cfg0000644000175000017500000000033710000613130017133 0ustar twernertwernerchar-encoding: shiftjis ncr: yes tidy-mark: no output-xhtml: yes clean: yes indent: auto logical-emphasis: yes char-encoding= shiftjis ncr= yes tidy-mark= no output-xhtml= yes clean= yes indent= auto logical-emphasis= yes jtidy/src/test/resources/598860.out0000644000175000017500000000040710133721557017240 0ustar twernertwerner #598860 script parsing fails with quote chars jtidy/src/test/resources/663197.html0000644000175000017500000000036410110502420017350 0ustar twernertwerner [ 663197 ] nbsp handling is wrong whitespacesbetweenwordsare0xA0characters jtidy/src/test/resources/427672.html0000644000175000017500000000026307777327667017415 0ustar twernertwerner [#427672] Non-std attrs w/multibyte names segfault text jtidy/src/test/resources/542029.msg0000644000175000017500000000117411461621360017177 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427677.msg0000644000175000017500000000117411461621360017212 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431721.msg0000644000175000017500000000760211463516445017206 0ustar twernertwerner 48 2 1 1 48 2 1 1 44 2 1 1 declaration]]> 49 2 46 1 lacks "type" attribute]]> 21 2 160 121 is not approved by W3C]]> 23 2 160 122 ]]> 21 2 168 74 is not approved by W3C]]> 23 2 168 74 ]]> 21 2 170 119 is not approved by W3C]]> 23 2 170 120 ]]> 21 2 175 46 is not approved by W3C]]> 23 2 175 46 ]]> 21 2 189 119 is not approved by W3C]]> 23 2 189 120 ]]> 21 2 202 57 is not approved by W3C]]> 23 2 202 57 ]]> 21 2 204 119 is not approved by W3C]]> 23 2 204 120 ]]> 20 2 212 1 by
    • ]]> 20 2 212 1 by
    • ]]> 20 2 212 1 by
    • ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427839.msg0000644000175000017500000000117411461621360017212 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/640473.out0000644000175000017500000000035310133721557017224 0ustar twernertwerner [ 640473 ] new-empty-tags doesn't work, breaks doc Foo bar foo foo foo

      This is a test

      This is a pre-formatted Baz! jtidy/src/test/resources/427833.msg0000644000175000017500000000177711463516445017226 0ustar twernertwerner 3 2 10 26 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/532535.msg0000644000175000017500000000200011461621360017165 0ustar twernertwerner 21 2 7 57 is not approved by W3C]]> 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/648768.out0000644000175000017500000000042110114201717017225 0ustar twernertwerner [648768] Fix for character references >= 32768

      jtidy/src/test/resources/426885.msg0000644000175000017500000001011111463516445017212 0ustar twernertwerner 44 2 1 1 declaration]]> 11 2 7 1 isn't allowed in elements]]> 15 2 7 1 ]]> 49 2 7 5 lacks "alt" attribute]]> 49 2 8 5 lacks "alt" attribute]]> 11 2 8 41 isn't allowed in
      elements]]> 23 2 8 41 ]]> 7 2 9 9 before

      ]]> 23 2 9 9 ]]> 23 2 9 9 ]]> 8 2 9 23 ]]> 12 2 11 1 ]]> 15 2 11 1 ]]> 49 2 11 5 lacks "alt" attribute]]> 49 2 12 5 lacks "alt" attribute]]> 11 2 12 41 isn't allowed in
      elements]]> 23 2 12 41 ]]> 7 2 13 -3 before ]]> 111 0 1 1 -1 0 0 0 1 0 1 1 112 0 1 1 jtidy/src/test/resources/695408.out0000644000175000017500000000040410000613130017205 0ustar twernertwerner [ 695408 ] Empty spans getting dropped, even if they have attrs

    • jtidy/src/test/resources/922302.out0000644000175000017500000000060410104011573017201 0ustar twernertwerner [ #922302 ] Add comment to script tag to produce valid XML jtidy/src/test/resources/475643.html0000644000175000017500000010071710114167510017363 0ustar twernertwerner HTML 4.0 Special Entities

      The Web Design Group

      Special Entities

      The following table gives the character entity reference, decimal character reference, and hexadecimal character reference for markup-significant and internationalization characters, as well as the rendering of each in your browser. Glyphs of the characters are available at the Unicode Consortium.

      With the exception of HTML 2.0's &quot;, &amp;, &lt;, and &gt;, browser support for these entities is generally quite poor, but recent browsers support some of the character entity references and decimal character references.

      Character Entity Decimal Hex Rendering in Your Browser
      Entity Decimal Hex
      quotation mark = APL quote &quot; &#34; &#x22; " " "
      ampersand &amp; &#38; &#x26; & & &
      less-than sign &lt; &#60; &#x3C; < < <
      greater-than sign &gt; &#62; &#x3E; > > >
      Latin capital ligature OE &OElig; &#338; &#x152; Œ Œ Œ
      Latin small ligature oe &oelig; &#339; &#x153; œ œ œ
      Latin capital letter S with caron &Scaron; &#352; &#x160; Š Š Š
      Latin small letter s with caron &scaron; &#353; &#x161; š š š
      Latin capital letter Y with diaeresis &Yuml; &#376; &#x178; Ÿ Ÿ Ÿ
      modifier letter circumflex accent &circ; &#710; &#x2C6; ˆ ˆ ˆ
      small tilde &tilde; &#732; &#x2DC; ˜ ˜ ˜
      en space &ensp; &#8194; &#x2002;
      em space &emsp; &#8195; &#x2003;
      thin space &thinsp; &#8201; &#x2009;
      zero width non-joiner &zwnj; &#8204; &#x200C;
      zero width joiner &zwj; &#8205; &#x200D;
      left-to-right mark &lrm; &#8206; &#x200E;
      right-to-left mark &rlm; &#8207; &#x200F;
      en dash &ndash; &#8211; &#x2013;
      em dash &mdash; &#8212; &#x2014;
      left single quotation mark &lsquo; &#8216; &#x2018;
      right single quotation mark &rsquo; &#8217; &#x2019;
      single low-9 quotation mark &sbquo; &#8218; &#x201A;
      left double quotation mark &ldquo; &#8220; &#x201C;
      right double quotation mark &rdquo; &#8221; &#x201D;
      double low-9 quotation mark &bdquo; &#8222; &#x201E;
      dagger &dagger; &#8224; &#x2020;
      double dagger &Dagger; &#8225; &#x2021;
      per mille sign &permil; &#8240; &#x2030;
      single left-pointing angle quotation mark &lsaquo; &#8249; &#x2039;
      single right-pointing angle quotation mark &rsaquo; &#8250; &#x203A;
      euro sign &euro; &#8364; &#x20AC;

      Latin-1 Entities

      The following table gives the character entity reference, decimal character reference, and hexadecimal character reference for 8-bit characters in the Latin-1 (ISO-8859-1) character set, as well as the rendering of each in your browser. Glyphs of the characters are available at the Unicode Consortium.

      Browser support is generally best for the decimal character references, except for the accented characters (decimal 192-214, 216-246, 248-255), where the character entity references hold a slight edge.

      Note that some Mac browsers will render fourteen Latin-1 characters incorrectly. These characters are decimal 166, 178, 179, 185, 188, 189, 190, 208, 215, 221, 222, 240, 253, and 254. See ISO-8859-1 and the Mac platform for more information.

      Character Entity Decimal Hex Rendering in Your Browser
      Entity Decimal Hex
      no-break space = non-breaking space &nbsp; &#160; &#xA0;      
      inverted exclamation mark &iexcl; &#161; &#xA1; ¡ ¡ ¡
      cent sign &cent; &#162; &#xA2; ¢ ¢ ¢
      pound sign &pound; &#163; &#xA3; £ £ £
      currency sign &curren; &#164; &#xA4; ¤ ¤ ¤
      yen sign = yuan sign &yen; &#165; &#xA5; ¥ ¥ ¥
      broken bar = broken vertical bar &brvbar; &#166; &#xA6; ¦ ¦ ¦
      section sign &sect; &#167; &#xA7; § § §
      diaeresis = spacing diaeresis &uml; &#168; &#xA8; ¨ ¨ ¨
      copyright sign &copy; &#169; &#xA9; © © ©
      feminine ordinal indicator &ordf; &#170; &#xAA; ª ª ª
      left-pointing double angle quotation mark = left pointing guillemet &laquo; &#171; &#xAB; « « «
      not sign &not; &#172; &#xAC; ¬ ¬ ¬
      soft hyphen = discretionary hyphen &shy; &#173; &#xAD; ­ ­ ­
      registered sign = registered trade mark sign &reg; &#174; &#xAE; ® ® ®
      macron = spacing macron = overline = APL overbar &macr; &#175; &#xAF; ¯ ¯ ¯
      degree sign &deg; &#176; &#xB0; ° ° °
      plus-minus sign = plus-or-minus sign &plusmn; &#177; &#xB1; ± ± ±
      superscript two = superscript digit two = squared &sup2; &#178; &#xB2; ² ² ²
      superscript three = superscript digit three = cubed &sup3; &#179; &#xB3; ³ ³ ³
      acute accent = spacing acute &acute; &#180; &#xB4; ´ ´ ´
      micro sign &micro; &#181; &#xB5; µ µ µ
      pilcrow sign = paragraph sign &para; &#182; &#xB6;
      middle dot = Georgian comma = Greek middle dot &middot; &#183; &#xB7; · · ·
      cedilla = spacing cedilla &cedil; &#184; &#xB8; ¸ ¸ ¸
      superscript one = superscript digit one &sup1; &#185; &#xB9; ¹ ¹ ¹
      masculine ordinal indicator &ordm; &#186; &#xBA; º º º
      right-pointing double angle quotation mark = right pointing guillemet &raquo; &#187; &#xBB; » » »
      vulgar fraction one quarter = fraction one quarter &frac14; &#188; &#xBC; ¼ ¼ ¼
      vulgar fraction one half = fraction one half &frac12; &#189; &#xBD; ½ ½ ½
      vulgar fraction three quarters = fraction three quarters &frac34; &#190; &#xBE; ¾ ¾ ¾
      inverted question mark = turned question mark &iquest; &#191; &#xBF; ¿ ¿ ¿
      Latin capital letter A with grave = Latin capital letter A grave &Agrave; &#192; &#xC0; À À À
      Latin capital letter A with acute &Aacute; &#193; &#xC1; Á Á Á
      Latin capital letter A with circumflex &Acirc; &#194; &#xC2; Â Â Â
      Latin capital letter A with tilde &Atilde; &#195; &#xC3; Ã Ã Ã
      Latin capital letter A with diaeresis &Auml; &#196; &#xC4; Ä Ä Ä
      Latin capital letter A with ring above = Latin capital letter A ring &Aring; &#197; &#xC5; Å Å Å
      Latin capital letter AE = Latin capital ligature AE &AElig; &#198; &#xC6; Æ Æ Æ
      Latin capital letter C with cedilla &Ccedil; &#199; &#xC7; Ç Ç Ç
      Latin capital letter E with grave &Egrave; &#200; &#xC8; È È È
      Latin capital letter E with acute &Eacute; &#201; &#xC9; É É É
      Latin capital letter E with circumflex &Ecirc; &#202; &#xCA; Ê Ê Ê
      Latin capital letter E with diaeresis &Euml; &#203; &#xCB; Ë Ë Ë
      Latin capital letter I with grave &Igrave; &#204; &#xCC; Ì Ì Ì
      Latin capital letter I with acute &Iacute; &#205; &#xCD; Í Í Í
      Latin capital letter I with circumflex &Icirc; &#206; &#xCE; Î Î Î
      Latin capital letter I with diaeresis &Iuml; &#207; &#xCF; Ï Ï Ï
      Latin capital letter ETH &ETH; &#208; &#xD0; Ð Ð Ð
      Latin capital letter N with tilde &Ntilde; &#209; &#xD1; Ñ Ñ Ñ
      Latin capital letter O with grave &Ograve; &#210; &#xD2; Ò Ò Ò
      Latin capital letter O with acute &Oacute; &#211; &#xD3; Ó Ó Ó
      Latin capital letter O with circumflex &Ocirc; &#212; &#xD4; Ô Ô Ô
      Latin capital letter O with tilde &Otilde; &#213; &#xD5; Õ Õ Õ
      Latin capital letter O with diaeresis &Ouml; &#214; &#xD6; Ö Ö Ö
      multiplication sign &times; &#215; &#xD7; × × ×
      Latin capital letter O with stroke = Latin capital letter O slash &Oslash; &#216; &#xD8; Ø Ø Ø
      Latin capital letter U with grave &Ugrave; &#217; &#xD9; Ù Ù Ù
      Latin capital letter U with acute &Uacute; &#218; &#xDA; Ú Ú Ú
      Latin capital letter U with circumflex &Ucirc; &#219; &#xDB; Û Û Û
      Latin capital letter U with diaeresis &Uuml; &#220; &#xDC; Ü Ü Ü
      Latin capital letter Y with acute &Yacute; &#221; &#xDD; Ý Ý Ý
      Latin capital letter THORN &THORN; &#222; &#xDE; Þ Þ Þ
      Latin small letter sharp s = ess-zed &szlig; &#223; &#xDF; ß ß ß
      Latin small letter a with grave = Latin small letter a grave &agrave; &#224; &#xE0; à à à
      Latin small letter a with acute &aacute; &#225; &#xE1; á á á
      Latin small letter a with circumflex &acirc; &#226; &#xE2; â â â
      Latin small letter a with tilde &atilde; &#227; &#xE3; ã ã ã
      Latin small letter a with diaeresis &auml; &#228; &#xE4; ä ä ä
      Latin small letter a with ring above = Latin small letter a ring &aring; &#229; &#xE5; å å å
      Latin small letter ae = Latin small ligature ae &aelig; &#230; &#xE6; æ æ æ
      Latin small letter c with cedilla &ccedil; &#231; &#xE7; ç ç ç
      Latin small letter e with grave &egrave; &#232; &#xE8; è è è
      Latin small letter e with acute &eacute; &#233; &#xE9; é é é
      Latin small letter e with circumflex &ecirc; &#234; &#xEA; ê ê ê
      Latin small letter e with diaeresis &euml; &#235; &#xEB; ë ë ë
      Latin small letter i with grave &igrave; &#236; &#xEC; ì ì ì
      Latin small letter i with acute &iacute; &#237; &#xED; í í í
      Latin small letter i with circumflex &icirc; &#238; &#xEE; î î î
      Latin small letter i with diaeresis &iuml; &#239; &#xEF; ï ï ï
      Latin small letter eth &eth; &#240; &#xF0; ð ð ð
      Latin small letter n with tilde &ntilde; &#241; &#xF1; ñ ñ ñ
      Latin small letter o with grave &ograve; &#242; &#xF2; ò ò ò
      Latin small letter o with acute &oacute; &#243; &#xF3; ó ó ó
      Latin small letter o with circumflex &ocirc; &#244; &#xF4; ô ô ô
      Latin small letter o with tilde &otilde; &#245; &#xF5; õ õ õ
      Latin small letter o with diaeresis &ouml; &#246; &#xF6; ö ö ö
      division sign &divide; &#247; &#xF7; ÷ ÷ ÷
      Latin small letter o with stroke = Latin small letter o slash &oslash; &#248; &#xF8; ø ø ø
      Latin small letter u with grave &ugrave; &#249; &#xF9; ù ù ù
      Latin small letter u with acute &uacute; &#250; &#xFA; ú ú ú
      Latin small letter u with circumflex &ucirc; &#251; &#xFB; û û û
      Latin small letter u with diaeresis &uuml; &#252; &#xFC; ü ü ü
      Latin small letter y with acute &yacute; &#253; &#xFD; ý ý ý
      Latin small letter thorn &thorn; &#254; &#xFE; þ þ þ
      Latin small letter y with diaeresis &yuml; &#255; &#xFF; ÿ ÿ ÿ
      jtidy/src/test/resources/527118.out0000644000175000017500000000061310114170147017213 0ustar twernertwerner [ 527118 ] Suppress duplicate attributes
      • blah blah blah
      • blah blah blah
      jtidy/src/test/resources/537604.xml0000644000175000017500000000034507777327667017247 0ustar twernertwerner this is a test of ©. &, <, >, ', " must be recognized. jtidy/src/test/resources/570027.msg0000644000175000017500000000345511463516445017213 0ustar twernertwerner 29 2 1 11 48 2 2 1 48 2 2 1 48 2 2 1 44 2 2 1 declaration]]> 1 2 22 33 65 2 27 44 discarding newline in URI reference]]> 21 2 29 42 is not approved by W3C]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431965.out0000644000175000017500000000042310112132511017205 0ustar twernertwerner [ #431965 ] XHTML Strict seen as Transitional w/div
      Test
      jtidy/src/test/resources/427819.out0000644000175000017500000000057510103722617017234 0ustar twernertwerner [ #427819 ] OPTION w/illegal FONT eats whitespace
      jtidy/src/test/resources/431731.html0000644000175000017500000000040207777327667017377 0ustar twernertwerner [ #431731 ] Inline emphasis inconsistent propagation OUTSIDE
      OUTSIDE jtidy/src/test/resources/434940b.out0000644000175000017500000000006611463535265017375 0ustar twernertwernerUse "--show-body-only yes" on the command line
      jtidy/src/test/resources/443362.msg0000644000175000017500000000152011461621360017172 0ustar twernertwerner 39 2 26 11 in pre content]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/463066.cfg0000644000175000017500000000017310111224155017141 0ustar twernertwerner# Tidy configuration file for bug #463066 word-2000: yes tidy-mark: false wrap: 0 word-2000= yes tidy-mark= false wrap= 0 jtidy/src/test/resources/540045.msg0000644000175000017500000000124611463516445017204 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1058909.out0000644000175000017500000000050210143246471017306 0ustar twernertwerner [ #1058909 ] Certain sites causing null pointer Exceptions

      NPE

      jtidy/src/test/resources/540571.cfg0000644000175000017500000000032210115714575017150 0ustar twernertwerner# Config for bug #540571 Inconsistent behaviour with span inline element output-xhtml: yes wrap: 255 clean: no indent: auto tidy-mark: false output-xhtml= yes wrap= 255 clean= no indent= auto tidy-mark= false jtidy/src/test/resources/765852.out0000644000175000017500000000042510024417324017226 0ustar twernertwerner #765852 Empty tag striping

      Text following italics without a blank after the i end tag is not cleaned up correctly (the bold blank is eliminated).

      jtidy/src/test/resources/427633.out0000644000175000017500000000036310001056777017225 0ustar twernertwerner [#427663] Line endings not supported correctly

      This is a carriage return This is a Unix line-ending This is a DOS line ending

      jtidy/src/test/resources/917012.out0000644000175000017500000000047310033355672017223 0ustar twernertwerner [#917012] Spaces are moved from content to between tags

      big coloured text

      jtidy/src/test/resources/431721.cfg0000644000175000017500000000054210111224155017132 0ustar twernertwerner# Tidy configuration file for bug #431721 new-inline-tags: o:p char-encoding: latin1 tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes word-2000: yes indent-attributes: yes wrap: 0 new-inline-tags= o:p char-encoding= latin1 tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes word-2000= yes indent-attributes= yes wrap= 0jtidy/src/test/resources/433656.out0000644000175000017500000000026310000613130017203 0ustar twernertwerner [ #433656 ] Improve support for PHP (some text) jtidy/src/test/resources/828316.html0000644000175000017500000000046410110502420017345 0ustar twernertwerner [ 828316 ] FRAMEBORDER attribute of IFRAME gives spurious warning jtidy/src/test/resources/678268.cfg0000644000175000017500000000047710000613130017154 0ustar twernertwernerindent: auto char-encoding: latin1 tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes indent-attributes: yes output-xhtml: yes tidy-mark:false indent= auto char-encoding= latin1 tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes indent-attributes= yes output-xhtml= yes tidy-mark= falsejtidy/src/test/resources/735603.html0000644000175000017500000000025310034237703017353 0ustar twernertwerner [ #735603 ] : drops attributes from declared elements. foo attributes shouldn't be removed jtidy/src/test/resources/427672.msg0000644000175000017500000000146511461621360017210 0ustar twernertwerner 44 2 1 1 declaration]]> 51 2 5 1 attribute "Ã1/2" has invalid value "xx"]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427837.xml0000644000175000017500000000014007777327667017246 0ustar twernertwerner Björn Höhrmann Marc-André Lemburg jtidy/src/test/resources/996484.msg0000644000175000017500000000124611461621360017221 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/538536.html0000644000175000017500000000030210111075545017354 0ustar twernertwerner #538536 Extra endtags not detected jtidy/src/test/resources/433672.out0000644000175000017500000000055610000613130017206 0ustar twernertwerner [ #433672 ] Anchor enclosing Header tags is omitted

      Section heading

      Another heading

      ack!

      Goto Another Heading

      jtidy/src/test/resources/663548.msg0000644000175000017500000000174311463516445017224 0ustar twernertwerner 44 2 1 1 declaration]]> 32 2 16 4 8 2 17 -3 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/929936.msg0000644000175000017500000001171611461621360017222 0ustar twernertwerner 62 2 7 1 converting backslash in URI to slash]]> 62 2 8 1 converting backslash in URI to slash]]> 64 2 8 1 escaping malformed URI reference]]> 62 2 9 1 converting backslash in URI to slash]]> 62 2 10 1 converting backslash in URI to slash]]> 64 2 10 1 escaping malformed URI reference]]> 62 2 11 1 converting backslash in URI to slash]]> 64 2 11 1 escaping malformed URI reference]]> 4 2 12 13 64 2 12 13 escaping malformed URI reference]]> 110 0 1 1 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/438658.msg0000644000175000017500000000173011461621360017211 0ustar twernertwerner 44 2 1 1 declaration]]> 24 1 3 61 is probably intended as ]]> 7 2 4 -3 before ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/640473.msg0000644000175000017500000000271711463516445017216 0ustar twernertwerner 44 2 1 1 declaration]]> 21 2 3 1 is not approved by W3C]]> 21 2 3 11 is not approved by W3C]]> 21 2 3 33 7 2 3 33 before zippo]]> 11 2 3 33 elements]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431889.html0000644000175000017500000000152007777327667017417 0ustar twernertwerner Bug-2000-12-27-B [ #431889 ] Config file options w/"param" don't work

      This image has no ALT attribute.

      jtidy/src/test/resources/640473.html0000644000175000017500000000032507777327667017410 0ustar twernertwerner[ 640473 ] new-empty-tags doesn't work, breaks doc Foo bar foo foo foo

      This is a test

      This is a pre-formatted Baz! jtidy/src/test/resources/586562.msg0000644000175000017500000000154311463516445017222 0ustar twernertwerner 34 2 5 3 isn't allowed after elements]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/444394.html0000644000175000017500000001054007777327667017414 0ustar twernertwerner Hello

      Hello

       

      This is a nice document

      Test

      With a nice picture

       

      jtidy/src/test/resources/470663.msg0000644000175000017500000000215411463516445017213 0ustar twernertwerner 48 2 1 1 48 2 1 1 48 2 1 1 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/553468.msg0000644000175000017500000000125411461621360017207 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/640474.xml0000644000175000017500000000014007777327667017240 0ustar twernertwerner Björn Höhrmann Marc-André Lemburg jtidy/src/test/resources/456596.out0000644000175000017500000000033510000613130017213 0ustar twernertwerner [ #456596 ] Missing attribute name garbles output System News
      jtidy/src/test/resources/531964.out0000644000175000017500000000044210115150162017213 0ustar twernertwerner [ 531964 ] <p /> gets tidied into <p /></p> jtidy/src/test/resources/708322.html0000644000175000017500000000032610034237311017345 0ustar twernertwerner [ #708322] : drop-proprietary-attributes leaves table height.
      jtidy/src/test/resources/445394.out0000644000175000017500000000033310034325524017221 0ustar twernertwerner [ #445394 ] Improve handling of missing trailing " link link jtidy/src/test/resources/431716.msg0000644000175000017500000000151311461621360017174 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 -1 0 0 0 jtidy/src/test/resources/991469.cfg0000644000175000017500000000014110076524731017166 0ustar twernertwernerinput-xml: yes output-xml: yes tidy-mark: false input-xml= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/629885.html0000644000175000017500000000025007777327667017423 0ustar twernertwerner [629885] - Unbalanced quote in CSS Scrambles Doc

      Test

      jtidy/src/test/resources/431736.cfg0000644000175000017500000000020010111224155017127 0ustar twernertwerner# Tidy configuration file for bug #431736 output-xhtml: yes tidy-mark: false wrap: 0 output-xhtml= yes tidy-mark= false wrap=0 jtidy/src/test/resources/449348.msg0000644000175000017500000000123011463516445017213 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/559774.msg0000644000175000017500000000124611461621360017216 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/463066.msg0000644000175000017500000000737711463516445017226 0ustar twernertwerner 29 2 1 11 48 2 3 1 48 2 3 1 48 2 3 1 44 2 3 1 declaration]]> 49 2 79 1 lacks "type" attribute]]> 21 2 539 62 is not approved by W3C]]> 23 2 539 63 ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 20 2 553 1 by
    • ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/763191.out0000644000175000017500000000055110110502420017204 0ustar twernertwerner [ 763191 ] Again DOM Parsing error (tidy removes spaces in attribute values)
      jtidy/src/test/resources/922302.cfg0000644000175000017500000000016610033354416017143 0ustar twernertwerneradd-xml-decl=true output-xhtml=true tidy-mark=false wrap=0 add-xml-decl:true output-xhtml:true tidy-mark:false wrap:0jtidy/src/test/resources/503436.msg0000644000175000017500000000043011463516445017201 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/433670.msg0000644000175000017500000000071711463516445017213 0ustar twernertwerner 3 2 4 9 -1 0 0 0 jtidy/src/test/resources/533233.html0000644000175000017500000000136407777327667017407 0ustar twernertwerner Test for bug #533233

      Script sample 1

      Headline project—Link to offsite page.

      Input 1

      texttext

      jtidy/src/test/resources/909187.out0000644000175000017500000000043410125633100017221 0ustar twernertwerner [909187] JTidy should remove 0x0 from stream Here it is a 0x0 -- jtidy/src/test/resources/480406.cfg0000644000175000017500000000021310111224155017131 0ustar twernertwerner# Tidy configuration file for bug #480406 input-xml: yes output-xml: yes tidy-mark: false input-xml= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/427826.cfg0000644000175000017500000000047710115150162017154 0ustar twernertwerner# Tidy configuration file for bug #427826 char-encoding: latin1 tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes indent-attributes: no output-xhtml: yes wrap: 0 char-encoding= latin1 tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes indent-attributes= no output-xhtml= yes wrap= 0 jtidy/src/test/resources/438650.html0000644000175000017500000000025607777327667017415 0ustar twernertwerner [ #438650 ] Newline in URL attr value becomes space This is a test jtidy/src/test/resources/586562.html0000644000175000017500000000056207777327667017423 0ustar twernertwerner [586562] Two Doctypes

      Two DOCTYPE's!

      jtidy/src/test/resources/427812.xhtml0000644000175000017500000000100307777110727017555 0ustar twernertwerner [ #427812 ] Reprocessing OBJECT removes PARAM jtidy/src/test/resources/502348.out0000644000175000017500000000051510114410350017203 0ustar twernertwerner [502348] <BR CLEAR="NONE"> should be output.


      jtidy/src/test/resources/427837.msg0000644000175000017500000000043011463516445017213 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/501230.msg0000644000175000017500000000177111461621360017167 0ustar twernertwerner 53 2 8 1 proprietary attribute "height"]]> 23 2 10 -3 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1097062.out0000644000175000017500000000037410167211525017304 0ustar twernertwerner [#1097062] trimInitialSpace does not handle nested inlines

      StartMiddle End

      jtidy/src/test/resources/680664.msg0000644000175000017500000000176111463516445017222 0ustar twernertwerner 29 2 13 38 6 2 18 1 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433360.out0000644000175000017500000000055210000613130017174 0ustar twernertwerner [ #433360 ] Tags with missing > can't be repaired

      There seems to be an error occurring when you don't end a tag with a >. Tidy won't fix it.

      jtidy/src/test/resources/433359.out0000644000175000017500000000034310000613130017202 0ustar twernertwerner [ #433359 ] Empty iframe elements trimmed This is a test jtidy/src/test/resources/532535.cfg0000644000175000017500000000015310111224155017135 0ustar twernertwerner# Tidy configuration file for bug #532535 word-2000: yes tidy-mark: false word-2000= yes tidy-mark= false jtidy/src/test/resources/935796.msg0000644000175000017500000000124711463516445017232 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/537604.cfg0000644000175000017500000000017710111224155017145 0ustar twernertwerner# Tidy configuration file for bug #537604 input-xml: yes clean: no tidy-mark: false input-xml= yes clean= no tidy-mark= false jtidy/src/test/resources/533233.out0000644000175000017500000000143710104011573017207 0ustar twernertwerner Test for bug #533233

      Script sample 1

      Headline project—Link to offsite page.

      Input 1

      texttext

      jtidy/src/test/resources/500236.out0000644000175000017500000000055011463534003017206 0ustar twernertwerner jtidy/src/test/resources/695408.msg0000644000175000017500000000512211463516445017217 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 6 3 lacks "summary" attribute]]> 53 2 8 11 proprietary attribute "datafld"]]> 23 2 8 23 ]]> 53 2 9 11 proprietary attribute "datafld"]]> 23 2 9 23 ]]> 53 2 10 11 proprietary attribute "datafld"]]> 23 2 10 23 ]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/502348.html0000644000175000017500000000047210114410350017342 0ustar twernertwerner [502348] <BR CLEAR="NONE"> should be output.


      jtidy/src/test/resources/427826.html0000644000175000017500000000165507777327667017424 0ustar twernertwerner [#427826] Script source needs escaping/CDATA section

      If converted to XML/XHTML, the < in the javascript source above causes problems for XML tools.

      jtidy/src/test/resources/658230.cfg0000644000175000017500000000014610000613130017130 0ustar twernertwernerchar-encoding: big5 doctype: strict tidy-mark: no char-encoding= big5 doctype= strict tidy-mark= no jtidy/src/test/resources/433607.xml0000644000175000017500000000015407777327667017243 0ustar twernertwerner [ #433607 ] No warning for omitted end tag with -xml. Use -xml on command line. jtidy/src/test/resources/909187.html0000644000175000017500000000043510125633100017357 0ustar twernertwerner [909187] JTidy should remove 0x0 from stream Here it is a 0x0 -- jtidy/src/test/resources/431721.out0000644000175000017500000000313510115150162017203 0ustar twernertwerner Joe-Bob Briggs LLP

      Joe-Bob Briggs LLP

      Bryan Joe-Bob LLP is a leading national and international corporate, litigation and private client law firm.  We represent a wide variety of business, institutional and individual clients for whom our lawyers handle a wide range of matters.  As a result, our lawyers are well prepared to meet the needs of clients whether large or small, public or private, for-profit or not-for-profit.

      Joe-Bob Briggs has more offices than you can shake a stick at.  These locations give Joe-Bob the geographic reach to assist his clients where their needs are most pressing.

      • Estate Planning
      • Closely-Held Business Practice
      • Estate, Gift, Income and Other Tax Advice

      Joe-Bob joined the Firm in 1995 after 15 years with the Kansas City firm of Fish, Gill, Smoker & Butts, where he was a Shareholder/Director.  John is a past Chair of the Estate Planning, Probate and Trust Committee of the Kansas City Metropolitan Bar Association and co-authored the Drinking Procedures Manual for County Practitioners.  Currently, JB is a member of the Missouri Bar Probate and Trust Committee, the Estate Planning Society and the Mid-America Planned Giving Council.  A fellow of the American College of Trust and Estate Counsel, JB lectures frequently on Estate Planning topics for both legal and lay organizations. 

      jtidy/src/test/resources/427835.out0000644000175000017500000000071410104006273017217 0ustar twernertwerner Test input file for bug #427835

      Test input file for bug #427835

      Use with or without the -asxhtml option.

      -clean has no effect

      jtidy/src/test/resources/427839.html0000644000175000017500000000023207777327667017416 0ustar twernertwerner Test Input For Bug #427839 This is a test. Use "-asxhtml --doctype omit" on the command line. jtidy/src/test/resources/438956.html0000644000175000017500000000026410000046405017356 0ustar twernertwerner [ #438956 ] Bad head-endtag reported incorrectly Test jtidy/src/test/resources/540045.cfg0000644000175000017500000000057310111224155017136 0ustar twernertwerner# Tidy configuration file for bug #540045 wrap: 64 indent: no indent-spaces: 4 add-xml-decl: yes break-before-br: yes clean: yes logical-emphasis: yes enclose-text: yes enclose-block-text: yes tidy-mark: false wrap= 64 indent= no indent-spaces= 4 add-xml-decl= yes break-before-br= yes clean= yes logical-emphasis= yes enclose-text= yes enclose-block-text= yes tidy-mark= false jtidy/src/test/resources/553414.html0000644000175000017500000000051710115150162017345 0ustar twernertwerner [553414] XHTML strict accept '_target' attribute

      tidy should report a warning for target

      jtidy/src/test/resources/427822.html0000644000175000017500000000021507777327667017407 0ustar twernertwerner [ #427822 ] PopInLine() doesn't check stack
      abc
      jtidy/src/test/resources/446019.msg0000644000175000017500000000124611461621360017201 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/679135.html0000644000175000017500000000230207777327667017414 0ustar twernertwerner [ 679135 ] Crashes while checking attributes
      jtidy/src/test/resources/547976.msg0000644000175000017500000000156411461621360017222 0ustar twernertwerner 70 2 9 1 attribute value "TOP" for "valign" must be lower case for XHTML]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427664.html0000644000175000017500000000030207777327667017410 0ustar twernertwerner [#427664] Missing attr values cause NULL segfault text jtidy/src/test/resources/514348.msg0000644000175000017500000000126311463516445017212 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/629885.out0000644000175000017500000000032310133721557017237 0ustar twernertwerner [629885] - Unbalanced quote in CSS Scrambles Doc

      Test

      jtidy/src/test/resources/547976.out0000644000175000017500000000052710114171503017232 0ustar twernertwerner [547976] Case of attribute values
      valign value should be lowercase
      jtidy/src/test/resources/539369a.msg0000644000175000017500000000324411461621360017355 0ustar twernertwerner 7 2 14 9 before ]]> 8 2 15 -3 ]]> 8 2 18 -3 ]]> 8 2 20 -3 ]]> 6 2 22 1 ]]> 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433012.msg0000644000175000017500000034031411463516445017201 0ustar twernertwerner 44 2 1 1 declaration]]> 1 2 15 17 64 2 15 17 escaping malformed URI reference]]> 1 2 15 32 1 2 18 17 64 2 18 17 escaping malformed URI reference]]> 1 2 18 33 1 2 21 17 64 2 21 17 escaping malformed URI reference]]> 1 2 21 32 1 2 24 17 64 2 24 17 escaping malformed URI reference]]> 1 2 24 33 1 2 27 17 64 2 27 17 escaping malformed URI reference]]> 1 2 27 34 1 2 30 17 64 2 30 17 escaping malformed URI reference]]> 1 2 30 31 1 2 33 17 64 2 33 17 escaping malformed URI reference]]> 1 2 33 34 1 2 36 17 64 2 36 17 escaping malformed URI reference]]> 1 2 36 32 1 2 39 17 64 2 39 17 escaping malformed URI reference]]> 1 2 39 31 1 2 42 17 64 2 42 17 escaping malformed URI reference]]> 1 2 42 32 1 2 45 17 64 2 45 17 escaping malformed URI reference]]> 1 2 45 32 1 2 48 17 64 2 48 17 escaping malformed URI reference]]> 1 2 48 33 1 2 51 17 64 2 51 17 escaping malformed URI reference]]> 1 2 51 31 1 2 54 17 64 2 54 17 escaping malformed URI reference]]> 1 2 54 31 1 2 57 17 64 2 57 17 escaping malformed URI reference]]> 1 2 57 31 1 2 60 17 64 2 60 17 escaping malformed URI reference]]> 1 2 60 32 1 2 63 17 64 2 63 17 escaping malformed URI reference]]> 1 2 63 31 1 2 66 17 64 2 66 17 escaping malformed URI reference]]> 1 2 66 34 1 2 69 17 64 2 69 17 escaping malformed URI reference]]> 1 2 69 32 1 2 72 17 64 2 72 17 escaping malformed URI reference]]> 1 2 72 32 1 2 75 17 64 2 75 17 escaping malformed URI reference]]> 1 2 75 33 1 2 78 17 64 2 78 17 escaping malformed URI reference]]> 1 2 78 33 1 2 81 17 64 2 81 17 escaping malformed URI reference]]> 1 2 81 32 1 2 84 17 64 2 84 17 escaping malformed URI reference]]> 1 2 84 34 1 2 87 17 64 2 87 17 escaping malformed URI reference]]> 1 2 87 33 1 2 90 17 64 2 90 17 escaping malformed URI reference]]> 1 2 90 32 1 2 93 17 64 2 93 17 escaping malformed URI reference]]> 1 2 93 32 1 2 96 17 64 2 96 17 escaping malformed URI reference]]> 1 2 96 33 1 2 99 17 64 2 99 17 escaping malformed URI reference]]> 1 2 99 34 1 2 102 17 64 2 102 17 escaping malformed URI reference]]> 1 2 102 34 1 2 105 17 64 2 105 17 escaping malformed URI reference]]> 1 2 105 34 1 2 108 17 64 2 108 17 escaping malformed URI reference]]> 1 2 108 34 1 2 111 17 64 2 111 17 escaping malformed URI reference]]> 1 2 111 34 1 2 114 17 64 2 114 17 escaping malformed URI reference]]> 1 2 114 34 1 2 117 17 64 2 117 17 escaping malformed URI reference]]> 1 2 117 33 1 2 120 17 64 2 120 17 escaping malformed URI reference]]> 1 2 120 34 1 2 123 17 64 2 123 17 escaping malformed URI reference]]> 1 2 123 32 1 2 126 17 64 2 126 17 escaping malformed URI reference]]> 1 2 126 33 1 2 129 17 64 2 129 17 escaping malformed URI reference]]> 1 2 129 33 1 2 132 17 64 2 132 17 escaping malformed URI reference]]> 1 2 132 34 1 2 135 17 64 2 135 17 escaping malformed URI reference]]> 1 2 135 34 1 2 138 17 64 2 138 17 escaping malformed URI reference]]> 1 2 138 34 1 2 141 17 64 2 141 17 escaping malformed URI reference]]> 1 2 141 33 1 2 144 17 64 2 144 17 escaping malformed URI reference]]> 1 2 144 32 1 2 147 17 64 2 147 17 escaping malformed URI reference]]> 1 2 147 34 1 2 150 17 64 2 150 17 escaping malformed URI reference]]> 1 2 150 34 1 2 153 17 64 2 153 17 escaping malformed URI reference]]> 1 2 153 33 1 2 156 17 64 2 156 17 escaping malformed URI reference]]> 1 2 156 32 1 2 159 17 64 2 159 17 escaping malformed URI reference]]> 1 2 159 31 1 2 162 17 64 2 162 17 escaping malformed URI reference]]> 1 2 162 34 1 2 165 17 64 2 165 17 escaping malformed URI reference]]> 1 2 165 34 1 2 168 17 64 2 168 17 escaping malformed URI reference]]> 1 2 168 34 1 2 171 17 64 2 171 17 escaping malformed URI reference]]> 1 2 171 33 1 2 174 17 64 2 174 17 escaping malformed URI reference]]> 1 2 174 34 1 2 177 17 64 2 177 17 escaping malformed URI reference]]> 1 2 177 32 1 2 180 17 64 2 180 17 escaping malformed URI reference]]> 1 2 180 33 1 2 183 17 64 2 183 17 escaping malformed URI reference]]> 1 2 183 34 1 2 186 17 64 2 186 17 escaping malformed URI reference]]> 1 2 186 34 1 2 189 17 64 2 189 17 escaping malformed URI reference]]> 1 2 189 34 1 2 192 17 64 2 192 17 escaping malformed URI reference]]> 1 2 192 33 1 2 195 17 64 2 195 17 escaping malformed URI reference]]> 1 2 195 32 1 2 198 17 64 2 198 17 escaping malformed URI reference]]> 1 2 198 34 1 2 201 17 64 2 201 17 escaping malformed URI reference]]> 1 2 201 33 1 2 204 17 64 2 204 17 escaping malformed URI reference]]> 1 2 204 33 1 2 207 17 64 2 207 17 escaping malformed URI reference]]> 1 2 207 34 1 2 210 17 64 2 210 17 escaping malformed URI reference]]> 1 2 210 34 1 2 213 17 64 2 213 17 escaping malformed URI reference]]> 1 2 213 33 1 2 216 17 64 2 216 17 escaping malformed URI reference]]> 1 2 216 34 1 2 219 17 64 2 219 17 escaping malformed URI reference]]> 1 2 219 32 1 2 222 17 64 2 222 17 escaping malformed URI reference]]> 1 2 222 33 1 2 225 17 64 2 225 17 escaping malformed URI reference]]> 1 2 225 33 1 2 228 17 64 2 228 17 escaping malformed URI reference]]> 1 2 228 34 1 2 231 17 64 2 231 17 escaping malformed URI reference]]> 1 2 231 34 1 2 234 17 64 2 234 17 escaping malformed URI reference]]> 1 2 234 34 1 2 237 17 64 2 237 17 escaping malformed URI reference]]> 1 2 237 33 1 2 240 17 64 2 240 17 escaping malformed URI reference]]> 1 2 240 32 1 2 243 17 64 2 243 17 escaping malformed URI reference]]> 1 2 243 34 1 2 246 17 64 2 246 17 escaping malformed URI reference]]> 1 2 246 34 1 2 249 17 64 2 249 17 escaping malformed URI reference]]> 1 2 249 33 1 2 252 17 64 2 252 17 escaping malformed URI reference]]> 1 2 252 32 1 2 255 17 64 2 255 17 escaping malformed URI reference]]> 1 2 255 31 1 2 258 17 64 2 258 17 escaping malformed URI reference]]> 1 2 258 34 1 2 261 17 64 2 261 17 escaping malformed URI reference]]> 1 2 261 34 1 2 264 17 64 2 264 17 escaping malformed URI reference]]> 1 2 264 34 1 2 267 17 64 2 267 17 escaping malformed URI reference]]> 1 2 267 33 1 2 270 17 64 2 270 17 escaping malformed URI reference]]> 1 2 270 34 1 2 273 17 64 2 273 17 escaping malformed URI reference]]> 1 2 273 32 1 2 276 17 64 2 276 17 escaping malformed URI reference]]> 1 2 276 34 1 2 279 17 64 2 279 17 escaping malformed URI reference]]> 1 2 279 34 1 2 282 17 64 2 282 17 escaping malformed URI reference]]> 1 2 282 34 1 2 285 17 64 2 285 17 escaping malformed URI reference]]> 1 2 285 34 1 2 288 17 64 2 288 17 escaping malformed URI reference]]> 1 2 288 33 1 2 291 17 64 2 291 17 escaping malformed URI reference]]> 1 2 291 32 1 2 294 17 64 2 294 17 escaping malformed URI reference]]> 1 2 294 34 1 2 297 17 64 2 297 17 escaping malformed URI reference]]> 1 2 297 33 1 2 300 17 64 2 300 17 escaping malformed URI reference]]> 1 2 300 32 3 2 308 17 3 2 308 32 3 2 311 17 3 2 311 33 3 2 314 17 3 2 314 32 3 2 317 17 3 2 317 33 3 2 320 17 3 2 320 33 3 2 323 17 3 2 323 35 3 2 326 17 3 2 326 32 3 2 329 17 3 2 329 31 3 2 332 17 3 2 332 33 3 2 335 17 3 2 335 32 3 2 338 17 3 2 338 33 3 2 341 17 3 2 341 34 3 2 344 17 3 2 344 30 3 2 347 17 3 2 347 30 3 2 350 17 3 2 350 30 3 2 353 17 3 2 353 35 3 2 356 17 3 2 356 30 3 2 359 17 3 2 359 31 3 2 362 17 3 2 362 33 3 2 365 17 3 2 365 31 3 2 368 17 3 2 368 35 3 2 371 17 3 2 371 31 3 2 374 17 3 2 374 31 3 2 377 17 3 2 377 31 3 2 380 17 3 2 380 33 3 2 383 17 3 2 383 33 3 2 386 17 3 2 386 32 3 2 389 17 3 2 389 33 3 2 392 17 3 2 392 33 3 2 395 17 3 2 395 35 3 2 398 17 3 2 398 32 3 2 401 17 3 2 401 31 3 2 404 17 3 2 404 33 3 2 407 17 3 2 407 32 3 2 410 17 3 2 410 33 3 2 413 17 3 2 413 34 3 2 416 17 3 2 416 30 3 2 419 17 3 2 419 30 3 2 422 17 3 2 422 30 3 2 425 17 3 2 425 35 3 2 428 17 3 2 428 30 3 2 431 17 3 2 431 31 3 2 434 17 3 2 434 34 3 2 437 17 3 2 437 33 3 2 440 17 3 2 440 31 3 2 443 17 3 2 443 35 3 2 446 17 3 2 446 31 3 2 449 17 3 2 449 31 3 2 452 17 3 2 452 31 3 2 455 17 3 2 455 33 3 2 458 17 3 2 458 36 3 2 461 17 3 2 461 33 3 2 464 17 3 2 464 31 3 2 467 17 3 2 467 32 3 2 470 17 3 2 470 34 3 2 473 17 3 2 473 33 3 2 476 17 3 2 476 33 3 2 479 17 3 2 479 33 3 2 482 17 3 2 482 33 3 2 485 17 3 2 485 34 3 2 488 17 3 2 488 33 3 2 491 17 3 2 491 32 3 2 494 17 3 2 494 33 3 2 497 17 3 2 497 35 3 2 500 17 3 2 500 32 3 2 503 17 3 2 503 32 3 2 506 17 3 2 506 32 3 2 509 17 3 2 509 32 3 2 512 17 3 2 512 32 3 2 515 17 3 2 515 33 3 2 518 17 3 2 518 32 3 2 521 17 3 2 521 32 3 2 524 17 3 2 524 32 3 2 527 17 3 2 527 32 3 2 530 17 3 2 530 32 3 2 533 17 3 2 533 34 3 2 536 17 3 2 536 32 3 2 539 17 3 2 539 33 3 2 542 17 3 2 542 33 3 2 545 17 3 2 545 33 3 2 548 17 3 2 548 32 3 2 554 17 3 2 554 33 3 2 557 17 3 2 557 30 3 2 560 17 3 2 560 32 3 2 563 17 3 2 563 31 3 2 566 17 3 2 566 33 3 2 569 17 3 2 569 34 3 2 572 17 3 2 572 33 3 2 575 17 3 2 575 32 3 2 578 17 3 2 578 33 3 2 581 17 3 2 581 31 3 2 584 17 3 2 584 31 3 2 587 17 3 2 587 30 3 2 590 17 3 2 590 31 3 2 593 17 3 2 593 31 3 2 596 17 3 2 596 31 3 2 599 17 3 2 599 34 3 2 602 17 3 2 602 31 3 2 605 17 3 2 605 32 3 2 608 17 3 2 608 33 3 2 611 17 3 2 611 30 3 2 614 17 3 2 614 33 3 2 617 17 3 2 617 30 3 2 620 17 3 2 620 30 3 2 623 17 3 2 623 31 3 2 626 17 3 2 626 31 3 2 629 17 3 2 629 32 3 2 632 17 3 2 632 32 3 2 635 17 3 2 635 32 3 2 638 17 3 2 638 33 3 2 641 17 3 2 641 34 3 2 644 17 3 2 644 32 3 2 647 17 3 2 647 32 3 2 650 17 3 2 650 33 3 2 653 17 3 2 653 33 3 2 656 17 3 2 656 34 3 2 659 17 3 2 659 34 3 2 662 17 3 2 662 32 3 2 665 17 3 2 665 32 3 2 668 17 3 2 668 31 3 2 671 17 3 2 671 34 3 2 674 17 3 2 674 33 3 2 677 17 3 2 677 34 3 2 680 17 3 2 680 33 1 2 689 17 1 2 689 32 1 2 692 17 1 2 692 31 1 2 695 17 64 2 695 17 escaping malformed URI reference]]> 1 2 695 30 1 2 698 17 64 2 698 17 escaping malformed URI reference]]> 1 2 698 30 3 2 706 17 3 2 706 33 3 2 709 17 3 2 709 33 3 2 712 17 3 2 712 34 3 2 715 17 3 2 715 34 3 2 718 17 3 2 718 32 3 2 721 17 3 2 721 32 3 2 724 17 3 2 724 33 3 2 727 17 3 2 727 32 3 2 730 17 3 2 730 32 3 2 733 17 3 2 733 34 3 2 736 17 3 2 736 32 3 2 739 17 3 2 739 31 3 2 742 17 3 2 742 31 3 2 745 17 3 2 745 31 3 2 748 17 3 2 748 33 3 2 751 17 3 2 751 33 3 2 754 17 3 2 754 33 3 2 757 17 3 2 757 33 3 2 760 17 3 2 760 33 3 2 763 17 3 2 763 33 3 2 766 17 3 2 766 33 3 2 769 17 3 2 769 33 3 2 772 17 3 2 772 34 3 2 775 17 3 2 775 34 3 2 778 17 3 2 778 34 3 2 781 17 3 2 781 34 3 2 784 17 3 2 784 34 3 2 790 17 3 2 790 32 5 2 798 3 3 2 798 17 5 2 798 27 3 2 798 32 3 2 801 17 3 2 801 31 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/433607.cfg0000644000175000017500000000015310111224155017135 0ustar twernertwerner# Tidy configuration file for bug #433607 input-xml: yes tidy-mark: false input-xml= yes tidy-mark= false jtidy/src/test/resources/917012.msg0000644000175000017500000000201011463516445017174 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/791933.html0000644000175000017500000000036010110502420017344 0ustar twernertwerner [ 791933 ] Why German special character converted to upper case ( ->) jtidy/src/test/resources/545772.msg0000644000175000017500000000121111461621360017177 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427845.msg0000644000175000017500000000124511463516445017217 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/574158.msg0000644000175000017500000000275511463516445017226 0ustar twernertwerner 7 2 7 16 before
      ]]> 23 2 7 16 ]]> 15 2 7 36 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/538727.html0000644000175000017500000000020710033573374017370 0ustar twernertwerner [ 538727 ] setDocType uncorrectly adds "" doctype should not contain "" jtidy/src/test/resources/431716.cfg0000644000175000017500000000014210111224155017132 0ustar twernertwerner# Tidy configuration file for bug #431716 split: yes tidy-mark: false split= yes tidy-mark= falsejtidy/src/test/resources/514348.html0000644000175000017500000000163407777327667017415 0ustar twernertwerner [ #514348 ] Incorrect wrap behaviour
      HomeNews
      jtidy/src/test/resources/500236.xml0000644000175000017500000000051407777327667017234 0ustar twernertwerner jtidy/src/test/resources/539369.msg0000644000175000017500000000200311461621360017204 0ustar twernertwerner 8 2 19 -3 ]]> 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/441508.html0000644000175000017500000000032307777327667017404 0ustar twernertwerner [ #441508 ] parser.c: BadForm() function broken
      Test
      jtidy/src/test/resources/443381.xhtml0000644000175000017500000000047107777327667017601 0ustar twernertwerner [ #443381 ] end tags for empty elements in XHTML

      TestcoolTest

      jtidy/src/test/resources/487204.html0000644000175000017500000000047107777327667017413 0ustar twernertwerner [ #487204 ] Duplicate DIV style attribute generated
        1. One
        2. Two
        3. Three
      jtidy/src/test/resources/431889.msg0000644000175000017500000000303711461621360017212 0ustar twernertwerner 49 2 35 4 lacks "alt" attribute]]> 110 0 1 1 111 0 1 1 -1 0 0 0 1 0 1 1 112 0 1 1 jtidy/src/test/resources/676205.html0000644000175000017500000000034407777327667017413 0ustar twernertwerner [676205] <img src="> crashes Tidy 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/441568.html0000644000175000017500000000030707777327667017414 0ustar twernertwerner [ #441568 ] Font tags handling different
      text-one
      text-two jtidy/src/test/resources/427822.out0000644000175000017500000000027110000613130017200 0ustar twernertwerner [ #427822 ] PopInLine() doesn't check stack
      abc
      jtidy/src/test/resources/647900.html0000644000175000017500000000111607777327667017411 0ustar twernertwerner [ 647900 ] tables are incorrectly merged
      Table data

      A paragraph

      Foo
      Foo

      Another paragraph

      Input:

      Yet another paragraph

      jtidy/src/test/resources/427677.out0000644000175000017500000000035710000613130017215 0ustar twernertwerner [ #427677 ] TrimInitialSpace() can trim too much

      This is a Red link

      jtidy/src/test/resources/427835.msg0000644000175000017500000000120311461621360017177 0ustar twernertwerner 44 2 2 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/538536.out0000644000175000017500000000027210111075545017225 0ustar twernertwerner #538536 Extra endtags not detected jtidy/src/test/resources/427839.out0000644000175000017500000000027710103710503017224 0ustar twernertwerner Test Input For Bug #427839 This is a test. Use "-asxhtml --doctype omit" on the command line. jtidy/src/test/resources/935796.cfg0000644000175000017500000000022110044423725017162 0ustar twernertwernertidy-mark=false clean= true output-xhtml= true wrap= 0 indent-spaces= 0 tidy-mark:false clean: true output-xhtml: true wrap: 0 indent-spaces: 0 jtidy/src/test/resources/706260.msg0000644000175000017500000000125411461621360017175 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/688746.html0000644000175000017500000000054007777327667017426 0ustar twernertwerner [ 688746 ] incorrect charset value for utf-8

      How to…
      Place an extended-hours order:

      jtidy/src/test/resources/647900.msg0000644000175000017500000000627411463516445017222 0ustar twernertwerner 44 2 1 1 declaration]]> 8 2 9 -1 ]]> 11 2 11 3 isn't allowed in elements]]> 8 2 13 3 ]]> 12 2 19 5 ]]> 8 2 20 5 ]]> 8 2 21 -1 ]]> 11 2 23 3 isn't allowed in elements]]> 12 2 25 3 ]]> 8 3 29 7 ]]> 8 3 29 14 ]]> 6 2 36 1 ]]> 6 2 36 1 ]]> 6 2 36 1 ]]> 111 0 1 1 -1 0 0 0 113 0 1 1 and tags. HTML elements should be properly nested and form elements are no exception. For instance you should not place the
      in one table cell and the
      in another. If the
      is placed before a table, the
      cannot be placed inside the table! Note that one form can't be nested inside another! ]]>
      jtidy/src/test/resources/991471.cfg0000644000175000017500000000014110076523763017163 0ustar twernertwernerinput-xml: yes output-xml: yes tidy-mark: false input-xml= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/467865.out0000644000175000017500000000040310000613130017210 0ustar twernertwerner [ #467865 ] un-nesting is incorrect

      (foo bar). ...

      jtidy/src/test/resources/765852.msg0000644000175000017500000000143211463516445017220 0ustar twernertwerner 44 2 1 1 declaration]]> 23 2 2 75 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/441740.xhtml0000644000175000017500000000071407777327667017576 0ustar twernertwerner Sample XHTML 1.1 document with Ruby markup

      10 31 2002 Month Day Year Expiration Date

      jtidy/src/test/resources/500236.cfg0000644000175000017500000000025210111224155017126 0ustar twernertwerner# Tidy configuration file for bug #500236 word-2000: yes input-xml: yes output-xml: yes tidy-mark: false word-2000= yes input-xml= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/504206.msg0000644000175000017500000000363611463516445017210 0ustar twernertwerner 44 2 1 1 declaration]]> 65 2 12 1 discarding newline in URI reference]]> 23 2 43 1 ]]> 23 2 139 1 ]]> 23 2 144 1 ]]> 49 2 152 1 lacks "action" attribute]]> 25 2 152 1 shouldn't be nested]]> 6 2 155 1 ]]> 6 2 155 1 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/511243.cfg0000644000175000017500000000016510111224155017131 0ustar twernertwerner# Tidy configuration file for bug #511243 char-encoding: utf8 tidy-mark: false char-encoding= utf8 tidy-mark= false jtidy/src/test/resources/435903.html0000644000175000017500000000056107777327667017412 0ustar twernertwerner [ #435903 ] Script element w/body child to table bug>
      jtidy/src/test/resources/531964.msg0000644000175000017500000000150111463516445017210 0ustar twernertwerner 23 2 9 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/514893.html0000644000175000017500000000137507777327667017424 0ustar twernertwerner [ 514893 ] Incorrect http-equiv <meta> tag

      This document will test synchronization of the <meta http-equiv ...> tag. It contains the header <meta http-equiv="CONTENT-TYPE" content="TEXT/HTML; CHARSET=WINDOWS-1252" />.

      On output, the header should be modified to reflect whatever output encoding you have specified.

      jtidy/src/test/resources/456596.msg0000644000175000017500000000276411461621360017222 0ustar twernertwerner 44 2 1 1 declaration]]> 69 2 6 6 unexpected =, expected attribute name]]> 59 2 6 6 unexpected or duplicate quote mark]]> 58 2 6 6 attribute with missing trailing quote mark]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/480701.cfg0000644000175000017500000000021710111224155017133 0ustar twernertwerner# Tidy configuration file for bug #480701 input-xml: yes output-xhtml: yes tidy-mark: false input-xml= yes output-xhtml= yes tidy-mark= false jtidy/src/test/resources/649812.html0000644000175000017500000000210010001541271017342 0ustar twernertwerner<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>[ 649812 ] Does TidyLib correctly handle Mac files?</title> </head> <body> <h1>This is a Samle UTF16 Little Cowboy</h1> <p>the next para is Hebrew</p> <p>    </p> <p>The Next one is Russian / crylic</p> <p>?@8;>65=85 4>;6=> >ACI5AB2;OBL</p> <p>The Next one is Greek</p> <p> - Feta, , Salads</p> </body> </html> jtidy/src/test/resources/542029.html0000644000175000017500000000023007777327667017401 0ustar twernertwerner [ 542029 ] PPrintXmlDecl reads outside array range Test jtidy/src/test/resources/501230.xhtml0000644000175000017500000000055407777327667017567 0ustar twernertwerner [ #501230 ] "0" (Zero) has to be lower case !
      jtidy/src/test/resources/487204.msg0000644000175000017500000000151011461621360017174 0ustar twernertwerner 12 2 6 2 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/696799.html0000644000175000017500000000032007777327667017431 0ustar twernertwerner [ 696799 ] Crash: <script language=""> jtidy/src/test/resources/502346.msg0000644000175000017500000000124011463516445017200 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/531962.html0000644000175000017500000000046410123373007017356 0ustar twernertwerner [531962] Closing quotes around attribute values jtidy/src/test/resources/433012.html0000644000175000017500000005503407777327667017404 0ustar twernertwerner [ #433012 ] Illegal ampersands/character entities

      id=ID =XX
      id=ID¡=XX
      id=ID¢=XX
      id=ID£=XX
      id=ID¤=XX
      id=ID¥=XX
      id=ID¦=XX
      id=ID§=XX
      id=ID¨=XX
      id=ID©=XX
      id=IDª=XX
      id=ID«=XX
      id=ID¬=XX
      id=ID­=XX
      id=ID®=XX
      id=ID¯=XX
      id=ID°=XX
      id=ID±=XX
      id=ID²=XX
      id=ID³=XX
      id=ID´=XX
      id=IDµ=XX
      id=ID¶=XX
      id=ID·=XX
      id=ID¸=XX
      id=ID¹=XX
      id=IDº=XX
      id=ID»=XX
      id=ID¼=XX
      id=ID½=XX
      id=ID¾=XX
      id=ID¿=XX
      id=IDÀ=XX
      id=IDÁ=XX
      id=IDÂ=XX
      id=IDÃ=XX
      id=IDÄ=XX
      id=IDÅ=XX
      id=IDÆ=XX
      id=IDÇ=XX
      id=IDÈ=XX
      id=IDÉ=XX
      id=IDÊ=XX
      id=IDË=XX
      id=IDÌ=XX
      id=IDÍ=XX
      id=IDÎ=XX
      id=IDÏ=XX
      id=IDÐ=XX
      id=IDÑ=XX
      id=IDÒ=XX
      id=IDÓ=XX
      id=IDÔ=XX
      id=IDÕ=XX
      id=IDÖ=XX
      id=ID×=XX
      id=IDØ=XX
      id=IDÙ=XX
      id=IDÚ=XX
      id=IDÛ=XX
      id=IDÜ=XX
      id=IDÝ=XX
      id=IDÞ=XX
      id=IDß=XX
      id=IDà=XX
      id=IDá=XX
      id=IDâ=XX
      id=IDã=XX
      id=IDä=XX
      id=IDå=XX
      id=IDæ=XX
      id=IDç=XX
      id=IDè=XX
      id=IDé=XX
      id=IDê=XX
      id=IDë=XX
      id=IDì=XX
      id=IDí=XX
      id=IDî=XX
      id=IDï=XX
      id=IDð=XX
      id=IDñ=XX
      id=IDò=XX
      id=IDó=XX
      id=IDô=XX
      id=IDõ=XX
      id=IDö=XX
      id=ID÷=XX
      id=IDø=XX
      id=IDù=XX
      id=IDú=XX
      id=IDû=XX
      id=IDü=XX
      id=IDý=XX
      id=IDþ=XX
      id=IDÿ=XX
      id=ID&fnof=XX
      id=ID&Alpha=XX
      id=ID&Beta=XX
      id=ID&Gamma=XX
      id=ID&Delta=XX
      id=ID&Epsilon=XX
      id=ID&Zeta=XX
      id=ID&Eta=XX
      id=ID&Theta=XX
      id=ID&Iota=XX
      id=ID&Kappa=XX
      id=ID&Lambda=XX
      id=ID&Mu=XX
      id=ID&Nu=XX
      id=ID&Xi=XX
      id=ID&Omicron=XX
      id=ID&Pi=XX
      id=ID&Rho=XX
      id=ID&Sigma=XX
      id=ID&Tau=XX
      id=ID&Upsilon=XX
      id=ID&Phi=XX
      id=ID&Chi=XX
      id=ID&Psi=XX
      id=ID&Omega=XX
      id=ID&alpha=XX
      id=ID&beta=XX
      id=ID&gamma=XX
      id=ID&delta=XX
      id=ID&epsilon=XX
      id=ID&zeta=XX
      id=ID&eta=XX
      id=ID&theta=XX
      id=ID&iota=XX
      id=ID&kappa=XX
      id=ID&lambda=XX
      id=ID&mu=XX
      id=ID&nu=XX
      id=ID&xi=XX
      id=ID&omicron=XX
      id=ID&pi=XX
      id=ID&rho=XX
      id=ID&sigmaf=XX
      id=ID&sigma=XX
      id=ID&tau=XX
      id=ID&upsilon=XX
      id=ID&phi=XX
      id=ID&chi=XX
      id=ID&psi=XX
      id=ID&omega=XX
      id=ID&thetasym=XX
      id=ID&upsih=XX
      id=ID&piv=XX
      id=ID&bull=XX
      id=ID&hellip=XX
      id=ID&prime=XX
      id=ID&Prime=XX
      id=ID&oline=XX
      id=ID&frasl=XX
      id=ID&weierp=XX
      id=ID&image=XX
      id=ID&real=XX
      id=ID&trade=XX
      id=ID&alefsym=XX
      id=ID&larr=XX
      id=ID&uarr=XX
      id=ID&rarr=XX
      id=ID&darr=XX
      id=ID&harr=XX
      id=ID&crarr=XX
      id=ID&lArr=XX
      id=ID&uArr=XX
      id=ID&rArr=XX
      id=ID&dArr=XX
      id=ID&hArr=XX
      id=ID&forall=XX
      id=ID&part=XX
      id=ID&exist=XX
      id=ID&empty=XX
      id=ID&nabla=XX
      id=ID&isin=XX
      id=ID¬in=XX
      id=ID&ni=XX
      id=ID&prod=XX
      id=ID&sum=XX
      id=ID&minus=XX
      id=ID&lowast=XX
      id=ID&radic=XX
      id=ID&prop=XX
      id=ID&infin=XX
      id=ID&ang=XX
      id=ID&and=XX
      id=ID&or=XX
      id=ID&cap=XX
      id=ID&cup=XX
      id=ID&int=XX
      id=ID&there4=XX
      id=ID&sim=XX
      id=ID&cong=XX
      id=ID&asymp=XX
      id=ID&ne=XX
      id=ID&equiv=XX
      id=ID&le=XX
      id=ID&ge=XX
      id=ID&sub=XX
      id=ID&sup=XX
      id=ID&nsub=XX
      id=ID&sube=XX
      id=ID&supe=XX
      id=ID&oplus=XX
      id=ID&otimes=XX
      id=ID&perp=XX
      id=ID&sdot=XX
      id=ID&lceil=XX
      id=ID&rceil=XX
      id=ID&lfloor=XX
      id=ID&rfloor=XX
      id=ID&lang=XX
      id=ID&rang=XX
      id=ID&loz=XX
      id=ID&spades=XX
      id=ID&clubs=XX
      id=ID&hearts=XX
      id=ID&diams=XX
      id=ID"=XX
      id=ID&=XX
      id=ID<=XX
      id=ID>=XX
      id=ID&OElig=XX
      id=ID&oelig=XX
      id=ID&Scaron=XX
      id=ID&scaron=XX
      id=ID&Yuml=XX
      id=ID&circ=XX
      id=ID&tilde=XX
      id=ID&ensp=XX
      id=ID&emsp=XX
      id=ID&thinsp=XX
      id=ID&zwnj=XX
      id=ID&zwj=XX
      id=ID&lrm=XX
      id=ID&rlm=XX
      id=ID&ndash=XX
      id=ID&mdash=XX
      id=ID&lsquo=XX
      id=ID&rsquo=XX
      id=ID&sbquo=XX
      id=ID&ldquo=XX
      id=ID&rdquo=XX
      id=ID&bdquo=XX
      id=ID&dagger=XX
      id=ID&Dagger=XX
      id=ID&permil=XX
      id=ID&lsaquo=XX
      id=ID&rsaquo=XX
      id=ID&euro=XX
      id=ID&apos=XX
      id=ID&foo=XX

      jtidy/src/test/resources/533233.cfg0000644000175000017500000000014110000613130017116 0ustar twernertwerneroutput-xhtml: yes indent: auto tidy-mark: false output-xhtml= yes indent= auto tidy-mark= false jtidy/src/test/resources/463066.out0000644000175000017500000000126110000613130017200 0ustar twernertwerner [ #463066 ] CleanWord2000 misses mso-list bullets

      Test 1

      1. Here
      2. We
      3. Go
      4. Again
       
      1. Ok
      2. That
      3. Worked
       
      • But
      • It
      • Does
      • Not
      • Work
      • With
      • Bullet
      • Points
      • Now
       
      • It
      • Is
      • Working
       
      • Try
      • It
      • Again
       
      • Do
      • It
      • Again
      • And

      jtidy/src/test/resources/438650.msg0000644000175000017500000000145511461621360017205 0ustar twernertwerner 44 2 1 1 declaration]]> 65 2 6 1 discarding newline in URI reference]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/695408.cfg0000644000175000017500000000040610027341712017154 0ustar twernertwernerindent: auto indent-attributes: yes tidy-mark: no clean: yes drop-font-tags: yes drop-proprietary-attributes: no indent-spaces: 0 indent= auto indent-attributes= yes tidy-mark= no clean= yes drop-font-tags= yes drop-proprietary-attributes= no indent-spaces= 0 jtidy/src/test/resources/527118.html0000644000175000017500000000033110033573374017356 0ustar twernertwerner [ 527118 ] Suppress duplicate attributes
        • blah blah blah
        • blah blah blah
        jtidy/src/test/resources/427838.out0000644000175000017500000000067010000613130017212 0ustar twernertwerner [ #427838 ] Name Anchor thrown away A fragment of html created by Frontpage.....

        Clipboard

        lots more stuff deleted.... some textsome text

        jtidy/src/test/resources/473490.cfg0000644000175000017500000000054610111224155017147 0ustar twernertwerner# Tidy configuration file for bug #473490 tidy-mark: no wrap: 0 output-xhtml: yes doctype: auto quote-nbsp: yes uppercase-tags: yes quote-ampersand: yes add-xml-space: no show-warnings:no quiet: yes tidy-mark= no wrap= 0 output-xhtml= yes doctype= auto quote-nbsp= yes uppercase-tags= yes quote-ampersand= yes add-xml-space= no show-warnings= no quiet= yes jtidy/src/test/resources/708322.msg0000644000175000017500000000370311463516445017210 0ustar twernertwerner 44 2 1 1 declaration]]> 53 2 6 1 proprietary attribute "bgproperties"]]> 49 2 6 1 lacks "summary" attribute]]> 53 2 6 1 proprietary attribute "height"]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/656889.msg0000644000175000017500000000121211463516445017225 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1031865.out0000644000175000017500000000052510125630076017301 0ustar twernertwerner [1031865] Script parsing warning
        jtidy/src/test/resources/676205.xhtml0000644000175000017500000000056607777327667017611 0ustar twernertwerner [ 676205 ] <img src="> crashes Tidy [ #427821 ] XHTML TRANSITIONAL doctype set wrongly <body> This is a test - use "-asxml" on the command line. </body> jtidy/src/test/resources/511243.msg0000644000175000017500000000736611463516445017213 0ustar twernertwerner 78 2 11 48 110 0 1 1 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 8 0 1 1 elements. ]]> jtidy/src/test/resources/427834.html0000644000175000017500000000032107777327667017410 0ustar twernertwerner [ #427834 ] Warning given for newline in DOCTYPE jtidy/src/test/resources/991469.msg0000644000175000017500000000042711463520474017225 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/578216.out0000644000175000017500000000140210013533125017212 0ustar twernertwerner [ 578216 ] Incorrect indent of <SPAN> elements
        Benefits: Using a new Project Profile Knowledge Base...
        Solutions: Comprehensive intranet-based knowledge base containing...
        Roles: Drove site and content management architecture...
        Technology: Visual InterDev, IIS, ...
        jtidy/src/test/resources/433604.msg0000644000175000017500000000042711463520412017173 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/443576.msg0000644000175000017500000000174211463516445017220 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 6 1 lacks "type" attribute]]> 32 2 8 31 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433672.msg0000644000175000017500000000265511461621360017207 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 6 15 before

        ]]> 15 2 6 19 ]]> 8 2 6 35 ]]> 7 2 9 16 before
        ]]> 8 2 9 62 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427810.html0000644000175000017500000000061607777327667017411 0ustar twernertwerner [ #427810 ] Proprietary elements not reported as err

        Test inline element

        Proprietary inline element (blink) Proprietary inline element (wbr) - note starts on new line, doesn't need end tag Proprietary inline element (nobr)

        Test inline element

        jtidy/src/test/resources/552861.msg0000644000175000017500000000144011463516445017211 0ustar twernertwerner 44 2 1 1 declaration]]> 48 2 8 3 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/480843.cfg0000644000175000017500000000022310111224155017137 0ustar twernertwerner# Tidy configuration file for bug #480843 output-xhtml: yes tidy-mark: false indent-spaces: 0 output-xhtml= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/574158.html0000644000175000017500000000167010114173713017364 0ustar twernertwerner [574158] Error with FONT tag jtidy/src/test/resources/475643.msg0000644000175000017500000000122711461621360017205 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/570027.html0000644000175000017500000000172207777327667017407 0ustar twernertwerner [ 570027 ] Fixes crash in Word2000 cleanup

              ;   Introduction ;

        jtidy/src/test/resources/433656.msg0000644000175000017500000000117311461621360017203 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/434100.cfg0000644000175000017500000000015210111224155017121 0ustar twernertwerner# Tidy configuration file for bug #434100 input-xml: yes tidy-mark: false input-xml= yes tidy-mark= falsejtidy/src/test/resources/598860.html0000644000175000017500000000025107777327667017422 0ustar twernertwerner #598860 script parsing fails with quote chars jtidy/src/test/resources/1039641.html0000644000175000017500000000053410130602330017422 0ustar twernertwerner [1039641] pre should not change the inside text
        1:
        1:
        
        jtidy/src/test/resources/437468.html0000644000175000017500000000032107777327667017414 0ustar twernertwerner Test input file for iso-8859-1 character entities

        Phrase with numeric quotes expressly stated: “Dj conu l're de Cafne”

        jtidy/src/test/resources/837023.html0000644000175000017500000000017507777327667017412 0ustar twernertwerner [ 837023 ] segfault on doctype-like element Just text. jtidy/src/test/resources/427830.html0000644000175000017500000000040707777327667017411 0ustar twernertwerner Test Input For Bug #427830

        Tidy uses an incorrect XHTML 1.0 Namespace, even if the correct namespace is given.

        jtidy/src/test/resources/735603.cfg0000644000175000017500000000024410034237703017146 0ustar twernertwernerdrop-proprietary-attributes= true new-blocklevel-tags= foo tidy-mark=false wrap=0 drop-proprietary-attributes: true new-blocklevel-tags: foo tidy-mark:false wrap:0jtidy/src/test/resources/438956.msg0000644000175000017500000000203111463520412017203 0ustar twernertwerner 11 2 5 1 elements]]> 11 2 6 2 isn't allowed in elements]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/649812.out0000644000175000017500000000204010001542231017205 0ustar twernertwerner<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>[ 649812 ] Does TidyLib correctly handle Mac files?</title> </head> <body> <h1>This is a Samle UTF16 Little Cowboy</h1> <p>the next para is Hebrew</p> <p>    </p> <p>The Next one is Russian / crylic</p> <p>?@8;>65=85 4>;6=> >ACI5AB2;OBL</p> <p>The Next one is Greek</p> <p> - Feta, , Salads</p> </body> </html> jtidy/src/test/resources/837023.msg0000644000175000017500000000173011461621360017176 0ustar twernertwerner 44 2 1 1 declaration]]> 34 2 9 7 isn't allowed after elements]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/688746.out0000644000175000017500000000060310000613130017215 0ustar twernertwerner [ 688746 ] incorrect charset value for utf-8

        How to…
        Place an extended-hours order:

        jtidy/src/test/resources/431874.msg0000644000175000017500000000177011461621360017206 0ustar twernertwerner 7 2 5 15 before ]]> 8 2 5 70 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431889.cfg0000644000175000017500000000035310115714575017167 0ustar twernertwerner# Config file for bug [ #431889 ] Config file options w/"param" don't work doctype: "-//ACME//DTD HTML 3.14159//EN" alt-text: "Alternate" tidy-mark: false doctype= "-//ACME//DTD HTML 3.14159//EN" alt-text= "Alternate" tidy-mark= falsejtidy/src/test/resources/433670.cfg0000644000175000017500000000015310111224155017135 0ustar twernertwerner# Tidy configuration file for bug #433670 input-xml: yes tidy-mark: false input-xml= yes tidy-mark= false jtidy/src/test/resources/508245.html0000644000175000017500000000022710110502420017336 0ustar twernertwerner
        hello & world
        hello < world
        jtidy/src/test/resources/434940.msg0000644000175000017500000000072311461621360017200 0ustar twernertwerner 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427821.cfg0000644000175000017500000000016210111224155017136 0ustar twernertwerner# Tidy configuration file for bug #427821 output-xhtml: yes tidy-mark: false output-xhtml= yes tidy-mark= false jtidy/src/test/resources/660397.msg0000644000175000017500000007573711463516445017241 0ustar twernertwerner 17 2 5 -3 77 2 7 19 77 2 8 39 77 2 13 36 77 2 16 12 77 2 17 38 77 2 19 2 77 2 19 12 77 2 21 50 77 2 23 63 77 2 27 68 77 2 28 56 77 2 32 18 77 2 34 39 77 2 38 30 77 2 38 52 77 2 41 36 77 2 41 58 77 2 42 21 77 2 43 29 77 2 43 46 77 2 43 58 77 2 47 28 77 2 49 28 77 2 51 38 77 2 51 67 77 2 54 1 77 2 54 6 77 2 54 29 77 2 55 51 77 2 56 30 77 2 58 5 77 2 58 60 77 2 59 11 77 2 61 46 77 2 64 32 77 2 66 43 77 2 68 8 77 2 68 38 77 2 68 47 77 2 69 54 77 2 70 25 77 2 71 17 77 2 74 5 77 2 75 34 77 2 75 62 77 2 79 1 77 2 79 6 77 2 79 22 77 2 80 49 77 2 82 38 77 2 82 43 77 2 83 47 77 2 84 43 77 2 88 24 77 2 89 31 77 2 89 63 77 2 92 38 77 2 93 19 77 2 93 28 77 2 93 59 77 2 94 29 77 2 96 41 77 2 97 2 77 2 98 9 77 2 98 19 77 2 98 42 77 2 98 52 77 2 100 8 77 2 101 55 77 2 102 31 77 2 102 44 77 2 104 29 77 2 104 45 77 2 109 10 77 2 109 16 77 2 116 40 77 2 117 16 77 2 117 62 77 2 120 29 77 2 121 43 77 2 125 45 77 2 126 44 77 2 129 21 77 2 131 8 77 2 131 47 77 2 132 42 77 2 133 7 77 2 133 29 77 2 137 29 77 2 138 10 77 2 138 69 77 2 139 69 77 2 140 43 77 2 141 59 77 2 143 35 77 2 143 48 77 2 145 32 77 2 149 58 77 2 150 62 77 2 151 22 77 2 151 66 77 2 152 34 77 2 152 57 77 2 153 19 77 2 153 35 77 2 155 55 77 2 159 14 77 2 160 28 77 2 161 42 77 2 163 51 77 2 164 12 77 2 165 54 77 2 165 69 77 2 166 54 77 2 167 56 77 2 167 60 77 2 169 28 77 2 171 45 77 2 175 32 77 2 178 35 77 2 179 33 77 2 181 20 77 2 181 26 77 2 182 25 77 2 184 18 77 2 186 60 77 2 187 40 77 2 187 46 77 2 187 67 77 2 188 25 77 2 188 53 77 2 189 28 77 2 191 35 77 2 191 44 77 2 196 49 77 2 199 14 77 2 199 65 77 2 200 22 77 2 201 8 77 2 202 20 77 2 203 10 77 2 203 13 77 2 203 25 77 2 209 66 77 2 212 7 77 2 212 32 77 2 214 28 77 2 215 1 77 2 215 33 77 2 218 1 77 2 218 46 77 2 219 28 77 2 219 42 77 2 222 37 77 2 222 41 77 2 224 9 77 2 227 35 77 2 231 24 77 2 235 45 77 2 237 13 110 0 1 1 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/540296.msg0000644000175000017500000000263211461621360017203 0ustar twernertwerner 44 2 1 1 declaration]]> 23 2 6 1 ]]> 23 2 7 -3 ]]> 23 2 9 1 ]]> 23 2 10 -3 ]]> 23 2 11 -3 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/655338.html0000644000175000017500000000035207777327667017416 0ustar twernertwerner [ 655338 ] Tidy leaves XML decl in wrong place

        foo jtidy/src/test/resources/427812.cfg0000644000175000017500000000024310111224155017136 0ustar twernertwerner# Tidy configuration file for bug #427812 output-xhtml: yes tidy-mark: false wrap: 0 indent-spaces: 0 output-xhtml= yes tidy-mark= false wrap= 0 indent-spaces= 0 jtidy/src/test/resources/473490.html0000644000175000017500000000031307777327667017410 0ustar twernertwerner [ #473490 ] DOCTYPE for Proprietary HTML to XHTML bad

        Test

        jtidy/src/test/resources/433856.cfg0000644000175000017500000000016410111224155017145 0ustar twernertwerner# Tidy configuration file for bug #433856 drop-font-tags: yes tidy-mark: false drop-font-tags= yes tidy-mark= falsejtidy/src/test/resources/427836.msg0000644000175000017500000001112611463516445017216 0ustar twernertwerner 44 2 1 1 declaration]]> 65 2 2 17 discarding newline in URI reference]]> 64 2 2 17 49 2 2 17 17 2 5 1 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 1 0 1 1 112 0 1 1 jtidy/src/test/resources/438954.out0000644000175000017500000000030710000613130017210 0ustar twernertwerner [ #438954 ] Body tag w/attributes omitted w/hide-end Use "--hide-endtags yes" on command line jtidy/src/test/resources/431883.html0000644000175000017500000000203607777327667017414 0ustar twernertwerner Bug-2000-12-27-A [ #431883 ] Given doctype reported incorrectly
        A cell.
        jtidy/src/test/resources/620531.msg0000644000175000017500000000117511463516445017204 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/test.dir0000644000175000017500000000004410123373031017365 0ustar twernertwernerneeded to find the test resource dirjtidy/src/test/resources/471264.msg0000644000175000017500000000126211461621360017177 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/480843.out0000644000175000017500000000062610115150162017216 0ustar twernertwerner [ #480843 ] Proposed change to FixID()

        Introduction

        New Introduction

        jtidy/src/test/resources/572154.html0000644000175000017500000000022010034327411017341 0ustar twernertwerner [ 572154 ] frame element outside of a frameset causes infinite loop jtidy/src/test/resources/427675.msg0000644000175000017500000000171111461621360017205 0ustar twernertwerner 44 2 1 1 declaration]]> 8 2 8 1 ]]> 6 2 11 2 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/443678.html0000644000175000017500000000052210133721557017367 0ustar twernertwerner [ #443678 ] Unclosed <script> in <head> messes Tidy jtidy/src/test/resources/427825.html0000644000175000017500000000030207777327667017407 0ustar twernertwerner Test user defined tags - bug #427825 Test-1Test-3 jtidy/src/test/resources/467863.msg0000644000175000017500000000177111463516445017227 0ustar twernertwerner 7 2 6 17 before ]]> 8 2 6 38 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/480406.xml0000644000175000017500000000016407777327667017243 0ustar twernertwerner jtidy/src/test/resources/433656.html0000644000175000017500000000020107777327667017404 0ustar twernertwerner [ #433656 ] Improve support for PHP (some text) jtidy/src/test/resources/655338.out0000644000175000017500000000052110000613130017203 0ustar twernertwerner [ 655338 ] Tidy leaves XML decl in wrong place

        foo

        jtidy/src/test/resources/427825.cfg0000644000175000017500000000020210111224155017135 0ustar twernertwerner# Tidy configuration file for bug #427825 new-inline-tags: lm:xcode tidy-mark: false new-inline-tags= lm:xcode tidy-mark= false jtidy/src/test/resources/434940.cfg0000644000175000017500000000016410111224155017140 0ustar twernertwerner# Tidy configuration file for bug #434940 show-body-only: yes tidy-mark: false show-body-only= yes tidy-mark= falsejtidy/src/test/resources/427840.html0000644000175000017500000000022307777327667017406 0ustar twernertwerner [ #427840 ] Span causes infinite loop

        Inside a span.

        jtidy/src/test/resources/427819.html0000644000175000017500000000061307777327667017417 0ustar twernertwerner [ #427819 ] OPTION w/illegal FONT eats whitespace
        jtidy/src/test/resources/470688.html0000644000175000017500000000041707777327667017423 0ustar twernertwerner [ #470688 ] doesn't cleanup badly nested tags right

        RIGHT TRIANGLES

        jtidy/src/test/resources/435909.html0000644000175000017500000000107110115150162017351 0ustar twernertwerner [ #435909 ] <noscript></noscript> in <head></head> Test jtidy/src/test/resources/537604.out0000644000175000017500000000033110000613130017175 0ustar twernertwerner this is a test of ©. &, <, >, ', " must be recognized. jtidy/src/test/resources/473490.out0000644000175000017500000000027010000613130017201 0ustar twernertwerner [ #473490 ] DOCTYPE for Proprietary HTML to XHTML bad

        Test

        jtidy/src/test/resources/547976.html0000644000175000017500000000052710114171503017367 0ustar twernertwerner [547976] Case of attribute values
        valign value should be lowercase
        jtidy/src/test/resources/433360.msg0000644000175000017500000000255111463516445017205 0ustar twernertwerner 44 2 1 1 declaration]]> 52 3 6 2 missing '>' for end of tag]]> 111 0 1 1 -1 0 0 0 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/480701.out0000644000175000017500000000056210115150162017206 0ustar twernertwerner jtidy/src/test/resources/505770.out0000644000175000017500000000153310000613130017201 0ustar twernertwerner [ #505770] Unclosed tag causing problems



        jtidy/src/test/resources/435923.html0000644000175000017500000000023107777327667017406 0ustar twernertwerner [ #435923 ] Preserve case of attribute names jtidy/src/test/resources/434100.html0000644000175000017500000000102207777327667017367 0ustar twernertwerner [ #434100 ] Error actually reported as a warning jtidy/src/test/resources/427823.out0000644000175000017500000000056110000613130017203 0ustar twernertwerner [ #427823 ] Multiple <BODY>'s in <NOFRAMES> allowed <body> Text in body 1. <div>Text in illegal body 2.</div> <p>Text in inferred illegal body 3.</p> <p>Text in inferred illegal body 4.</p> </body> jtidy/src/test/resources/467865.msg0000644000175000017500000000252711463516445017231 0ustar twernertwerner 24 1 6 13 is probably intended as ]]> 10 2 6 19 by ]]> 8 2 6 45 ]]> 8 2 6 52 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427633.msg0000644000175000017500000000117411463516445017213 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/471264.html0000644000175000017500000000040507777327667017407 0ustar twernertwerner [ #471264 ] Reduce blank lines in output
        • first element
        • second element
        jtidy/src/test/resources/487283.msg0000644000175000017500000000312011463532424017206 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 6 1 lacks "action" attribute]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/640474.cfg0000644000175000017500000000026710111224155017145 0ustar twernertwerner# Tidy configuration file for bug #640474 input-xml: yes output-xml: yes char-encoding: latin1 tidy-mark: false input-xml= yes output-xml= yes char-encoding= latin1 tidy-mark= false jtidy/src/test/resources/646946.out0000644000175000017500000000032410000613130017211 0ustar twernertwerner jtidy/src/test/resources/533105.out0000644000175000017500000000200010001537662017200 0ustar twernertwerner [ 533105 ] Tidy confused: HTML in VBScript jtidy/src/test/resources/480701.msg0000644000175000017500000000042711463520436017201 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/1392829.html0000644000175000017500000000123710463217611017452 0ustar twernertwerner [1392829] Some cases for null pointer Exception 1 addFontSize does not do a null pointer check on size value
        jtidy/src/test/resources/427837.out0000644000175000017500000000017510013722351017223 0ustar twernertwerner Bjrn Hhrmann Marc-Andr Lemburg jtidy/src/test/resources/663197.msg0000644000175000017500000000126211461621360017207 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/593705.out0000644000175000017500000000050110133721557017224 0ustar twernertwerner [ 593705 ] Use of < comparison symbol confuses Tidy

        Does the script confuse Tidy?

        jtidy/src/test/resources/445557.msg0000644000175000017500000000730411463516445017221 0ustar twernertwerner 44 2 1 1 declaration]]> 77 2 2 32 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 8 0 1 1 elements. ]]> jtidy/src/test/resources/679135.msg0000644000175000017500000000554511461621360017216 0ustar twernertwerner 44 2 2 1 declaration]]> 48 2 12 1 49 2 12 1 lacks "alt" attribute]]> 55 2 17 156 dropping value "0" for repeated attribute "border"]]> 49 2 17 273 lacks "alt" attribute]]> 55 2 18 157 dropping value "0" for repeated attribute "border"]]> 49 2 18 274 lacks "alt" attribute]]> 111 0 1 1 -1 0 0 0 4 0 1 1 1 0 1 1 112 0 1 1 jtidy/src/test/resources/435920.msg0000644000175000017500000000173711463516445017216 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/427840.msg0000644000175000017500000000301111463516445017203 0ustar twernertwerner 44 2 1 1 declaration]]> 52 3 4 1 missing '>' for end of tag]]> 7 2 5 2 before

        ]]> 23 2 5 2 ]]> 8 2 6 -3 ]]> 111 0 1 1 -1 0 0 0 -1 0 0 0 jtidy/src/test/resources/1403105.html0000644000175000017500000000001310463650235017420 0ustar twernertwerner

        jtidy/src/test/resources/449348.out0000644000175000017500000000072310104011573017221 0ustar twernertwerner [ #449348 ] Whitespace added/removed to inline tags

        Make this wrap at the end of the line12345678: white-spacejoebob

        This is long enough a wrap at the next line text ...

        jtidy/src/test/resources/688746.cfg0000644000175000017500000000011110000613130017137 0ustar twernertwernerchar-encoding: utf8 tidy-mark:false char-encoding= utf8 tidy-mark=false jtidy/src/test/resources/943559.html0000644000175000017500000000051210044423725017366 0ustar twernertwerner [943559] Form between td
        yyy
        jtidy/src/test/resources/443678.msg0000644000175000017500000000272011463535265017221 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 4 1 lacks "type" attribute]]> 32 2 7 28 49 2 11 1 lacks "type" attribute]]> 6 2 12 3 ]]> 49 2 15 2 lacks "type" attribute]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/593705.html0000644000175000017500000000042607777327667017417 0ustar twernertwerner [ 593705 ] Use of < comparison symbol confuses Tidy

        Does the script confuse Tidy?

        jtidy/src/test/resources/437468.out0000644000175000017500000000045710000613130017215 0ustar twernertwerner Test input file for iso-8859-1 character entities

        Phrase with numeric quotes expressly stated: “Déjà conçu à l'ère de Caféïne”

        jtidy/src/test/resources/427812.msg0000644000175000017500000000126211461621360017177 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/538727.cfg0000644000175000017500000000034010033573374017161 0ustar twernertwernertidy-mark: false wrap: 0 doctype: "-//W3C//DTD XHTML 1.0 Transitional//EN" "file:///E:/xhtml1-transitional.dtd" tidy-mark= false wrap= 0 doctype= "-//W3C//DTD XHTML 1.0 Transitional//EN" "file:///E:/xhtml1-transitional.dtd"jtidy/src/test/resources/427662.html0000644000175000017500000000033707777327667017416 0ustar twernertwerner [#427662] BLOCK/INLINE before TABLE parsed wrong Big and bold Big
        jtidy/src/test/resources/432677.html0000644000175000017500000000065107777327667017417 0ustar twernertwerner [ #432677 ] Null value changed to "value" for -asxml
        jtidy/src/test/resources/647255.msg0000644000175000017500000000145011463516445017214 0ustar twernertwerner 44 2 1 2 declaration]]> 17 2 1 2 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/676156.html0000644000175000017500000000020007777327667017407 0ustar twernertwernerfoo bar jtidy/src/test/resources/431958.out0000644000175000017500000000016010104011573017212 0ustar twernertwerner [ #431958 ] Comments always indented jtidy/src/test/resources/922302.html0000644000175000017500000000032310033354416017343 0ustar twernertwerner [ #922302 ] Add comment to script tag to produce valid XML jtidy/src/test/resources/514348.out0000644000175000017500000000156510000613130017207 0ustar twernertwerner [ #514348 ] Incorrect wrap behaviour
        HomeNews
        jtidy/src/test/resources/432677.out0000644000175000017500000000063710000613130017212 0ustar twernertwerner [ #432677 ] Null value changed to "value" for -asxml
        jtidy/src/test/resources/437468.msg0000644000175000017500000000117411463516445017222 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431958.msg0000644000175000017500000000117411463516445017220 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433666.msg0000644000175000017500000000152111461621360017201 0ustar twernertwerner 44 2 1 1 declaration]]> 55 2 4 1 dropping value "right" for repeated attribute "align"]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/656889.html0000644000175000017500000000036507777327667017436 0ustar twernertwerner [ 656889 ] textarea text and line wrapping
        jtidy/src/test/resources/676156.msg0000644000175000017500000000220611463521405017206 0ustar twernertwerner 44 2 1 1 declaration]]> 11 2 1 1 elements]]> 29 2 3 22 17 2 6 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1003994.cfg0000644000175000017500000000010110110421776017220 0ustar twernertwernertidy-mark: false input-xml: yes tidy-mark= false input-xml= yes jtidy/src/test/resources/427825.out0000644000175000017500000000026710000613130017210 0ustar twernertwerner Test user defined tags - bug #427825 Test-1Test-3 jtidy/src/test/resources/503436.xml0000644000175000017500000000020007777327667017231 0ustar twernertwerner Testcase #503436 first jtidy/src/test/resources/505770.html0000644000175000017500000000157707777327667017422 0ustar twernertwerner [ #505770] Unclosed <option> tag causing problems



        jtidy/src/test/resources/647900.cfg0000644000175000017500000000055210115714575017161 0ustar twernertwerner# HTML Tidy configuration file created by TidyGUI indent: auto tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes indent-attributes: yes force-output: yes tidy-mark: false indent-spaces: 0 indent= auto tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes indent-attributes= yes force-output= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/431874.html0000644000175000017500000000032010000046405017337 0ustar twernertwerner Test for bug #431874 Test for bug #431874 jtidy/src/test/resources/443362.html0000644000175000017500000000167507777327667017417 0ustar twernertwerner [ #443362 ] null-pointer except. for doctype in pre

        Unofficial W3C Validator FAQ

        This is a list of frequently asked questions and answers asked on the www-validator-css@w3.org mailing list.

        What does "org.xml.sax.SAXException: Please, fix your system identifier (URI) in the DOCTYPE rule." mean?

        Your XHTML document contains a document type declaration but the system identifier points at some non-W3C URI. Your document probably contains something like this:

         
         
        
        
        jtidy/src/test/resources/678268.msg0000644000175000017500000000511611463516445017227 0ustar twernertwerner 48 2 1 1 48 2 1 1 44 2 1 1 declaration]]> 29 2 3 17 49 2 12 2 lacks "summary" attribute]]> 7 2 12 2 before ]]> 8 2 12 37 ]]> 7 2 13 -3 before ]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/1039641.out0000644000175000017500000000053410130602330017265 0ustar twernertwerner[1039641] pre should not change the inside text
        1:
        1:
        
        jtidy/src/test/resources/433604.cfg0000644000175000017500000000015310111224155017132 0ustar twernertwerner# Tidy configuration file for bug #433604 input-xml: yes tidy-mark: false input-xml= yes tidy-mark= false jtidy/src/test/resources/427838.msg0000644000175000017500000000345611463516445017227 0ustar twernertwerner 7 2 11 18 before

        ]]> 23 2 11 18 ]]> 15 2 11 22 ]]> 8 2 11 32 ]]> 24 1 16 25 is probably intended as ]]> 7 2 17 25 before ]]> 7 2 19 1 before ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431956.xml0000644000175000017500000000044007777327667017246 0ustar twernertwerner Test jtidy/src/test/resources/427837.cfg0000644000175000017500000000026710111224155017153 0ustar twernertwerner# Tidy configuration file for bug #427837 input-xml: yes output-xml: yes char-encoding: latin1 tidy-mark: false input-xml= yes output-xml= yes char-encoding= latin1 tidy-mark= false jtidy/src/test/resources/470688.out0000644000175000017500000000037310000613130017213 0ustar twernertwerner [ #470688 ] doesn't cleanup badly nested tags right

        RIGHT TRIANGLES

        jtidy/src/test/resources/656889.cfg0000644000175000017500000000020110000613130017142 0ustar twernertwernerindent: auto wrap: 55555 alt-text: pic tidy-mark: no clean: yes indent= auto wrap= 55555 alt-text= pic tidy-mark= no clean= yes jtidy/src/test/resources/427825.msg0000644000175000017500000000146011461621360017203 0ustar twernertwerner 44 2 2 1 declaration]]> 21 2 7 11 is not approved by W3C]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433040.msg0000644000175000017500000000126211461621360017165 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1020806b.html0000644000175000017500000000046610120313554017567 0ustar twernertwerner [1020806b] NPE when PPPrint'ing changed DOM tree jtidy/src/test/resources/658230.out0000644000175000017500000015047510000613130017213 0ustar twernertwerner

        BIG-5 r

        Here are some entities: & "  2

        A0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿

        C3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ

        C4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ġ ġ Ģ ģ Ĥ ĥ Ħ ħ Ĩ ĩ Ī ī Ĭ ĭ Į į İ ı IJ ij Ĵ ĵ Ķ ķ ĸ Ĺ ĺ Ļ ļ Ľ ľ Ŀ

        C5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Š š Ţ ţ Ť ť Ŧ ŧ Ũ ũ Ū ū Ŭ ŭ Ů ů Ű ű Ų ų Ŵ ŵ Ŷ ŷ Ÿ Ź ź Ż ż Ž ž ſ

        C6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ơ ơ Ƣ ƣ Ƥ ƥ Ʀ Ƨ ƨ Ʃ ƪ ƫ Ƭ ƭ Ʈ Ư ư Ʊ Ʋ Ƴ ƴ Ƶ ƶ Ʒ Ƹ ƹ ƺ ƻ Ƽ ƽ ƾ ƿ

        C7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ǡ ǡ Ǣ ǣ Ǥ ǥ Ǧ ǧ Ǩ ǩ Ǫ ǫ Ǭ ǭ Ǯ ǯ ǰ DZ Dz dz Ǵ ǵ Ƕ Ƿ Ǹ ǹ Ǻ ǻ Ǽ ǽ Ǿ ǿ

        C8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ƞ ȡ Ȣ ȣ Ȥ ȥ Ȧ ȧ Ȩ ȩ Ȫ ȫ Ȭ ȭ Ȯ ȯ Ȱ ȱ Ȳ ȳ ȴ ȵ ȶ ȷ ȸ ȹ Ⱥ Ȼ ȼ Ƚ Ⱦ ȿ

        C9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ɠ ɡ ɢ ɣ ɤ ɥ ɦ ɧ ɨ ɩ ɪ ɫ ɬ ɭ ɮ ɯ ɰ ɱ ɲ ɳ ɴ ɵ ɶ ɷ ɸ ɹ ɺ ɻ ɼ ɽ ɾ ɿ

        CA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ʠ ʡ ʢ ʣ ʤ ʥ ʦ ʧ ʨ ʩ ʪ ʫ ʬ ʭ ʮ ʯ ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ʹ ʺ ʻ ʼ ʽ ʾ ʿ

        CB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ˠ ˡ ˢ ˣ ˤ ˥ ˦ ˧ ˨ ˩ ˪ ˫ ˬ ˭ ˮ ˯ ˰ ˱ ˲ ˳ ˴ ˵ ˶ ˷ ˸ ˹ ˺ ˻ ˼ ˽ ˾ ˿

        CC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ̠ ̡ ̢ ̣ ̤ ̥ ̦ ̧ ̨ ̩ ̪ ̫ ̬ ̭ ̮ ̯ ̰ ̱ ̲ ̳ ̴ ̵ ̶ ̷ ̸ ̹ ̺ ̻ ̼ ̽ ̾ ̿

        CD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ͠ ͡ ͢ ͣ ͤ ͥ ͦ ͧ ͨ ͩ ͪ ͫ ͬ ͭ ͮ ͯ Ͱ ͱ Ͳ ͳ ʹ ͵ Ͷ ͷ ͸ ͹ ͺ ͻ ͼ ͽ ; Ϳ

        CE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Π Ρ ΢ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή ί ΰ α β γ δ ε ζ η θ ι κ λ μ ν ξ ο

        CF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ϡ ϡ Ϣ ϣ Ϥ ϥ Ϧ ϧ Ϩ ϩ Ϫ ϫ Ϭ ϭ Ϯ ϯ ϰ ϱ ϲ ϳ ϴ ϵ ϶ Ϸ ϸ Ϲ Ϻ ϻ ϼ Ͻ Ͼ Ͽ

        D0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я а б в г д е ж з и й к л м н о п

        D1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ѡ ѡ Ѣ ѣ Ѥ ѥ Ѧ ѧ Ѩ ѩ Ѫ ѫ Ѭ ѭ Ѯ ѯ Ѱ ѱ Ѳ ѳ Ѵ ѵ Ѷ ѷ Ѹ ѹ Ѻ ѻ Ѽ ѽ Ѿ ѿ

        D2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ҡ ҡ Ң ң Ҥ ҥ Ҧ ҧ Ҩ ҩ Ҫ ҫ Ҭ ҭ Ү ү Ұ ұ Ҳ ҳ Ҵ ҵ Ҷ ҷ Ҹ ҹ Һ һ Ҽ ҽ Ҿ ҿ

        D3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ӡ ӡ Ӣ ӣ Ӥ ӥ Ӧ ӧ Ө ө Ӫ ӫ Ӭ ӭ Ӯ ӯ Ӱ ӱ Ӳ ӳ Ӵ ӵ Ӷ ӷ Ӹ ӹ Ӻ ӻ Ӽ ӽ Ӿ ӿ

        D4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ԡ ԡ Ԣ ԣ Ԥ ԥ Ԧ ԧ Ԩ ԩ Ԫ ԫ Ԭ ԭ Ԯ ԯ ԰ Ա Բ Գ Դ Ե Զ Է Ը Թ Ժ Ի Լ Խ Ծ Կ

        D5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ՠ ա բ գ դ ե զ է ը թ ժ ի լ խ ծ կ հ ձ ղ ճ մ յ ն շ ո չ պ ջ ռ ս վ տ

        D6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ֠ ֡ ֢ ֣ ֤ ֥ ֦ ֧ ֨ ֩ ֪ ֫ ֬ ֭ ֮ ֯ ְ ֱ ֲ ֳ ִ ֵ ֶ ַ ָ ֹ ֺ ֻ ּ ֽ ־ ֿ

        D7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ נ ס ע ף פ ץ צ ק ר ש ת ׫ ׬ ׭ ׮ ׯ װ ױ ײ ׳ ״ ׵ ׶ ׷ ׸ ׹ ׺ ׻ ׼ ׽ ׾ ׿

        D8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ؠ ء آ أ ؤ إ ئ ا ب ة ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ػ ؼ ؽ ؾ ؿ

        D9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ٪ ٫ ٬ ٭ ٮ ٯ ٰ ٱ ٲ ٳ ٴ ٵ ٶ ٷ ٸ ٹ ٺ ٻ ټ ٽ پ ٿ

        DA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ڠ ڡ ڢ ڣ ڤ ڥ ڦ ڧ ڨ ک ڪ ګ ڬ ڭ ڮ گ ڰ ڱ ڲ ڳ ڴ ڵ ڶ ڷ ڸ ڹ ں ڻ ڼ ڽ ھ ڿ

        DB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ۠ ۡ ۢ ۣ ۤ ۥ ۦ ۧ ۨ ۩ ۪ ۫ ۬ ۭ ۮ ۯ ۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹ ۺ ۻ ۼ ۽ ۾ ۿ

        DC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ܠ ܡ ܢ ܣ ܤ ܥ ܦ ܧ ܨ ܩ ܪ ܫ ܬ ܭ ܮ ܯ ܰ ܱ ܲ ܳ ܴ ܵ ܶ ܷ ܸ ܹ ܺ ܻ ܼ ܽ ܾ ܿ

        DD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ݠ ݡ ݢ ݣ ݤ ݥ ݦ ݧ ݨ ݩ ݪ ݫ ݬ ݭ ݮ ݯ ݰ ݱ ݲ ݳ ݴ ݵ ݶ ݷ ݸ ݹ ݺ ݻ ݼ ݽ ݾ ݿ

        DE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ޠ ޡ ޢ ޣ ޤ ޥ ަ ާ ި ީ ު ޫ ެ ޭ ޮ ޯ ް ޱ ޲ ޳ ޴ ޵ ޶ ޷ ޸ ޹ ޺ ޻ ޼ ޽ ޾ ޿

        DF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ߠ ߡ ߢ ߣ ߤ ߥ ߦ ߧ ߨ ߩ ߪ ߫ ߬ ߭ ߮ ߯ ߰ ߱ ߲ ߳ ߴ ߵ ߶ ߷ ߸ ߹ ߺ ߻ ߼ ߽ ߾ ߿

        E0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        ED @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        jtidy/src/test/resources/504206.html0000644000175000017500000001177407777327667017413 0ustar twernertwerner [ #504206] Tidy errors in processing forms.

        COMM 428: Feedback Survey

        Please fill out the following form based on YOUR experience in COMM 428 to date.

        Use the Send button at the bottom of the page to send your feedback to me via e-mail.


        Name (optional):

        E-Mail Address (optional):

        Where will you be this time next year?

        Area(s) of concentation? (Select all that Apply):
        MIS
        Finance
        Marketing
        Management
        Accounting
        Other

        Q1: The instructor gives appropriate consideration to the comments and questions of students.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q2: The instructor relates theoretical concepts covered in the course to practical applications.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q3: The instructor presents class material in a clear and organized manner.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q4: The instructor is prepared for class.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q5: The instructor demonstrates enthusiasm and interest in the subject matter.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q6: The instructor posts reasonable office hours and is available whenever I try to see him during those hours.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Q7: The instructor stimulates me to think about the course.
        Strongly Agree Agree Neither Agree Nor Disagree Disagree Strongly Disagree

        Stengths (class/instructor):

        Weaknesses (class/instructor):

        Suggestions for improvement (class/instructor; NOTE: This field is REQUIRED):


        jtidy/src/test/resources/640474.msg0000644000175000017500000000043011463516445017205 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/427845.cfg0000644000175000017500000000013610111224155017145 0ustar twernertwerner# Tidy configuration file for bug #427845 wrap: 70 tidy-mark: false wrap= 70 tidy-mark= falsejtidy/src/test/resources/649812.cfg0000644000175000017500000000032310000613130017133 0ustar twernertwernerchar-encoding: utf16le newline: CR output-xhtml: yes indent: auto indent-attributes: yes tidy-mark:false char-encoding= utf16le newline= CR output-xhtml= yes indent= auto indent-attributes= yes tidy-mark=false jtidy/src/test/resources/663197.cfg0000644000175000017500000000034410110502420017141 0ustar twernertwerneralt-text=" " add-xml-decl=true clean=true drop-font-tags=true output-xhtml=true tab-size=1 tidy-mark=false wrap=0 alt-text:" " add-xml-decl:true clean:true drop-font-tags:true output-xhtml:true tab-size:1 tidy-mark:false wrap:0jtidy/src/test/resources/433040.html0000644000175000017500000000035207777327667017376 0ustar twernertwerner [ #433040 ] Anchor tag without attributes deleted
        Tidy strips Anchor tags when there are no attributes jtidy/src/test/resources/508936.out0000644000175000017500000000072510000613130017212 0ustar twernertwerner [ #508936 ] Parse CSS Selector prefix in config file

        Allow user to specify prefix for class names Tidy generates with --clean yes option.

        jtidy/src/test/resources/443576.out0000644000175000017500000000045010104011573017213 0ustar twernertwerner [ #443576 ] End script tag inside scripts problem jtidy/src/test/resources/671087.msg0000644000175000017500000000125411461621360017205 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/676205.msg0000644000175000017500000001171011461621360017200 0ustar twernertwerner 44 2 3 1 declaration]]> 65 2 8 3 discarding newline in URI reference]]> 65 2 8 3 discarding newline in URI reference]]> 65 2 8 3 discarding newline in URI reference]]> 36 2 11 1 ]]> 64 2 11 1 escaping malformed URI reference]]> 49 2 11 1 lacks "alt" attribute]]> 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 1 0 1 1 112 0 1 1 jtidy/src/test/resources/763191.msg0000644000175000017500000000126311463516445017214 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1058909.html0000644000175000017500000000053210143246471017446 0ustar twernertwerner [ #1058909 ] Certain sites causing null pointer Exceptions

        NPE

        jtidy/src/test/resources/433670.out0000644000175000017500000000020110110504274017201 0ustar twernertwerner [ #433670 ] &apos not recognized as valid XML entity. Use -xml on command line. Test of ' jtidy/src/test/resources/671087.html0000644000175000017500000000057107777327667017420 0ustar twernertwerner tidy loops with --new-inline-tags table,tr,td

        table heading
        jtidy/src/test/resources/676156.cfg0000644000175000017500000000011110000613130017127 0ustar twernertwernerchar-encoding: utf8 tidy-mark:false char-encoding= utf8 tidy-mark=false jtidy/src/test/resources/532535.html0000644000175000017500000000045407777327667017412 0ustar twernertwerner [ 532535 ] Hang when in code <?xml />

         

        jtidy/src/test/resources/470663.out0000644000175000017500000000033210000613130017177 0ustar twernertwerner Test Input For Bug #470663

        Body doesn't matter. Problem occurs parsing <head> element.

        jtidy/src/test/resources/543262.html0000644000175000017500000000033507777327667017407 0ustar twernertwerner Preferences

        Test

        jtidy/src/test/resources/450389.msg0000644000175000017500000000361111461621360017204 0ustar twernertwerner 44 2 1 1 declaration]]> 51 2 44 1 attribute "color" has invalid value "reddish"]]> 51 2 72 1 attribute "color" has invalid value "#FF"]]> 51 2 76 1 attribute "color" has invalid value "grurple"]]> 51 2 77 1 attribute "color" has invalid value "#grurple"]]> 51 2 78 1 attribute "color" has invalid value "#1234567"]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/427833.out0000644000175000017500000000042210000613130017200 0ustar twernertwerner Escape sequences

        #include <stdio.h>

        #include <stdio.h>

        #include <stdio.h>

        jtidy/src/test/resources/427823.msg0000644000175000017500000000320411461621360017177 0ustar twernertwerner 44 2 1 1 declaration]]> 27 2 13 1 20 2 13 1 by
        ]]> 27 2 15 -1 26 2 18 1 27 2 18 1 6 2 19 2 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/433040.out0000644000175000017500000000035210000613130017165 0ustar twernertwerner [ #433040 ] Anchor tag without attributes deleted Tidy strips Anchor tags when there are no attributes jtidy/src/test/resources/default.cfg0000644000175000017500000000006310000613130020003 0ustar twernertwernertidy-mark: false wrap: 0 tidy-mark= false wrap= 0 jtidy/src/test/resources/540555.out0000644000175000017500000000022110000613130017172 0ustar twernertwerner </head> <body> <p>#540555 Empty title tag is trimmed</p> </body> </html> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/431965.msg�����������������������������������������������������������������0000644�0001750�0001750�00000001246�11461621360�017205� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="ISO-8859-1"?> <!-- expected messages for test 431965--> <messages> <message> <code>110</code> <level>0</level> <line>1</line> <column>1</column> <text><![CDATA[InputStream: Doctype given is "-//W3C//DTD XHTML 1.0 Strict//EN"]]></text> </message> <message> <code>111</code> <level>0</level> <line>1</line> <column>1</column> <text><![CDATA[InputStream: Document content looks like XHTML 1.0 Strict]]></text> </message> <message> <code>-1</code> <level>0</level> <line>0</line> <column>0</column> <text><![CDATA[no warnings or errors were found]]></text> </message> </messages> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/427664.msg�����������������������������������������������������������������0000644�0001750�0001750�00000002532�11463520303�017202� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="ISO-8859-1"?> <!-- expected messages for test 427664--> <messages> <message> <code>44</code> <level>2</level> <line>1</line> <column>1</column> <text><![CDATA[missing <!DOCTYPE> declaration]]></text> </message> <message> <code>51</code> <level>2</level> <line>5</line> <column>1</column> <text><![CDATA[<body> attribute "Ã1/2" has invalid value "xx"]]></text> </message> <message> <code>59</code> <level>2</level> <line>5</line> <column>1</column> <text><![CDATA[<body> unexpected or duplicate quote mark]]></text> </message> <message> <code>59</code> <level>2</level> <line>5</line> <column>1</column> <text><![CDATA[<body> unexpected or duplicate quote mark]]></text> </message> <message> <code>51</code> <level>2</level> <line>5</line> <column>1</column> <text><![CDATA[<body> attribute "width" has invalid value "align="""]]></text> </message> <message> <code>111</code> <level>0</level> <line>1</line> <column>1</column> <text><![CDATA[InputStream: Document content looks like HTML 3.2]]></text> </message> <message> <code>-1</code> <level>0</level> <line>0</line> <column>0</column> <text><![CDATA[5 warnings, no errors were found!]]></text> </message> </messages> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������jtidy/src/test/resources/1416185.html���������������������������������������������������������������0000644�0001750�0001750�00000000225�10463216210�017430� 0����������������������������������������������������������������������������������������������������ustar �twerner�������������������������twerner����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������<html> <head><title>test jtidy/src/test/resources/680664.out0000644000175000017500000000075310115150162017222 0ustar twernertwerner [ 680664 ] Malformed comment generates bad (X)HTML
        This is a test of some pre stuff.
        See what happens to this comment 
        
        jtidy/src/test/resources/708322.out0000644000175000017500000000023710034234737017222 0ustar twernertwerner [ #708322] : drop-proprietary-attributes leaves table height.
        jtidy/src/test/resources/525081.msg0000644000175000017500000000255311461621360017200 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 16 0 1 1 112 0 1 1 jtidy/src/test/resources/1024661.out0000644000175000017500000000052610120313524017265 0ustar twernertwerner [1024661] Error Parsing duplicate style
        ...
        jtidy/src/test/resources/427822.msg0000644000175000017500000000237111463516445017213 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 7 11 before ]]> 12 2 7 12 ]]> 8 2 7 12 ]]> 23 2 8 -3 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/763186.msg0000644000175000017500000000364711463516445017230 0ustar twernertwerner 44 2 1 1 declaration]]> 17 2 2 1 49 2 3 1 lacks "summary" attribute]]> 8 2 14 1 ]]> 111 0 1 1 -1 0 0 0 4 0 1 1 112 0 1 1 jtidy/src/test/resources/ant/0000755000175000017500000000000011617345033016504 5ustar twernertwernerjtidy/src/test/resources/ant/file1.html0000644000175000017500000000033710123373031020363 0ustar twernertwerner Ant test file 1 jtidy/src/test/resources/ant/file3.html0000644000175000017500000000043510123633012020362 0ustar twernertwerner Ant test file 3 this is an unparseable file jtidy/src/test/resources/ant/file2.html0000644000175000017500000000033710123373031020364 0ustar twernertwerner Ant test file 2 jtidy/src/test/resources/template.html0000644000175000017500000000033710114165646020427 0ustar twernertwerner [#] description jtidy/src/test/resources/547976.cfg0000644000175000017500000000015710114171503017161 0ustar twernertwernertidy-mark: false wrap: 0 output-xhtml: yes tidy-mark= false wrap= 0 output-xhtml= yes tidy-mark:false wrap:0 jtidy/src/test/resources/431898.msg0000644000175000017500000000154111463516445017221 0ustar twernertwerner 3 2 11 13 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427675.html0000644000175000017500000000031707777327667017420 0ustar twernertwerner This text belongs in a noframes element. jtidy/src/test/resources/511679.html0000644000175000017500000000025607777327667017420 0ustar twernertwerner[ 511679 ] Block level elements in a <pre> section
        foo
        jtidy/src/test/resources/445394.msg0000644000175000017500000000704611463516445017223 0ustar twernertwerner 44 2 1 1 declaration]]> 58 2 6 1 attribute with missing trailing quote mark]]> 64 2 6 1 escaping malformed URI reference]]> 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/788031.msg0000644000175000017500000000256611463521405017213 0ustar twernertwerner 44 2 1 1 declaration]]> 11 2 2 1 isn't allowed in elements]]> 52 3 3 1 missing '>' for end of tag]]> 17 2 7 1 111 0 1 1 -1 0 0 0 -1 0 0 0 jtidy/src/test/resources/516370.msg0000644000175000017500000000302411461621360017173 0ustar twernertwerner 71 2 10 1 " uses XML ID syntax]]> 71 2 11 1 " uses XML ID syntax]]> 51 2 13 1 attribute "id" has invalid value ".InvalidID1"]]> 51 2 14 1 attribute "id" has invalid value "2InvalidID2"]]> 50 2 15 1 attribute "id" lacks value]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427818.html0000644000175000017500000000033307777327667017415 0ustar twernertwerner [ #427818 ] Missing quotes cause segfaults link jtidy/src/test/resources/533105.html0000644000175000017500000000177707777327667017415 0ustar twernertwerner [ 533105 ] Tidy confused: HTML in VBScript jtidy/src/test/resources/427827.msg0000644000175000017500000000173311461621360017210 0ustar twernertwerner 7 2 7 21 before ]]> 8 2 8 6 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/514348.cfg0000644000175000017500000000026210111224155017140 0ustar twernertwerner# Tidy configuration file for bug #514348 uppercase-tags: true indent: auto indent-spaces: 2 tidy-mark: false uppercase-tags= true indent= auto indent-spaces= 2 tidy-mark= falsejtidy/src/test/resources/433604.xml0000644000175000017500000000020507777327667017235 0ustar twernertwerner [ #433604 ] Tidy inserts &nbsp; entity in -xml mode. Use -xml on command line. Test of   jtidy/src/test/resources/445394.html0000644000175000017500000000025507777327667017417 0ustar twernertwerner [ #445394 ] Improve handling of missing trailing " link jtidy/src/test/resources/663548.html0000644000175000017500000000066707777327667017431 0ustar twernertwerner [663548] Javascript and Tidy - missing code

        foo jtidy/src/test/resources/543262.msg0000644000175000017500000000042510120355471017173 0ustar twernertwerner 44 2 1 1 declaration]]> jtidy/src/test/resources/log4j.xml0000644000175000017500000000210010467623231017455 0ustar twernertwerner jtidy/src/test/resources/427845.out0000644000175000017500000000032410115433712017221 0ustar twernertwerner [ #427845 ] Doctypes are output on multiple lines Use "--wrap 70" on the command line jtidy/src/test/resources/426885.html0000644000175000017500000000061107777327667017417 0ustar twernertwerner [ #426885 ] Definition list w/Center crashes

        Heading 1

        Term 1
        Term 2

        Heading 2

          Term 3
          Term 4
        jtidy/src/test/resources/435917.html0000644000175000017500000000041710000046405017350 0ustar twernertwerner [ #435917 ] <input onfocus=""> reported unknown attr bug #1 bug #2 jtidy/src/test/resources/431721.html0000644000175000017500000001626407777327667017413 0ustar twernertwerner Joe-Bob Briggs LLP

        Joe-Bob Briggs LLP

         

        Bryan Joe-Bob LLP is a leading national and international corporate, litigation and private client law firm. We represent a wide variety of business, institutional and individual clients for whom our lawyers handle a wide range of matters. As a result, our lawyers are well prepared to meet the needs of clients whether large or small, public or private, for-profit or not-for-profit.

         

        Joe-Bob Briggs has more offices than you can shake a stick at. These locations give Joe-Bob the geographic reach to assist his clients where their needs are most pressing.

                 Estate Planning

                 Closely-Held Business Practice

                 Estate, Gift, Income and Other Tax Advice

         

        Joe-Bob joined the Firm in 1995 after 15 years with the Kansas City firm of Fish, Gill, Smoker & Butts, where he was a Shareholder/Director. John is a past Chair of the Estate Planning, Probate and Trust Committee of the Kansas City Metropolitan Bar Association and co-authored the Drinking Procedures Manual for County Practitioners. Currently, JB is a member of the Missouri Bar Probate and Trust Committee, the Estate Planning Society and the Mid-America Planned Giving Council. A fellow of the American College of Trust and Estate Counsel, JB lectures frequently on Estate Planning topics for both legal and lay organizations.

         

        jtidy/src/test/resources/501669.html0000644000175000017500000000046707777327667017422 0ustar twernertwerner [ #501669 ] width="n*" marked invalid on <COL>
        xx
        jtidy/src/test/resources/508245.msg0000644000175000017500000000531511461621360017202 0ustar twernertwerner 44 2 1 1 declaration]]> 17 2 2 1 52 3 4 9 missing '>' for end of tag]]> 49 2 4 9 lacks "alt" attribute]]> 4 2 5 16 111 0 1 1 -1 0 0 0 -1 0 0 0 4 0 1 1 1 0 1 1 112 0 1 1 jtidy/src/test/resources/435920.html0000644000175000017500000000030607777327667017406 0ustar twernertwerner [ #435920 ] Space inserted before </td> causes probs bla
         
        jtidy/src/test/resources/545772.cfg0000644000175000017500000000016110111224155017143 0ustar twernertwerner# Tidy configuration file for bug #547057 output-xhtml: yes tidy-mark: false output-xhtml= yes tidy-mark= false jtidy/src/test/resources/629885.msg0000644000175000017500000000117511463516445017231 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/586562.out0000644000175000017500000000045010000613130017206 0ustar twernertwerner [586562] Two Doctypes

        Two DOCTYPE's!

        jtidy/src/test/resources/434940b.msg0000644000175000017500000000072511461621360017344 0ustar twernertwerner 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427676.msg0000644000175000017500000000361511461621360017213 0ustar twernertwerner 44 2 1 1 declaration]]> 59 2 6 15 unexpected or duplicate quote mark]]> 58 2 6 15 attribute with missing trailing quote mark]]> 22 3 6 15 is not recognized!]]> 8 2 6 15 ]]> 8 2 6 44 ]]> 59 2 6 53 unexpected or duplicate quote mark]]> 111 0 1 1 -1 0 0 0 -1 0 0 0 jtidy/src/test/resources/696799.msg0000644000175000017500000000146311461621360017230 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 6 3 lacks "type" attribute]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427813.html0000644000175000017500000000020207777327667017403 0ustar twernertwerner [#427813] Missing = from attr value segfaults text jtidy/src/test/resources/444834.cfg0000644000175000017500000000012510114204746017147 0ustar twernertwernerquiet: yes show-errors:0 show-warnings:no quiet= yes show-errors=0 show-warnings=no jtidy/src/test/resources/1058909.cfg0000644000175000017500000000016010143246471017236 0ustar twernertwernertidy-mark: false wrap: 0 clean: true output-xhtml:true tidy-mark= false wrap= 0 clean= true output-xhtml= true jtidy/src/test/resources/427836.html0000644000175000017500000000020610000613130017340 0ustar twernertwerner jtidy/src/test/resources/531964.cfg0000644000175000017500000000022210111224155017137 0ustar twernertwerner# Tidy configuration file for bug 531964 output-xhtml: yes tidy-mark: false indent-spaces: 0 output-xhtml= yes tidy-mark= false indent-spaces= 0 jtidy/src/test/resources/432677.cfg0000644000175000017500000000021610111224155017143 0ustar twernertwerner# Tidy configuration file for bug #432677 output-xml: yes tidy-mark: false indent-spaces: 0 output-xml= yes tidy-mark= false indent-spaces= 0jtidy/src/test/resources/922302.msg0000644000175000017500000000117511463516445017205 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/616744.xml0000644000175000017500000000046207777327667017252 0ustar twernertwerner This is some stuff in a para. There's a "command" in it.
        This line is indented 4 spaces. This (3rd) line is indented 8 spaces.
        jtidy/src/test/resources/443678.out0000644000175000017500000000074011463534326017237 0ustar twernertwerner [ #443678 ] Unclosed <script> in <head> messes Tidy Just a test. jtidy/src/test/resources/706260.html0000644000175000017500000000057507777327667017414 0ustar twernertwerner #706260 size not accepted for input
        jtidy/src/test/resources/427833.html0000644000175000017500000000043207777327667017412 0ustar twernertwerner Escape sequences

        #include <stdio.h>

        #include <stdio.h>

        #include <stdio.h>

        jtidy/src/test/resources/435919.out0000644000175000017500000000036010000613130017205 0ustar twernertwerner [ #435919 ] Nested <q></q>'s not handled correctly So then I said to him, don't go there. jtidy/src/test/resources/480701.xml0000644000175000017500000000055607777327667017246 0ustar twernertwerner jtidy/src/test/resources/427830.out0000644000175000017500000000056010000613130017200 0ustar twernertwerner Test Input For Bug #427830

        Tidy uses an incorrect XHTML 1.0 Namespace, even if the correct namespace is given.

        jtidy/src/test/resources/553414.msg0000644000175000017500000000125411461621360017176 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/688746.msg0000644000175000017500000000704111463516445017230 0ustar twernertwerner 44 2 1 1 declaration]]> 29 2 4 13 78 2 9 15 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 jtidy/src/test/resources/1033035.msg0000644000175000017500000000124711463516445017262 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427671.html0000644000175000017500000000053107777327667017412 0ustar twernertwerner [#427671] <LI> w/FRAME/FRAMESET/OPTGROUP/OPTION loop
        • first item
        • frame item
        • frameset item
        • optgroup item
        • last item
        jtidy/src/test/resources/431965.xhtml0000644000175000017500000000043207777327667017603 0ustar twernertwerner [ #431965 ] XHTML Strict seen as Transitional w/div
        Test
        jtidy/src/test/resources/434100.msg0000644000175000017500000000125411461621360017164 0ustar twernertwerner 13 3 13 -3 in ]]> -1 0 0 0 -1 0 0 0 jtidy/src/test/resources/487283.out0000644000175000017500000000066010000613130017211 0ustar twernertwerner [ #487283 ] >/select< does not terminate >option<
        row 1, cell 1 row 1, cell 2
        row 2, cell 1 row 2, cell 2
        jtidy/src/test/resources/553468.xhtml0000644000175000017500000000053407777327667017611 0ustar twernertwerner [ #553468 ] Doesn't warn about <u> in XHTML strict

        Tidy doesn't complain about underlining in XHTML strict documents

        jtidy/src/test/resources/443381.msg0000644000175000017500000000124611461621360017200 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/658230.msg0000644000175000017500000000145011463516445017207 0ustar twernertwerner 44 2 1 1 declaration]]> 17 2 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427826.msg0000644000175000017500000000300211463516445017207 0ustar twernertwerner 32 2 13 1 8 2 14 -3 ]]> 32 2 38 3 8 2 39 -3 ]]> 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/586555.out0000644000175000017500000000123010000613130017205 0ustar twernertwerner [ 586555 ] Misplaced backslash caused by newline

        [ 586555 ] Misplaced backslash caused by newline

        jtidy/src/test/resources/648768.html0000644000175000017500000000042110114165646017374 0ustar twernertwerner [648768] Fix for character references >= 32768

        jtidy/src/test/resources/438954.html0000644000175000017500000000027307777327667017423 0ustar twernertwerner [ #438954 ] Body tag w/attributes omitted w/hide-end Use "--hide-endtags yes" on command line jtidy/src/test/resources/427846.out0000644000175000017500000000034210103731422017216 0ustar twernertwerner Test Input For Bug #427846
        text-one
        text-two jtidy/src/test/resources/676156.out0000644000175000017500000000035710166303474017237 0ustar twernertwerner foo bar jtidy/src/test/resources/431956.msg0000644000175000017500000000042711463520342017205 0ustar twernertwerner -1 0 0 0 jtidy/src/test/resources/540555.msg0000644000175000017500000000117411463516445017212 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427826.out0000644000175000017500000000221210115150162017211 0ustar twernertwerner [#427826] Script source needs escaping/CDATA section

        If converted to XML/XHTML, the < in the javascript source above causes problems for XML tools.

        jtidy/src/test/resources/570027.cfg0000644000175000017500000000020110111224155017125 0ustar twernertwerner# Tidy configuration file for bug 570027 clean: yes word-2000: yes tidy-mark: false clean= yes word-2000= yes tidy-mark= false jtidy/src/test/resources/427844.msg0000644000175000017500000000126111461621360017203 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/663548.out0000644000175000017500000000077210133721557017241 0ustar twernertwerner [663548] Javascript and Tidy - missing code

        foo

        jtidy/src/test/resources/660397.html0000644000175000017500000003223507777327667017424 0ustar twernertwerner
        Aufklrung ist der Ausgang des Menschen
        aus seiner selbstverschuldeten Unmndigkeit.
        /Immanuel Kant/

        TCPA und Palladium

        Ein weiterer Schritt in die Unmndigkeit
        oder ein Schritt hin zur Emanzipation?

        Von der ffentlichkeit weitgehend unbemerkt geistern seit einigen Wochen geheimnisvolle neue Schlagworte und Krzel durch die Welt der EDV-Profis: TCPA und Palladium. Interessiert habe ich mich damit nher beschftigt, denn allein die Tatsache, da die Bezeichnung Palladium in unserem Fall anders als in den Naturwissenschaften nicht ein wertvolles Metall meint, sondern eine Wortschpfung der Firma Microsoft darstellt, verheit nichts Gutes und weckte daher meinen Argwohn. Der wesentlich neutralere Begriff TCPA (das steht fr Trusted Computing Platform Alliance) dagegen scheint auf den ersten Blick positive oder zumindest neutrale Emotionen zu wecken - aber auch nur auf den ersten Blick, denn wenn man sich des Horrorszenarios bewut wird, das hinter TCPA in Kombination mit Palladium steckt, so drfte wohl selbst der unbedarfteste EDV-Anwender schlaflose Nchte bekommen.

        Was hat es also mit diesen Begriffen auf sich, welche Intentionen liegen diesen Schlagworten zugrunde und was bedeuten die dahintersteckenden neuen Techniken fr den EDV-Anwender?

        Die TCPA ist ein Zusammenschlu fhrender Hardwarehersteller, darunter IBM, HP, AMD und Intel, die sich vorgenommen haben, den Personal Computer durch Implementation neuer Hardwaretechnologien sicherer zu machen. Wie uns allen bewut ist, hat die Monokultur im Betriebssystemsektor dazu gefhrt, da durch das unsgliche Monopol von Microsoft in Kombination mit der grottenschlechten Software dieser Firma allerorten und allenthalben eine wahre Flut von Computerviren, sogenannten trojanischen Pferden, Wrmern und Sicherheitslchern entstanden ist und tglich neu auf den Anwender zurollt, die den Umgang mit dem PC immer wieder zum rgernis werden lt. Diesem bel wollen die an der TCPA beteiligten Konzerne nun durch den sogenannten Fritz-Chip (benannt nach dem US-Senator Fritz Hollings) abhelfen - ein hehrer Wunsch. Bei dem Fritz-Chip handelt es sich um einen Krypto-Baustein, der in zuknftige Generationen von Personal Computern integriert werden und allgemein die Systeme sicherer machen soll. Dieser Chip speichert mehrere Schlssel, die hardware- und anwenderspezifisch definiert sind. Sobald der PC eingeschaltet wird, nimmt der Fritz-Chip seine Arbeit auf und fragt einen Schlssel nach dem anderen ab: Zunchst wird das BIOS abgefragt, anschlieend alle im Rechner vorhandenen BIOS-Erweiterungen der Steckkarten. Danach wird die Festplatte berprft, und anschlieend prft der TCPA-Chip auch noch den Bootsektor, den Bootloader, den Kernel und die Gertetreiber. Da bei jedem dieser Schritte eine Prfsumme abgespeichert und ein 160 Bit langer eindeutiger Wert aus den gewonnenen Daten und einem speziellen Schlssel generiert wird, hat der Fritz-Chip jederzeit die vllige Kontrolle ber das Gesamtsystem.

        Damit taucht schon die erste Problematik fr den Anwender auf: Bereits ein Flash-Update des Rechner-BIOS legt das gesamte System lahm, da dann die generierten Werte des Fritz-Chip nicht mehr mit den gespeicherten Werten, die zertifiziert sind, bereinstimmen. In Zeiten, in denen aufgrund der oftmals schlampig implementierten BIOS-Versionen Flash-Updates derselben zumindest bei den blichen Consumer-Produkten an der Tagesordnung sind, ist also der Fritz-Chip eher hinderlich denn ein Segen fr den Anwender. Gleiches gilt brigens fr diejenigen Anwender, die beispielsweise eine neue Grafikkarte oder eine grere Festplatte einbauen wollen - auch fr sie bedeutet jede Hardware-Modifikation eine - vermutlich natrlich kostenpflichtige - Neuzertifizierung des Gesamtsystems, damit dieses wieder als TCPA-konform angesehen werden kann. Bei der Neuzertifizierung wird online anhand einer Liste mit geprfter Hardware (HCL) und einer weiteren Liste mit gesperrten Seriennummern (SRL) die Konformittstabelle des Rechners geprft und aktualisiert.

        Hat der Fritz-Chip beim Bootvorgang alle Komponenten als TCPA-konform berprft und erkannt, bergibt er die Kontrolle schlielich an das Betriebssystem. Ab diesem Punkt hakt nun - wie knnte es anders sein? - die Firma Microsoft ein mit ihrer Palladium-Technologie. Sobald der Anwender jetzt ein Programm startet, berprft das Betriebssystem dieses anhand der im Fritz-Chip gespeicherten Werte fr die SRL. Sollte sich herausstellen, da dieses Programm keine gltige Lizenz und/oder Seriennummer besitzt oder die Lizenz abgelaufen ist, wird es gar nicht erst gestartet. Stellt es sich als TCPA-konform heraus, so wird nach der Freigabe und dem anschlieenden Start erneut online eine Liste mit gesperrten Dokumenten fr dieses Programm abgerufen (DRL), um zu verhindern, da der Anwender fr ihn nicht vorgesehene Dateien ffnet oder unerlaubterweise nutzt.

        Was sich auf den ersten Blick tatschlich als wirksame Waffe gegen Viren, Trojaner, Wrmer und hnliche Probleme geriert, entmndigt jedoch den Anwender: Palladium stt vor allem bei der Unterhaltungsindustrie, die einen erbitterten Kampf gegen jegliche Weiterverbreitung urheberrechtlich geschtzter Produkte im Internet fhrt, auf groe Zustimmung, bietet sich hier jedoch erstmals vordergrndig die Mglichkeit, MP3-Tauschbrsen und hnliche Dienste effizient trockenzulegen dank Microsoft. Auch das Kopieren einzelner Musikstcke zu privaten Zwecken am heimischen PC wird damit unterbunden - dank Microsoft werden also vermutlich die ohnehin bervollen Kassen der Unterhaltungsindustrie zuknftig noch krftiger klingeln!

        Doch der Anwender hat natrlich noch die Mglichkeit, auch nicht TCPA-konforme Software auf seinem heimischen PC zu installieren und zu starten. Bemerkt Palladium eine solche Anwendung, wird das Gesamtsystem als kompromittiert gekennzeichnet und alle konformen Anwendungen samt Dateien werden geschlossen. Der Nutzwert eines solchen Systems drfte fr den Anwender dann wohl gegen Null tendieren.

        Doch gehen wir einen Schritt weiter und bedenken wir die Folgen dieser Technologie:

        1. Die SRL's, DRL's und HCL's, die fr die Konformitts-Authentifizierung eines PC's unbedingt bentigt werden, werden an zentraler Stelle im Internet gespeichert und abgerufen. Hacker brauchen jetzt also nicht mehr einzelne PC's anzugreifen, sondern knnen ihr Engagement auf diese Server konzentrieren - und damit unter Umstnden mit einem einzigen gelungenen Angriff Millionen von Rechnern unbrauchbar machen.
        2. Es bedarf keiner ausgesprochen ausgeprgten Phantasie, um sich auszumalen, wie Palladium mit unerwnschten Konkurrenzprodukten verfahren kann: Sollen bestimmte Softwareprodukte anderer Hersteller als Microsoft nicht als konform zertifiziert werden, so gengt es, sie auf die schwarze Liste des Palladium-Systems zu setzen. Bei einem Start solcher Software wre der Rechner nur noch sehr eingeschrnkt nutzbar, da kompromittiert - jeder Anwender wrde sich wohl zumindest berlegen, ob er beim nchsten Mal nicht doch zur drittklassigen Spyware aus dem Hause Microsoft greift.
        3. Die Zertifizierungen fr Software und Dateien kosten Geld: Schtzungen gehen von bis zu sechsstelligen Dollarsummen aus fr eine einzige Anwendung. Die Folge dieser Lizenzierungspraxis wre, da die Freewareszene von der Bildflche verschwindet. Viele tausend Programmierer, die unter oftmals groem persnlichen Engagament und erheblichem Zeitaufwand ansehnliche Projekte als Freeware entwickelt haben, htten keine Mglichkeit und auch keine Motivation mehr, ihre oft wirklich innovativen Projekte der Anwendergemeinde zur Verfgung zu stellen dank der kriminellen Krake Microsoft.
        4. Der gesamte von der GPL-Lizenz abgedeckte Bereich wrde ebenfalls sang- und klanglos vor dem Aus stehen, da auch hier zunchst erhebliche Betrge in eine Zertifizierung gesteckt werden mten, denen keine Einnahmen gegenberstehen. Die wohl gefhrlichste Konkurrenz fr Microsoft, nmlich die oftmals aus idealistischer und moralischer Intention heraus handelnden freien Entwickler, die ihre Software unter der GPL-Lizenz vertreiben, wre mit einem Schlag ausgeschaltet.
        5. Dem unsglichen Monopol der Firma Microsoft im Betriebssystemmarkt wrde ein weiterer nachhaltiger Schub verliehen, denn Konkurrenzsysteme mten, um mit dem Fritz-Chip und damit letztendlich auch mit Palladium zu harmonieren, ebenfalls TCPA-konform gestaltet werden. Fr OS/2 WARP ebenso wie fr die meisten Linux-Distributionen und auch Systeme wie FreeBSD, NetBSD oder auch BeOS und (mit Einschrnkungen) QNX wrde der Zwang zur TCPA- und Palladium-Konformitt das Verschwinden vom EDV-Markt bedeuten, denn ohne diese Konformitt wrden diese meist besseren Betriebssysteme als unsicher gelten. OS/2 WARP und die eComStation wrden aus den groen Banken und Versicherungen, bei TK-Dienstleistern und bei den anderen Anwendern im professionellen Umfeld gechtet und von den Festplatten verbannt zugunsten der wesentlich schlechteren Windows-Systeme. Die einzige Alternative bliebe nach dem derzeitigen Stand der Dinge HP-Linux, da HP bereits an der TCPA- und Palladium-Konformitt seines Linux arbeitet.
        6. Mit dem Palladium-System wrde die offene, basisdemokratische Struktur des Internet endgltig zu Grabe getragen und einem Meinungs- und Zensurmonopol der Firma Microsoft weichen. Microsoft knnte ber die variable Gestaltung von Zertifizierungsgebhren die Weiterverbreitung kritischer Dokumente im Internet oder im Rahmen von Software-Distributionen verhindern. Obendrein wren alle Newsdienste auer den Microsoft-eigenen davon betroffen - freiwillige, sehr anerkennenswerte Initiativen wie beispielsweise auch die VOICE, die sich der Aufklrung der OS/2-Gemeinde verschrieben hat, mten fr die einzelnen Beitrge Zertifizierungsgebhren an Microsoft zahlen fr die Palladium-Konformitt, um nicht auf der schwarzen Liste der DRL-Server zu landen. Das Internet als Transporteur basisdemokratischer Ideale wre zerstrt und zu einem Sprachrohr von Microsoft verkommen.
        7. Durch die enge Kopplung der Palladium-Technologie in Kooperation mit dem Fritz-Chip an Hard- und Software wrde der Gebrauchtsoftwaremarkt zum Erliegen kommen - weil bereits einmal durch TCPA und Palladium zertifizierte Software aufgrund der Verschlsselung untrennbar an die Hardware gekoppelt ist. Fr Microsoft erfllt sich damit ein lange gehegter Traum: Jeder Kufer eines PC mu Software aus Redmond neu kaufen, da die alte Software nur ber eine Seriennummern-Freigabe auf ein anderes Gert bertragbar wre - und davon steht nicht ein einziges Wort in den entsprechenden Standardisierungs-Richtlinien.

        Das hier geschilderte Horrorszenario erscheint keineswegs abwegig: Bill Gates hat mit der Unterhaltungsindustrie starke Kombattanten im Rcken, denen es genauso wie ihm um die Profitmaximierung um jeden Preis geht - auch wenn dabei demokratische und ethische Prinzipien nicht nur ausgehhlt, sondern offen mit Fen getreten werden und auf der Strecke bleiben. Offen diskutiert werden die Folgen seiner Technologie noch nicht; bislang hat Gates grten Wert darauf gelegt, sich stets ffentlich und lauthals als Vorkmpfer gegen Raubkopierertum zum Wohle der Software- und der Unterhaltungsindustrie und auch zum angeblichen Nutzen des Endverbrauchers zu gerieren - mit Palladium jedoch berschreitet Microsoft im Halbdunkel ein- fr allemal eine Grenze, die dem vermeintlichen Vorkmpfer Gates fr die Durchsetzung von Urheberrechten bislang Fesseln anlegte: Nun geht es um die vollkommene Kontrolle der Informationsgesellschaft durch einen Konzern, der krimineller Machenschaften mehrfach berfhrt ist - die Weltherrschaft einer einzelnen kleinen Clique im Mediensektor droht, und das auch noch mit blauugiger Zustimmung einiger international agierender Medienkonzerne, die bisher offenbar noch gar nicht realisiert haben, da sie sich mit ihrer offensichtlich blinden Profitsucht einem Mann ausliefern, den andere als den gefhrlichsten Zeitgenossen seit Adolf Hitler betrachten.

        Es wird Zeit, da die Demokraten unter den EDV-Profis und -Anwendern endlich aus ihrem Dornrschenschlaf aufwachen, denn:

        Man darf nicht warten, bis aus dem Schneeball eine Lawine geworden ist. Man mu den rollenden Schneeball zertreten. Die Lawine hlt keiner mehr auf. Sie ruht erst, wenn sie alles unter sich begraben hat...
        /Erich Kstner/
        jtidy/src/test/resources/1403105.msg0000644000175000017500000000432511463521405017251 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 1 7 before
        ]]> 23 2 1 7 ]]> 15 2 1 12 ]]> 6 2 1 12 ]]> 23 2 1 12 ]]> 6 2 1 13 ]]> 23 2 1 13 ]]> 17 2 1 13 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/533233.msg0000644000175000017500000000125511463516445017205 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/1097062.html0000644000175000017500000000101110167211525017426 0ustar twernertwerner [#1097062] trimInitialSpace does not handle nested inlines

        StartMiddle End

        jtidy/src/test/resources/433360.html0000644000175000017500000000044607777327667017407 0ustar twernertwerner [ #433360 ] Tags with missing > can't be repaired

        There seems to be an error occurring when you don't end a tag with a >. Tidy won't fix it.

        jtidy/src/test/resources/566542.msg0000644000175000017500000000765711461621360017221 0ustar twernertwerner 44 2 1 1 declaration]]> 11 2 4 1 isn't allowed in elements]]> 15 2 4 1 ]]> 23 2 4 5 ]]> 7 2 4 5 before

        ]]> 23 2 4 5 ]]> 7 2 6 7 before

        ]]> 15 2 7 1 ]]> 7 2 7 1 before

      • ]]> 23 2 7 1 ]]> 23 2 7 1 ]]> 11 2 7 1 isn't allowed in elements]]> 15 2 7 1 ]]> 15 2 7 5 ]]> 7 2 7 5 before

        ]]> 23 2 7 5 ]]> 23 2 7 5 ]]> 7 2 7 5 before

        ]]> 23 2 7 5 ]]> 15 2 7 8 ]]> 24 1 7 40 is probably intended as ]]> 23 2 9 1 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/678268.html0000644000175000017500000000065707777327667017435 0ustar twernertwerner Error: File Copy Error! File=C:\WinNT\ System32\PERFLIB_PERFDATA_<#>.DAT (copying to a '.fil'). Error: File Copy Error! File=C:\WinNT\ System32\PERFLIB_PERFDATA_ <#>.DAT (copying to a '.fil').
        id
        jtidy/src/test/resources/508936.html0000644000175000017500000000044607777327667017423 0ustar twernertwerner [ #508936 ] Parse CSS Selector prefix in config file

        Allow user to specify prefix for class names Tidy generates with --clean yes option.

        jtidy/src/test/resources/598860.msg0000644000175000017500000000217611463516445017231 0ustar twernertwerner 44 2 1 1 declaration]]> 49 2 4 1 lacks "type" attribute]]> 8 2 8 1 8 2 9 -3 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/603128.html0000644000175000017500000000042607777327667017406 0ustar twernertwerner [ 603128 ] tidy adds newlines after </html> There is exactly one line-ending after the </html> - older versions of Tidy will add an additional line-ending. jtidy/src/test/resources/678268.out0000644000175000017500000000123410000613130017214 0ustar twernertwerner Error: File Copy Error! File=C:\WinNT\ System32\PERFLIB_PERFDATA_<#>.DAT (copying to a '.fil'). Error: File Copy Error! File=C:\WinNT\ System32\PERFLIB_PERFDATA_ <#>.DAT (copying to a '.fil').
        id
        jtidy/src/test/resources/444394.cfg0000644000175000017500000000044710115150162017150 0ustar twernertwerner# Tidy configuration file for bug #444394 new-inline-tags: o:p char-encoding: latin1 tidy-mark: no clean: yes drop-font-tags: yes logical-emphasis: yes word-2000: yes new-inline-tags= o:p char-encoding= latin1 tidy-mark= no clean= yes drop-font-tags= yes logical-emphasis= yes word-2000= yes jtidy/src/test/resources/1097062.cfg0000644000175000017500000000012110167211525017222 0ustar twernertwernertidy-mark: false wrap: 0 word-2000: yes tidy-mark= false wrap= 0 word-2000= yes jtidy/src/test/resources/427671.msg0000644000175000017500000000371611461621360017210 0ustar twernertwerner 44 2 1 1 declaration]]> 8 2 8 5 ]]> 8 2 8 22 ]]> 8 2 9 5 ]]> 8 2 9 28 ]]> 8 2 10 5 ]]> 8 2 10 28 ]]> 8 2 11 5 ]]> 8 2 11 24 ]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/435919.html0000644000175000017500000000026007777327667017415 0ustar twernertwerner [ #435919 ] Nested <q></q>'s not handled correctly So then I said to him, don't go there. jtidy/src/test/resources/463066.html0000644000175000017500000002555707777327667017427 0ustar twernertwerner [ #463066 ] CleanWord2000 misses mso-list bullets

        Test 1

        1. Here
        2. We
        3. Go
        4. Again

         

        1. Ok
        2. That
        3. Worked

         

        v     But

        v     It

        v     Does

        v     Not

        v     Work

        v     With

        v     Bullet

        v     Points

         

        q       Now

        q       It

        q       Is

        q       Working

         

        • Try
        • It
        • Again

         

        • Do
        • It
        • Again
        • And

         

        jtidy/src/test/resources/480843.msg0000644000175000017500000000200211463516445017204 0ustar twernertwerner 11 2 6 1 isn't allowed in elements]]> 66 2 8 19 Anchor "newintro" already defined]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/540571.html0000644000175000017500000000070007777327667017403 0ustar twernertwerner #540571 Inconsistent behaviour with span inline element

        Hello World

        The font inline is moved so it becomes a child of the h1 element.

        Hello World

        The span inline is not moved so it becomes a child of the h1 element, which is inconsistent and does not correspond with current browser behaviour any more.

        jtidy/src/test/resources/467865.html0000644000175000017500000000040307777327667017421 0ustar twernertwerner [ #467865 ] un-nesting is incorrect

        (foo bar). ...

        jtidy/src/test/resources/433856.msg0000644000175000017500000000725211461621360017211 0ustar twernertwerner 44 2 1 1 declaration]]> 29 2 2 11 7 2 10 33 before

        ]]> 23 2 10 33 ]]> 7 2 10 33 before

        ]]> 23 2 10 33 ]]> 7 2 10 33 before

        ]]> 23 2 10 33 ]]> 15 2 10 36 ]]> 15 2 10 36 ]]> 15 2 10 36 ]]> 8 2 11 -3 ]]> 8 2 11 1 ]]> 8 2 11 5 ]]> 7 2 12 1 before

        ]]> 23 2 12 1 ]]> 15 2 12 4 ]]> 8 2 13 -3 ]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/487283.html0000644000175000017500000000050407777327667017417 0ustar twernertwerner [ #487283 ] >/select< does not terminate >option<

        row 1, cell 1 row 1, cell 2
        row 2, cell 1 row 2, cell 2
        jtidy/src/test/resources/435917.msg0000644000175000017500000000155711461621360017213 0ustar twernertwerner 58 2 11 2 attribute with missing trailing quote mark]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427846.msg0000644000175000017500000000314311461621360017206 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 7 1 before
        ]]> 23 2 7 1 ]]> 15 2 8 1 ]]> 15 2 10 -1 ]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/593705.msg0000644000175000017500000000117511463516445017220 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/660397.out0000644000175000017500000003457010104011573017227 0ustar twernertwerner
        »Aufklärung ist der Ausgang des Menschen
        aus seiner selbstverschuldeten Unmündigkeit.«
        /Immanuel Kant/

        »TCPA« und »Palladium«

        Ein weiterer Schritt in die Unmündigkeit
        oder ein Schritt hin zur Emanzipation? €

        Von der Öffentlichkeit weitgehend unbemerkt geistern seit einigen Wochen geheimnisvolle neue Schlagworte und Kürzel durch die Welt der EDV-Profis: »TCPA« und »Palladium«. Interessiert habe ich mich damit näher beschäftigt, denn allein die Tatsache, daß die Bezeichnung »Palladium« in unserem Fall anders als in den Naturwissenschaften nicht ein wertvolles Metall meint, sondern eine Wortschöpfung der Firma »Microsoft« darstellt, verheißt nichts Gutes und weckte daher meinen Argwohn. Der wesentlich neutralere Begriff »TCPA« (das steht für »Trusted Computing Platform Alliance«) dagegen scheint auf den ersten Blick positive oder zumindest neutrale Emotionen zu wecken - aber auch nur auf den ersten Blick, denn wenn man sich des Horrorszenarios bewußt wird, das hinter »TCPA« in Kombination mit »Palladium« steckt, so dürfte wohl selbst der unbedarfteste EDV-Anwender schlaflose Nächte bekommen.

        Was hat es also mit diesen Begriffen auf sich, welche Intentionen liegen diesen Schlagworten zugrunde und was bedeuten die dahintersteckenden neuen Techniken für den EDV-Anwender?

        Die »TCPA« ist ein Zusammenschluß führender Hardwarehersteller, darunter IBM, HP, AMD und Intel, die sich vorgenommen haben, den Personal Computer durch Implementation neuer Hardwaretechnologien sicherer zu machen. Wie uns allen bewußt ist, hat die Monokultur im Betriebssystemsektor dazu geführt, daß durch das unsägliche Monopol von »Microsoft« in Kombination mit der grottenschlechten Software dieser Firma allerorten und allenthalben eine wahre Flut von Computerviren, sogenannten trojanischen Pferden, Würmern und Sicherheitslöchern entstanden ist und täglich neu auf den Anwender zurollt, die den Umgang mit dem PC immer wieder zum Ärgernis werden läßt. Diesem Übel wollen die an der »TCPA« beteiligten Konzerne nun durch den sogenannten »Fritz«-Chip (benannt nach dem US-Senator Fritz Hollings) abhelfen - ein hehrer Wunsch. Bei dem Fritz-Chip handelt es sich um einen Krypto-Baustein, der in zukünftige Generationen von Personal Computern integriert werden und allgemein die Systeme sicherer machen soll. Dieser Chip speichert mehrere Schlüssel, die hardware- und anwenderspezifisch definiert sind. Sobald der PC eingeschaltet wird, nimmt der Fritz-Chip seine Arbeit auf und fragt einen Schlüssel nach dem anderen ab: Zunächst wird das BIOS abgefragt, anschließend alle im Rechner vorhandenen BIOS-Erweiterungen der Steckkarten. Danach wird die Festplatte überprüft, und anschließend prüft der TCPA-Chip auch noch den Bootsektor, den Bootloader, den Kernel und die Gerätetreiber. Da bei jedem dieser Schritte eine Prüfsumme abgespeichert und ein 160 Bit langer eindeutiger Wert aus den gewonnenen Daten und einem speziellen Schlüssel generiert wird, hat der Fritz-Chip jederzeit die völlige Kontrolle über das Gesamtsystem.

        Damit taucht schon die erste Problematik für den Anwender auf: Bereits ein Flash-Update des Rechner-BIOS legt das gesamte System lahm, da dann die generierten Werte des Fritz-Chip nicht mehr mit den gespeicherten Werten, die zertifiziert sind, übereinstimmen. In Zeiten, in denen aufgrund der oftmals schlampig implementierten BIOS-Versionen Flash-Updates derselben zumindest bei den üblichen Consumer-Produkten an der Tagesordnung sind, ist also der Fritz-Chip eher hinderlich denn ein Segen für den Anwender. Gleiches gilt übrigens für diejenigen Anwender, die beispielsweise eine neue Grafikkarte oder eine größere Festplatte einbauen wollen - auch für sie bedeutet jede Hardware-Modifikation eine - vermutlich natürlich kostenpflichtige - Neuzertifizierung des Gesamtsystems, damit dieses wieder als »TCPA-konform« angesehen werden kann. Bei der Neuzertifizierung wird online anhand einer Liste mit geprüfter Hardware (HCL) und einer weiteren Liste mit gesperrten Seriennummern (SRL) die Konformitätstabelle des Rechners geprüft und aktualisiert.

        Hat der Fritz-Chip beim Bootvorgang alle Komponenten als »TCPA-konform« überprüft und erkannt, übergibt er die Kontrolle schließlich an das Betriebssystem. Ab diesem Punkt hakt nun - wie könnte es anders sein? - die Firma »Microsoft« ein mit ihrer »Palladium«-Technologie. Sobald der Anwender jetzt ein Programm startet, überprüft das Betriebssystem dieses anhand der im Fritz-Chip gespeicherten Werte für die SRL. Sollte sich herausstellen, daß dieses Programm keine gültige Lizenz und/oder Seriennummer besitzt oder die Lizenz abgelaufen ist, wird es gar nicht erst gestartet. Stellt es sich als »TCPA-konform« heraus, so wird nach der Freigabe und dem anschließenden Start erneut online eine Liste mit gesperrten Dokumenten für dieses Programm abgerufen (DRL), um zu verhindern, daß der Anwender für ihn nicht vorgesehene Dateien öffnet oder unerlaubterweise nutzt.

        Was sich auf den ersten Blick tatsächlich als wirksame Waffe gegen Viren, Trojaner, Würmer und ähnliche Probleme geriert, entmündigt jedoch den Anwender: »Palladium« stößt vor allem bei der Unterhaltungsindustrie, die einen erbitterten Kampf gegen jegliche Weiterverbreitung urheberrechtlich geschützter Produkte im Internet führt, auf große Zustimmung, bietet sich hier jedoch erstmals vordergründig die Möglichkeit, MP3-Tauschbörsen und ähnliche Dienste effizient trockenzulegen dank »Microsoft«. Auch das Kopieren einzelner Musikstücke zu privaten Zwecken am heimischen PC wird damit unterbunden - dank »Microsoft« werden also vermutlich die ohnehin übervollen Kassen der Unterhaltungsindustrie zukünftig noch kräftiger klingeln!

        Doch der Anwender hat natürlich noch die Möglichkeit, auch nicht »TCPA-konforme« Software auf seinem heimischen PC zu installieren und zu starten. Bemerkt »Palladium« eine solche Anwendung, wird das Gesamtsystem als »kompromittiert« gekennzeichnet und alle konformen Anwendungen samt Dateien werden geschlossen. Der Nutzwert eines solchen Systems dürfte für den Anwender dann wohl gegen Null tendieren.

        Doch gehen wir einen Schritt weiter und bedenken wir die Folgen dieser Technologie:

        1. Die SRL's, DRL's und HCL's, die für die Konformitäts-Authentifizierung eines PC's unbedingt benötigt werden, werden an zentraler Stelle im Internet gespeichert und abgerufen. Hacker brauchen jetzt also nicht mehr einzelne PC's anzugreifen, sondern können ihr Engagement auf diese Server konzentrieren - und damit unter Umständen mit einem einzigen gelungenen Angriff Millionen von Rechnern unbrauchbar machen.
        2. Es bedarf keiner ausgesprochen ausgeprägten Phantasie, um sich auszumalen, wie »Palladium« mit unerwünschten Konkurrenzprodukten verfahren kann: Sollen bestimmte Softwareprodukte anderer Hersteller als »Microsoft« nicht als »konform« zertifiziert werden, so genügt es, sie auf die »schwarze Liste« des »Palladium«-Systems zu setzen. Bei einem Start solcher Software wäre der Rechner nur noch sehr eingeschränkt nutzbar, da »kompromittiert« - jeder Anwender würde sich wohl zumindest überlegen, ob er beim nächsten Mal nicht doch zur drittklassigen Spyware aus dem Hause »Microsoft« greift.
        3. Die Zertifizierungen für Software und Dateien kosten Geld: Schätzungen gehen von bis zu sechsstelligen Dollarsummen aus für eine einzige Anwendung. Die Folge dieser Lizenzierungspraxis wäre, daß die Freewareszene von der Bildfläche verschwindet. Viele tausend Programmierer, die unter oftmals großem persönlichen Engagament und erheblichem Zeitaufwand ansehnliche Projekte als Freeware entwickelt haben, hätten keine Möglichkeit und auch keine Motivation mehr, ihre oft wirklich innovativen Projekte der Anwendergemeinde zur Verfügung zu stellen dank der kriminellen Krake »Microsoft«.
        4. Der gesamte von der GPL-Lizenz abgedeckte Bereich würde ebenfalls sang- und klanglos vor dem Aus stehen, da auch hier zunächst erhebliche Beträge in eine Zertifizierung gesteckt werden müßten, denen keine Einnahmen gegenüberstehen. Die wohl gefährlichste Konkurrenz für »Microsoft«, nämlich die oftmals aus idealistischer und moralischer Intention heraus handelnden freien Entwickler, die ihre Software unter der GPL-Lizenz vertreiben, wäre mit einem Schlag ausgeschaltet.
        5. Dem unsäglichen Monopol der Firma »Microsoft« im Betriebssystemmarkt würde ein weiterer nachhaltiger Schub verliehen, denn Konkurrenzsysteme müßten, um mit dem Fritz-Chip und damit letztendlich auch mit »Palladium« zu harmonieren, ebenfalls »TCPA-konform« gestaltet werden. Für OS/2 WARP ebenso wie für die meisten Linux-Distributionen und auch Systeme wie FreeBSD, NetBSD oder auch BeOS und (mit Einschränkungen) QNX würde der Zwang zur »TCPA-« und »Palladium«-Konformität das Verschwinden vom EDV-Markt bedeuten, denn ohne diese Konformität würden diese meist besseren Betriebssysteme als »unsicher« gelten. OS/2 WARP und die eComStation würden aus den großen Banken und Versicherungen, bei TK-Dienstleistern und bei den anderen Anwendern im professionellen Umfeld geächtet und von den Festplatten verbannt zugunsten der wesentlich schlechteren »Windows«-Systeme. Die einzige Alternative bliebe nach dem derzeitigen Stand der Dinge HP-Linux, da HP bereits an der »TCPA-« und »Palladium«-Konformität seines Linux arbeitet.
        6. Mit dem »Palladium«-System würde die offene, basisdemokratische Struktur des Internet endgültig zu Grabe getragen und einem Meinungs- und Zensurmonopol der Firma »Microsoft« weichen. »Microsoft« könnte über die variable Gestaltung von Zertifizierungsgebühren die Weiterverbreitung kritischer Dokumente im Internet oder im Rahmen von Software-Distributionen verhindern. Obendrein wären alle Newsdienste außer den »Microsoft«-eigenen davon betroffen - freiwillige, sehr anerkennenswerte Initiativen wie beispielsweise auch die VOICE, die sich der Aufklärung der OS/2-Gemeinde verschrieben hat, müßten für die einzelnen Beiträge Zertifizierungsgebühren an »Microsoft« zahlen für die »Palladium«-Konformität, um nicht auf der »schwarzen Liste« der DRL-Server zu landen. Das Internet als Transporteur basisdemokratischer Ideale wäre zerstört und zu einem Sprachrohr von »Microsoft« verkommen.
        7. Durch die enge Kopplung der »Palladium«-Technologie in Kooperation mit dem Fritz-Chip an Hard- und Software würde der Gebrauchtsoftwaremarkt zum Erliegen kommen - weil bereits einmal durch TCPA und »Palladium« zertifizierte Software aufgrund der Verschlüsselung untrennbar an die Hardware gekoppelt ist. Für »Microsoft« erfüllt sich damit ein lange gehegter Traum: Jeder Käufer eines PC muß Software aus Redmond neu kaufen, da die alte Software nur über eine Seriennummern-Freigabe auf ein anderes Gerät übertragbar wäre - und davon steht nicht ein einziges Wort in den entsprechenden Standardisierungs-Richtlinien.

        Das hier geschilderte Horrorszenario erscheint keineswegs abwegig: Bill Gates hat mit der Unterhaltungsindustrie starke Kombattanten im Rücken, denen es genauso wie ihm um die Profitmaximierung um jeden Preis geht - auch wenn dabei demokratische und ethische Prinzipien nicht nur ausgehöhlt, sondern offen mit Füßen getreten werden und auf der Strecke bleiben. Offen diskutiert werden die Folgen seiner Technologie noch nicht; bislang hat Gates größten Wert darauf gelegt, sich stets öffentlich und lauthals als Vorkämpfer gegen Raubkopierertum zum Wohle der Software- und der Unterhaltungsindustrie und auch zum angeblichen Nutzen des Endverbrauchers zu gerieren - mit »Palladium« jedoch überschreitet »Microsoft« im Halbdunkel ein- für allemal eine Grenze, die dem vermeintlichen Vorkämpfer Gates für die Durchsetzung von Urheberrechten bislang Fesseln anlegte: Nun geht es um die vollkommene Kontrolle der Informationsgesellschaft durch einen Konzern, der krimineller Machenschaften mehrfach überführt ist - die Weltherrschaft einer einzelnen kleinen Clique im Mediensektor droht, und das auch noch mit blauäugiger Zustimmung einiger international agierender Medienkonzerne, die bisher offenbar noch gar nicht realisiert haben, daß sie sich mit ihrer offensichtlich blinden Profitsucht einem Mann ausliefern, den andere als den gefährlichsten Zeitgenossen seit Adolf Hitler betrachten.

        Es wird Zeit, daß die Demokraten unter den EDV-Profis und -Anwendern endlich aus ihrem Dornröschenschlaf aufwachen, denn:

        »Man darf nicht warten, bis aus dem Schneeball eine Lawine geworden ist. Man muß den rollenden Schneeball zertreten. Die Lawine hält keiner mehr auf. Sie ruht erst, wenn sie alles unter sich begraben hat...«
        /Erich Kästner/
        jtidy/src/test/resources/511243.out0000644000175000017500000000066310115433712017211 0ustar twernertwerner [ #511243 ] xhtml utf8 format bug

        How to…
        Place an extended-hours order:

        jtidy/src/test/resources/427676.html0000644000175000017500000000022107777327667017413 0ustar twernertwerner

        This is a Red link

        jtidy/src/test/resources/545067.msg0000644000175000017500000000146311461621360017205 0ustar twernertwerner 23 2 4 1 ]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/427812.out0000644000175000017500000000100010104006273017177 0ustar twernertwerner [ #427812 ] Reprocessing OBJECT removes PARAM jtidy/src/test/resources/427820.html0000644000175000017500000000044407777327667017411 0ustar twernertwerner Test Input For Bug #427820

        jtidy/src/test/resources/658230.html0000644000175000017500000014767507777327667017434 0ustar twernertwerner

        BIG-5 r

        Here are some entities: & " — ′

        A0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        A9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        AF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        B9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        BF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        C2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿

        C3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ

        C4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ġ ġ Ģ ģ Ĥ ĥ Ħ ħ Ĩ ĩ Ī ī Ĭ ĭ Į į İ ı IJ ij Ĵ ĵ Ķ ķ ĸ Ĺ ĺ Ļ ļ Ľ ľ Ŀ

        C5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Š š Ţ ţ Ť ť Ŧ ŧ Ũ ũ Ū ū Ŭ ŭ Ů ů Ű ű Ų ų Ŵ ŵ Ŷ ŷ Ÿ Ź ź Ż ż Ž ž ſ

        C6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ơ ơ Ƣ ƣ Ƥ ƥ Ʀ Ƨ ƨ Ʃ ƪ ƫ Ƭ ƭ Ʈ Ư ư Ʊ Ʋ Ƴ ƴ Ƶ ƶ Ʒ Ƹ ƹ ƺ ƻ Ƽ ƽ ƾ ƿ

        C7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ǡ ǡ Ǣ ǣ Ǥ ǥ Ǧ ǧ Ǩ ǩ Ǫ ǫ Ǭ ǭ Ǯ ǯ ǰ DZ Dz dz Ǵ ǵ Ƕ Ƿ Ǹ ǹ Ǻ ǻ Ǽ ǽ Ǿ ǿ

        C8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ƞ ȡ Ȣ ȣ Ȥ ȥ Ȧ ȧ Ȩ ȩ Ȫ ȫ Ȭ ȭ Ȯ ȯ Ȱ ȱ Ȳ ȳ ȴ ȵ ȶ ȷ ȸ ȹ Ⱥ Ȼ ȼ Ƚ Ⱦ ȿ

        C9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ɠ ɡ ɢ ɣ ɤ ɥ ɦ ɧ ɨ ɩ ɪ ɫ ɬ ɭ ɮ ɯ ɰ ɱ ɲ ɳ ɴ ɵ ɶ ɷ ɸ ɹ ɺ ɻ ɼ ɽ ɾ ɿ

        CA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ʠ ʡ ʢ ʣ ʤ ʥ ʦ ʧ ʨ ʩ ʪ ʫ ʬ ʭ ʮ ʯ ʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ʹ ʺ ʻ ʼ ʽ ʾ ʿ

        CB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ˠ ˡ ˢ ˣ ˤ ˥ ˦ ˧ ˨ ˩ ˪ ˫ ˬ ˭ ˮ ˯ ˰ ˱ ˲ ˳ ˴ ˵ ˶ ˷ ˸ ˹ ˺ ˻ ˼ ˽ ˾ ˿

        CC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ̠ ̡ ̢ ̣ ̤ ̥ ̦ ̧ ̨ ̩ ̪ ̫ ̬ ̭ ̮ ̯ ̰ ̱ ̲ ̳ ̴ ̵ ̶ ̷ ̸ ̹ ̺ ̻ ̼ ̽ ̾ ̿

        CD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ͠ ͡ ͢ ͣ ͤ ͥ ͦ ͧ ͨ ͩ ͪ ͫ ͬ ͭ ͮ ͯ Ͱ ͱ Ͳ ͳ ʹ ͵ Ͷ ͷ ͸ ͹ ͺ ͻ ͼ ͽ ; Ϳ

        CE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Π Ρ ΢ Σ Τ Υ Φ Χ Ψ Ω Ϊ Ϋ ά έ ή ί ΰ α β γ δ ε ζ η θ ι κ λ μ ν ξ ο

        CF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ϡ ϡ Ϣ ϣ Ϥ ϥ Ϧ ϧ Ϩ ϩ Ϫ ϫ Ϭ ϭ Ϯ ϯ ϰ ϱ ϲ ϳ ϴ ϵ ϶ Ϸ ϸ Ϲ Ϻ ϻ ϼ Ͻ Ͼ Ͽ

        D0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Р С Т У Ф Х Ц Ч Ш Щ Ъ Ы Ь Э Ю Я а б в г д е ж з и й к л м н о п

        D1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ѡ ѡ Ѣ ѣ Ѥ ѥ Ѧ ѧ Ѩ ѩ Ѫ ѫ Ѭ ѭ Ѯ ѯ Ѱ ѱ Ѳ ѳ Ѵ ѵ Ѷ ѷ Ѹ ѹ Ѻ ѻ Ѽ ѽ Ѿ ѿ

        D2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ҡ ҡ Ң ң Ҥ ҥ Ҧ ҧ Ҩ ҩ Ҫ ҫ Ҭ ҭ Ү ү Ұ ұ Ҳ ҳ Ҵ ҵ Ҷ ҷ Ҹ ҹ Һ һ Ҽ ҽ Ҿ ҿ

        D3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ӡ ӡ Ӣ ӣ Ӥ ӥ Ӧ ӧ Ө ө Ӫ ӫ Ӭ ӭ Ӯ ӯ Ӱ ӱ Ӳ ӳ Ӵ ӵ Ӷ ӷ Ӹ ӹ Ӻ ӻ Ӽ ӽ Ӿ ӿ

        D4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ Ԡ ԡ Ԣ ԣ Ԥ ԥ Ԧ ԧ Ԩ ԩ Ԫ ԫ Ԭ ԭ Ԯ ԯ ԰ Ա Բ Գ Դ Ե Զ Է Ը Թ Ժ Ի Լ Խ Ծ Կ

        D5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ՠ ա բ գ դ ե զ է ը թ ժ ի լ խ ծ կ հ ձ ղ ճ մ յ ն շ ո չ պ ջ ռ ս վ տ

        D6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ֠ ֡ ֢ ֣ ֤ ֥ ֦ ֧ ֨ ֩ ֪ ֫ ֬ ֭ ֮ ֯ ְ ֱ ֲ ֳ ִ ֵ ֶ ַ ָ ֹ ֺ ֻ ּ ֽ ־ ֿ

        D7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ נ ס ע ף פ ץ צ ק ר ש ת ׫ ׬ ׭ ׮ ׯ װ ױ ײ ׳ ״ ׵ ׶ ׷ ׸ ׹ ׺ ׻ ׼ ׽ ׾ ׿

        D8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ؠ ء آ أ ؤ إ ئ ا ب ة ت ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ػ ؼ ؽ ؾ ؿ

        D9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ٠ ١ ٢ ٣ ٤ ٥ ٦ ٧ ٨ ٩ ٪ ٫ ٬ ٭ ٮ ٯ ٰ ٱ ٲ ٳ ٴ ٵ ٶ ٷ ٸ ٹ ٺ ٻ ټ ٽ پ ٿ

        DA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ڠ ڡ ڢ ڣ ڤ ڥ ڦ ڧ ڨ ک ڪ ګ ڬ ڭ ڮ گ ڰ ڱ ڲ ڳ ڴ ڵ ڶ ڷ ڸ ڹ ں ڻ ڼ ڽ ھ ڿ

        DB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ۠ ۡ ۢ ۣ ۤ ۥ ۦ ۧ ۨ ۩ ۪ ۫ ۬ ۭ ۮ ۯ ۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹ ۺ ۻ ۼ ۽ ۾ ۿ

        DC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ܠ ܡ ܢ ܣ ܤ ܥ ܦ ܧ ܨ ܩ ܪ ܫ ܬ ܭ ܮ ܯ ܰ ܱ ܲ ܳ ܴ ܵ ܶ ܷ ܸ ܹ ܺ ܻ ܼ ܽ ܾ ܿ

        DD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ݠ ݡ ݢ ݣ ݤ ݥ ݦ ݧ ݨ ݩ ݪ ݫ ݬ ݭ ݮ ݯ ݰ ݱ ݲ ݳ ݴ ݵ ݶ ݷ ݸ ݹ ݺ ݻ ݼ ݽ ݾ ݿ

        DE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ޠ ޡ ޢ ޣ ޤ ޥ ަ ާ ި ީ ު ޫ ެ ޭ ޮ ޯ ް ޱ ޲ ޳ ޴ ޵ ޶ ޷ ޸ ޹ ޺ ޻ ޼ ޽ ޾ ޿

        DF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ ߠ ߡ ߢ ߣ ߤ ߥ ߦ ߧ ߨ ߩ ߪ ߫ ߬ ߭ ߮ ߯ ߰ ߱ ߲ ߳ ߴ ߵ ߶ ߷ ߸ ߹ ߺ ߻ ߼ ߽ ߾ ߿

        E0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        E9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        ED @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        EF @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F0 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F1 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F2 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F3 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F4 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F5 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F6 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F7 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F8 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        F9 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FA @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FB @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FC @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FD @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~

        FE @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ jtidy/src/test/resources/500236.msg0000644000175000017500000000070211463516445017176 0ustar twernertwerner 29 2 1 11 -1 0 0 0 jtidy/src/test/resources/929936.out0000644000175000017500000000053210034326060017227 0ustar twernertwerner [ # ] Escape URI 1
        2
        3
        4
        5
        6
        jtidy/src/test/resources/517550.xhtml0000644000175000017500000000063607777327667017604 0ustar twernertwerner [ #517550 ] parser misinterprets ?xml-stylesheet PI

        A test document

        jtidy/src/test/resources/537604.msg0000644000175000017500000000072011463516445017207 0ustar twernertwerner 3 2 5 46 -1 0 0 0 jtidy/src/test/resources/446019.xhtml0000644000175000017500000000056307777327667017604 0ustar twernertwerner [ #446019 ] <img name="foo"> allowed in XTHML-Strict

        TestTest

        jtidy/src/test/resources/763186.out0000644000175000017500000000037010110502420017207 0ustar twernertwerner
        Joint



        Standard
        jtidy/src/test/resources/655338.msg0000644000175000017500000000153711463516445017223 0ustar twernertwerner 57 2 3 1 has XML attribute "xml:lang"]]> 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/656889.out0000644000175000017500000000044110000613130017220 0ustar twernertwerner [ 656889 ] textarea text and line wrapping
        jtidy/src/test/resources/427633.html0000644000175000017500000000031510001055575017352 0ustar twernertwerner [#427663] Line endings not supported correctly

        This is a carriage return This is a Unix line-ending This is a DOS line ending jtidy/src/test/resources/929936.cfg0000644000175000017500000000005610034326060017160 0ustar twernertwernertidy-mark=false wrap=0 tidy-mark:false wrap:0jtidy/src/test/resources/1024661.html0000644000175000017500000000053510120310045017415 0ustar twernertwerner [1024661] Error Parsing duplicate style
        ...
        jtidy/src/test/resources/427810.msg0000644000175000017500000000264111461621360017177 0ustar twernertwerner 44 2 1 1 declaration]]> 21 2 8 1 is not approved by W3C]]> 21 2 9 2 is not approved by W3C]]> 21 2 10 2 is not approved by W3C]]> 111 0 1 1 -1 0 0 0 4 0 1 1 ...
        into the markup.]]> jtidy/src/test/resources/559774.out0000644000175000017500000000046710133721557017247 0ustar twernertwerner [559774] tidy version rejects all id values

        header with a valid id

        jtidy/src/test/resources/427841.html0000644000175000017500000000074007777327667017413 0ustar twernertwerner Test input for bug #427841

        Tidy crashes on badly formed HTML involving nested lists.

        • Merge adjacent lists
          jtidy/src/test/resources/508936.cfg0000644000175000017500000000064210000613130017140 0ustar twernertwernerclean: yes tidy-mark: false # Error 1: escaped number too long. Max 4 hex digits # css-prefix: \77777abc # Error 2: class prefix starts with digit # css-prefix: 77abc # Error 3: Unescaped invalid character # css-prefix: abc # OK 1: Plain old name # css-prefix: abc123 # OK 2: Begin w/ escaped number # css-prefix: \77abc # OK 3: escaped number css-prefix: abc\8 clean= yes tidy-mark= false css-prefix= abc\8 jtidy/src/test/resources/527118.cfg0000644000175000017500000000016610033573374017157 0ustar twernertwerneradd-xml-decl=true output-xhtml=true tidy-mark=false wrap=0 add-xml-decl:true output-xhtml:true tidy-mark:false wrap:0jtidy/src/test/resources/994841.out0000644000175000017500000000051310115150162017221 0ustar twernertwerner [994841] Whitespace is removed from value attributes
          jtidy/src/test/resources/431736.msg0000644000175000017500000000120311461621360017172 0ustar twernertwerner 44 2 2 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/431716.html0000644000175000017500000001442307777327667017412 0ustar twernertwerner civRights2

          Civil Rights #2

          Overview

          • Who are you protected from?
          • How are you supposed to do anything about it?
          • Why do protections grow and shrink?
          • Rights of women
          • Rights of disabled people
          • Native Americans

          Who are you protected from?

          • "State government" under 14th Amendment
            • Sex/race, not age!
            • People who act "on behalf" of state government

          Who are you protected from?

          • "State government" under 14th Amendment
            • Sex/race, not age!
            • People who act "on behalf" of state government
          • Federal Government Contractors

          Who are you protected from?

          • "State government" under 14th Amendment
            • Sex/race, not age!
            • People who act "on behalf" of state government
          • Federal Government Contractors
          • Anybody Congress can regulate under the commerce clause
            • Civil Rights Acts (1866, 1964, 1991)

          How are rules enforced?

          1. Criminal prosecution

          How are rules enforced?

          1. Criminal prosecution
          2. EEOC complaint
            1. Lawsuit for back pay and reinstatement
            2. Affirmative Action order.

          How are rules enforced?

          1. Criminal prosecution
          2. EEOC complaint
            1. Lawsuit for back pay and reinstatement
            2. Affirmative Action order.
          3. Private lawsuit--financial compensation

          What about Affirmative Action?

          • what is it?

          What about Affirmative Action?

          • what is it?
          • reverse discrimination

          What about Affirmative Action?

          • what is it?
          • reverse discrimination
          • diversity policy and the University
            • Bakke's lawsuit & followups
          • diversity in government contracting: quotas?

          Why do protections grow and shrink?

          • Courts have limited government intervention in society
          • Is protest necessary?
            •  Need to "get attention"
            • Maybe if you don't have a good lawyer...
          • Terrorism/violence usually "counter productive"

          Racial Minorities

          • 14th shrank in response to Court decisions and politics
          • Some protections won through legal action
          • Biggest protections result from nonviolent action

          Rights of Women

          • Long era of "protectionism"

          Rights of Women

          • Long era of "protectionism"
          • excluded from many professions

          Rights of Women

          • Long era of "protectionism"
          • excluded from many professions
          • couldn't vote
          • own property

          Equal Rights Amendments

          • Remember the 14th Amendment?

          Equal Rights Amendments

          • Remember the 14th Amendment?
          • ERA proposed by Congress 1972. Failed.

          Equal Rights Amendments

          • Remember the 14th Amendment?
          • ERA proposed by Congress 1972. Failed.
          • 1970s: Revival of 14th for women.
            • Sexual classifications no longer allowed by State without persuasive justification

          Women in the Labor Force

          • Why do women earn less?
            • Job type
            • Skill
            • Discrimination

          Women in the Labor Force

          • Why do women earn less?
            • Job type
            • Skill
            • Discrimination
          • 1964 CRA included "sex" but...
          • EEOC  initially refused to pursue sex discrimination cases

          Women in the Labor Force

          • Why do women earn less?
            • Job type
            • Skill
            • Discrimination
          • 1964 CRA included "sex" but...
          • EEOC  initially refused to pursue sex discrimination cases
          • 1991 CRA: restated 1866 CRA right to sue for $ damages for discrimination or sexual harassment.

          Higher Education

          • Higher Educ. Act, Title IX (1972)  Prohibit sex discrimination in fed. funded programs
          • Rising emphasis on women's athletics
          • Backlash in some states (California, Texas)

          Rights of disabled people

          • 1973 Federal law prohibited discrimination by federal contractors
            • must not discriminate against an otherwise qualified person solely by reason of handicap

          Rights of disabled people

          • 1973 Federal law prohibited discrimination by federal contractors
            • must not discriminate against an otherwise qualified person solely by reason of handicap
          • 1990 Americans with Disabilities Act
            • extends protection to businesses and public accommodations (commerce clause)
            • requires reasonable accommodation

          Politics and Native American Rights

          • Battle on 2 fronts
            • Tribal autonomy & relations with US/BIA
            • Opportunities within "mainstream" US
          • State Government and the 14th amendment
            • Complicated peyote story
          jtidy/src/test/resources/431719.msg0000644000175000017500000000123411461621360017177 0ustar twernertwerner 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/438954.cfg0000644000175000017500000000016010111224155017145 0ustar twernertwerner# Tidy configuration file for bug #438954 hide-endtags: yes tidy-mark: false hide-endtags= yes tidy-mark= falsejtidy/src/test/resources/480406.out0000644000175000017500000000016410077311615017217 0ustar twernertwerner jtidy/src/test/resources/542029.cfg0000644000175000017500000000022110111224155017130 0ustar twernertwerner# Tidy configuration file for bug #542029 add-xml-decl: yes output-xml: yes tidy-mark: false add-xml-decl= yes output-xml= yes tidy-mark= false jtidy/src/test/resources/996484.html0000644000175000017500000000140210110645233017364 0ustar twernertwerner test whether xx<...>yy</...> is wrapped properly

          some text (a long URI).

          xxyy

          xxyy

          zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
          xx

          jtidy/src/test/resources/508936.msg0000644000175000017500000000172111463516445017217 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements. ]]> jtidy/src/test/resources/431958.html0000644000175000017500000000016010104011573017347 0ustar twernertwerner [ #431958 ] Comments always indented jtidy/src/test/resources/433670.xml0000644000175000017500000000020607777327667017241 0ustar twernertwerner [ #433670 ] &apos not recognized as valid XML entity. Use -xml on command line. Test of ' jtidy/src/test/resources/445074.html0000644000175000017500000000062307777327667017411 0ustar twernertwerner [ #445074 ] XHTML requires form method="post"
          jtidy/src/test/resources/431719.html0000644000175000017500000000064607777327667017417 0ustar twernertwerner Test input for bug #431719

          Problem is spec want "HTML 3.2 Final", but everyone in the world, including Tidy, uses "HTML 3.2". So the software has to recognize both FPI's as equivalent.
          Missing table summary only applies to HTML 4.x
          jtidy/src/test/resources/431964.msg0000644000175000017500000000230211461621360017176 0ustar twernertwerner 51 2 7 1 attribute "height" has invalid value ""]]> 53 2 7 1 proprietary attribute "height"]]> 28 2 1 1 110 0 1 1 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/445074.out0000644000175000017500000000061510104011573017211 0ustar twernertwerner [ #445074 ] XHTML requires form method="post"

          jtidy/src/test/resources/588061.msg0000644000175000017500000004740211463532643017220 0ustar twernertwerner 44 2 1 1 declaration]]> 7 2 14 17 before

          ]]> 23 2 14 17 ]]> 15 2 14 21 ]]> 15 2 18 1 ]]> 7 2 20 2 before
          ]]> 15 2 20 9 ]]> 7 2 22 2 before

          ]]> 15 2 23 1 ]]> 3 2 25 96 64 2 25 96 escaping malformed URI reference]]> 4 2 43 128 10 2 46 223 by ]]> 3 2 66 104 7 2 73 1 before

          ]]> 7 2 73 1 before

          ]]> 3 2 74 263 64 2 74 263 escaping malformed URI reference]]> 66 2 102 104 Anchor "MAP details" already defined]]> 3 2 102 257 64 2 102 257 escaping malformed URI reference]]> 66 2 106 102 Anchor "MAP details" already defined]]> 3 2 106 254 64 2 106 254 escaping malformed URI reference]]> 66 2 110 100 Anchor "MAP details" already defined]]> 3 2 110 251 64 2 110 251 escaping malformed URI reference]]> 66 2 114 98 Anchor "MAP details" already defined]]> 3 2 114 245 64 2 114 245 escaping malformed URI reference]]> 66 2 118 96 Anchor "MAP details" already defined]]> 3 2 118 242 64 2 118 242 escaping malformed URI reference]]> 66 2 122 90 Anchor "MAP details" already defined]]> 3 2 122 227 64 2 122 227 escaping malformed URI reference]]> 66 2 126 88 Anchor "MAP details" already defined]]> 3 2 126 231 64 2 126 231 escaping malformed URI reference]]> 66 2 130 90 Anchor "MAP details" already defined]]> 3 2 130 233 64 2 130 233 escaping malformed URI reference]]> 66 2 134 90 Anchor "MAP details" already defined]]> 3 2 134 236 64 2 134 236 escaping malformed URI reference]]> 66 2 138 91 Anchor "MAP details" already defined]]> 3 2 138 237 64 2 138 237 escaping malformed URI reference]]> 66 2 142 91 Anchor "MAP details" already defined]]> 3 2 142 237 64 2 142 237 escaping malformed URI reference]]> 66 2 168 132 Anchor "MAP details" already defined]]> 3 2 168 280 64 2 168 280 escaping malformed URI reference]]> 4 2 188 76 8 2 196 153 ]]> 66 2 200 177 Anchor "MAP details" already defined]]> 3 2 200 339 64 2 200 339 escaping malformed URI reference]]> 4 2 216 90 4 2 240 67 66 2 248 95 Anchor "MAP details" already defined]]> 3 2 248 281 64 2 248 281 escaping malformed URI reference]]> 23 2 296 134 ]]> 8 2 300 177 ]]> 23 2 300 207 ]]> 7 2 328 61 before

          ]]>
          8 2 328 66 ]]> 11 2 339 1 isn't allowed in

          elements]]> 49 2 339 9 lacks "alt" attribute]]> 8 2 339 64 ]]> 49 2 341 1 lacks "alt" attribute]]> 49 2 345 2 lacks "alt" attribute]]> 64 2 349 2 escaping malformed URI reference]]> 11 2 353 2 isn't allowed in

          elements]]> 7 2 353 9 before

          ]]> 23 2 353 9 ]]> 8 2 353 88 ]]> 8 2 363 264 ]]> 49 2 371 79 lacks "alt" attribute]]> 7 2 383 52 before

          ]]>
          8 2 383 53 ]]> 8 2 383 62 ]]> 3 2 414 76 3 2 414 92 49 2 414 92 lacks "alt" attribute]]> 8 2 424 1 ]]> 111 0 1 1 -1 0 0 0 76 0 1 1 77 0 1 1 78 0 1 1 79 0 1 1 81 0 1 1 4 0 1 1 1 0 1 1 112 0 1 1 8 0 1 1 elements.]]> jtidy/src/test/resources/531964.xhtml0000644000175000017500000000045207777327667017605 0ustar twernertwerner [ 531964 ] <p /> gets tidied into <p /></p>

          jtidy/src/test/resources/438954.msg0000644000175000017500000000117411463516445017223 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/539369a.html0000644000175000017500000000047307777327667017570 0ustar twernertwerner [ 539369 ] Test </frameset> inside <noframes> <frameset> jtidy/src/test/resources/441568.msg0000644000175000017500000000171711461621360017210 0ustar twernertwerner 44 2 1 1 declaration]]> 111 0 1 1 -1 0 0 0 8 0 1 1 elements.]]> jtidy/src/test/resources/763186.html0000644000175000017500000000026210110502420017344 0ustar twernertwerner
          Joint



          Standard
          jtidy/src/test/resources/1031865.html0000644000175000017500000000050310125630076017432 0ustar twernertwerner [1031865] Script parsing warning

          jtidy/src/test/resources/427821.html0000644000175000017500000000041407777327667017407 0ustar twernertwerner [ #427821 ] XHTML TRANSITIONAL doctype set wrongly <body> This is a test - use "-asxml" on the command line. </body> jtidy/src/test/resources/427813.msg0000644000175000017500000000146711461621360017207 0ustar twernertwerner 44 2 1 1 declaration]]> 58 2 5 1 attribute with missing trailing quote mark]]> 111 0 1 1 -1 0 0 0 jtidy/src/test/resources/935796.out0000644000175000017500000000062710104011573017233 0ustar twernertwerner Quote entities in title attribute problem jtidy/src/test/java/0000755000175000017500000000000011617345026014633 5ustar twernertwernerjtidy/src/test/java/org/0000755000175000017500000000000011617345026015422 5ustar twernertwernerjtidy/src/test/java/org/w3c/0000755000175000017500000000000011617345026016116 5ustar twernertwernerjtidy/src/test/java/org/w3c/tidy/0000755000175000017500000000000011617345027017070 5ustar twernertwernerjtidy/src/test/java/org/w3c/tidy/LexerTest.java0000644000175000017500000000740410116675277021665 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.TestCase; /** * Tests for utility methods in Lexer. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public class LexerTest extends TestCase { /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorEscapedNumberTooLong() { assertFalse("\\77777abc is not a valid css selector", Lexer.isCSS1Selector("\\77777abc")); } /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorClassPrefixStartsWithDigit() { assertFalse("77abc is not a valid css selector", Lexer.isCSS1Selector("77abc")); } /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorUnescapedInvalidCharacter() { assertFalse("abc\b is not a valid css selector", Lexer.isCSS1Selector("abc\b")); } /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorPlainOldName() { assertTrue("abc123 is a valid css selector", Lexer.isCSS1Selector("abc123")); } /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorBeginWithEscapedNumber() { assertTrue("\\77abc is a valid css selector", Lexer.isCSS1Selector("\\77abc")); } /** * Test for isCSS1Selector(). */ public final void testIsCSS1SelectorEscapedNumber() { assertTrue("abc\\8 is a valid css selector", Lexer.isCSS1Selector("abc\\8")); } }jtidy/src/test/java/org/w3c/tidy/TidyTestCase.java0000644000175000017500000005544610467623231022314 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URL; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Properties; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import junit.framework.TestCase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * @author fgiust * @version $Revision: 812 $ ($Author: fgiust $) */ public class TidyTestCase extends TestCase { /** * Tidy executable name, if you want to produce output files for comparison. */ private static final String TIDY_EXECUTABLE = "tidy.exe"; /** * Logger used to enable/disable output file generation using tidy c executable. Setting this logger to * debug in your log4j configuration file will cause the TIDY_EXECUTABLE to be run against the actual * test file. If set to false the command line used to manually run tidy will appear in the log. */ private static final Logger RUN_TIDY_EXECUTABLE = LoggerFactory.getLogger("runtidy"); /** * Tidy test instance. */ protected Tidy tidy; /** * message listener. */ protected TestMessageListener messageListener; /** * Error out. */ protected StringWriter errorLog; /** * Tidy output. */ protected String tidyOut; /** * logger. */ protected Logger log = LoggerFactory.getLogger(getClass()); /** * write directly to out. Useful for debugging (but it will make the test fail!). */ private boolean writeToOut; /** * Instantiate a new Test case. * @param name test name */ public TidyTestCase(String name) { super(name); } /** * @see junit.framework.TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); // creates a new Tidy this.tidy = new Tidy(); } /** * @see junit.framework.TestCase#tearDown() */ protected void tearDown() throws Exception { this.tidy = null; this.errorLog = null; this.tidyOut = null; super.tearDown(); } /** * Executes a tidy test. This method simply requires the input file name. If a file with the same name but with a * ".cfg" extension is found is used as configuration file for the test, otherwise the default config will be used. * If a file with the same name, but with the ".out" extension is found, tidy will the result with the content of * such file. * @param fileName input file name * @throws Exception any exception generated during the test */ protected void executeTidyTest(String fileName) throws Exception { // set up Tidy using supplied configuration setUpTidy(fileName); // input file URL inputURL = getClass().getClassLoader().getResource(fileName); assertNotNull("Can't find input file [" + fileName + "]", inputURL); OutputStream out; // out if (!writeToOut) { out = new ByteArrayOutputStream(); } else { out = System.out; } // go! this.tidy.parse(inputURL.openStream(), out); if (log.isDebugEnabled()) { log.debug("out:\n---- out ----\n" + out + "\n---- out ----"); log.debug("log:\n---- log ----\n" + this.errorLog + "\n---- log ----"); } // existing file for comparison String outFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".out"; URL outFile = getClass().getClassLoader().getResource(outFileName); this.tidyOut = out.toString(); if (outFile != null) { log.debug("Comparing file using [" + outFileName + "]"); assertEquals(this.tidyOut, outFile); } // check messages String messagesFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".msg"; URL messagesFile = getClass().getClassLoader().getResource(messagesFileName); // save messages if (messagesFile == null) { if (log.isDebugEnabled()) { log.debug("Messages file doesn't exists, generating [" + messagesFileName + "] for reference"); } FileWriter fw = new FileWriter(inputURL.getFile().substring(0, inputURL.getFile().lastIndexOf(".")) + ".msg"); fw.write(this.messageListener.messagesToXml()); fw.close(); } else { // compare result to expected messages if (log.isDebugEnabled()) { log.debug("Comparing messages using [" + messagesFileName + "]"); } compareMsgXml(messagesFile); } } /** * Parse an existing msg file and assert that content is identical to current output. * @param messagesFile URL to mesage file * @throws Exception any exception generated during the test */ protected void compareMsgXml(URL messagesFile) throws Exception { // first parse existing file // avoid using DOM since if will need forking junit execution in maven (too slow) SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); MsgXmlHandler handler = new MsgXmlHandler(); saxParser.parse(new InputSource(messagesFile.openStream()), handler); List expectedMsgs = handler.getMessages(); List tidyMsgs = this.messageListener.getReceived(); // assert size if (expectedMsgs.size() != tidyMsgs.size()) { StringBuffer messagesAsString = new StringBuffer(); for (Iterator iter = tidyMsgs.iterator(); iter.hasNext();) { TidyMessage message = (TidyMessage) iter.next(); messagesAsString.append("\n"); messagesAsString.append(message.getMessage()); } fail("Expected " + expectedMsgs.size() + " messages but got " + tidyMsgs.size() + ". Messages:" + messagesAsString.toString()); } // compare messages Iterator expectedMsgIt = expectedMsgs.iterator(); Iterator tidyMsgIt = tidyMsgs.iterator(); int count = 0; while (tidyMsgIt.hasNext()) { TidyMessage expectedOne = (TidyMessage) expectedMsgIt.next(); TidyMessage tidyOne = (TidyMessage) tidyMsgIt.next(); assertEquals("Error code for message [" + count + "] is different from expected", expectedOne .getErrorCode(), tidyOne.getErrorCode()); assertEquals( "Level for message [" + count + "] is different from expected", expectedOne.getLevel(), tidyOne.getLevel()); assertEquals("Line for message [" + count + "] is different from expected. Expected position: [" + expectedOne.getLine() + ":" + expectedOne.getColumn() + "] , current [" + tidyOne.getLine() + ":" + tidyOne.getColumn() + "]", expectedOne.getLine(), tidyOne.getLine()); assertEquals("Column for message [" + count + "] is different from expected. Expected position: [" + expectedOne.getLine() + ":" + expectedOne.getColumn() + "] , current [" + tidyOne.getLine() + ":" + tidyOne.getColumn() + "]", expectedOne.getColumn(), tidyOne.getColumn()); // don't assert text in respect for i18n count++; } } /** * Basic test for DOM parser. Test is set up using [fileName.cfg] configuration if the file exists. Calls * tidy.parseDOM and returns the Document to the caller. * @param fileName input file name * @return parsed Document * @throws Exception any exception generated during the test */ protected Document parseDomTest(String fileName) throws Exception { // creates a new Tidy setUpTidy(fileName); // input file URL inputURL = getClass().getClassLoader().getResource(fileName); assertNotNull("Can't find input file [" + fileName + "]", inputURL); // out OutputStream out = new ByteArrayOutputStream(); Document doc = this.tidy.parseDOM(inputURL.openStream(), out); this.tidyOut = out.toString(); return doc; } /** * assert generated output and test file are equals. * @param tidyOutput tidy output as string * @param correctFile URL used to load the file for comparison * @throws FileNotFoundException if test file is not found * @throws IOException in reading file */ protected void assertEquals(String tidyOutput, URL correctFile) throws FileNotFoundException, IOException { // assume the expected output has the same encoding tidy has in its configuration String encodingName = tidy.getConfiguration().getOutCharEncodingName(); diff( new BufferedReader((new InputStreamReader(new ByteArrayInputStream(tidyOutput.getBytes()), encodingName))), new BufferedReader(new InputStreamReader(new FileInputStream(correctFile.getPath()), encodingName))); } /** * Utility method: assert no warnings were reported in the last tidy run. */ protected void assertNoWarnings() { int warningNum = this.tidy.getParseWarnings(); if (warningNum != 0) { fail("Test failed, [" + warningNum + "] false warnings were reported"); } } /** * Utility method: assert no errors were reported in the last tidy run. */ protected void assertNoErrors() { int errorNum = this.tidy.getParseErrors(); if (errorNum != 0) { fail("Test failed, [" + errorNum + "] false errors were reported"); } } /** * Utility method: assert no warnings were reported in the last tidy run. * @param expectedNumber expected number of warnings. */ protected void assertWarnings(int expectedNumber) { int warningNum = this.tidy.getParseWarnings(); if (warningNum != expectedNumber) { fail("Test failed, [" + expectedNumber + "] warnings expected, [" + warningNum + "] were reported"); } } /** * Utility method: assert no errors were reported in the last tidy run. * @param expectedNumber expected number of errors. */ protected void assertErrors(int expectedNumber) { int errorNum = this.tidy.getParseErrors(); if (errorNum != expectedNumber) { fail("Test failed, [" + expectedNumber + "] errors expected, [" + errorNum + "] were reported"); } } /** * Utility method: asserts a given String can be found in the error log. * @param expectedString expected String in error log. */ protected void assertLogContains(String expectedString) { String logString = this.errorLog.toString(); if (logString.indexOf(expectedString) == -1) { fail("Test failed, expected [" + expectedString + "] couldn't be found in error log."); } } /** * Utility method: asserts a given String can't be found in the error log. * @param expectedString expected String in error log. */ protected void assertLogDoesntContains(String expectedString) { String logString = this.errorLog.toString(); if (logString.indexOf(expectedString) != -1) { fail("Test failed, [" + expectedString + "] was found in error log."); } } /** * set up the tidy instance. * @param fileName input file name (needed to determine configuration file name) * @throws IOException in reading configuration file */ private void setUpTidy(String fileName) throws IOException { // config file names String configFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".cfg"; String messagesFileName = fileName.substring(0, fileName.lastIndexOf(".")); // input file URL inputURL = getClass().getClassLoader().getResource(fileName); assertNotNull("Can't find input file [" + fileName + "]", inputURL); // configuration file URL configurationFile = getClass().getClassLoader().getResource(configFileName); // debug runing test info if (log.isDebugEnabled()) { StringBuffer message = new StringBuffer(); message.append("Testing [" + fileName + "]"); if (configurationFile != null) { message.append(" using configuration file [" + configFileName + "]"); } log.debug(message.toString()); } if (configurationFile == null) { configurationFile = getClass().getClassLoader().getResource("default.cfg"); } generateOutputUsingTidyC(inputURL.getFile(), configurationFile.getFile(), RUN_TIDY_EXECUTABLE.isDebugEnabled()); // if configuration file exists load and set it Properties testProperties = new Properties(); testProperties.load(configurationFile.openStream()); this.tidy.setConfigurationFromProps(testProperties); // set up error log this.errorLog = new StringWriter(); this.tidy.setErrout(new PrintWriter(this.errorLog)); this.messageListener = new TestMessageListener(messagesFileName); this.tidy.setMessageListener(messageListener); } /** * Diff between two buffered readers. If comparison fails an AssertionFailedException is thrown with the line * number, actual and expected output. Content is tested to be identical (same wrapping). * @param tidyOutput reader for tidy generated output * @param correctFile reader for test file * @throws IOException in reading from readers */ private static void diff(BufferedReader tidyOutput, BufferedReader correctFile) throws IOException { String tidyLine, testLine; int i = 1; do { tidyLine = tidyOutput.readLine(); testLine = correctFile.readLine(); i++; } while ((tidyLine != null) && (testLine != null) && (tidyLine.equals(testLine))); tidyOutput.close(); correctFile.close(); if ((tidyLine != null) || (testLine != null)) { assertEquals("Wrong output, file comparison failed at line [" + (i - 1) + "]", testLine, tidyLine); } return; } /** * Run TIDY_EXECUTABLE to produce an output file. Used to generates output files using tidy c for comparison with * jtidy. A file ".out" will be written in the same folder of the input file. * @param inputFileName input file for tidy. * @param configurationFileName configuration file name (default if there is no not test-specific file). * @param runIt if true the output is generated using tidy, if false simply output the command line. */ private void generateOutputUsingTidyC(String inputFileName, String configurationFileName, boolean runIt) { String outputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(".")) + ".out"; String strCmd = TIDY_EXECUTABLE + " -config \"" + cleanUpFilePath(configurationFileName) + "\" -o \"" + cleanUpFilePath(outputFileName) + "\" \"" + cleanUpFilePath(inputFileName) + "\""; log.debug("cmd line:\n***\n" + strCmd + "\nw/o output:\n" + TIDY_EXECUTABLE + " -config \"" + cleanUpFilePath(configurationFileName) + "\" \"" + cleanUpFilePath(inputFileName) + "\"" + "\n***"); if (runIt) { log.debug("running " + TIDY_EXECUTABLE); try { Runtime.getRuntime().exec(strCmd); } catch (IOException e) { log.warn("Error running [" + strCmd + "] cmd: " + e.getMessage()); } } } /** * Utility method to clean up file path returned by URLs. * @param fileName file name as given by URL.getFile() * @return String fileName */ protected String cleanUpFilePath(String fileName) { if (fileName.length() > 3 && fileName.charAt(2) == ':') { // assuming something like ""/C:/program files/..." return fileName.substring(1); } else if (fileName.startsWith("file://")) { return fileName.substring(7); } return fileName; } /** * A simple SAX Content Handler used to parse .msg files. */ static class MsgXmlHandler extends DefaultHandler { /** * Parsed messages. */ private List messages = new ArrayList(); /** * Error code for the current message. */ private int code; /** * Level for the current message. */ private int level; /** * Column for the current message. */ private int column; /** * Line for the current message. */ private int line; /** * Message the current message. */ private StringBuffer textbuffer; /** * Actual parsing position. */ private int parsePosition = -100; /** * actually parsing a detail tag. */ private boolean intag; /** * @see org.xml.sax.ContentHandler#startElement(String, String, String, org.xml.sax.Attributes) */ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("message".equals(qName)) { parsePosition = 0; textbuffer = new StringBuffer(); } else { parsePosition++; intag = true; } } /** * @see org.xml.sax.ContentHandler#endElement(String, String, String) */ public void endElement(String uri, String localName, String qName) throws SAXException { if ("message".equals(qName)) { TidyMessage message = new TidyMessage(code, line, column, TidyMessage.Level.fromCode(level), textbuffer .toString()); messages.add(message); } intag = false; } /** * @see org.xml.sax.ContentHandler#characters(char[], int, int) */ public void characters(char[] ch, int start, int length) throws SAXException { if (!intag) { return; } switch (parsePosition) { case 1 : this.code = Integer.parseInt(new String(ch, start, length)); break; case 2 : this.level = Integer.parseInt(new String(ch, start, length)); break; case 3 : this.line = Integer.parseInt(new String(ch, start, length)); break; case 4 : this.column = Integer.parseInt(new String(ch, start, length)); break; case 5 : textbuffer.append(new String(ch, start, length)); break; default : break; } } /** * Returns the list of parsed messages. * @return List containing TidyMessage elements */ public List getMessages() { return messages; } } } jtidy/src/test/java/org/w3c/tidy/TidyUtilsTest.java0000644000175000017500000001140310123333757022522 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.TestCase; /** * Test cases for TidyUtils. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public class TidyUtilsTest extends TestCase { /** * instantiates a new test. * @param name test name */ public TidyUtilsTest(String name) { super(name); } /** * Tests isInValuesIgnoreCase with a valid string. */ public void testIsInValuesIgnoreCaseSuccessfull() { String[] validValues = new String[]{"first", "Second", "THIRD"}; String stringToCheck = "second"; assertTrue(TidyUtils.isInValuesIgnoreCase(validValues, stringToCheck)); } /** * Tests isInValuesIgnoreCase with an invalid string. */ public void testIsInValuesIgnoreCaseFail() { String[] validValues = new String[]{"first", "Second", "THIRD"}; String stringToCheck = "secon"; assertFalse(TidyUtils.isInValuesIgnoreCase(validValues, stringToCheck)); } /** * Test for isCharEncodingSupported(). */ public void testIsCharEncodingSupported() { assertTrue(TidyUtils.isCharEncodingSupported("utf8")); assertTrue(TidyUtils.isCharEncodingSupported("UTF-8")); assertTrue(TidyUtils.isCharEncodingSupported("US-ASCII")); assertTrue(TidyUtils.isCharEncodingSupported("ASCII")); assertTrue(TidyUtils.isCharEncodingSupported("LATIN1")); assertTrue(TidyUtils.isCharEncodingSupported("ISO-8859-1")); assertTrue(TidyUtils.isCharEncodingSupported("WINDOWS-1252")); assertTrue(TidyUtils.isCharEncodingSupported("ISO2022")); assertTrue(TidyUtils.isCharEncodingSupported("ISO-2022-JP")); assertTrue(TidyUtils.isCharEncodingSupported("BIG5")); assertTrue(TidyUtils.isCharEncodingSupported("UTF16")); assertTrue(TidyUtils.isCharEncodingSupported("UTF16BE")); assertTrue(TidyUtils.isCharEncodingSupported("UTF16LE")); assertTrue(TidyUtils.isCharEncodingSupported("UTF-16")); assertTrue(TidyUtils.isCharEncodingSupported("UTF-16BE")); assertTrue(TidyUtils.isCharEncodingSupported("UTF-16LE")); assertTrue(TidyUtils.isCharEncodingSupported("CP858")); assertTrue(TidyUtils.isCharEncodingSupported("ibm858")); assertTrue(TidyUtils.isCharEncodingSupported("Macintosh Roman")); assertTrue(TidyUtils.isCharEncodingSupported("WiN1252")); assertTrue(TidyUtils.isCharEncodingSupported("SHIFTJIS")); } }jtidy/src/test/java/org/w3c/tidy/TestMessageListener.java0000644000175000017500000000510510120355471023656 0ustar twernertwernerpackage org.w3c.tidy; import java.util.ArrayList; import java.util.Iterator; import java.util.List; /** * @author fgiust * @version $Revision: 597 $ ($Author: fgiust $) */ public class TestMessageListener implements TidyMessageListener { /** * filename. */ private String filename; /** * Contains all the received TidyMessages. */ private List received = new ArrayList(); /** * Instantiate a new messag listener for the given test file. * @param filename actual test case name */ public TestMessageListener(String filename) { this.filename = filename; } /** * @see org.w3c.tidy.TidyMessageListener#messageReceived(org.w3c.tidy.TidyMessage) */ public void messageReceived(TidyMessage message) { received.add(message); } /** * Write received messages as xml. * @return xml containing message details. */ public String messagesToXml() { StringBuffer buffer = new StringBuffer(); buffer.append("\n"); buffer.append("\n"); buffer.append("\n"); Iterator iterator = received.iterator(); while (iterator.hasNext()) { TidyMessage msg = (TidyMessage) iterator.next(); buffer.append(" \n"); buffer.append(" "); buffer.append(msg.getErrorCode()); buffer.append("\n"); buffer.append(" "); buffer.append(msg.getLevel().getCode()); buffer.append("\n"); buffer.append(" "); buffer.append(msg.getLine()); buffer.append("\n"); buffer.append(" "); buffer.append(msg.getColumn()); buffer.append("\n"); buffer.append(" \n"); buffer.append(" \n"); } buffer.append("\n"); return buffer.toString(); } /** * Getter for filename. * @return Returns the filename. */ protected String getFilename() { return this.filename; } /** * Returns the list of received messages. * @return Returns the received messages. */ public List getReceived() { return this.received; } } jtidy/src/test/java/org/w3c/tidy/TidyWarningBugsTest.java0000644000175000017500000004210210133721557023650 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Testcase for Tidy resolved bugs (Tidy warning and errors). *

          * see http://sourceforge.net/support/tracker.php?aid=(item number) *

          * @author fgiust * @version $Revision: 660 $ ($Author: fgiust $) */ public class TidyWarningBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyWarningBugsTest(String name) { super(name); } /** * test for Tidy [427810] : Proprietary elements not reported as err. * @throws Exception any exception generated during the test */ public void test427810() throws Exception { // line 1 column 1 - Warning: missing declaration // line 8 column 1 - Warning: is not approved by W3C // line 9 column 1 - Warning: is not approved by W3C // line 10 column 1 - Warning: is not approved by W3C // Info: Document content looks like HTML Proprietary // 4 warnings, 0 errors were found! executeTidyTest("427810.html"); assertWarnings(4); } /** * test for Tidy [431874] : Nested anchors not detected. * @throws Exception any exception generated during the test */ public void test431874() throws Exception { executeTidyTest("431874.html"); assertWarnings(2); } /** * test for Tidy [427827] : Nested anchor elements allowed. * @throws Exception any exception generated during the test */ public void test427827() throws Exception { // line 6 column 1 - Warning: missing before // line 7 column 6 - Warning: discarding unexpected // 2 warnings, 0 errors were found! executeTidyTest("427827.html"); assertWarnings(2); } /** * test for Tidy [427834] : Warning given for newline in DOCTYPE. * @throws Exception any exception generated during the test */ public void test427834() throws Exception { // no warnings executeTidyTest("427834.html"); assertNoWarnings(); } /** * test for Tidy [427844] : End tags containing whitespace warning. * @throws Exception any exception generated during the test */ public void test427844() throws Exception { executeTidyTest("427844.html"); assertNoWarnings(); } /** * test for Tidy [431719] : Spec want "HTML 3.2 Final", but everyone in the world, including Tidy, uses "HTML 3.2". * So the software has to recognize both FPI's as equivalent. * @throws Exception any exception generated during the test */ public void test431719() throws Exception { // line 11 column 3 - Warning: lacks "summary" attribute // Info: Doctype given is "-//W3C//DTD HTML 3.2//EN" // Info: Document content looks like HTML 3.2 // 1 warning, 0 errors were found! // still bad in tidy? executeTidyTest("431719.html"); assertNoWarnings(); } /** * test for Tidy [431883] : Given doctype reported incorrectly. * @throws Exception any exception generated during the test */ public void test431883() throws Exception { executeTidyTest("431883.html"); assertLogContains("Doctype given is \"-//W3C//DTD HTML 4.0"); } /** * test for Tidy [431956] : Well formed XSL xsl:text gives error. * @throws Exception any exception generated during the test */ public void test431956() throws Exception { // No warnings or errors were found. (-xml) executeTidyTest("431956.xml"); assertNoWarnings(); } /** * test for Tidy [431964] : table height="" not flagged as error. * @throws Exception any exception generated during the test */ public void test431964() throws Exception { // line 7 column 1 - Warning:
          attribute "height" lacks value // line 7 column 1 - Warning:
          proprietary attribute "height" // Info: Doctype given is "-//W3C//DTD HTML 4.01 Transitional//EN" // Info: Document content looks like HTML Proprietary // 2 warnings, 0 errors were found! executeTidyTest("431964.html"); assertWarnings(2); } /** * test for Tidy [433021] : Identify attribute whose value is bad. * @throws Exception any exception generated during the test */ public void test433021() throws Exception { executeTidyTest("433021.html"); assertLogContains("align"); assertLogContains("valign"); assertWarnings(5); } /** * test for Tidy [433607] : No warning for omitted end tag with -xml. * @throws Exception any exception generated during the test */ public void test433607() throws Exception { // No warnings or errors were found. (-xml) // still bad in tidy? executeTidyTest("433607.xml"); assertWarnings(1); } /** * test for Tidy [433670] : &apos not recognized as valid XML entity. * @throws Exception any exception generated during the test */ public void test433670() throws Exception { // No warnings or errors were found. (-xml) executeTidyTest("433670.xml"); assertNoWarnings(); } /** * test for Tidy [434047] : Mixed content in 4.01 Strict not allowed. * @throws Exception any exception generated during the test */ public void test434047() throws Exception { // Info: Doctype given is "-//W3C//DTD HTML 4.01//EN" // Info: Document content looks like HTML 4.01 Strict // No warnings or errors were found. executeTidyTest("434047.html"); assertLogContains("HTML 4.01 Strict"); } /** * test for Tidy [434100] : Error actually reported as a warning (-xml). * @throws Exception any exception generated during the test */ public void test434100() throws Exception { // -xml // line 13 column 1 - Error: unexpected in // 0 warnings, 1 error were found! executeTidyTest("434100.html"); assertErrors(1); assertNoWarnings(); } /** * test for Tidy [435917] : <input onfocus=""> reported unknown attr. * @throws Exception any exception generated during the test */ public void test435917() throws Exception { // should not report: invalid attribute "onfocus" executeTidyTest("435917.html"); assertWarnings(1); } /** * test for Tidy [435917] : missing "=" in attribute confuses tidy. * @throws Exception any exception generated during the test */ public void test435917b() throws Exception { // line 11 column 1 - Warning: attribute with missing trailing quote mark // should not report: // line 11 column 2 - Warning: unknown attribute value "null" executeTidyTest("435917.html"); assertLogDoesntContains("null"); } /** * test for Tidy [435922] : Missing <form> around <input> no warning. * @throws Exception any exception generated during the test */ public void test435922() throws Exception { // line 6 column 1 - Warning: isn't allowed in elements // line 7 column 3 - Warning: inserting implicit // line 7 column 3 - Warning: missing // line 7 column 3 - Warning: lacks "action" attribute // Info: Doctype given is "-//W3C//DTD HTML 4.01 Transitional//EN" // Info: Document content looks like HTML 4.01 Transitional // 4 warnings, 0 errors were found! executeTidyTest("435922.html"); assertWarnings(4); } /** * test for Tidy [438956] : Bad head-endtag reported incorrectly. * @throws Exception any exception generated during the test */ public void test438956() throws Exception { // line 3 column 1 - Warning: plain text isn't allowed in elements // line 6 column 1 - Warning: discarding unexpected // Info: Doctype given is "-//W3C//DTD HTML 4.01 Transitional//EN" // Info: Document content looks like HTML 4.01 Transitional // 2 warnings, 0 errors were found! executeTidyTest("438956.html"); assertWarnings(2); } /** * test for Tidy [446019] : <img name="foo"> allowed in XTHML-Strict. * @throws Exception any exception generated during the test */ public void test446019() throws Exception { // Info: Doctype given is "-//W3C//DTD XHTML 1.0 Strict//EN" // Info: Document content looks like XHTML 1.0 Transitional // No warnings or errors were found. executeTidyTest("446019.xhtml"); assertLogContains("XHTML 1.0 Transitional"); } /** * test for Tidy [450389] : Color attval check allows only black/#. * @throws Exception any exception generated during the test */ public void test450389() throws Exception { // line 1 column 1 - Warning: missing declaration // line 44 column 1 - Warning: attribute "color" has invalid value "reddish" // line 72 column 1 - Warning: attribute "color" has invalid value "#FF" // line 76 column 1 - Warning: attribute "color" has invalid value "grurple" // line 77 column 1 - Warning: attribute "color" has invalid value "#grurple" // line 78 column 1 - Warning: attribute "color" has invalid value "#1234567" // Info: Document content looks like HTML 3.2 executeTidyTest("450389.html"); assertWarnings(6); } /** * test for Tidy [501230] : "0" (Zero) has to be lower case. * @throws Exception any exception generated during the test */ public void test501230() throws Exception { executeTidyTest("501230.xhtml"); assertLogDoesntContains("lower case"); } /** * test for Tidy [501669] : width="n*" marked invalid on <COL>. * @throws Exception any exception generated during the test */ public void test501669() throws Exception { // Info: Doctype given is "-//W3C//DTD HTML 4.01 Transitional//EN" // Info: Document content looks like HTML 4.01 Strict // No warnings or errors were found. executeTidyTest("501669.html"); assertNoWarnings(); } /** * test for Tidy [516370] : Invalid ID value. * @throws Exception any exception generated during the test */ public void test516370() throws Exception { // line 10 column 1 - Warning:

          ID "_ValidID2" uses XML ID syntax // line 11 column 1 - Warning:

          ID ":ValidID3" uses XML ID syntax // line 13 column 1 - Warning:

          attribute "id" has invalid value ".InvalidID1" // line 14 column 1 - Warning:

          attribute "id" has invalid value "2InvalidID2" // line 15 column 1 - Warning:

          attribute "id" lacks value executeTidyTest("516370.xhtml"); assertWarnings(5); assertLogContains("XML ID syntax"); } /** * test for Tidy [517528] : Parser complains about xml:lang. * @throws Exception any exception generated during the test */ public void test517528() throws Exception { executeTidyTest("517528.html"); assertNoWarnings(); } /** * test for Tidy [525081] : frameset rows attr. not recognized. * @throws Exception any exception generated during the test */ public void test525081() throws Exception { // Info: Doctype given is "-//W3C//DTD HTML 4.01 Frameset//EN" // Info: Document content looks like HTML 4.01 Frameset // No warnings or errors were found. executeTidyTest("525081.html"); assertNoWarnings(); } /** * test for Tidy [538536] : Extra endtags not detected. * @throws Exception any exception generated during the test */ public void test538536() throws Exception { // line 5 column 1 - Warning: content occurs after end of body // line 9 column 1 - Warning: discarding unexpected // Info: Doctype given is "-//W3C//DTD HTML 4.01//EN" // Info: Document content looks like HTML 4.01 Strict // 2 warnings, 0 errors were found! executeTidyTest("538536.html"); assertNoErrors(); assertWarnings(2); } /** * test for Tidy [545067] : Implicit closing of head broken. * @throws Exception any exception generated during the test */ public void test545067() throws Exception { // should NOT output: // line 3 column 1 - Warning:

          isn't allowed in elements // since body tags implicitly terminate the section // ok in jtidy r7 executeTidyTest("545067.html"); assertLogDoesntContains(""); } /** * test for Tidy [552861] : <td with=""> not recognized. * @throws Exception any exception generated during the test */ public void test552861() throws Exception { executeTidyTest("552861.html"); // should complain about invalid "with" attribute assertWarnings(1); } /** * test for Tidy [553414] : XHTML strict accept '_target' attribute. * @throws Exception any exception generated during the test */ public void test553414() throws Exception { executeTidyTest("553414.html"); assertLogContains("given is \"-//W3C//DTD XHTML 1.0 Strict//EN\""); assertLogContains("looks like XHTML 1.0 Transitional"); assertWarnings(1); } /** * test for Tidy [553468] : Doesn't warn about <u> in XHTML strict. * @throws Exception any exception generated during the test */ public void test553468() throws Exception { // Info: Doctype given is "-//W3C//DTD XHTML 1.0 Strict//EN" // Info: Document content looks like XHTML 1.0 Transitional // No warnings or errors were found. executeTidyTest("553468.xhtml"); assertLogContains("XHTML 1.0 Transitional"); } /** * test for Tidy [559774] : tidy version rejects all id values. * @throws Exception any exception generated during the test */ public void test559774() throws Exception { executeTidyTest("559774.html"); assertNoWarnings(); } /** * test for Tidy [706260] : size not accepted for input. * @throws Exception any exception generated during the test */ public void test706260() throws Exception { // Info: Doctype given is "-//W3C//DTD XHTML 1.0 Strict//EN" // Info: Document content looks like XHTML 1.0 Transitional // No warnings or errors were found. executeTidyTest("706260.html"); assertNoWarnings(); } }jtidy/src/test/java/org/w3c/tidy/TidyCrashingBugsTest.java0000644000175000017500000003306110463650235024005 0ustar twernertwerner/** * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.File; import java.text.NumberFormat; /** * Testcase for Tidy resolved bugs. Bugs that crashed tidy or caused infinite loops. * @author fgiust * @version $Revision: 809 $ ($Author: fgiust $) */ public class TidyCrashingBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyCrashingBugsTest(String name) { super(name); } /** * test for Tidy [427664] : Missing attr values cause NULL segfault. * @throws Exception any exception generated during the test */ public void test427664() throws Exception { executeTidyTest("427664.html"); } /** * test for Tidy [427671] : <LI> w/FRAME/FRAMESET/OPTGROUP/OPTION loop. * @throws Exception any exception generated during the test */ public void test427671() throws Exception { executeTidyTest("427671.html"); } /** * test for Tidy [427675] : Frameset followed by frame infinite loop. * @throws Exception any exception generated during the test */ public void test427675() throws Exception { executeTidyTest("427675.html"); } /** * test for Tidy [427676] : Missing = from attr value NULL segfault. * @throws Exception any exception generated during the test */ public void test427676() throws Exception { executeTidyTest("427676.html"); } /** * test for Tidy [427672] : Non-std attrs w/multibyte names segfault. * @throws Exception any exception generated during the test */ public void test427672() throws Exception { executeTidyTest("427672.html"); } /** * test for Tidy [426885] : Definition list w/Center crashes. * @throws Exception any exception generated during the test */ public void test426885() throws Exception { executeTidyTest("426885.html"); } /** * test for Tidy [427811] : FRAME inside NOFRAME infinite loop. * @throws Exception any exception generated during the test */ public void test427811() throws Exception { executeTidyTest("427811.html"); } /** * test for Tidy [427813] : Missing = from attr value segfaults. * @throws Exception any exception generated during the test */ public void test427813() throws Exception { executeTidyTest("427813.html"); } /** * test for Tidy [427816] : Mismatched quotes for attr segfaults. * @throws Exception any exception generated during the test */ public void test427816() throws Exception { executeTidyTest("427816.html"); } /** * test for Tidy [427818] : Missing quotes cause segfaults. * @throws Exception any exception generated during the test */ public void test427818() throws Exception { executeTidyTest("427818.html"); } /** * test for Tidy [427840] : Span causes infinite loop. * @throws Exception any exception generated during the test */ public void test427840() throws Exception { executeTidyTest("427840.html"); } /** * test for Tidy [427841] : Tidy crashes on badly formed HTML involving nested lists. * @throws Exception any exception generated during the test */ public void test427841() throws Exception { executeTidyTest("427841.html"); } /** * test for Tidy [431716] : -slides causes a seg fault. * @throws Exception any exception generated during the test */ public void test431716() throws Exception { executeTidyTest("431716.html"); // delete generates slides (this should be done in tear down be I'm pretty confident tidy will not crash here) NumberFormat numberFormat = NumberFormat.getInstance(); numberFormat.setMinimumIntegerDigits(3); for (int j = 1; j <= 26; j++) { String slide = "slide" + numberFormat.format(j) + ".html"; (new File(slide)).delete(); } } /** * test for Tidy [443362] : null-pointer exception for doctype in pre. * @throws Exception any exception generated during the test */ public void test443362() throws Exception { executeTidyTest("443362.html"); } /** * test for Tidy [433856] : Access violation w/Word files w/font tag. * @throws Exception any exception generated during the test */ public void test433856() throws Exception { executeTidyTest("433856.html"); } /** * test for Tidy [532535] : Hang when in code <?xml />. * @throws Exception any exception generated during the test */ public void test532535() throws Exception { executeTidyTest("532535.html"); } /** * test for Tidy [539369] : Infinite loop </frame> after </frameset>. * @throws Exception any exception generated during the test */ public void test539369() throws Exception { executeTidyTest("539369.html"); } /** * test for Tidy [539369a] : Test </frameset> inside <noframes> (infinite loop). * @throws Exception any exception generated during the test */ public void test539369a() throws Exception { executeTidyTest("539369a.html"); } /** * test for Tidy [540296] : Tidy dumps. * @throws Exception any exception generated during the test */ public void test540296() throws Exception { executeTidyTest("540296.html"); } /** * test for Tidy [542029] : PPrintXmlDecl reads outside array range. * @throws Exception any exception generated during the test */ public void test542029() throws Exception { executeTidyTest("542029.html"); } /** * test for Tidy [572154] : frame element outside of a frameset causes infinite loop. * @throws Exception any exception generated during the test */ public void test572154() throws Exception { executeTidyTest("572154.html"); } /** * test for Tidy [543262] : tidy eats all memory. * @throws Exception any exception generated during the test */ public void test543262() throws Exception { executeTidyTest("543262.html"); } /** * test for Tidy [545772] : --output-xhtml hangs on most files. * @throws Exception any exception generated during the test */ public void test545772() throws Exception { executeTidyTest("545772.html"); } /** * test for Tidy [566542] : parser hangs. * @throws Exception any exception generated during the test */ public void test566542() throws Exception { executeTidyTest("566542.html"); } /** * test for Tidy [570027] : Fixes crash in Word2000 cleanup. * @throws Exception any exception generated during the test */ public void test570027() throws Exception { executeTidyTest("570027.html"); } /** * test for Tidy [588061] : Crash on www.tvnav.com. * @throws Exception any exception generated during the test */ public void test588061() throws Exception { executeTidyTest("588061.html"); } /** * test for Tidy [661606] : Two bytes at the last line, w/ asian options. * @throws Exception any exception generated during the test */ public void test661606() throws Exception { executeTidyTest("661606.html"); } /** * test for Tidy [671087] : tidy loops with --new-inline-tags table,tr,td. * @throws Exception any exception generated during the test */ public void test671087() throws Exception { executeTidyTest("671087.html"); } /** * test for Tidy [676205] : <img src="> crashes Tidy. * @throws Exception any exception generated during the test */ public void test676205() throws Exception { executeTidyTest("676205.html"); } /** * test for Tidy [679135] : Crashes while checking attributes. * @throws Exception any exception generated during the test */ public void test679135() throws Exception { executeTidyTest("679135.html"); } /** * test for Tidy [696799] : Crash: <script language="">. * @throws Exception any exception generated during the test */ public void test696799() throws Exception { executeTidyTest("696799.html"); } /** * test for Tidy [788031] : tidy hangs on input. * @throws Exception any exception generated during the test */ public void test788031() throws Exception { executeTidyTest("788031.html"); } /** * test for Tidy [837023] : segfault on doctype-like element. * @throws Exception any exception generated during the test */ public void test837023() throws Exception { executeTidyTest("837023.html"); } /** * test for Tidy [991469] : goes wrong with option tag. * @throws Exception any exception generated during the test */ public void test991469() throws Exception { executeTidyTest("991469.xml"); } /** * test for Tidy [991471] : NPE on file with CDATA. * @throws Exception any exception generated during the test */ public void test991471() throws Exception { executeTidyTest("991471.xml"); } /** * test for Tidy [1003994] : wrong UNEXPECTED_ENDTAG message. * @throws Exception any exception generated during the test */ public void test1003994() throws Exception { executeTidyTest("1003994.xml"); assertLogDoesntContains("{"); } /** * test for JTidy [1020806]: NPE when PPPrint'ing changed DOM tree. * @throws Exception any exception generated during the test */ public void test1020806() throws Exception { org.w3c.dom.Document doc = parseDomTest("1020806.html"); tidy.pprint(doc, System.out); } /** * test for JTidy [1020806b]: NPE when PPPrint'ing changed DOM tree. * @throws Exception any exception generated during the test */ public void test1020806b() throws Exception { org.w3c.dom.Document doc = parseDomTest("1020806b.html"); tidy.pprint(doc, System.out); } /** * test for JTidy [1033035]: EOF bug in parseAsp and parsePhp. * @throws Exception any exception generated during the test */ public void test1033035() throws Exception { executeTidyTest("1033035.html"); } /** * test for JTidy [1392829]: Some cases for null pointer Exception. * @throws Exception any exception generated during the test */ public void test1392829() throws Exception { executeTidyTest("1392829.html"); } /** * test for JTidy [1416185]: infinite loop. * @throws Exception any exception generated during the test */ public void test1416185() throws Exception { executeTidyTest("1416185.html"); } /** * test for JTidy [1403105]: java.lang.StackOverflowError in Tidy.parseDOM(). * @throws Exception any exception generated during the test */ public void test1403105() throws Exception { tidy.setEncloseBlockText(true); executeTidyTest("1403105.html"); } }jtidy/src/test/java/org/w3c/tidy/EncodingNameMapperTest.java0000644000175000017500000001051711425047400024262 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.TestCase; /** * Test cases for EncodingNameMapper. * @author Fabrizio Giustina * @version $Revision $ ($Author $) */ public class EncodingNameMapperTest extends TestCase { /** * instantiates a new test. * @param name test name */ public EncodingNameMapperTest(String name) { super(name); } /** * Test for toJava(). */ public void testToJava() { assertEquals("UTF8", EncodingNameMapper.toJava("utf8")); assertEquals("UTF8", EncodingNameMapper.toJava("UTF-8")); assertEquals("ASCII", EncodingNameMapper.toJava("US-ASCII")); assertEquals("ASCII", EncodingNameMapper.toJava("ASCII")); assertEquals("ISO8859_1", EncodingNameMapper.toJava("LATIN1")); assertEquals("ISO8859_1", EncodingNameMapper.toJava("ISO-8859-1")); assertEquals("Cp1252", EncodingNameMapper.toJava("WINDOWS-1252")); assertEquals("JIS", EncodingNameMapper.toJava("ISO2022")); assertEquals("JIS", EncodingNameMapper.toJava("ISO-2022-JP")); assertEquals("Big5", EncodingNameMapper.toJava("BIG5")); assertEquals("Unicode", EncodingNameMapper.toJava("UTF16")); assertEquals("UnicodeBig", EncodingNameMapper.toJava("UTF16BE")); assertEquals("UnicodeLittle", EncodingNameMapper.toJava("UTF16LE")); assertEquals("Unicode", EncodingNameMapper.toJava("UTF-16")); assertEquals("UnicodeBig", EncodingNameMapper.toJava("UTF-16BE")); assertEquals("UnicodeLittle", EncodingNameMapper.toJava("UTF-16LE")); assertEquals("Cp858", EncodingNameMapper.toJava("CP858")); assertEquals("Cp858", EncodingNameMapper.toJava("ibm858")); assertEquals("MacRoman", EncodingNameMapper.toJava("Macintosh Roman")); assertEquals("Cp1252", EncodingNameMapper.toJava("WiN1252")); assertEquals("SJIS", EncodingNameMapper.toJava("SHIFTJIS")); assertEquals("MS932", EncodingNameMapper.toJava("WINDOWS-31J")); assertEquals(null, EncodingNameMapper.toJava("IBM-")); } }jtidy/src/test/java/org/w3c/tidy/TidyWordBugsTest.java0000644000175000017500000000734410115150162023153 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Testcase for Tidy resolved bugs (Word documents cleaning bugs). *

          * see http://sourceforge.net/support/tracker.php?aid=(item number) *

          * @author fgiust * @version $Revision: 565 $ ($Author: fgiust $) */ public class TidyWordBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyWordBugsTest(String name) { super(name); } /** * test for Tidy [431721] : Cleaning list bullets for Word 2000. * @throws Exception any exception generated during the test */ public void test431721() throws Exception { executeTidyTest("431721.html"); } /** * test for Tidy [444394] : Tidy strips images from Word2000 docs. * @throws Exception any exception generated during the test */ public void test444394() throws Exception { executeTidyTest("444394.html"); } /** * test for Tidy [463066] : CleanWord2000 misses mso-list bullets. * @throws Exception any exception generated during the test */ public void test463066() throws Exception { executeTidyTest("463066.html"); } /** * test for Tidy [634889] : Problem with <o:p> ms word tag. * @throws Exception any exception generated during the test */ public void test634889() throws Exception { executeTidyTest("634889.html"); } }jtidy/src/test/java/org/w3c/tidy/AllNotWorkingTests.java0000644000175000017500000002077010133721557023514 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.Test; import junit.framework.TestSuite; /** * Testsuite for all the test cases actually NOT working in JTidy. * @author Fabrizio Giustina * @version $Revision: 660 $ ($Author: fgiust $) */ public final class AllNotWorkingTests { /** * Should not be instantiated. */ private AllNotWorkingTests() { // unused } /** * Generates the suite. * @return junit test suite. */ public static Test suite() { TestSuite suite = new TestSuite("All the NOT working test cases for JTidy"); suite.addTest(new JTidyBugsTest("test508245")); suite.addTest(new JTidyBugsTest("test531962")); suite.addTest(new JTidyBugsTest("test574158")); suite.addTest(new JTidyBugsTest("test763191")); suite.addTest(new JTidyBugsTest("test763186")); suite.addTest(new JTidyBugsTest("test917012")); suite.addTest(new JTidyBugsTest("test922302")); suite.addTest(new JTidyBugsTest("test943559")); suite.addTest(new JTidyBugsTest("test935796")); suite.addTest(new JTidyBugsTest("test1039641")); suite.addTest(new TidyEncodingBugsTest("test647255")); suite.addTest(new TidyEncodingBugsTest("test649812")); suite.addTest(new TidyEncodingBugsTest("test658230")); suite.addTest(new TidyEncodingBugsTest("test660397")); suite.addTest(new TidyEncodingBugsTest("test688746")); suite.addTest(new TidyOutputBugsTest("test427633")); suite.addTest(new TidyOutputBugsTest("test427820")); suite.addTest(new TidyOutputBugsTest("test427822")); suite.addTest(new TidyOutputBugsTest("test427826")); suite.addTest(new TidyOutputBugsTest("test427830")); suite.addTest(new TidyOutputBugsTest("test427833")); suite.addTest(new TidyOutputBugsTest("test427836")); suite.addTest(new TidyOutputBugsTest("test427838")); suite.addTest(new TidyOutputBugsTest("test427845")); suite.addTest(new TidyOutputBugsTest("test431739")); suite.addTest(new TidyOutputBugsTest("test431898")); suite.addTest(new TidyOutputBugsTest("test431958")); suite.addTest(new TidyOutputBugsTest("test433012")); suite.addTest(new TidyOutputBugsTest("test433360")); suite.addTest(new TidyOutputBugsTest("test435903")); suite.addTest(new TidyOutputBugsTest("test435909")); suite.addTest(new TidyOutputBugsTest("test435920")); suite.addTest(new TidyOutputBugsTest("test435923")); suite.addTest(new TidyOutputBugsTest("test437468")); suite.addTest(new TidyOutputBugsTest("test438954")); suite.addTest(new TidyOutputBugsTest("test441508")); suite.addTest(new TidyOutputBugsTest("test443576")); suite.addTest(new TidyOutputBugsTest("test445394")); suite.addTest(new TidyOutputBugsTest("test445557")); suite.addTest(new TidyOutputBugsTest("test449348")); suite.addTest(new TidyOutputBugsTest("test467863")); suite.addTest(new TidyOutputBugsTest("test467865")); suite.addTest(new TidyOutputBugsTest("test470663")); suite.addTest(new TidyOutputBugsTest("test480843")); suite.addTest(new TidyOutputBugsTest("test502346")); suite.addTest(new TidyOutputBugsTest("test503436")); suite.addTest(new TidyOutputBugsTest("test504206")); suite.addTest(new TidyOutputBugsTest("test505770")); suite.addTest(new TidyOutputBugsTest("test508936")); suite.addTest(new TidyOutputBugsTest("test511243")); suite.addTest(new TidyOutputBugsTest("test511679")); suite.addTest(new TidyOutputBugsTest("test514348")); suite.addTest(new TidyOutputBugsTest("test514893")); suite.addTest(new TidyOutputBugsTest("test533105")); suite.addTest(new TidyOutputBugsTest("test533233")); suite.addTest(new TidyOutputBugsTest("test537604")); suite.addTest(new TidyOutputBugsTest("test540045")); suite.addTest(new TidyOutputBugsTest("test540555")); suite.addTest(new TidyOutputBugsTest("test540571")); suite.addTest(new TidyOutputBugsTest("test578216")); suite.addTest(new TidyOutputBugsTest("test586555")); suite.addTest(new TidyOutputBugsTest("test586562")); suite.addTest(new TidyOutputBugsTest("test593705")); suite.addTest(new TidyOutputBugsTest("test598860")); suite.addTest(new TidyOutputBugsTest("test620531")); suite.addTest(new TidyOutputBugsTest("test629885")); suite.addTest(new TidyOutputBugsTest("test640473")); suite.addTest(new TidyOutputBugsTest("test640474")); suite.addTest(new TidyOutputBugsTest("test646946")); suite.addTest(new TidyOutputBugsTest("test647900")); suite.addTest(new TidyOutputBugsTest("test655338")); suite.addTest(new TidyOutputBugsTest("test656889")); suite.addTest(new TidyOutputBugsTest("test663548")); suite.addTest(new TidyOutputBugsTest("test678268")); suite.addTest(new TidyOutputBugsTest("test680664")); suite.addTest(new TidyOutputBugsTest("test695408")); suite.addTest(new TidyOutputBugsTest("test708322")); suite.addTest(new TidyOutputBugsTest("test735603")); suite.addTest(new TidyOutputBugsTest("test765852")); suite.addTest(new TidyOutputBugsTest("test994841")); suite.addTest(new TidyOutputBugsTest("test1031865")); suite.addTest(new TidyWarningBugsTest("test431964")); suite.addTest(new TidyWarningBugsTest("test433607")); suite.addTest(new TidyWarningBugsTest("test433670")); suite.addTest(new TidyWarningBugsTest("test434047")); suite.addTest(new TidyWarningBugsTest("test435922")); suite.addTest(new TidyWarningBugsTest("test446019")); suite.addTest(new TidyWarningBugsTest("test552861")); suite.addTest(new TidyWarningBugsTest("test553414")); suite.addTest(new TidyWordBugsTest("test431721")); suite.addTest(new TidyWordBugsTest("test444394")); suite.addTest(new TidyWordBugsTest("test463066")); suite.addTest(new TidyWordBugsTest("test634889")); // previously working suite.addTest(new TidyOutputBugsTest("test531964")); return suite; } }jtidy/src/test/java/org/w3c/tidy/ReportTest.java0000644000175000017500000007033310134011630022033 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.TestCase; /** * Test for Report messages. This test case actually requires EN locale to run successfully. . * @author fgiust * @version $Revision: 661 $ ($Author: fgiust $) */ public class ReportTest extends TestCase { /** * report instance. */ private Report report; /** * lexer instance. */ private Lexer lexer; /** * instantiates a new test. * @param name test name */ public ReportTest(String name) { super(name); } /** * @see junit.framework.TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); this.report = new Report(); this.lexer = new Lexer(null, new Configuration(report), this.report); lexer.lines = 12; lexer.columns = 34; } /** * test getMessage with the missing_endtag_for key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingEndtagFor() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_endtag_for", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: missing ", message); } /** * test getMessage with the missing_endtag_before key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingEndtagBefore() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_endtag_before", new Object[]{"test", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: missing before bee", message); } /** * test getMessage with the discarding_unexpected key. * @throws Exception any Exception generated during test */ public void testGetMessageDiscardingUnexpected() throws Exception { String message = this.report.getMessage( -1, lexer, "discarding_unexpected", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: discarding unexpected test", message); } /** * test getMessage with the nested_emphasis key. * @throws Exception any Exception generated during test */ public void testGetMessageNestedEmphasis() throws Exception { String message = this.report.getMessage( -1, lexer, "nested_emphasis", new Object[]{"test"}, TidyMessage.Level.INFO); assertEquals("line 12 column 34 - nested emphasis test", message); } /** * test getMessage with the coerce_to_endtag key. * @throws Exception any Exception generated during test */ public void testGetMessageCoerceToEndtag() throws Exception { String message = this.report.getMessage( -1, lexer, "coerce_to_endtag", new Object[]{"test"}, TidyMessage.Level.INFO); assertEquals("line 12 column 34 - is probably intended as ", message); } /** * test getMessage with the non_matching_endtag key. * @throws Exception any Exception generated during test */ public void testGetMessageNonMatchingEndtag() throws Exception { String message = this.report.getMessage( -1, lexer, "non_matching_endtag", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: replacing unexpected by ", message); } /** * test getMessage with the tag_not_allowed_in key. * @throws Exception any Exception generated during test */ public void testGetMessageTagNonAllowedIn() throws Exception { String message = this.report.getMessage( -1, lexer, "tag_not_allowed_in", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: isn't allowed in elements", message); } /** * test getMessage with the doctype_after_tags key. * @throws Exception any Exception generated during test */ public void testGetMessageDoctypeAfterTags() throws Exception { String message = this.report.getMessage(-1, lexer, "doctype_after_tags", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: isn't allowed after elements", message); } /** * test getMessage with the missing_starttag key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingStarttag() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_starttag", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: missing ", message); } /** * test getMessage with the using_br_inplace_of key. * @throws Exception any Exception generated during test */ public void testGetMessageUsingBrInPlaceOf() throws Exception { String message = this.report.getMessage( -1, lexer, "using_br_inplace_of", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: using
          in place of test", message); } /** * test getMessage with the inserting_tag key. * @throws Exception any Exception generated during test */ public void testGetMessageInsertingTag() throws Exception { String message = this.report.getMessage( -1, lexer, "inserting_tag", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: inserting implicit ", message); } /** * test getMessage with the cant_be_nested key. * @throws Exception any Exception generated during test */ public void testGetMessageCantBeNested() throws Exception { String message = this.report.getMessage( -1, lexer, "cant_be_nested", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: can't be nested", message); } /** * test getMessage with the proprietary_element key. * @throws Exception any Exception generated during test */ public void testGetMessageProprietaryElement() throws Exception { String message = this.report.getMessage( -1, lexer, "proprietary_element", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: is not approved by W3C", message); } /** * test getMessage with the obsolete_element key. * @throws Exception any Exception generated during test */ public void testGetMessageObsoleteElement() throws Exception { String message = this.report.getMessage( -1, lexer, "obsolete_element", new Object[]{"", ""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: replacing obsolete element by ", message); } /** * test getMessage with the replacing_element key. * @throws Exception any Exception generated during test */ public void testGetMessageReplacingElement() throws Exception { String message = this.report.getMessage( -1, lexer, "replacing_element", new Object[]{"", ""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: replacing element by ", message); } /** * test getMessage with the trim_empty_element key. * @throws Exception any Exception generated during test */ public void testGetMessageTrimEmptyElement() throws Exception { String message = this.report.getMessage( -1, lexer, "trim_empty_element", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: trimming empty ", message); } /** * test getMessage with the missing_title_element key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingTitleElement() throws Exception { String message = this.report.getMessage(-1, lexer, "missing_title_element", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: inserting missing 'title' element", message); } /** * test getMessage with the illegal_nesting key. * @throws Exception any Exception generated during test */ public void testGetMessageIllegalNesting() throws Exception { String message = this.report.getMessage( -1, lexer, "illegal_nesting", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: shouldn't be nested", message); } /** * test getMessage with the noframes_content key. * @throws Exception any Exception generated during test */ public void testGetMessageNoframesContent() throws Exception { String message = this.report.getMessage( -1, lexer, "noframes_content", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: not inside 'noframes' element", message); } /** * test getMessage with the inconsistent_version key. * @throws Exception any Exception generated during test */ public void testGetMessageInconsistentVersion() throws Exception { String message = this.report.getMessage(-1, lexer, "inconsistent_version", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: html doctype doesn't match content", message); } /** * test getMessage with the malformed_doctype key. * @throws Exception any Exception generated during test */ public void testGetMessageMalformedDoctype() throws Exception { String message = this.report.getMessage(-1, lexer, "malformed_doctype", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: expected \"html PUBLIC\" or \"html SYSTEM\"", message); } /** * test getMessage with the content_after_body key. * @throws Exception any Exception generated during test */ public void testGetMessageContentAfterBody() throws Exception { String message = this.report.getMessage(-1, lexer, "content_after_body", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: content occurs after end of body", message); } /** * test getMessage with the malformed_comment key. * @throws Exception any Exception generated during test */ public void testGetMessageMalformedComment() throws Exception { String message = this.report.getMessage(-1, lexer, "malformed_comment", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: adjacent hyphens within comment", message); } /** * test getMessage with the bad_comment_chars key. * @throws Exception any Exception generated during test */ public void testGetMessageBadCommentChars() throws Exception { String message = this.report.getMessage(-1, lexer, "bad_comment_chars", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: expecting -- or >", message); } /** * test getMessage with the bad_xml_comment key. * @throws Exception any Exception generated during test */ public void testGetMessageBadXmlComment() throws Exception { String message = this.report.getMessage(-1, lexer, "bad_xml_comment", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: XML comments can't contain --", message); } /** * test getMessage with the bad_cdata_content key. * @throws Exception any Exception generated during test */ public void testGetMessageBadCdataComment() throws Exception { String message = this.report.getMessage(-1, lexer, "bad_cdata_content", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: '<' + '/' + letter not allowed here", message); } /** * test getMessage with the inconsistent_namespace key. * @throws Exception any Exception generated during test */ public void testGetMessageInconsistentNamespace() throws Exception { String message = this.report.getMessage(-1, lexer, "inconsistent_namespace", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: html namespace doesn't match content", message); } /** * test getMessage with the dtype_not_upper_case key. * @throws Exception any Exception generated during test */ public void testGetMessageDtypeNotUpperCase() throws Exception { String message = this.report.getMessage(-1, lexer, "dtype_not_upper_case", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: SYSTEM, PUBLIC, W3C, DTD, EN must be upper case", message); } /** * test getMessage with the unexpected_end_of_file key. * @throws Exception any Exception generated during test */ public void testGetMessageUnexpectedEndOfFile() throws Exception { String message = this.report.getMessage( -1, lexer, "unexpected_end_of_file", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: end of file while parsing attributes ", message); } /** * test getMessage with the suspected_missing_quote key. * @throws Exception any Exception generated during test */ public void testGetMessageSuspectedMissingQuote() throws Exception { String message = this.report.getMessage(-1, lexer, "suspected_missing_quote", null, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: missing quotemark for attribute value", message); } /** * test getMessage with the duplicate_frameset key. * @throws Exception any Exception generated during test */ public void testGetMessageDuplicateFrameset() throws Exception { String message = this.report.getMessage(-1, lexer, "duplicate_frameset", null, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: repeated FRAMESET element", message); } /** * test getMessage with the unknown_element key. * @throws Exception any Exception generated during test */ public void testGetMessageUnknownElement() throws Exception { String message = this.report.getMessage( -1, lexer, "unknown_element", new Object[]{""}, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: is not recognized!", message); } /** * test getMessage with the unexpected_endtag key. * @throws Exception any Exception generated during test */ public void testGetMessageUnexpectedEndtag() throws Exception { String message = this.report.getMessage( -1, lexer, "unexpected_endtag", new Object[]{"test"}, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: unexpected ", message); } /** * test getMessage with the unexpected_endtag_in key. * @throws Exception any Exception generated during test */ public void testGetMessageUnexpectedEndtagIn() throws Exception { String message = this.report.getMessage( -1, lexer, "unexpected_endtag_in", new Object[]{"test", "bee"}, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: unexpected in ", message); } /** * test getMessage with the too_many_elements key. * @throws Exception any Exception generated during test */ public void testGetMessageTooManyElements() throws Exception { String message = this.report.getMessage( -1, lexer, "too_many_elements", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: too many elements", message); } /** * test getMessage with the too_many_elements_in key. * @throws Exception any Exception generated during test */ public void testGetMessageTooManyElementsIn() throws Exception { String message = this.report.getMessage( -1, lexer, "too_many_elements_in", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: too many elements in ", message); } /** * test getMessage with the unknown_attribute key. * @throws Exception any Exception generated during test */ public void testGetMessageUnknownAttribute() throws Exception { String message = this.report.getMessage( -1, lexer, "unknown_attribute", new Object[]{"test"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: unknown attribute \"test\"", message); } /** * test getMessage with the missing_attribute key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingAttribute() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_attribute", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: lacks \"bee\" attribute", message); } /** * test getMessage with the missing_attr_value key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingAttrValue() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_attr_value", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: attribute \"bee\" lacks value", message); } /** * test getMessage with the missing_imagemap key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingImagemap() throws Exception { String message = this.report.getMessage( -1, lexer, "missing_imagemap", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: should use client-side image map", message); } /** * test getMessage with the bad_attribute_value key. * @throws Exception any Exception generated during test */ public void testGetMessageBadAttributeValue() throws Exception { String message = this.report.getMessage( -1, lexer, "bad_attribute_value", new Object[]{"", "bee", "ant"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: attribute \"bee\" has invalid value \"ant\"", message); } /** * test getMessage with the xml_attribute_value key. * @throws Exception any Exception generated during test */ public void testGetMessageXmlAttributeValue() throws Exception { String message = this.report.getMessage( -1, lexer, "xml_attribute_value", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: has XML attribute \"bee\"", message); } /** * test getMessage with the unexpected_gt key. * @throws Exception any Exception generated during test */ public void testGetMessageUnexpectedGt() throws Exception { String message = this.report.getMessage( -1, lexer, "unexpected_gt", new Object[]{""}, TidyMessage.Level.ERROR); assertEquals("line 12 column 34 - Error: missing '>' for end of tag", message); } /** * test getMessage with the unexpected_quotemark key. * @throws Exception any Exception generated during test */ public void testGetMessageUnexpectedQuotemark() throws Exception { String message = this.report.getMessage( -1, lexer, "unexpected_quotemark", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: unexpected or duplicate quote mark", message); } /** * test getMessage with the repeated_attribute key. * @throws Exception any Exception generated during test */ public void testGetMessageRepeatedAttribute() throws Exception { String message = this.report.getMessage( -1, lexer, "repeated_attribute", new Object[]{"", "bee", "ant"}, TidyMessage.Level.WARNING); assertEquals( "line 12 column 34 - Warning: dropping value \"bee\" for repeated attribute \"ant\"", message); } /** * test getMessage with the proprietary_attr_value key. * @throws Exception any Exception generated during test */ public void testGetMessageProprietaryAttrValue() throws Exception { String message = this.report.getMessage( -1, lexer, "proprietary_attr_value", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: proprietary attribute value \"bee\"", message); } /** * test getMessage with the proprietary_attribute key. * @throws Exception any Exception generated during test */ public void testGetMessageProprietaryAttribute() throws Exception { String message = this.report.getMessage( -1, lexer, "proprietary_attribute", new Object[]{"", "bee"}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: proprietary attribute \"bee\"", message); } /** * test getMessage with the id_name_mismatch key. * @throws Exception any Exception generated during test */ public void testGetMessageIdNameMismatch() throws Exception { String message = this.report.getMessage( -1, lexer, "id_name_mismatch", new Object[]{""}, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: id and name attribute value mismatch", message); } /** * test getMessage with the missing_doctype key. * @throws Exception any Exception generated during test */ public void testGetMessageMissingDoctype() throws Exception { String message = this.report.getMessage(-1, lexer, "missing_doctype", null, TidyMessage.Level.WARNING); assertEquals("line 12 column 34 - Warning: missing declaration", message); } /** * test getMessage with the doctype_given key. * @throws Exception any Exception generated during test */ public void testGetMessageDoctypeGiven() throws Exception { String message = this.report.getMessage( -1, lexer, "doctype_given", new Object[]{"test", "bee"}, TidyMessage.Level.SUMMARY); assertEquals("test: Doctype given is \"bee\"", message); } /** * test getMessage with the report_version key. * @throws Exception any Exception generated during test */ public void testGetMessageReportVersion() throws Exception { String message = this.report.getMessage( -1, lexer, "report_version", new Object[]{"test", "bee"}, TidyMessage.Level.SUMMARY); assertEquals("test: Document content looks like bee", message); } /** * test getMessage with the xml_attribute_value key. * @throws Exception any Exception generated during test */ public void testGetMessageNumWarning() throws Exception { String message = this.report.getMessage( -1, lexer, "num_warnings", new Object[]{new Integer(0), new Integer(33)}, TidyMessage.Level.SUMMARY); assertEquals("no warnings, 33 errors were found!", message); } }jtidy/src/test/java/org/w3c/tidy/JTidyParserBugsTest.java0000644000175000017500000000562310110502420023575 0ustar twernertwerner/** * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * testcase for JTidy resolved bugs (DOM parser). * @author fgiust * @version $Revision: 446 $ ($Author: fgiust $) */ public class JTidyParserBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public JTidyParserBugsTest(String name) { super(name); } /** * test for JTidy [788651]: DOM parser hangs (page using shift_jis charset). * @throws Exception any exception generated during the test */ public void test788651() throws Exception { parseDomTest("788651.html"); } }jtidy/src/test/java/org/w3c/tidy/TidyBugsTest.java0000644000175000017500000000664010463651222022326 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URL; /** * Testcase for Tidy resolved bugs. *

          * see http://sourceforge.net/support/tracker.php?aid=(item number) *

          * @author fgiust * @version $Revision: 811 $ ($Author: fgiust $) */ public class TidyBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyBugsTest(String name) { super(name); } /** * test for Tidy [431895] : Filename not set when processing XML or if the "-quiet" option is used when processing * HTML; "(null)" is reported instead. * @throws Exception any exception generated during the test */ public void test431895() throws Exception { URL inputURL = getClass().getClassLoader().getResource("431895.html"); // set up error log this.errorLog = new StringWriter(); this.tidy.setErrout(new PrintWriter(this.errorLog)); tidy.mainExec(new String[]{"-qe", cleanUpFilePath(inputURL.getFile())}); assertLogDoesntContains("null"); } }jtidy/src/test/java/org/w3c/tidy/AllWorkingTests.java0000644000175000017500000002010710167211525023021 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.Test; import junit.framework.TestSuite; /** * Testsuite for all the test cases actually working in JTidy. Must be used to assure there are no regressions after a * change. * @author Fabrizio Giustina * @version $Revision: 765 $ ($Author: fgiust $) */ public final class AllWorkingTests { /** * Should not be instantiated. */ private AllWorkingTests() { // unused } /** * Generates the suite. * @return junit test suite. */ public static Test suite() { TestSuite suite = new TestSuite("All the working test cases for JTidy"); suite.addTestSuite(TidyCrashingBugsTest.class); suite.addTestSuite(TidyBugsTest.class); suite.addTestSuite(JTidyParserBugsTest.class); suite.addTestSuite(JTidyWarningBugsTest.class); suite.addTest(new JTidyBugsTest("test475643")); suite.addTest(new JTidyBugsTest("test527118")); suite.addTest(new JTidyBugsTest("test538727")); suite.addTest(new JTidyBugsTest("test547976")); suite.addTest(new JTidyBugsTest("test610244")); suite.addTest(new JTidyBugsTest("test648768")); suite.addTest(new JTidyBugsTest("test663197")); suite.addTest(new JTidyBugsTest("test791933")); suite.addTest(new JTidyBugsTest("test909187")); suite.addTest(new JTidyBugsTest("test929936")); suite.addTest(new JTidyBugsTest("test1024661")); suite.addTest(new JTidyBugsTest("test1058909")); suite.addTest(new JTidyBugsTest("test1097062")); suite.addTest(new TidyOutputBugsTest("test427812")); suite.addTest(new TidyOutputBugsTest("test427662")); suite.addTest(new TidyOutputBugsTest("test427677")); suite.addTest(new TidyOutputBugsTest("test427819")); suite.addTest(new TidyOutputBugsTest("test427821")); suite.addTest(new TidyOutputBugsTest("test427823")); suite.addTest(new TidyOutputBugsTest("test427825")); suite.addTest(new TidyOutputBugsTest("test427835")); suite.addTest(new TidyOutputBugsTest("test427837")); suite.addTest(new TidyOutputBugsTest("test427839")); suite.addTest(new TidyOutputBugsTest("test427846")); suite.addTest(new TidyOutputBugsTest("test431731")); suite.addTest(new TidyOutputBugsTest("test431889")); suite.addTest(new TidyOutputBugsTest("test431965")); suite.addTest(new TidyOutputBugsTest("test431736")); suite.addTest(new TidyOutputBugsTest("test432677")); suite.addTest(new TidyOutputBugsTest("test433040")); suite.addTest(new TidyOutputBugsTest("test433359")); suite.addTest(new TidyOutputBugsTest("test433604")); suite.addTest(new TidyOutputBugsTest("test433656")); suite.addTest(new TidyOutputBugsTest("test433666")); suite.addTest(new TidyOutputBugsTest("test433672")); suite.addTest(new TidyOutputBugsTest("test434940")); suite.addTest(new TidyOutputBugsTest("test434940b")); suite.addTest(new TidyOutputBugsTest("test435919")); suite.addTest(new TidyOutputBugsTest("test438650")); suite.addTest(new TidyOutputBugsTest("test438658")); suite.addTest(new TidyOutputBugsTest("test441568")); suite.addTest(new TidyOutputBugsTest("test441740")); suite.addTest(new TidyOutputBugsTest("test443381")); suite.addTest(new TidyOutputBugsTest("test443678")); suite.addTest(new TidyOutputBugsTest("test445074")); suite.addTest(new TidyOutputBugsTest("test470688")); suite.addTest(new TidyOutputBugsTest("test471264")); suite.addTest(new TidyOutputBugsTest("test473490")); suite.addTest(new TidyOutputBugsTest("test456596")); suite.addTest(new TidyOutputBugsTest("test480406")); suite.addTest(new TidyOutputBugsTest("test480701")); suite.addTest(new TidyOutputBugsTest("test487204")); suite.addTest(new TidyOutputBugsTest("test487283")); suite.addTest(new TidyOutputBugsTest("test500236")); suite.addTest(new TidyOutputBugsTest("test502348")); suite.addTest(new TidyOutputBugsTest("test517550")); suite.addTest(new TidyOutputBugsTest("test603128")); suite.addTest(new TidyOutputBugsTest("test616744")); suite.addTest(new TidyOutputBugsTest("test996484")); suite.addTest(new TidyWarningBugsTest("test427810")); suite.addTest(new TidyWarningBugsTest("test431874")); suite.addTest(new TidyWarningBugsTest("test427827")); suite.addTest(new TidyWarningBugsTest("test427834")); suite.addTest(new TidyWarningBugsTest("test427844")); suite.addTest(new TidyWarningBugsTest("test431719")); suite.addTest(new TidyWarningBugsTest("test431883")); suite.addTest(new TidyWarningBugsTest("test431956")); suite.addTest(new TidyWarningBugsTest("test433021")); suite.addTest(new TidyWarningBugsTest("test434100")); suite.addTest(new TidyWarningBugsTest("test435917")); suite.addTest(new TidyWarningBugsTest("test435917b")); suite.addTest(new TidyWarningBugsTest("test438956")); suite.addTest(new TidyWarningBugsTest("test450389")); suite.addTest(new TidyWarningBugsTest("test501230")); suite.addTest(new TidyWarningBugsTest("test501669")); suite.addTest(new TidyWarningBugsTest("test516370")); suite.addTest(new TidyWarningBugsTest("test517528")); suite.addTest(new TidyWarningBugsTest("test525081")); suite.addTest(new TidyWarningBugsTest("test538536")); suite.addTest(new TidyWarningBugsTest("test545067")); suite.addTest(new TidyWarningBugsTest("test553468")); suite.addTest(new TidyWarningBugsTest("test706260")); suite.addTest(new TidyEncodingBugsTest("test676156")); return suite; } }jtidy/src/test/java/org/w3c/tidy/AllFailingBlanklinesTests.java0000644000175000017500000000750310133721557024766 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.Test; import junit.framework.TestSuite; /** * Testsuite for all the test cases tests failing with unneeded blank lines or spaces. * @author Fabrizio Giustina * @version $Revision: 660 $ ($Author: fgiust $) */ public final class AllFailingBlanklinesTests { /** * Should not be instantiated. */ private AllFailingBlanklinesTests() { } /** * Generates the suite. * @return junit test suite. */ public static Test suite() { TestSuite suite = new TestSuite("All the tests failing with unneeded blank lines or spaces"); suite.addTest(new JTidyBugsTest("test922302")); suite.addTest(new TidyOutputBugsTest("test427826")); suite.addTest(new TidyOutputBugsTest("test427838")); suite.addTest(new TidyOutputBugsTest("test435903")); suite.addTest(new TidyOutputBugsTest("test435909")); suite.addTest(new TidyOutputBugsTest("test435920")); suite.addTest(new TidyOutputBugsTest("test438954")); suite.addTest(new TidyOutputBugsTest("test533105")); suite.addTest(new TidyOutputBugsTest("test533233")); suite.addTest(new TidyOutputBugsTest("test593705")); suite.addTest(new TidyOutputBugsTest("test598860")); suite.addTest(new TidyOutputBugsTest("test620531")); suite.addTest(new TidyOutputBugsTest("test629885")); suite.addTest(new TidyOutputBugsTest("test640473")); suite.addTest(new TidyOutputBugsTest("test663548")); return suite; } }jtidy/src/test/java/org/w3c/tidy/AllFailingDoctypeTests.java0000644000175000017500000001116210115435613024302 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import junit.framework.Test; import junit.framework.TestSuite; /** * Testsuite for all the test cases failing with a bad dtd. * @author Fabrizio Giustina * @version $Revision: 572 $ ($Author: fgiust $) */ public final class AllFailingDoctypeTests { /** * Should not be instantiated. */ private AllFailingDoctypeTests() { } /** * Generates the suite. * @return junit test suite. */ public static Test suite() { TestSuite suite = new TestSuite("All the tests failing because of a wrong doctype"); suite.addTest(new JTidyBugsTest("test763186")); suite.addTest(new TidyOutputBugsTest("test427633")); suite.addTest(new TidyOutputBugsTest("test427820")); suite.addTest(new TidyOutputBugsTest("test427822")); suite.addTest(new TidyOutputBugsTest("test427830")); suite.addTest(new TidyOutputBugsTest("test427833")); suite.addTest(new TidyOutputBugsTest("test431739")); suite.addTest(new TidyOutputBugsTest("test431958")); suite.addTest(new TidyOutputBugsTest("test433012")); suite.addTest(new TidyOutputBugsTest("test437468")); suite.addTest(new TidyOutputBugsTest("test441508")); suite.addTest(new TidyOutputBugsTest("test445557")); suite.addTest(new TidyOutputBugsTest("test470663")); suite.addTest(new TidyOutputBugsTest("test502346")); suite.addTest(new TidyOutputBugsTest("test504206")); suite.addTest(new TidyOutputBugsTest("test508936")); suite.addTest(new TidyOutputBugsTest("test514893")); suite.addTest(new TidyOutputBugsTest("test540555")); suite.addTest(new TidyOutputBugsTest("test540571")); suite.addTest(new TidyOutputBugsTest("test578216")); suite.addTest(new TidyOutputBugsTest("test586562")); suite.addTest(new TidyOutputBugsTest("test646946")); suite.addTest(new TidyOutputBugsTest("test647900")); suite.addTest(new TidyOutputBugsTest("test655338")); suite.addTest(new TidyOutputBugsTest("test656889")); suite.addTest(new TidyOutputBugsTest("test765852")); suite.addTest(new TidyOutputBugsTest("test994841")); suite.addTest(new TidyWarningBugsTest("test516370")); suite.addTest(new TidyWarningBugsTest("test552861")); return suite; } }jtidy/src/test/java/org/w3c/tidy/JTidyBugsTest.java0000644000175000017500000002163610167211525022441 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * testcase for JTidy resolved bugs. * @author fgiust * @version $Revision: 765 $ ($Author: fgiust $) */ public class JTidyBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public JTidyBugsTest(String name) { super(name); } /** * test for JTidy [475643]: Hex character references not handled. * @throws Exception any exception generated during the test */ public void test475643() throws Exception { executeTidyTest("475643.html"); // no warnings for unknown entities assertNoWarnings(); } /** * test for JTidy [547976]: Case of attribute values. * @throws Exception any exception generated during the test */ public void test547976() throws Exception { executeTidyTest("547976.html"); } /** * test for JTidy [508245]: Do not convert the & or < to Entity Ref. Actually is "Tidy fails in completing * unclosed tags" * @throws Exception any exception generated during the test */ public void test508245() throws Exception { executeTidyTest("508245.html"); assertNoErrors(); } /** * test for JTidy [527118]: Suppress duplicate attributes. * @throws Exception any exception generated during the test */ public void test527118() throws Exception { executeTidyTest("527118.html"); } /** * test for JTidy [531962]: Closing quotes around attribute values. * @throws Exception any exception generated during the test */ public void test531962() throws Exception { // wish: missing quote should be detected and handled better: // original: width="10 height="10" // now: width="10 height=" // desired: width="10" height="10" executeTidyTest("531962.html"); } /** * test for JTidy [538727]: setDocType uncorrectly adds "". * @throws Exception any exception generated during the test */ public void test538727() throws Exception { // this has the same result of setting it in the config file // tidy.setDocType("\"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"file:///E:/xhtml1-transitional.dtd\""); executeTidyTest("538727.html"); } /** * test for JTidy [574158]: Error with FONT tag. * @throws Exception any exception generated during the test */ public void test574158() throws Exception { executeTidyTest("574158.html"); } /** * test for JTidy [610244]: NullPointerException in parsing. * @throws Exception any exception generated during the test */ public void test610244() throws Exception { executeTidyTest("610244.html"); } /** * test for JTidy [648768]: Fix for character references >= 32768. * @throws Exception any exception generated during the test */ public void test648768() throws Exception { executeTidyTest("648768.html"); assertNoWarnings(); } /** * test for JTidy [791933]: German special character converted to upper case. * @throws Exception any exception generated during the test */ public void test791933() throws Exception { executeTidyTest("791933.html"); } /** * test for JTidy [663197]: nbsp handling is wrong. * @throws Exception any exception generated during the test */ public void test663197() throws Exception { executeTidyTest("663197.html"); } /** * test for JTidy [763191]: Again DOM Parsing error (tidy removes spaces in attribute values). * @throws Exception any exception generated during the test */ public void test763191() throws Exception { executeTidyTest("763191.html"); } /** * test for JTidy [763186]: Another DOM Parsing error (tidy inserting whitespaces). * @throws Exception any exception generated during the test */ public void test763186() throws Exception { executeTidyTest("763186.html"); } /** * test for JTidy [909187]: JTidy should remove 0x0 from stream. * @throws Exception any exception generated during the test */ public void test909187() throws Exception { executeTidyTest("909187.html"); } /** * test for JTidy [917012]: Spaces are moved from content to between tags. * @throws Exception any exception generated during the test */ public void test917012() throws Exception { executeTidyTest("917012.html"); } /** * test for JTidy [922302]: Add comment to script tag to produce valid XML. * @throws Exception any exception generated during the test */ public void test922302() throws Exception { executeTidyTest("922302.html"); } /** * test for JTidy [929936]: escape URLs. * @throws Exception any exception generated during the test */ public void test929936() throws Exception { executeTidyTest("929936.html"); // 10 escaped "\" or chars assertWarnings(10); } /** * test for JTidy [943559]: Form between td. Tidy C mark this as an error, Jtidy adds a useless table. * @throws Exception any exception generated during the test */ public void test943559() throws Exception { executeTidyTest("943559.html"); } /** * test for JTidy [935796]: Quote entities converted to literals. * @throws Exception any exception generated during the test */ public void test935796() throws Exception { executeTidyTest("935796.html"); } /** * test for JTidy [1024661]: Error Parsing duplicate style. * @throws Exception any exception generated during the test */ public void test1024661() throws Exception { executeTidyTest("1024661.html"); } /** * test for JTidy [1039641]: Pre should not change the inside text. * @throws Exception any exception generated during the test */ public void test1039641() throws Exception { executeTidyTest("1039641.html"); } /** * test for JTidy [1058909]: Certain sites causing null pointer Exceptions. * @throws Exception any exception generated during the test */ public void test1058909() throws Exception { executeTidyTest("1058909.html"); } /** * test for JTidy [1097062]: trimInitialSpace does not handle nested inlines. * @throws Exception any exception generated during the test */ public void test1097062() throws Exception { executeTidyTest("1097062.html"); } }jtidy/src/test/java/org/w3c/tidy/ant/0000755000175000017500000000000011617345027017652 5ustar twernertwernerjtidy/src/test/java/org/w3c/tidy/ant/JTidyTaskTest.java0000644000175000017500000003306010463645504023226 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy.ant; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import junit.framework.TestCase; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Project; import org.apache.tools.ant.types.FileSet; import org.apache.tools.ant.types.Parameter; import org.apache.tools.ant.util.FileUtils; /** * @author Fabrizio Giustina * @version $Revision: 807 $ ($Author: fgiust $) */ public class JTidyTaskTest extends TestCase { /** * test instance. */ private JTidyTask task; /** * Temp dir used for output. */ private String tempDir; /** * Test dir. */ private String testDir; /** * @see junit.framework.TestCase#setUp() */ protected void setUp() throws Exception { super.setUp(); task = new JTidyTask(); Project p = new Project(); task.setProject(p); task.init(); tempDir = System.getProperty("java.io.tmpdir"); testDir = new File(getClass().getClassLoader().getResource("test.dir").getPath()).getParent(); } /** * Test with invalid parameters. */ public void testExceptionMissingParameters() { try { task.execute(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testExceptionBothSrcfileAndFilesets() { try { task.setSrcfile(new File(".")); task.addFileset(new FileSet()); task.validateParameters(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testDestFileAndDestDirNull() { try { task.setSrcfile(new File(".")); task.validateParameters(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testDestFileAndFilesets() { try { task.addFileset(new FileSet()); task.setDestfile(new File(".")); task.validateParameters(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testScrFileIsADir() { try { task.setSrcfile(new File("/")); task.setDestfile(new File("test.out")); task.validateParameters(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testScrFileDoesntExist() { task.setSrcfile(new File("xyz.123")); task.setDestfile(new File("test.out")); task.validateParameters(); try { task.execute(); fail("Missing source file not detected"); } catch (BuildException e) { // ok } } /** * Test with invalid parameters. */ public void testInvalidProperties() { try { task.setSrcfile(new File("test.in")); task.setDestfile(new File("test.out")); task.setProperties(new File("x2ui34")); task.validateParameters(); fail("Invalid parameters not detected"); } catch (BuildException e) { // ok } } /** * Test with a fileset. */ public void testFileset() { FileSet fileset = new FileSet(); fileset.setDir(new File(testDir, "ant")); task.addFileset(fileset); task.setDestdir(new File(tempDir)); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "file1.html").exists()); assertTrue("Expected output file not created", new File(tempDir, "file2.html").exists()); new File(tempDir, "file1.html").delete(); new File(tempDir, "file2.html").delete(); } /** * Test with a fileset. */ public void testFilesetWithDirStructure() { FileSet fileset = new FileSet(); fileset.setDir(new File(testDir)); fileset.setIncludes("ant/*.html"); task.addFileset(fileset); task.setDestdir(new File(tempDir)); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "ant/file1.html").exists()); assertTrue("Expected output file not created", new File(tempDir, "ant/file2.html").exists()); new File(tempDir, "ant/file1.html").delete(); new File(tempDir, "ant/file2.html").delete(); new File(tempDir, "ant").delete(); } /** * Test with a fileset. */ public void testFilesetWithDirStructureFlatten() { FileSet fileset = new FileSet(); fileset.setDir(new File(testDir)); fileset.setIncludes("ant/*.html"); task.addFileset(fileset); task.setDestdir(new File(tempDir)); task.setFlatten(true); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "file1.html").exists()); assertTrue("Expected output file not created", new File(tempDir, "file2.html").exists()); new File(tempDir, "file1.html").delete(); new File(tempDir, "file2.html").delete(); } /** * Test nested parameter element. */ public void testWithParameters() { FileSet fileset = new FileSet(); fileset.setDir(new File(testDir)); fileset.setIncludes("ant/*1.html"); task.addFileset(fileset); task.setDestdir(new File(tempDir)); task.setFlatten(true); Parameter parameter = new Parameter(); parameter.setName("tidy-mark"); parameter.setValue("false"); task.addConfiguredParameter(parameter); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "file1.html").exists()); try { Reader reader = new FileReader(new File(tempDir, "file1.html")); String output = FileUtils.readFully(reader); reader.close(); // output file should not contain "generator" assertTrue("Configured parameter doesn't have effect on output.", output.indexOf("generator") == -1); } catch (IOException e) { fail("Unable to read generated file."); } new File(tempDir, "file1.html").delete(); } /** * Test with a properties file. */ public void testWithProperties() { FileSet fileset = new FileSet(); fileset.setDir(new File(testDir)); fileset.setIncludes("ant/*1.html"); task.addFileset(fileset); task.setDestdir(new File(tempDir)); task.setFlatten(true); task.setProperties(new File(testDir, "default.cfg")); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "file1.html").exists()); try { Reader reader = new FileReader(new File(tempDir, "file1.html")); String output = FileUtils.readFully(reader); reader.close(); // output file should not contain "generator" assertTrue("Configured parameter doesn't have effect on output.", output.indexOf("generator") == -1); } catch (IOException e) { fail("Unable to read generated file."); } new File(tempDir, "file1.html").delete(); } /** * Test with a fileset. */ public void testFailonerrorFalse() { task.setSrcfile(new File(testDir, "ant/file3.html")); task.setDestdir(new File(tempDir)); task.setFailonerror(false); task.execute(); // ok if no buildexception is thrown } /** * Test with a fileset. */ public void testFailonerrorTrue() { task.setSrcfile(new File(testDir, "ant/file3.html")); task.setDestdir(new File(tempDir)); task.setFailonerror(true); try { task.execute(); fail("Expected BuildException not thrown."); } catch (BuildException e) { // ok if buildexception IS thrown } } /** * Test with srcfile/destdir. */ public void testSrcfileDestDir() { task.setSrcfile(new File(testDir, "ant/file1.html")); task.setDestdir(new File(tempDir)); task.setFailonerror(true); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "file1.html").exists()); new File(tempDir, "file1.html").delete(); } /** * Test with srcfile/destfile. */ public void testSrcfileDestFile() { task.setSrcfile(new File(testDir, "ant/file1.html")); task.setDestfile(new File(tempDir, "newfile.html")); task.setFailonerror(true); task.execute(); assertTrue("Expected output file not created", new File(tempDir, "newfile.html").exists()); assertFalse("Expected output file is a dir!", new File(tempDir, "newfile.html").isDirectory()); new File(tempDir, "newfile.html").delete(); } /** * Test with srcfile/destfile. */ public void testMissingSrcFile() { try { task .processFile(new File(testDir, "non/existing/file.html"), new File(tempDir, "non/existing/output.html")); fail("Expected BuildException not thrown"); } catch (BuildException e) { // ok, this is expected } } /** * Test with srcfile/destfile. */ public void testMissingOutputFile() { try { task.processFile(new File(testDir, "ant/file1.html"), new File(tempDir, "///::non/existing/output.html")); fail("Expected BuildException not thrown"); } catch (BuildException e) { // ok, this is expected } } /** * Test with invalid properties file. */ public void testMissingProperties() { task.setProperties(new File(testDir, "non/existing/propertyfile.properties")); task.setSrcfile(new File(testDir, "ant/file1.html")); task.setDestfile(new File(tempDir, "newfile.html")); try { task.execute(); fail("Expected BuildException not thrown"); } catch (BuildException e) { // ok, this is expected } } /** * Test with srcfile/destfile. */ public void testPropertiesIsADir() { task.setProperties(new File(testDir)); task.setSrcfile(new File(testDir, "ant/file1.html")); task.setDestfile(new File(tempDir, "newfile.html")); try { task.execute(); fail("Expected BuildException not thrown"); } catch (BuildException e) { // ok, this is expected } } }jtidy/src/test/java/org/w3c/tidy/TidyOutputBugsTest.java0000644000175000017500000007670510125630076023557 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Testcase for Tidy resolved bugs (wrong output). *

          * see http://sourceforge.net/support/tracker.php?aid=(item number) *

          * @author fgiust * @version $Revision: 648 $ ($Author: fgiust $) */ public class TidyOutputBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyOutputBugsTest(String name) { super(name); } /** * test for Tidy [427812] : Reprocessing OBJECT removes PARAM. * @throws Exception any exception generated during the test */ public void test427812() throws Exception { executeTidyTest("427812.xhtml"); } /** * test for Tidy [427662] : BLOCK/INLINE before TABLE parsed wrong. * @throws Exception any exception generated during the test */ public void test427662() throws Exception { executeTidyTest("427662.html"); } /** * test for Tidy [427677] : TrimInitialSpace() can trim too much. * @throws Exception any exception generated during the test */ public void test427677() throws Exception { executeTidyTest("427677.html"); } /** * test for Tidy [427633] : Line endings not supported correctly. * @throws Exception any exception generated during the test */ public void test427633() throws Exception { // check line endings executeTidyTest("427633.html"); } /** * test for Tidy [427819] : OPTION w/illegal FONT eats whitespace. * @throws Exception any exception generated during the test */ public void test427819() throws Exception { executeTidyTest("427819.html"); } /** * test for Tidy [427820] : indent config options ignored for <BR>. [fixed] * @throws Exception any exception generated during the test */ public void test427820() throws Exception { executeTidyTest("427820.html"); } /** * test for Tidy [427821] : XHTML TRANSITIONAL doctype set wrongly. * @throws Exception any exception generated during the test */ public void test427821() throws Exception { executeTidyTest("427821.html"); } /** * test for Tidy [427822] : PopInLine() doesn't check stack. * @throws Exception any exception generated during the test */ public void test427822() throws Exception { // should be already fixed in jtidy executeTidyTest("427822.html"); } /** * test for Tidy [427823] : Multiple <BODY>'s in <NOFRAMES> allowed. * @throws Exception any exception generated during the test */ public void test427823() throws Exception { executeTidyTest("427823.html"); } /** * test for Tidy [427825] : Test user defined tags. * @throws Exception any exception generated during the test */ public void test427825() throws Exception { executeTidyTest("427825.html"); } /** * test for Tidy [427826] : Script source needs escaping/CDATA section. * @todo HIGH priority * @throws Exception any exception generated during the test */ public void test427826() throws Exception { executeTidyTest("427826.html"); } /** * test for Tidy [427830] : Tidy uses an incorrect XHTML 1.0 Namespace, even if the correct namespace is given. * @throws Exception any exception generated during the test */ public void test427830() throws Exception { executeTidyTest("427830.html"); } /** * test for Tidy [427833] : Uppercase &#X; escape sequences ignored. * @todo high priority * @throws Exception any exception generated during the test */ public void test427833() throws Exception { executeTidyTest("427833.html"); } /** * test for Tidy [427835] : DTD not inserted, but DTD URI Namespace. * @throws Exception any exception generated during the test */ public void test427835() throws Exception { executeTidyTest("427835.html"); } /** * test for Tidy [427836] : OBJECT should be wrapped in BODY. * @throws Exception any exception generated during the test */ public void test427836() throws Exception { executeTidyTest("427836.html"); } /** * test for Tidy [427837] : No encoding specified in XML output. * @throws Exception any exception generated during the test */ public void test427837() throws Exception { executeTidyTest("427837.xml"); } /** * test for Tidy [427838] : Name Anchor thrown away. * @throws Exception any exception generated during the test */ public void test427838() throws Exception { executeTidyTest("427838.html"); } /** * test for Tidy [427839] : Output DOCTYPE/Namespace decl separately. [fixed] * @throws Exception any exception generated during the test */ public void test427839() throws Exception { executeTidyTest("427839.html"); } /** * test for Tidy [427845] : Doctypes are output on multiple lines. * @throws Exception any exception generated during the test */ public void test427845() throws Exception { // still 2 lines on tidy? executeTidyTest("427845.html"); } /** * test for Tidy [427846] : Blank char inserted after first char. [fixed] * @throws Exception any exception generated during the test */ public void test427846() throws Exception { executeTidyTest("427846.html"); } /** * test for Tidy [431731] : Inline emphasis inconsistent propagation. * @throws Exception any exception generated during the test */ public void test431731() throws Exception { executeTidyTest("431731.html"); } /** * test for Tidy [431736] : Doctype decl added before XML decl. * @throws Exception any exception generated during the test */ public void test431736() throws Exception { executeTidyTest("431736.html"); } /** * test for Tidy [431739] : Spaces carried into empty block tags. [fixed] * @throws Exception any exception generated during the test */ public void test431739() throws Exception { executeTidyTest("431739.html"); } /** * test for Tidy [431889] : The "alt-text:" and "doctype: <fpi>" options do not work when specified in a config * file with a quoted string parameter. * @throws Exception any exception generated during the test */ public void test431889() throws Exception { executeTidyTest("431889.html"); } /** * test for Tidy [431898] : Tidy messes up X(HT)ML documents. * @throws Exception any exception generated during the test */ public void test431898() throws Exception { executeTidyTest("431898.html"); } /** * test for Tidy [431958] : Comments always indented. * @throws Exception any exception generated during the test */ public void test431958() throws Exception { executeTidyTest("431958.html"); } /** * test for Tidy [431965] : XHTML Strict seen as Transitional w/div. * @throws Exception any exception generated during the test */ public void test431965() throws Exception { executeTidyTest("431965.xhtml"); } /** * test for Tidy [432677] : Null value changed to "value" for -asxml. * @throws Exception any exception generated during the test */ public void test432677() throws Exception { executeTidyTest("432677.html"); } /** * test for Tidy [433012] : Illegal ampersands/character entities. * @throws Exception any exception generated during the test */ public void test433012() throws Exception { executeTidyTest("433012.html"); } /** * test for Tidy [433040] : Anchor tag without attributes deleted. [fixed] * @throws Exception any exception generated during the test */ public void test433040() throws Exception { executeTidyTest("433040.html"); } /** * test for Tidy [433359] : Empty iframe elements trimmed. * @throws Exception any exception generated during the test */ public void test433359() throws Exception { executeTidyTest("433359.html"); } /** * test for Tidy [433360] : Tags with missing > can't be repaired. * @throws Exception any exception generated during the test */ public void test433360() throws Exception { executeTidyTest("433360.html"); } /** * test for Tidy [433604] : Tidy inserts &nbsp; entity in -xml mode. [fixed] * @throws Exception any exception generated during the test */ public void test433604() throws Exception { executeTidyTest("433604.xml"); } /** * test for Tidy [433656] : Improve support for PHP. * @throws Exception any exception generated during the test */ public void test433656() throws Exception { executeTidyTest("433656.html"); } /** * test for Tidy [433666] : Attempt to repair duplicate attributes. * @throws Exception any exception generated during the test */ public void test433666() throws Exception { executeTidyTest("433666.html"); } /** * test for Tidy [433672] : Anchor enclosing Header tags is omitted. * @throws Exception any exception generated during the test */ public void test433672() throws Exception { executeTidyTest("433672.html"); } /** * test for Tidy [434940] : --show-body-only: print only body contents. * @throws Exception any exception generated during the test */ public void test434940() throws Exception { executeTidyTest("434940.html"); } /** * test for Tidy [434940b] : --show-body-only: print only body contents (used with output-xml). * @throws Exception any exception generated during the test */ public void test434940b() throws Exception { executeTidyTest("434940b.html"); } /** * test for Tidy [435903] : Script element w/body child to table bug. * @throws Exception any exception generated during the test */ public void test435903() throws Exception { executeTidyTest("435903.html"); } /** * test for Tidy [435909] : <noscript></noscript> in <head></head>. * @throws Exception any exception generated during the test */ public void test435909() throws Exception { executeTidyTest("435909.html"); } /** * test for Tidy [435919] : Nested <q></q>'s not handled correctly. * @throws Exception any exception generated during the test */ public void test435919() throws Exception { executeTidyTest("435919.html"); } /** * test for Tidy [435920] : Space inserted before </td> causes probs. [fixed] * @throws Exception any exception generated during the test */ public void test435920() throws Exception { executeTidyTest("435920.html"); } /** * test for Tidy [435923] : Preserve case of attribute names. * @throws Exception any exception generated during the test */ public void test435923() throws Exception { executeTidyTest("435923.html"); } /** * test for Tidy [437468] : Test input file for iso-8859-1 character entities. * @throws Exception any exception generated during the test */ public void test437468() throws Exception { executeTidyTest("437468.html"); } /** * test for Tidy [438650] : Newline in URL attr value becomes space. [fixed] * @throws Exception any exception generated during the test */ public void test438650() throws Exception { executeTidyTest("438650.html"); } /** * test for Tidy [438658] : Missing / in title endtag makes 2 titles. * @throws Exception any exception generated during the test */ public void test438658() throws Exception { executeTidyTest("438658.html"); } /** * test for Tidy [438954] : Body tag w/attributes omitted w/hide-end. * @throws Exception any exception generated during the test */ public void test438954() throws Exception { executeTidyTest("438954.html"); } /** * test for Tidy [441508] : parser.c: BadForm() function broken. * @throws Exception any exception generated during the test */ public void test441508() throws Exception { executeTidyTest("441508.html"); } /** * test for Tidy [441568] : Font tags handling different. * @throws Exception any exception generated during the test */ public void test441568() throws Exception { executeTidyTest("441568.html"); } /** * test for Tidy [441740] : XHTML 1.1 Support. * @throws Exception any exception generated during the test */ public void test441740() throws Exception { executeTidyTest("441740.xhtml"); assertLogContains("XHTML 1.1"); } /** * test for Tidy [443381] : end tags for empty elements in XHTML. * @throws Exception any exception generated during the test */ public void test443381() throws Exception { executeTidyTest("443381.xhtml"); // should not complain about assertNoWarnings(); } /** * test for Tidy [443576] : End script tag inside scripts problem. * @throws Exception any exception generated during the test */ public void test443576() throws Exception { executeTidyTest("443576.html"); } /** * test for Tidy [443678] : Unclosed <script> in <head> messes Tidy. * @throws Exception any exception generated during the test */ public void test443678() throws Exception { executeTidyTest("443678.html"); } /** * test for Tidy [445074] : XHTML requires form method="post". * @throws Exception any exception generated during the test */ public void test445074() throws Exception { executeTidyTest("445074.html"); // line 8 column 1 - Warning: attribute value "POST" must be lower case for XHTML assertWarnings(1); } /** * test for Tidy [445394] : Improve handling of missing trailing ". STill bad in tidy. * @throws Exception any exception generated during the test */ public void test445394() throws Exception { executeTidyTest("445394.html"); } /** * test for Tidy [445557] : Convert Symbol font chars to Unicode. * @throws Exception any exception generated during the test */ public void test445557() throws Exception { executeTidyTest("445557.html"); } /** * test for Tidy [449348] : Whitespace added/removed to inline tags. * @throws Exception any exception generated during the test */ public void test449348() throws Exception { executeTidyTest("449348.html"); } /** * test for Tidy [456596] : Missing attribute name garbles output. * @throws Exception any exception generated during the test */ public void test456596() throws Exception { executeTidyTest("456596.html"); } /** * test for Tidy [467863] : un-nest <a>. * @throws Exception any exception generated during the test */ public void test467863() throws Exception { executeTidyTest("467863.html"); } /** * test for Tidy [467865] : un-nesting is incorrect. * @throws Exception any exception generated during the test */ public void test467865() throws Exception { executeTidyTest("467865.html"); } /** * test for Tidy [470663] : Unterminated tags in <head>confuse pars. * @throws Exception any exception generated during the test */ public void test470663() throws Exception { executeTidyTest("470663.html"); } /** * test for Tidy [470688] : doesn't cleanup badly nested tags right. * @throws Exception any exception generated during the test */ public void test470688() throws Exception { executeTidyTest("470688.html"); } /** * test for Tidy [471264] : Reduce blank lines in output. * @throws Exception any exception generated during the test */ public void test471264() throws Exception { executeTidyTest("471264.html"); } /** * test for Tidy [473490] : DOCTYPE for Proprietary HTML to XHTML bad. * @throws Exception any exception generated during the test */ public void test473490() throws Exception { executeTidyTest("473490.html"); } /** * test for Tidy [480406] : Single document element discarded. * @throws Exception any exception generated during the test */ public void test480406() throws Exception { executeTidyTest("480406.xml"); } /** * test for Tidy [480701] : -xml conflicts with -output-xhtml. * @throws Exception any exception generated during the test */ public void test480701() throws Exception { executeTidyTest("480701.xml"); } /** * test for Tidy [480843] : Proposed change to FixID(). * @throws Exception any exception generated during the test */ public void test480843() throws Exception { // still open in tidy executeTidyTest("480843.xhtml"); } /** * test for Tidy [487204] : Duplicate DIV style attribute generated. * @throws Exception any exception generated during the test */ public void test487204() throws Exception { executeTidyTest("487204.html"); } /** * test for Tidy [487283] : >/select< does not terminate >option<. * @throws Exception any exception generated during the test */ public void test487283() throws Exception { executeTidyTest("487283.html"); } /** * test for Tidy [500236] : Test case for MS Access files failing with Error: missing quote mark for attribute * value. * @throws Exception any exception generated during the test */ public void test500236() throws Exception { executeTidyTest("500236.xml"); } /** * test for Tidy [502346] : Leading and trailing space in attval . * @throws Exception any exception generated during the test */ public void test502346() throws Exception { executeTidyTest("502346.html"); // this is actually another bug, but reported doctype is wrong for this test assertLogContains("Doctype given is \"-//W3C//DTD XHTML 1.0 Strict//EN\""); } /** * test for Tidy [502348] : <br clear="none"> should be output. * @throws Exception any exception generated during the test */ public void test502348() throws Exception { executeTidyTest("502348.html"); assertWarnings(2); } /** * test for Tidy [503436] : Detect duplicate attribute specifications in XML docs. * @todo HIGH PRIORITY. * @throws Exception any exception generated during the test */ public void test503436() throws Exception { executeTidyTest("503436.xml"); } /** * test for Tidy [504206] : Tidy errors in processing forms. * @throws Exception any exception generated during the test */ public void test504206() throws Exception { executeTidyTest("504206.html"); } /** * test for Tidy [505770] : Unclosed %lt;option> tag causing problems. * @throws Exception any exception generated during the test */ public void test505770() throws Exception { executeTidyTest("505770.html"); } /** * test for Tidy [508936] : Parse CSS Selector prefix in config file. * @throws Exception any exception generated during the test */ public void test508936() throws Exception { executeTidyTest("508936.html"); } /** * test for Tidy [511243] : xhtml utf8 format bug. * @throws Exception any exception generated during the test */ public void test511243() throws Exception { // need wrapping executeTidyTest("511243.xhtml"); } /** * test for Tidy [511679] : No end tag for PRE. * @throws Exception any exception generated during the test */ public void test511679() throws Exception { executeTidyTest("511679.html"); } /** * test for Tidy [514348] : Incorrect wrap behaviour. [fixed] * @throws Exception any exception generated during the test */ public void test514348() throws Exception { executeTidyTest("514348.html"); } /** * test for Tidy [514893] : Incorrect http-equiv <meta> tag. * @throws Exception any exception generated during the test */ public void test514893() throws Exception { executeTidyTest("514893.html"); } /** * test for Tidy [517550] : parser misinterprets ?xml-stylesheet PI. * @throws Exception any exception generated during the test */ public void test517550() throws Exception { executeTidyTest("517550.xhtml"); } /** * test for Tidy [531964] : <p /> gets tidied into <p /></p>. * @throws Exception any exception generated during the test */ public void test531964() throws Exception { executeTidyTest("531964.xhtml"); } /** * test for Tidy [533105] : Tidy confused: HTML in VBScript. * @throws Exception any exception generated during the test */ public void test533105() throws Exception { executeTidyTest("533105.html"); } /** * test for Tidy [533233] : Extra newline around inline <script>. * @throws Exception any exception generated during the test */ public void test533233() throws Exception { executeTidyTest("533233.html"); } /** * test for Tidy [537604] : Expansion of entity references in -xml. * @throws Exception any exception generated during the test */ public void test537604() throws Exception { executeTidyTest("537604.xml"); } /** * test for Tidy [540045] : Tidy strips all the IMG tags out. * @throws Exception any exception generated during the test */ public void test540045() throws Exception { executeTidyTest("540045.xhtml"); } /** * test for Tidy [540555] : Empty title tag is trimmed. * @throws Exception any exception generated during the test */ public void test540555() throws Exception { executeTidyTest("540555.html"); } /** * test for Tidy [540571] : Inconsistent behaviour with span inline element. * @throws Exception any exception generated during the test */ public void test540571() throws Exception { executeTidyTest("540571.html"); } /** * test for Tidy [578216] : Incorrect indent of <SPAN> elements. * @throws Exception any exception generated during the test */ public void test578216() throws Exception { executeTidyTest("578216.html"); } /** * test for Tidy [586555] : Misplaced backslash caused by newline. * @throws Exception any exception generated during the test */ public void test586555() throws Exception { executeTidyTest("586555.html"); } /** * test for Tidy [586562] : Two Doctypes. * @throws Exception any exception generated during the test */ public void test586562() throws Exception { executeTidyTest("586562.html"); } /** * test for Tidy [593705] : Use of < comparison symbol confuses Tidy. * @throws Exception any exception generated during the test */ public void test593705() throws Exception { executeTidyTest("593705.html"); } /** * test for Tidy [598860] : Script parsing fails with quote chars. * @throws Exception any exception generated during the test */ public void test598860() throws Exception { executeTidyTest("598860.html"); } /** * test for Tidy [603128] : tidy adds newlines after </html>. [fixed] * @throws Exception any exception generated during the test */ public void test603128() throws Exception { executeTidyTest("603128.html"); } /** * test for Tidy [616744] : Incorrect line break after closing tag. * @throws Exception any exception generated during the test */ public void test616744() throws Exception { executeTidyTest("616744.xml"); } /** * test for Tidy [620531] : br in pre must not cause line break. * @throws Exception any exception generated during the test */ public void test620531() throws Exception { executeTidyTest("620531.html"); } /** * test for Tidy [629885] : Unbalanced quote in CSS Scrambles Doc. * @throws Exception any exception generated during the test */ public void test629885() throws Exception { executeTidyTest("629885.html"); } /** * test for Tidy [640473] : new-empty-tags doesn't work, breaks doc. * @throws Exception any exception generated during the test */ public void test640473() throws Exception { executeTidyTest("640473.html"); } /** * test for Tidy [640474] : XML decl, attributes in wrong order. * @throws Exception any exception generated during the test */ public void test640474() throws Exception { executeTidyTest("640474.xml"); } /** * test for Tidy [646946] : Bad doctype guessing in XML mode. * @throws Exception any exception generated during the test */ public void test646946() throws Exception { executeTidyTest("646946.xml"); } /** * test for Tidy [647900] : tables are incorrectly merged. * @throws Exception any exception generated during the test */ public void test647900() throws Exception { executeTidyTest("647900.html"); } /** * test for Tidy [655338] : Tidy leaves XML decl in wrong place. * @throws Exception any exception generated during the test */ public void test655338() throws Exception { executeTidyTest("655338.html"); } /** * test for Tidy [656889] : textarea text and line wrapping. * @throws Exception any exception generated during the test */ public void test656889() throws Exception { executeTidyTest("656889.html"); } /** * test for Tidy [663548] : Javascript and Tidy - missing code. * @throws Exception any exception generated during the test */ public void test663548() throws Exception { executeTidyTest("663548.html"); } /** * test for Tidy [678268] : --output-xhtml produces bad xml. * @todo HIGH PRIORITY. * @throws Exception any exception generated during the test */ public void test678268() throws Exception { executeTidyTest("678268.html"); } /** * test for Tidy [680664] : Malformed comment generates bad (X)HTML. * @throws Exception any exception generated during the test */ public void test680664() throws Exception { executeTidyTest("680664.xhtml"); } /** * test for Tidy [695408] : Empty spans getting dropped, even if they have attrs. * @todo HIGH PRIORITY. * @throws Exception any exception generated during the test */ public void test695408() throws Exception { executeTidyTest("695408.html"); } /** * test for Tidy [708322] : drop-proprietary-attributes leaves table height. * @throws Exception any exception generated during the test */ public void test708322() throws Exception { executeTidyTest("708322.html"); } /** * test for Tidy [735603] : drops attributes from declared elements. * @throws Exception any exception generated during the test */ public void test735603() throws Exception { executeTidyTest("735603.html"); } /** * test for Tidy [765852] : Empty tag striping. * @throws Exception any exception generated during the test */ public void test765852() throws Exception { executeTidyTest("765852.html"); } /** * test for Tidy [994841] : Whitespace is removed from value attributes. * @throws Exception any exception generated during the test */ public void test994841() throws Exception { executeTidyTest("994841.html"); assertNoWarnings(); } /** * test for Tidy [996484] : Tidy add spurious space when indenting long URIs. * @throws Exception any exception generated during the test */ public void test996484() throws Exception { executeTidyTest("996484.html"); } /** * test for Tidy [1031865] : Script parsing warning. * @throws Exception any exception generated during the test */ public void test1031865() throws Exception { executeTidyTest("1031865.html"); assertNoWarnings(); } }jtidy/src/test/java/org/w3c/tidy/JTidyWarningBugsTest.java0000644000175000017500000000654110114204746023764 0ustar twernertwerner/** * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * testcase for JTidy resolved bugs (Tidy warning and errors). * @author fgiust * @version $Revision: 532 $ ($Author: fgiust $) */ public class JTidyWarningBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public JTidyWarningBugsTest(String name) { super(name); } /** * test for JTidy [828316]: FRAMEBORDER attribute of IFRAME gives spurious warning. * @throws Exception any exception generated during the test */ public void test828316() throws Exception { executeTidyTest("828316.html"); assertNoWarnings(); assertNoErrors(); } /** * test for JTidy [444834]: Silent Option. * @throws Exception any exception generated during the test */ public void test444834() throws Exception { // only need to test the "quiet" option executeTidyTest("444834.html"); assertEquals("errorLog should be empty, size doesn't match", 0, this.errorLog.toString().length()); } }jtidy/src/test/java/org/w3c/tidy/ConfigurationTest.java0000644000175000017500000002353710467623231023412 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; import java.io.StringWriter; import junit.framework.TestCase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author Fabrizio Giustina * @version $Revision: 812 $ ($Author: fgiust $) */ public class ConfigurationTest extends TestCase { /** * logger. */ private static Logger log = LoggerFactory.getLogger(ConfigurationTest.class); /** * Test for -help-config. * @throws Exception any exception thrown during test */ public void testPrintConfig() throws Exception { Tidy tidy = new Tidy(); Configuration configuration = tidy.getConfiguration(); StringWriter writer = new StringWriter(); configuration.printConfigOptions(writer, false); String result = writer.toString(); // just test that no exception occurred and that something was printed assertTrue(result.length() > 200); log.debug(result); } /** * Test for -show-config. * @throws Exception any exception thrown during test */ public void testPrintActualConfig() throws Exception { Tidy tidy = new Tidy(); tidy.getConfiguration().tt.defineTag(Dict.TAGTYPE_INLINE, "something"); tidy.getConfiguration().tt.defineTag(Dict.TAGTYPE_INLINE, "second"); Configuration configuration = tidy.getConfiguration(); StringWriter writer = new StringWriter(); configuration.printConfigOptions(writer, true); String result = writer.toString(); // just test that no exception occurred and that something was printed assertTrue(result.length() > 200); log.debug(result); } /** * Test for configuration getters and setters. * @throws Exception any exception thrown during test */ public void testGetSet() throws Exception { Tidy tidy = new Tidy(); tidy.setAltText("alt"); assertEquals("alt", tidy.getAltText()); tidy.setAsciiChars(false); assertEquals(false, tidy.getAsciiChars()); tidy.setBreakBeforeBR(true); assertEquals(true, tidy.getBreakBeforeBR()); tidy.setBurstSlides(true); assertEquals(true, tidy.getBurstSlides()); tidy.setDropEmptyParas(false); assertEquals(false, tidy.getDropEmptyParas()); tidy.setDropFontTags(true); assertEquals(true, tidy.getDropFontTags()); tidy.setDropProprietaryAttributes(true); assertEquals(true, tidy.getDropProprietaryAttributes()); tidy.setEmacs(true); assertEquals(true, tidy.getEmacs()); tidy.setEncloseBlockText(true); assertEquals(true, tidy.getEncloseBlockText()); tidy.setEncloseText(true); assertEquals(true, tidy.getEncloseText()); tidy.setEscapeCdata(true); assertEquals(true, tidy.getEscapeCdata()); tidy.setFixBackslash(true); assertEquals(true, tidy.getFixBackslash()); tidy.setFixComments(true); assertEquals(true, tidy.getFixComments()); tidy.setFixUri(true); assertEquals(true, tidy.getFixUri()); tidy.setForceOutput(true); assertEquals(true, tidy.getForceOutput()); tidy.setHideComments(true); assertEquals(true, tidy.getHideComments()); tidy.setHideEndTags(true); assertEquals(true, tidy.getHideEndTags()); tidy.setIndentAttributes(true); assertEquals(true, tidy.getIndentAttributes()); tidy.setIndentCdata(true); assertEquals(true, tidy.getIndentCdata()); tidy.setIndentContent(true); assertEquals(true, tidy.getIndentContent()); tidy.setJoinClasses(true); assertEquals(true, tidy.getJoinClasses()); tidy.setJoinStyles(true); assertEquals(true, tidy.getJoinStyles()); tidy.setKeepFileTimes(true); assertEquals(true, tidy.getKeepFileTimes()); tidy.setLiteralAttribs(true); assertEquals(true, tidy.getLiteralAttribs()); tidy.setLogicalEmphasis(true); assertEquals(true, tidy.getLogicalEmphasis()); tidy.setLowerLiterals(true); assertEquals(true, tidy.getLowerLiterals()); tidy.setMakeBare(true); assertEquals(true, tidy.getMakeBare()); tidy.setMakeClean(true); assertEquals(true, tidy.getMakeClean()); tidy.setNumEntities(true); assertEquals(true, tidy.getNumEntities()); tidy.setOnlyErrors(true); assertEquals(true, tidy.getOnlyErrors()); tidy.setPrintBodyOnly(true); assertEquals(true, tidy.getPrintBodyOnly()); tidy.setQuiet(true); assertEquals(true, tidy.getQuiet()); tidy.setQuoteAmpersand(true); assertEquals(true, tidy.getQuoteAmpersand()); tidy.setQuoteMarks(true); assertEquals(true, tidy.getQuoteMarks()); tidy.setQuoteNbsp(true); assertEquals(true, tidy.getQuoteNbsp()); tidy.setRawOut(true); assertEquals(true, tidy.getRawOut()); tidy.setReplaceColor(true); assertEquals(true, tidy.getReplaceColor()); tidy.setShowWarnings(true); assertEquals(true, tidy.getShowWarnings()); tidy.setSmartIndent(true); assertEquals(true, tidy.getSmartIndent()); tidy.setTidyMark(true); assertEquals(true, tidy.getTidyMark()); tidy.setTrimEmptyElements(true); assertEquals(true, tidy.getTrimEmptyElements()); tidy.setUpperCaseAttrs(true); assertEquals(true, tidy.getUpperCaseAttrs()); tidy.setUpperCaseTags(true); assertEquals(true, tidy.getUpperCaseTags()); tidy.setWord2000(true); assertEquals(true, tidy.getWord2000()); tidy.setWrapAsp(true); assertEquals(true, tidy.getWrapAsp()); tidy.setWrapAttVals(true); assertEquals(true, tidy.getWrapAttVals()); tidy.setWrapJste(true); assertEquals(true, tidy.getWrapJste()); tidy.setWrapPhp(true); assertEquals(true, tidy.getWrapPhp()); tidy.setWrapScriptlets(true); assertEquals(true, tidy.getWrapScriptlets()); tidy.setWrapSection(true); assertEquals(true, tidy.getWrapSection()); tidy.setWraplen(5); assertEquals(5, tidy.getWraplen()); tidy.setWriteback(true); assertEquals(true, tidy.getWriteback()); tidy.setXHTML(true); assertEquals(true, tidy.getXHTML()); tidy.setXmlOut(true); assertEquals(true, tidy.getXmlOut()); tidy.setXmlPi(true); assertEquals(true, tidy.getXmlPi()); tidy.setXmlPIs(true); assertEquals(true, tidy.getXmlPIs()); tidy.setXmlSpace(true); assertEquals(true, tidy.getXmlSpace()); tidy.setXmlTags(true); assertEquals(true, tidy.getXmlTags()); tidy.setTabsize(5); assertEquals(5, tidy.getTabsize()); tidy.setOutputEncoding("UTF8"); assertEquals("UTF8", tidy.getOutputEncoding()); tidy.setInputEncoding("UTF8"); assertEquals("UTF8", tidy.getInputEncoding()); tidy.setRepeatedAttributes(Configuration.KEEP_FIRST); assertEquals(Configuration.KEEP_FIRST, tidy.getRepeatedAttributes()); tidy.setShowErrors(10); assertEquals(10, tidy.getShowErrors()); tidy.setDocType("strict"); assertEquals("strict", tidy.getDocType()); tidy.setErrfile("errfile"); assertEquals("errfile", tidy.getErrfile()); tidy.setSpaces(5); assertEquals(5, tidy.getSpaces()); tidy.setInputStreamName("inputname"); assertEquals("inputname", tidy.getInputStreamName()); } }jtidy/src/test/java/org/w3c/tidy/TidyEncodingBugsTest.java0000644000175000017500000001040611463541057023775 0ustar twernertwerner/* * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */ package org.w3c.tidy; /** * Testcase for Tidy resolved bugs (encodings). *

          * see http://sourceforge.net/support/tracker.php?aid=(item number) *

          * @author Fabrizio Giustina * @version $Revision: 1163 $ ($Author: fgiust $) */ public class TidyEncodingBugsTest extends TidyTestCase { /** * Instantiate a new Test case. * @param name test name */ public TidyEncodingBugsTest(String name) { super(name); } /** * test for Tidy [647255] : UTF16. * @throws Exception any exception generated during the test */ public void test647255() throws Exception { executeTidyTest("647255.html"); } /** * test for Tidy [649812] : Does TidyLib correctly handle Mac files?. (test is UTF16) * @throws Exception any exception generated during the test */ public void test649812() throws Exception { // doesn't work for missing encoding support in test case! executeTidyTest("649812.html"); } /** * test for Tidy [658230] : Big5. * @throws Exception any exception generated during the test */ public void test658230() throws Exception { executeTidyTest("658230.html"); } /** * test for Tidy [660397] : Add support for IBM-858 and ISO-8859-15. * @throws Exception any exception generated during the test */ public void test660397() throws Exception { executeTidyTest("660397.html"); } /** * test for Tidy [676156] : tidy --input-encoding is broken. * @throws Exception any exception generated during the test */ public void test676156() throws Exception { executeTidyTest("676156.html"); assertWarnings(4); } /** * test for Tidy [688746] : incorrect charset value for utf-8. * @throws Exception any exception generated during the test */ public void test688746() throws Exception { executeTidyTest("688746.html"); } }jtidy/LICENSE.txt0000644000175000017500000000435707776241520014005 0ustar twernertwerner/** * Java HTML Tidy - JTidy * HTML parser and pretty printer * * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts * Institute of Technology, Institut National de Recherche en * Informatique et en Automatique, Keio University). All Rights * Reserved. * * Contributing Author(s): * * Dave Raggett * Andy Quick (translation to Java) * Gary L Peskin (Java development) * Sami Lempinen (release management) * Fabrizio Giustina * * The contributing author(s) would like to thank all those who * helped with testing, bug fixes, and patience. This wouldn't * have been possible without all of you. * * COPYRIGHT NOTICE: * * This software and documentation is provided "as is," and * the copyright holders and contributing author(s) make no * representations or warranties, express or implied, including * but not limited to, warranties of merchantability or fitness * for any particular purpose or that the use of the software or * documentation will not infringe any third party patents, * copyrights, trademarks or other rights. * * The copyright holders and contributing author(s) will not be * liable for any direct, indirect, special or consequential damages * arising out of any use of the software or documentation, even if * advised of the possibility of such damage. * * Permission is hereby granted to use, copy, modify, and distribute * this source code, or portions hereof, documentation and executables, * for any purpose, without fee, subject to the following restrictions: * * 1. The origin of this source code must not be misrepresented. * 2. Altered versions must be plainly marked as such and must * not be misrepresented as being the original source. * 3. This Copyright notice may not be removed or altered from any * source or altered source distribution. * * The copyright holders and contributing author(s) specifically * permit, without fee, and encourage the use of this source code * as a component for supporting the Hypertext Markup Language in * commercial products. If you use this source code in a product, * acknowledgment is not required but would be appreciated. * */jtidy/pom.xml0000644000175000017500000002256211463510374013467 0ustar twernertwerner 4.0.0 net.sf.jtidy jtidy JTidy 8.0-SNAPSHOT JTidy is a Java port of HTML Tidy, a HTML syntax checker and pretty printer. Like its non-Java cousin, JTidy can be used as a tool for cleaning up malformed and faulty HTML. In addition, JTidy provides a DOM interface to the document that is being processed, which effectively makes you able to use JTidy as a DOM parser for real-world HTML. http://jtidy.sourceforge.net http://sourceforge.net/tracker/?group_id=13153
          jtidy-continuum@lists.sourceforge.net
          2000 JTidy User Mailing list http://lists.sourceforge.net/lists/listinfo/jtidy-user http://lists.sourceforge.net/lists/listinfo/jtidy-user http://sourceforge.net/mailarchive/forum.php?forum=jtidy-user JTidy Developer Mailing list http://lists.sourceforge.net/lists/listinfo/jtidy-devel http://lists.sourceforge.net/lists/listinfo/jtidy-devel http://sourceforge.net/mailarchive/forum.php?forum=jtidy-devel JTidy Cvs Mailing list http://lists.sourceforge.net/lists/listinfo/jtidy-cvs http://lists.sourceforge.net/lists/listinfo/jtidy-cvs http://sourceforge.net/mailarchive/forum.php?forum=jtidy-cvs fgiust Fabrizio Giustina fgiust AT users.sourceforge.net Sourceforge Current project admin and developer +1 atripp Andy Tripp atripp AT users.sourceforge.net Sourceforge Project admin 0 russgold Russell Gold russgold AT users.sourceforge.net Sourceforge Project admin 0 garypeskin Gary L Peskin garypeskin AT users.sourceforge.net Sourceforge developer 0 lempinen Sami Lempinen lempinen AT users.sourceforge.net Sourceforge release manager 0 scm:svn:https://jtidy.svn.sourceforge.net/svnroot/jtidy/trunk/jtidy/ scm:svn:https://jtidy.svn.sourceforge.net/svnroot/jtidy/trunk/jtidy/ https://jtidy.svn.sourceforge.net/svnroot/jtidy/trunk/jtidy/ sourceforge http://sourceforge.net org.apache.maven.plugins maven-surefire-plugin 2.5 true **/*Test.* -Duser.language=en org.apache.maven.plugins maven-compiler-plugin 2.3.1 true 1.4 1.4 UTF8 org.apache.maven.plugins maven-javadoc-plugin 2.7 -breakiterator To Do: todo a org.apache.maven.plugins maven-clover-plugin src/config/clover.license 2.4 pre-site instrument org.apache.maven.plugins maven-jar-plugin 2.3.1 org.w3c.tidy.Tidy org.apache.ant ant 1.7.0 true org.slf4j slf4j-log4j12 1.6.1 test log4j log4j 1.2.15 test junit junit 4.8.2 test org.apache.maven.plugins maven-checkstyle-plugin 2.5 src/config/checkstyle.xml false org.apache.maven.plugins maven-javadoc-plugin 2.7 true org.apache.maven.plugins maven-pmd-plugin 2.5 org.apache.maven.plugins maven-project-info-reports-plugin 2.2 org.apache.maven.plugins maven-jxr-plugin 2.2 org.apache.maven.plugins maven-surefire-report-plugin 2.5 org.apache.maven.plugins maven-changelog-plugin 2.2 org.apache.maven.plugins maven-clover-plugin 2.4 src/config/clover.license org.codehaus.mojo jdepend-maven-plugin 2.0-beta-1 jtidy http://jtidy.sourceforge.net/maven2/ jtidy scp://shell.sourceforge.net/home/groups/j/jt/jtidy/htdocs/maven2/ jtidy scp://shell.sourceforge.net/home/groups/j/jt/jtidy/htdocs/snapshots/ jtidy scp://shell.sourceforge.net/home/groups/j/jt/jtidy/htdocs/
          jtidy/build.xml0000755000175000017500000000362511304252373013771 0ustar twernertwerner