pax_global_header00006660000000000000000000000064132062730410014510gustar00rootroot0000000000000052 comment=3a30f6a4300417674026f6dddea5973debc6b808 tidy-html5-5.6.0/000077500000000000000000000000001320627304100135205ustar00rootroot00000000000000tidy-html5-5.6.0/.gitignore000066400000000000000000000002601320627304100155060ustar00rootroot00000000000000/autom4te.cache/ /console/.deps/ /console/.libs/ /src/.deps/ /src/.libs/ *.user *.suo *.sdf /test /test/testall.log /test/tmp/ /test/tmp2/ *~ temp* *.bak .DS_Store .idea *.old tidy-html5-5.6.0/CMakeLists.txt000066400000000000000000000564361320627304100162760ustar00rootroot00000000000000############################################################################## # @file CMakeLists.txt # Build executables, static and dylibs, packages, build systems, etc., for # HTML Tidy. # # Read this file or use cmake-gui (Windows) or ccmake (everything else) for # guided build. # # @author Geoff McLane [ubuntu@geoffair.info] # @author HTACG, et al (consult git log) # # @copyright # Copyright (c) 1998-2017 HTACG # @copyright # See tidy.h for license. # # @date Consult git log. ############################################################################## cmake_minimum_required (VERSION 2.8.12) set(LIB_NAME tidy) set(LIBTIDY_DESCRIPTION "${LIB_NAME} - HTML syntax checker") set(LIBTIDY_URL "http://www.html-tidy.org") project (${LIB_NAME}) ################################################# # Setup ################################################# #------------------------------------------------------------------------ # Release Information # Release version and date are found in `version.txt`; update *that* # file when required. It will be read into variable `versionFile` # (stripping any newlines or spaces). This file must be formatted into # two lines: the dot-separated MAJOR.MINOR.POINT version, followed by # the date separated YEAR.MONTH.DAY release date. #------------------------------------------------------------------------ file(READ version.txt versionFile) if (NOT versionFile) message(FATAL_ERROR "Unable to determine libtidy version. version.txt file is missing.") endif() string(STRIP "${versionFile}" VERSION_TEXT) string(REGEX REPLACE "(.*)[\r\n|\n](.*)" "\\1" LIBTIDY_VERSION ${VERSION_TEXT}) string(REGEX REPLACE "(.*)[\r\n|\n](.*)" "\\2" LIBTIDY_DATE ${VERSION_TEXT}) # Establish version number if (LIBTIDY_VERSION) string(REPLACE "." ";" VERSION_LIST ${LIBTIDY_VERSION}) list(GET VERSION_LIST 0 TIDY_MAJOR_VERSION) list(GET VERSION_LIST 1 TIDY_MINOR_VERSION) list(GET VERSION_LIST 2 TIDY_POINT_VERSION) else () message(FATAL_ERROR "*** FAILED to get a VERSION from version.txt!") endif () # Establish version date if (LIBTIDY_DATE) string(REPLACE "." ";" DATE_LIST ${LIBTIDY_DATE}) list(GET DATE_LIST 0 tidy_YEAR) list(GET DATE_LIST 1 tidy_MONTH) list(GET DATE_LIST 2 tidy_DAY) else () message(FATAL_ERROR "*** FAILED to get a DATE from version.txt!") endif () #------------------------------------------------------------------------ # Library Types and Linking # By default, *both* static and dynamic library types are built. The # shared library can be turned off if not needed. The console program # can be configured for static linking or dynamic linking. #------------------------------------------------------------------------ set( LIB_TYPE STATIC ) # set default message option( BUILD_SHARED_LIB "Set OFF to NOT build shared library" ON ) # Issue #326 - Allow linkage choice of console app tidy option( TIDY_CONSOLE_SHARED "Set ON to link with shared(DLL) lib." OFF ) if (TIDY_CONSOLE_SHARED) if (NOT BUILD_SHARED_LIB) message(FATAL_ERROR "Enable shared build for this tidy linkage!") endif () endif () #------------------------------------------------------------------------ # Miscellaneous Options #------------------------------------------------------------------------ option( BUILD_TAB2SPACE "Set ON to build utility app, tab2space" OFF ) option( BUILD_SAMPLE_CODE "Set ON to build the sample code" OFF ) option( TIDY_COMPAT_HEADERS "Set ON to include compatibility headers" OFF ) #------------------------------------------------------------------------ # Man Page # Allow building with non-default man page directory. #------------------------------------------------------------------------ if (NOT MAN_INSTALL_DIR) set(MAN_INSTALL_DIR share/man/man1) endif () #------------------------------------------------------------------------ # Localization # Allow building without extra language support. #------------------------------------------------------------------------ option( SUPPORT_LOCALIZATIONS "Set OFF to build without additional languages." ON ) if (SUPPORT_LOCALIZATIONS) add_definitions ( -DSUPPORT_LOCALIZATIONS=1 ) else () add_definitions ( -DSUPPORT_LOCALIZATIONS=0 ) endif () #------------------------------------------------------------------------ # Console Application # Allow building without console support, which mostly prevents # console strings from existing in the library. Note that this will # prevent the console application from being built, since it can't be # linked. #------------------------------------------------------------------------ option( SUPPORT_CONSOLE_APP "Set OFF to build libraries only without console application support." ON ) if (SUPPORT_CONSOLE_APP) add_definitions ( -DSUPPORT_CONSOLE_APP=1 ) else () add_definitions ( -DSUPPORT_CONSOLE_APP=0 ) endif () #------------------------------------------------------------------------ # Diagnostics # Enable building with logs, some memory diagnostics. #------------------------------------------------------------------------ option( ENABLE_DEBUG_LOG "Set ON to output debugging messages." OFF ) option( ENABLE_ALLOC_DEBUG "Set ON to output node allocation diagnostics." OFF ) option( ENABLE_MEMORY_DEBUG "Set ON to output some memory diagnostics." OFF ) if ( ENABLE_DEBUG_LOG ) add_definitions( -DENABLE_DEBUG_LOG ) message(STATUS "*** Debug Logging is enabled.") else () message(STATUS "*** Debug Logging is NOT enabled.") endif () if (ENABLE_ALLOC_DEBUG) add_definitions ( -DDEBUG_ALLOCATION ) # see lexer.c for details message(STATUS "*** Note, lexer.c node allocation diagnostics are ON") endif () if (ENABLE_MEMORY_DEBUG) add_definitions ( -DDEBUG_MEMORY ) # see alloc.c for details message(STATUS "*** Note, alloc.c memory diagnostics are ON") endif () if (WIN32) option( ENABLE_CRTDBG_MEMORY "Set ON to enable the Windows CRT debug library." OFF ) if (ENABLE_CRTDBG_MEMORY) add_definitions ( -D_CRTDBG_MAP_ALLOC ) # see tidy.c for details message(STATUS "*** Note, tidy.c Windows CRT memory debug is ON") endif () endif () #------------------------------------------------------------------------ # Complier Flags # Setup other compiler-specific and platform-specific compiler flags. #------------------------------------------------------------------------ if(CMAKE_COMPILER_IS_GNUCXX) set( WARNING_FLAGS -Wall ) endif(CMAKE_COMPILER_IS_GNUCXX) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set( WARNING_FLAGS "-Wall -Wno-overloaded-virtual" ) endif() if(WIN32 AND MSVC) # C4996: The compiler encountered a deprecated declaration. # C4090: 'function' : different 'const' qualifiers # C4244: '=' : conversion from '__int64' to 'uint', possible loss of data # C4267: 'function' : conversion from 'size_t' to 'uint', possible loss of data foreach(warning 4996 4090 4244 4267) set(WARNING_FLAGS "${WARNING_FLAGS} /wd${warning}") endforeach() set( MSVC_FLAGS "-DNOMINMAX -D_USE_MATH_DEFINES -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS -D__CRT_NONSTDC_NO_WARNINGS" ) # to distinguish between debug and release lib in windows set( CMAKE_DEBUG_POSTFIX "d" ) # little effect in unix else() # add any gcc flags endif() set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WARNING_FLAGS} ${MSVC_FLAGS} -D_REENTRANT" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WARNING_FLAGS} ${MSVC_FLAGS} -D_REENTRANT" ) set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MSVC_LD_FLAGS}" ) #------------------------------------------------------------------------ # Static Windows Runtime # Option to statically link to the Windows runtime. Maybe only # applies to WIN32/MSVC. #------------------------------------------------------------------------ if (MSVC) option( USE_STATIC_RUNTIME "Set ON to change /MD(DLL) to /MT(static)" OFF ) if (USE_STATIC_RUNTIME) set(CompilerFlags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE ) foreach(CompilerFlag ${CompilerFlags}) string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") endforeach() message(STATUS "Using /MT STATIC runtime") else () message(STATUS "Using /MD DYNAMIC runtime") endif () endif () #------------------------------------------------------------------------ # Macro Values # These additional macros are set in Tidy's source code. It is *very* # seldom that you would ever have to change any of these in order to # achieve a functioning build. #------------------------------------------------------------------------ add_definitions ( -DLIBTIDY_VERSION="${LIBTIDY_VERSION}" ) add_definitions ( -DRELEASE_DATE="${tidy_YEAR}/${tidy_MONTH}/${tidy_DAY}" ) # Optionally specify an extra version point for pre-release/debug versions. if (TIDY_RC_NUMBER) add_definitions ( -DRC_NUMBER="${TIDY_RC_NUMBER}" ) endif () # If your OS doesn't have native ISO2022 support, then build with this flag. if (NO_NATIVE_ISO2022_SUPPORT) add_definitions ( -DNO_NATIVE_ISO2022_SUPPORT=1 ) endif () # If your OS doesn't have library function access(), build with this flag. if (NO_ACCESS_SUPPORT) add_definitions ( -DNO_ACCESS_SUPPORT=1 ) endif () # Delete me? Not used in Tidy source! add_definitions ( -DHAVE_CONFIG_H ) #------------------------------------------------------------------------ # Runtime Configuration File Support # By default on Unix-like systems when building for the console program, # support runtime configuration files in /etc/ and in ~/. To prevent this, # set ENABLE_CONFIG_FILES to NO. Specify -DTIDY_CONFIG_FILE and/or # -DTIDY_USER_CONFIG_FILE to override the default paths in tidyplatform.h. # @note: this section refactored to support #584. #------------------------------------------------------------------------ if ( UNIX AND SUPPORT_CONSOLE_APP ) option ( ENABLE_CONFIG_FILES "Set to OFF to disable Tidy runtime configuration file support" ON ) # All Unixes support getpwnam(); undef'd in tidyplatform.h if necessary. add_definitions( -DSUPPORT_GETPWNAM=1 ) else () option ( ENABLE_CONFIG_FILES "Set to ON to enable Tidy runtime configuration file support" OFF ) if ( SUPPORT_GETPWNAM ) add_definitions( -DSUPPORT_GETPWNAM=1 ) endif () endif () if ( ENABLE_CONFIG_FILES ) message(STATUS "*** Building support for runtime configuration files.") add_definitions( -DTIDY_ENABLE_CONFIG_FILES ) # define a default here so we can pass to XSL. if ( NOT TIDY_CONFIG_FILE ) set( TIDY_CONFIG_FILE "/etc/tidy.conf" ) endif () # define a default here so we can pass to XSL. if ( NOT TIDY_USER_CONFIG_FILE ) set( TIDY_USER_CONFIG_FILE "~/.tidyrc" ) endif () # do *not* add these unless ENABLE_CONFIG_FILES! add_definitions( -DTIDY_CONFIG_FILE="${TIDY_CONFIG_FILE}" ) add_definitions( -DTIDY_USER_CONFIG_FILE="${TIDY_USER_CONFIG_FILE}" ) endif () #------------------------------------------------------------------------ # Shared Library # Setup whether or not we will build the shared library. #------------------------------------------------------------------------ if(BUILD_SHARED_LIB) set(LIB_TYPE SHARED) message(STATUS "*** Also building DLL library ${LIB_TYPE}, version ${LIBTIDY_VERSION}, date ${LIBTIDY_DATE}") else() message(STATUS "*** Only building static library ${LIB_TYPE}, version ${LIBTIDY_VERSION}, date ${LIBTIDY_DATE}") endif() ################################################# # Build ################################################# #------------------------------------------------------------------------ # File Locations and File Lists # Setup whether or not we will build the shared library. #------------------------------------------------------------------------ include_directories ( "${PROJECT_SOURCE_DIR}/include" "${PROJECT_SOURCE_DIR}/src" ) set ( SRCDIR src ) set ( INCDIR include ) set ( CFILES ${SRCDIR}/access.c ${SRCDIR}/attrs.c ${SRCDIR}/istack.c ${SRCDIR}/parser.c ${SRCDIR}/tags.c ${SRCDIR}/entities.c ${SRCDIR}/lexer.c ${SRCDIR}/pprint.c ${SRCDIR}/charsets.c ${SRCDIR}/clean.c ${SRCDIR}/message.c ${SRCDIR}/config.c ${SRCDIR}/alloc.c ${SRCDIR}/attrdict.c ${SRCDIR}/buffio.c ${SRCDIR}/fileio.c ${SRCDIR}/streamio.c ${SRCDIR}/tagask.c ${SRCDIR}/tmbstr.c ${SRCDIR}/utf8.c ${SRCDIR}/tidylib.c ${SRCDIR}/mappedio.c ${SRCDIR}/gdoc.c ${SRCDIR}/language.c ${SRCDIR}/messageobj.c ${SRCDIR}/sprtf.c ) set ( HFILES ${INCDIR}/tidyplatform.h ${INCDIR}/tidy.h ${INCDIR}/tidyenum.h ${INCDIR}/tidybuffio.h ) if (TIDY_COMPAT_HEADERS) set ( HFILES ${HFILES} ${INCDIR}/buffio.h ${INCDIR}/platform.h ) endif () set ( LIBHFILES ${SRCDIR}/access.h ${SRCDIR}/attrs.h ${SRCDIR}/attrdict.h ${SRCDIR}/charsets.h ${SRCDIR}/clean.h ${SRCDIR}/config.h ${SRCDIR}/entities.h ${SRCDIR}/fileio.h ${SRCDIR}/forward.h ${SRCDIR}/lexer.h ${SRCDIR}/mappedio.h ${SRCDIR}/message.h ${SRCDIR}/parser.h ${SRCDIR}/pprint.h ${SRCDIR}/streamio.h ${SRCDIR}/tags.h ${SRCDIR}/tmbstr.h ${SRCDIR}/utf8.h ${SRCDIR}/tidy-int.h ${SRCDIR}/version.h ${SRCDIR}/gdoc.h ${SRCDIR}/language.h ${SRCDIR}/language_en.h ${SRCDIR}/sprtf.h ) #------------------------------------------------------------------------ # Target Locations #------------------------------------------------------------------------ if (NOT LIB_INSTALL_DIR) set(LIB_INSTALL_DIR lib${LIB_SUFFIX}) endif () if (NOT BIN_INSTALL_DIR) set(BIN_INSTALL_DIR bin) endif () if (NOT INCLUDE_INSTALL_DIR) set(INCLUDE_INSTALL_DIR include) endif () #------------------------------------------------------------------------ # Static Library # The static library always builds. #------------------------------------------------------------------------ set(name tidy-static) add_library ( ${name} STATIC ${CFILES} ${HFILES} ${LIBHFILES} ) set_target_properties( ${name} PROPERTIES OUTPUT_NAME ${LIB_NAME}s ) if (NOT TIDY_CONSOLE_SHARED) # user wants default static linkage list ( APPEND add_LIBS ${name} ) endif () install(TARGETS ${name} RUNTIME DESTINATION ${BIN_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR} LIBRARY DESTINATION ${LIB_INSTALL_DIR} ) install( FILES ${HFILES} DESTINATION ${INCLUDE_INSTALL_DIR} ) #------------------------------------------------------------------------ # Dynamic Library # If the user option is still on. #------------------------------------------------------------------------ if (BUILD_SHARED_LIB) set(name tidy-share) if (UNIX AND APPLE) set(CMAKE_MACOSX_RPATH 1) endif () add_library ( ${name} SHARED ${CFILES} ${HFILES} ${LIBHFILES} ) set_target_properties( ${name} PROPERTIES OUTPUT_NAME ${LIB_NAME} ) set_target_properties( ${name} PROPERTIES VERSION ${LIBTIDY_VERSION} SOVERSION ${TIDY_MAJOR_VERSION} ) set_target_properties( ${name} PROPERTIES COMPILE_FLAGS "-DBUILD_SHARED_LIB" ) set_target_properties( ${name} PROPERTIES COMPILE_FLAGS "-DBUILDING_SHARED_LIB" ) install(TARGETS ${name} RUNTIME DESTINATION ${BIN_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR} LIBRARY DESTINATION ${LIB_INSTALL_DIR} ) if (TIDY_CONSOLE_SHARED) # user wants shared/dll linkage list ( APPEND add_LIBS ${name} ) endif () endif () #------------------------------------------------------------------------ # Main Executable # The main executable will be linked with either the static or the # shared library. #------------------------------------------------------------------------ if (SUPPORT_CONSOLE_APP) set(name ${LIB_NAME}) set ( BINDIR console ) add_executable( ${name} ${BINDIR}/tidy.c ) target_link_libraries( ${name} ${add_LIBS} ) if (MSVC) set_target_properties( ${name} PROPERTIES DEBUG_POSTFIX d ) endif () if (NOT TIDY_CONSOLE_SHARED) set_target_properties( ${name} PROPERTIES COMPILE_FLAGS "-DTIDY_STATIC" ) endif () install (TARGETS ${name} DESTINATION bin) endif () #------------------------------------------------------------------------ # Miscellaneous Targets #------------------------------------------------------------------------ if (BUILD_TAB2SPACE) set(name tab2space) add_executable( ${name} ${BINDIR}/tab2space.c ) if (MSVC) set_target_properties( ${name} PROPERTIES DEBUG_POSTFIX d ) endif () # no INSTALL of this 'local' tool - use depreciated endif () if (BUILD_SAMPLE_CODE) set(name test71) set(dir console) add_executable( ${name} ${dir}/${name}.cxx ) if (MSVC) set_target_properties( ${name} PROPERTIES DEBUG_POSTFIX d ) endif () target_link_libraries( ${name} ${add_LIBS} ) # no INSTALL of this 'local' sample endif () ################################################# # Create man pages ################################################# if (UNIX AND SUPPORT_CONSOLE_APP) find_program( XSLTPROC_FOUND xsltproc ) if (XSLTPROC_FOUND) ## NOTE: man name must match exe ie currently `${LIB_NAME}.1` not `tidy.1` ## also could use `manpath` command output to determine target install path set(TIDY_MANFILE ${LIB_NAME}.1) message(STATUS "*** Generating man ${TIDY_MANFILE} custom commands...") set(TIDY1XSL ${CMAKE_CURRENT_BINARY_DIR}/tidy1.xsl) set(TIDYHELP ${CMAKE_CURRENT_BINARY_DIR}/tidy-help.xml) set(TIDYCONFIG ${CMAKE_CURRENT_BINARY_DIR}/tidy-config.xml) add_custom_target(man ALL DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}") ## Populate the @VARIABLES@ in the input file. configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/man/tidy1.xsl.in ${TIDY1XSL} ) # Run the built EXE to generate xml output . add_custom_command( TARGET man COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} -xml-help > ${TIDYHELP} COMMENT "Generate ${TIDYHELP}" VERBATIM ) # Run the built EXE to generate more xml output. add_custom_command( TARGET man COMMAND ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME} -xml-config > ${TIDYCONFIG} COMMENT "Generate ${TIDYCONFIG}" VERBATIM ) # Run xsltproc to generate the install files. add_custom_command( TARGET man DEPENDS ${TIDYHELP} COMMAND xsltproc ARGS ${TIDY1XSL} ${TIDYHELP} > ${CMAKE_CURRENT_BINARY_DIR}/${TIDY_MANFILE} COMMENT "Generate ${TIDY_MANFILE}" VERBATIM ) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${TIDY_MANFILE} DESTINATION ${MAN_INSTALL_DIR}) else () message(STATUS "*** NOTE: xsltproc NOT FOUND! Can NOT generate man page.") message(STATUS "*** You need to install xsltproc in your system.") endif () endif () ################################################# # Create MSI,EXE, DMG, DEB/RPM # TODO: Check each of these builds ################################################# set(BITNESS 32) if(CMAKE_SIZEOF_VOID_P EQUAL 8) set(BITNESS 64) endif() #------------------------------------------------------------------------ # System Runtime Libraries # Need to ensure that system DLLs get included in a binary # distribution, but since it can miss some - seems incomplete - make # optional. #------------------------------------------------------------------------ option( ADD_SYSTEM_RUNTIMES "Set ON to include system runtime DLLS in distribution" OFF ) if (MSVC AND ADD_SYSTEM_RUNTIMES) if (NOT DEFINED CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS) # Visual Studio Express does include redistributable components so # squelch the warning. set (CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) endif () set (CMAKE_INSTALL_DEBUG_LIBRARIES OFF) include (InstallRequiredSystemLibraries) endif () #------------------------------------------------------------------------ # Windows # MSI - this needs WiX Tooset installed and a path to candle.exe # EXE - this needs NSIS tools to be in path #------------------------------------------------------------------------ if (WIN32) set(CPACK_GENERATOR "NSIS;WIX;ZIP") set(CPACK_SOURCE_GENERATOR "ZIP") set(CPACK_WIX_UPGRADE_GUID "D809598A-B513-4752-B268-0BAC403B00E4") elseif ( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) set(CPACK_GENERATOR "productbuild") set(CPACK_SOURCE_GENERATOR "TGZ") else () set(CPACK_GENERATOR "DEB;RPM") set(CPACK_SOURCE_GENERATOR "TGZ") endif () #------------------------------------------------------------------------ # General #------------------------------------------------------------------------ set(CPACK_PACKAGE_NAME "${LIB_NAME}") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${LIBTIDY_DESCRIPTION}") set(CPACK_PACKAGE_VENDOR "HTML Tidy Advocacy Community Group") set(CPACK_PACKAGE_CONTACT "maintainer@htacg.org") set(CPACK_PACKAGE_VERSION ${LIBTIDY_VERSION}) set(CPACK_PACKAGE_VERSION_MAJOR "${TIDY_MAJOR_VERSION}") set(CPACK_PACKAGE_VERSION_MINOR "${TIDY_MINOR_VERSION}") set(CPACK_PACKAGE_VERSION_PATCH "${TIDY_POINT_VERSION}") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html") # use one compatible license file for all set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README/LICENSE.txt") set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html") set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_CURRENT_SOURCE_DIR}/README/README.html") #------------------------------------------------------------------------ # Debian #------------------------------------------------------------------------ set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_CONTACT}) set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${LIBTIDY_URL}) #set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc") set(CPACK_DEBIAN_PACKAGE_SECTION "Libraries") set(CPACK_SOURCE_IGNORE_FILES "${PROJECT_SOURCE_DIR}/build" ) #------------------------------------------------------------------------ # RPM config #------------------------------------------------------------------------ set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/usr/share/man" "/usr/share/man/man1") set(CPACK_SOURCE_IGNORE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/test/;${CMAKE_CURRENT_SOURCE_DIR}/build/;${CMAKE_CURRENT_SOURCE_DIR}/.git/") if (NOT WIN32 AND NOT APPLE) set( CPACK_PACKAGE_FILE_NAME "${LIB_NAME}-${CPACK_PACKAGE_VERSION}-${BITNESS}bit" ) endif () include(CPack) #------------------------------------------------------------------------ # pkg-config #------------------------------------------------------------------------ configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/${LIB_NAME}.pc.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc" @ONLY ) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}.pc" DESTINATION "${LIB_INSTALL_DIR}/pkgconfig" ) # eof tidy-html5-5.6.0/README.md000066400000000000000000000067551320627304100150140ustar00rootroot00000000000000# HTACG HTML Tidy All other READMEs and related materials can be found in [README/][100]. Although all of our materials should be linked in this README, be sure to check this directory for documents we’ve not yet added to this document. ## Building HTML Tidy - For build instructions please see [README/BUILD.md][115]. ## Branches and Versions Learn about which branches are available, which branch you should use, and how HTML Tidy’s versioning scheme works. - Learn about version numbering in [README/VERSION.md][160]. - Learn about our repository branches in [README/BRANCHES.md][110]. ## Contributing and Development Guides We gladly accept PRs! Read about some of our contribution guidelines, and check out some of the additional explanatory documents that will aid your understanding of how to accomplish certain things in HTML Tidy. ### General Contribution Guidelines These are some general guidelines that will help you help us when it comes to making your own contributions to HTML Tidy. - Learn about our contributing guidelines in [README/CONTRIBUTING.md][125]. - Understand HTML Tidy’s source code style in [README/CODESTYLE.md][120]. ### Adding Features Guides When you’re ready to add a great new feature, these write-ups may be useful. - Learn how to add new element attributes to HTML Tidy by reading [README/ATTRIBUTES.md][105]. - Discover how to add new tags to Tidy in [README/TAGS.md][130]. - If you want to add new messages to Tidy, read [README/MESSAGE.md][150]. - Configuration options can be added according to [README/OPTIONS.md][155]. ### Language Localization Guides Tidy supports localization, and welcomes translations into various languages. Please read up on how to localize HTML Tidy. - The general README for localizing can be found in [/README/LOCALIZE.md][140]. - And [/localize/README.md][145] contains specific instructions for localizing. ## Other Important Links - site: [http://www.html-tidy.org/][4] - source: [https://github.com/htacg/tidy-html5][5] - binaries: [http://binaries.html-tidy.org][6] - bugs: [https://github.com/htacg/tidy-html5/issues][7] - list: [https://lists.w3.org/Archives/Public/html-tidy/][8] - api and quickref: [http://api.html-tidy.org/][9] [4]: http://www.html-tidy.org/ [5]: https://github.com/htacg/tidy-html5 [6]: http://binaries.html-tidy.org [7]: https://github.com/htacg/tidy-html5/issues [8]: https://lists.w3.org/Archives/Public/html-tidy/ [9]: http://api.html-tidy.org/ ## History This repository should be considered canonical for HTML Tidy as of 2015-January-15. - This repository originally transferred from [w3c.github.com/tidy-html5][20], now redirected to the current site. - First moved to Github from [tidy.sourceforge.net][21]. Note, this site is kept only for historic reasons, and is not now well maintained. **Tidy is the granddaddy of HTML tools, with support for modern standards.** Have fun... [20]: http://w3c.github.com/tidy-html5/ [21]: http://tidy.sourceforge.net ## License HTML Tidy and LibTidy are free and open source software with a permissive license. - You can read the complete license in [README/LICENSE.md][135]. [100]: README/ [105]: README/ATTRIBUTES.md [110]: README/BRANCHES.md [115]: README/BUILD.md [120]: README/CODESTYLE.md [125]: README/CONTRIBUTING.md [130]: README/TAGS.md [135]: README/LICENSE.md [140]: /README/LOCALIZE.md [145]: /localize/README.md [150]: README/MESSAGE.md [155]: README/OPTIONS.md [160]: README/VERSION.md tidy-html5-5.6.0/README/000077500000000000000000000000001320627304100144555ustar00rootroot00000000000000tidy-html5-5.6.0/README/API_AND_NAMESPACE.md000066400000000000000000000142641320627304100174550ustar00rootroot00000000000000# The `LibTidy` API and Namespacing ## Introduction If you're just getting started working with `LibTidy`, some of the design choices may seem overwhelming if you're not a seasoned C veteran. Hopefully this article will give a decent overview, encouraging you to explore and contribute to the `LibTidy` code. This article will discuss briefly: - How `LibTidy` achieves namespacing in C - Explanations for some of the bizzarre, do-nothing macros. - Opaque types - How to add new functions to the `LibTidy` API. # Namespacing The C language does not support built in namespacing, but it is subject to namespace collision, especially when a library is statically linked. `LibTidy` tries to get around this by making a compromise between human-readable names and making the names random enough to avoid a collision. As you browse Tidy's code, you'll notice many uses of a macro function – `TY_()` – applied to the function names of non-static functions. The preprocessor thus resolves all of these function names to `prvTidyFunction`, thus ensuring a clear namespace and avoiding the possibility of collisions (unless some other library has thoughtlessly borrowed our prefix for the same). For example, `TY_(getNextOptionPick)` will resolve to `prvTidygetNextOptionPick` when compiled. Of course, `static` functions are immune to the issue of namespace pollution, so in general you will really only use this technique for functions that must be accessible from outside of your new file, such as functions that you want to expose to the API. # Macros for documentation `TIDY_EXPORT` and `TIDY_CALL` are defined to be `NULL`, i.e., when compiled they resolve to nothing. These are used exclusively for documenting functions that are part of the API defined in `tidy.h` and the implementation in `tidylib.c`. For example, in `tidy.h`: ~~~ TIDY_EXPORT TidyIterator TIDY_CALL getWindowsLanguageList(); ~~~ The `TIDY_EXPORT` call clearly indicates that this function prototype is meant to be exported from the API, and `TIDY_CALL` clearly indicates that the function is called from within `LibTidy`. Although this makes things obvious from the documentation perspective, the truth is a little murkier. In some environments one might define `TIDY_EXPORT` and `TIDY_CALL` differently in order to control compiler behavior, especially in environments that have special requirements for dynamic libraries. In general, though, you shouldn't have to worry about this. The preferred use of pointer operators when documenting with macros is this: ~~~ const tidyLocaleMapItem* TIDY_CALL getNextWindowsLanguage( TidyIterator* iter ) ~~~ …instead of this: ~~~ const tidyLocaleMapItem TIDY_CALL *getNextWindowsLanguage( TidyIterator* iter ) ~~~ # External types are opaque In several spots the source code indicates that a particular structure is "opaque." This simply means that API users cannot see inside of them, and they have to depend on accessor functions to gain access to the sweet fruit that is within. This is a design choice that makes `LibTidy` highly portable and makes it accessible to multitudes of other languages that can communicate with a C API. Take `tidyDoc` for example, as it's the most fundamental datatype within `LibTidy`. As an API user, you can have a reference to a `tidyDoc`, and you're going to pass it around a lot to accessor functions (such as `tidyCleanAndRepair`), and you know that it contains lots of good stuff, but you're not allowed to peek inside of it unless an accessor function is provided. Think of it as a token that you pass around, and nothing more. Internally, the type is cast to a native C structure of type `tidyDocImpl`, and so if you decide to become a Tidy developer, you have the choice to access the item fully. If you extend Tidy's API, it's important to respect this design choice, even if only writing functionality for the console application (which is, of course, simply an implementor of `LibTidy`). # How to add new functions to `LibTidy` All of the information above is useful for anyone who wants to browse Tidy's source code, or use the API, or understand Tidy better, but it all comes together nicely when you want to extend the API. This quick lesson will show you how to do so, using `tidyLocalizedString()` as an example. ## Behind the scenes The first thing we need to do is have the internal version of the function that we want to add. Tidy has a module that handles localization: `language.h/c`. In the header is where we define the interface to LibTidy, which should be namespaced according to the discussion above. We can declare: ~~~ ctmbstr TY_(tidyLocalizedString)( uint messageType ); ~~~ …and of course implement it in the `.c` file. Now you have a decision to make: if you plan to use this function internally, you're going to have to import the header into other modules that require the function. This can lead to painful compile-time consequences. However since we want to expose this particular function to the API, it will be visible within `TidyLib`, so we can use the public API internally, too. ## The API Once implemented, we want a pretty, public-facing name for our `tidyLocalizedString()` function, which appropriately is `tidyLocalizedString()`. Add the declaration to `tidy.h`: ~~~ TIDY_EXPORT ctmbstr TIDY_CALL tidyLocalizedString( uint messageType ); ~~~ …and now the publicly exposed interface knows that your function exists. All that's left to do is add the `language.h` header to `tidylib.c`, and then implement it there: ~~~ ctmbstr TIDY_CALL tidyLocalizedString( uint messageType ) { return TY_(tidyLocalizedString)( messageType ); } ~~~ Congratulations, you can now expose new functionality to the API. ## API functions for opaque types For a more complicated example that demonstrates how to use opaque types (and also the `TidyIterator` type) have a look at the implementation of `getWindowsLanguageList()`, and its partners `*getNextWindowsLanguage()`, `TidyLangWindowsName()`, and `TidyLangPosixName()`. These demonstrate how to: - implement iteration for structures with multiple records. - write a function in `tidylib.c` that converts between the exposed, opaque type and the internal, implementation type. - further reinforce how functionality is added to the API. tidy-html5-5.6.0/README/ATTRIBUTES.md000066400000000000000000000040621320627304100164670ustar00rootroot00000000000000# Tidy Element Attributes This is about adding a **new** HTML attribute to one or more HTML tags, i.e., a new attribute such as `attribute=value`. Tidy’s large number of attributes are supported via number of files: - `tidyenum.h` is where you first define a new attribute in order to give it an internal value. - `attrs.c` is where you give a unique **string** name to the attribute, as well as a **function** to verify the **value**. - `attrdict.c` further refines the definition of your attribute, specifying which version(s) of HTML support this attribute. - `tags.c`, finally, determines which tags support the attribute, in the `tag_defs[]` table. So, to add a new `attribute=value`, on one or more existing tags, consists of the following simple steps - 1. `tidyenum.h` - Give the attribute an internal name, like `TidyAttr_XXXX`, and thus a value. Please try to keep this enumeration in alphabetical order. 2. `attrs.c` - Assign the string value of the attribute. Of course this must be unique. And then assign a `function` to verify the attribute value. There are already a considerable number of defined functions to verify specific attribute values, but maybe this new attribute requires a new function, so that should be written, and defined. 3. `attrdict.c` - If this attribute only relates to specific tags, then it should be added to their list. There are some general attributes that are allowed on every, or most tags, so this new attribute and value should be added accordingly. 4. `tags.c` - Now the new attribute will be verified for each tag it is associated with in the `tag_defs[]` table. Like for example the `