pax_global_header00006660000000000000000000000064146372737650014536gustar00rootroot0000000000000052 comment=947d2e9596ac1ff9a7c61b27e964db2db0722c88 luaexpat-1.5.2/000077500000000000000000000000001463727376500133665ustar00rootroot00000000000000luaexpat-1.5.2/.busted000066400000000000000000000001571463727376500146600ustar00rootroot00000000000000return { default = { verbose = true, lazy = true, coverage = false, output = "gtest", }, } luaexpat-1.5.2/.editorconfig000066400000000000000000000010121463727376500160350ustar00rootroot00000000000000root = true [*] end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true charset = utf-8 [*.{lua,rockspec}] indent_style = tab indent_size = 4 [Makefile] indent_style = tab indent_size = 4 [*.html] indent_style = tab indent_size = 4 [*.c] indent_style = space indent_size = 2 [*.h] indent_style = tab indent_size = 8 luaexpat-1.5.2/.github/000077500000000000000000000000001463727376500147265ustar00rootroot00000000000000luaexpat-1.5.2/.github/workflows/000077500000000000000000000000001463727376500167635ustar00rootroot00000000000000luaexpat-1.5.2/.github/workflows/deploy.yml000066400000000000000000000023311463727376500210010ustar00rootroot00000000000000name: Deploy on: [ push, workflow_dispatch ] jobs: affected: uses: lunarmodules/.github/.github/workflows/list_affected_rockspecs.yml@main build: needs: affected if: ${{ needs.affected.outputs.rockspecs }} uses: lunarmodules/.github/.github/workflows/test_build_rock.yml@main with: rockspecs: ${{ needs.affected.outputs.rockspecs }} upload: needs: [ affected, build ] # Only run upload if: # 1. We are on the canonical repository (no uploads from forks) # 2. The current commit is either tagged or on the default branch (the workflow will upload dev/scm rockspecs any # time they are touched, tagged ones whenever the edited rockspec and tag match) # 3. Some rockspecs were changed — this implies the commit changing the rockspec is the same one that gets tagged if: >- ${{ github.repository == 'lunarmodules/luaexpat' && ( github.ref_name == 'master' || startsWith(github.ref, 'refs/tags/') ) && needs.affected.outputs.rockspecs }} uses: lunarmodules/.github/.github/workflows/upload_to_luarocks.yml@main with: rockspecs: ${{ needs.affected.outputs.rockspecs }} secrets: apikey: ${{ secrets.LUAROCKS_APIKEY }} luaexpat-1.5.2/.github/workflows/docs_lint.yml000066400000000000000000000007731463727376500214730ustar00rootroot00000000000000name: Docs on: [push, pull_request] jobs: DocLint: runs-on: ubuntu-20.04 steps: - name: Checkout uses: actions/checkout@v3 - uses: anishathalye/proof-html@v1 with: directory: ./docs check_favicon: false enforce_https: false url_ignore: | http://www.fabricadigital.com.br http://www.saxproject.org/ url_ignore_re: | ^http://files.luaforge.net/ ^https://opensource.org/ luaexpat-1.5.2/.github/workflows/luacheck.yml000066400000000000000000000003311463727376500212620ustar00rootroot00000000000000name: Luacheck on: [push, pull_request] jobs: luacheck: runs-on: ubuntu-20.04 steps: - name: Checkout uses: actions/checkout@v3 - name: Luacheck uses: lunarmodules/luacheck@v0 luaexpat-1.5.2/.github/workflows/unix_build.yml000066400000000000000000000016321463727376500216520ustar00rootroot00000000000000name: "Unix build" on: [push, pull_request] jobs: test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: luaVersion: ["5.1", "5.2", "5.3", "5.4", "luajit-2.1.0-beta3", "luajit-openresty"] expatVersion: ["2.4.4", "2.6.2"] steps: - uses: actions/checkout@v2 - uses: leafo/gh-actions-lua@v8 with: luaVersion: ${{ matrix.luaVersion }} - uses: leafo/gh-actions-luarocks@v4 - name: dependencies run: | luarocks install busted - name: build expat run: | git clone -b R_$(echo ${{ matrix.expatVersion }} | sed "s/\./_/g") --single-branch https://github.com/libexpat/libexpat.git cd libexpat/expat ./buildconf.sh ./configure sudo make install - name: build luaexpat run: | luarocks make - name: test suite run: | busted --Xoutput "--color" luaexpat-1.5.2/.gitignore000066400000000000000000000005701463727376500153600ustar00rootroot00000000000000.vscode # Compiled Lua sources luac.out # luarocks build files *.rock *.zip *.tar.gz # luacov report files *.report.out *.stats.out # Object files *.o *.os *.ko *.obj *.elf # Precompiled Headers *.gch *.pch # Libraries *.lib *.a *.la *.lo *.def *.exp # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex luaexpat-1.5.2/.luacheckrc000066400000000000000000000006041463727376500154730ustar00rootroot00000000000000unused_args = false redefined = false max_line_length = false globals = { "ngx", } not_globals = { "string.len", "table.getn", } include_files = { "**/*.lua", "**/*.rockspec", ".busted", ".luacheckrc", } files["spec/**/*.lua"] = { std = "+busted", } exclude_files = { -- GH Actions Lua Environment ".lua", ".luarocks", ".install", } luaexpat-1.5.2/LICENSE000066400000000000000000000021031463727376500143670ustar00rootroot00000000000000Copyright (C) 2003-2007 The Kepler Project, 2013-2024 Matthew Wild Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. luaexpat-1.5.2/Makefile000066400000000000000000000020021463727376500150200ustar00rootroot00000000000000CC ?= cc RM ?= rm -rf INSTALL ?= install INSTALL_PROGRAM ?= $(INSTALL) INSTALL_DATA ?= $(INSTALL) -m 644 LUA_V ?= 5.4 LUA_LDIR ?= /usr/share/lua/$(LUA_V) LUA_CDIR ?= /usr/lib/lua/$(LUA_V) T = lxp LIBNAME = $(T).so COMMON_CFLAGS = -g -pedantic -Wall -O2 -fPIC -DPIC LUA_INC ?= -I/usr/include/lua$(LUA_V) EXPAT_INC ?= -I/usr/include CF = $(LUA_INC) $(EXPAT_INC) $(COMMON_CFLAGS) $(CFLAGS) EXPAT_LIB = -lexpat COMMON_LDFLAGS = -shared LF = $(COMMON_LDFLAGS) $(EXPAT_LIB) $(LDFLAGS) OBJS = src/lxplib.o lib: src/$(LIBNAME) src/$(LIBNAME): export MACOSX_DEPLOYMENT_TARGET="10.3"; $(CC) $(CF) $(RPM_OPT_FLAGS) $(RPM_LD_FLAGS) -o $@ src/$(T)lib.c $(LF) install: $(INSTALL_PROGRAM) -D src/$(LIBNAME) $(DESTDIR)$(LUA_CDIR)/$(LIBNAME) $(INSTALL_DATA) -D src/$T/lom.lua $(DESTDIR)$(LUA_LDIR)/$T/lom.lua $(INSTALL_DATA) -D src/$T/totable.lua $(DESTDIR)$(LUA_LDIR)/$T/totable.lua $(INSTALL_DATA) -D src/$T/threat.lua $(DESTDIR)$(LUA_LDIR)/$T/threat.lua clean: $(RM) src/$(LIBNAME) $(OBJS) $(RM) ./$(LIBNAME) luaexpat-1.5.2/README.md000066400000000000000000000041021463727376500146420ustar00rootroot00000000000000LuaExpat -------- [![Unix build](https://img.shields.io/github/actions/workflow/status/lunarmodules/luaexpat/unix_build.yml?branch=master&label=Unix%20build&logo=linux)](https://github.com/lunarmodules/luaexpat/actions/workflows/unix_build.yml) [![Luacheck](https://github.com/lunarmodules/luaexpat/actions/workflows/luacheck.yml/badge.svg)](https://github.com/lunarmodules/luaexpat/actions/workflows/luacheck.yml) [![License](https://img.shields.io/badge/license-MIT-success)](https://lunarmodules.github.io/luaexpat/license.html) # Overview LuaExpat is a SAX XML parser based on the Expat library. LuaExpat is free software and uses the same license as Lua 5.1. ## Download LuaExpat source can be downloaded from the [github releases](https://github.com/lunarmodules/luaexpat/releases) or from [LuaRocks](https://luarocks.org/search?q=luaexpat). ## History For version history please [see the documentation](https://lunarmodules.github.io/luaexpat/index.html#history) ### Release instructions: - ensure [the changelog](https://lunarmodules.github.io/luaexpat/index.html#history) is up to date and has the correct version and release date. - update the [status](https://lunarmodules.github.io/luaexpat/index.html#status) section - update copyright years at the [license page](https://lunarmodules.github.io/luaexpat/license.html) and the [LICENSE file](https://github.com/lunarmodules/luaexpat/blob/master/LICENSE). - update version info and copyright in file [`lxplib.h`](https://github.com/lunarmodules/luaexpat/blob/master/src/lxplib.h) - create a new rockspec file for the new version - commit the above changes and create a PR - after merging the PR tag it in `x.y.z` format, and push the tag (make sure the rockspec file is touched in the same commit that gets the version tag) - the Github actions CI will automatically push a new LuaRocks release - test the uploaded rock using: `luarocks install luaexpat` - add the new release to the [Github releases](https://github.com/lunarmodules/luaexpat/releases) ## License [MIT license](https://lunarmodules.github.io/luaexpat/license.html) luaexpat-1.5.2/docs/000077500000000000000000000000001463727376500143165ustar00rootroot00000000000000luaexpat-1.5.2/docs/doc.css000066400000000000000000000077431463727376500156100ustar00rootroot00000000000000body { margin-left: 1em; margin-right: 1em; font-family: arial, helvetica, geneva, sans-serif; background-color:#ffffff; margin:0px; } code { font-family: "Andale Mono", monospace; } tt { font-family: "Andale Mono", monospace; } body, td, th { font-size: 11pt; } h1, h2, h3, h4 { margin-left: 0em; } textarea, pre, tt { font-size:10pt; } body, td, th { color:#000000; } small { font-size:0.85em; } h1 { font-size:1.5em; } h2 { font-size:1.25em; } h3 { font-size:1.15em; } h4 { font-size:1.06em; } a:link { font-weight:bold; color: #004080; text-decoration: none; } a:visited { font-weight:bold; color: #006699; text-decoration: none; } a:link:hover { text-decoration:underline; } hr { color:#cccccc } img { border-width: 0px; } h3 { padding-top: 1em; } p { margin-left: 1em; } p.name { font-family: "Andale Mono", monospace; padding-top: 1em; margin-left: 0em; } blockquote { margin-left: 3em; } .example { background-color: rgb(245, 245, 245); border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: solid; border-right-style: solid; border-bottom-style: solid; border-left-style: solid; border-top-color: silver; border-right-color: silver; border-bottom-color: silver; border-left-color: silver; padding: 1em; margin-left: 1em; margin-right: 1em; font-family: "Andale Mono", monospace; font-size: smaller; } hr { margin-left: 0em; background: #00007f; border: 0px; height: 1px; } ul { list-style-type: disc; } table.index { border: 1px #00007f; } table.index td { text-align: left; vertical-align: top; } table.index ul { padding-top: 0em; margin-top: 0em; } table { border: 1px solid black; border-collapse: collapse; margin-left: auto; margin-right: auto; } th { border: 1px solid black; padding: 0.5em; } td { border: 1px solid black; padding: 0.5em; } div.header, div.footer { margin-left: 0em; } #container { margin-left: 1em; margin-right: 1em; background-color: #f0f0f0; } #product { text-align: center; border-bottom: 1px solid #cccccc; background-color: #ffffff; } #product big { font-size: 2em; } #product_logo { } #product_name { } #product_description { } #main { background-color: #f0f0f0; border-left: 2px solid #cccccc; } #navigation { float: left; width: 12em; margin: 0; vertical-align: top; background-color: #f0f0f0; overflow:visible; } #navigation h1 { background-color:#e7e7e7; font-size:1.1em; color:#000000; text-align:left; margin:0px; padding:0.2em; border-top:1px solid #dddddd; border-bottom:1px solid #dddddd; } #navigation ul { font-size:1em; list-style-type: none; padding: 0; margin: 1px; } #navigation li { text-indent: -1em; margin: 0em 0em 0em 0.5em; display: block; padding: 3px 0px 0px 12px; } #navigation li li a { padding: 0px 3px 0px -1em; } #content { margin-left: 12em; padding: 1em; border-left: 2px solid #cccccc; border-right: 2px solid #cccccc; background-color: #ffffff; } #about { clear: both; margin: 0; padding: 5px; border-top: 2px solid #cccccc; background-color: #ffffff; } @media print { body { font: 10pt "Times New Roman", "TimeNR", Times, serif; } a { font-weight:bold; color: #004080; text-decoration: underline; } #main { background-color: #ffffff; border-left: 0px; } #container { margin-left: 2%; margin-right: 2%; background-color: #ffffff; } #content { margin-left: 0px; padding: 1em; border-left: 0px; border-right: 0px; background-color: #ffffff; } #navigation { display: none; } #product_logo { display: none; } #about img { display: none; } .example { font-family: "Andale Mono", monospace; font-size: 8pt; page-break-inside: avoid; } } luaexpat-1.5.2/docs/examples.html000066400000000000000000000120301463727376500170160ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Examples

The code excerpt below creates a parser with 2 callbacks and feeds a test string to it. The parsing of the test string triggers the callbacks, printing the results.

require"lxp"

local count = 0
callbacks = {
    StartElement = function (parser, name)
        io.write("+ ", string.rep(" ", count), name, "\n")
        count = count + 1
    end,
    EndElement = function (parser, name)
        count = count - 1
        io.write("- ", string.rep(" ", count), name, "\n")
    end
}

p = lxp.new(callbacks)

for l in io.lines() do  -- iterate lines
    p:parse(l)          -- parses the line
    p:parse("\n")       -- parses the end of line
end
p:parse()               -- finishes the document
p:close()               -- closes the parser

For a test string like

<elem1>
    text
    <elem2/>
    more text
</elem1>

The example would print

+ elem1
    + elem2
    - elem2
- elem1

Note that the text parts are not handled since the corresponding callback (CharacterData) has not been defined. Also note that defining this callback after the call to lxp.new would make no difference. But had the callback table been defined as

callbacks = {
    StartElement = function (parser, name)
        io.write("+ ", string.rep(" ", count), name, "\n")
        count = count + 1
    end,
    EndElement = function (parser, name)
        count = count - 1
        io.write("- ", string.rep(" ", count), name, "\n")
    end,
    CharacterData = function (parser, string)
        io.write("* ", string.rep(" ", count), string, "\n")
    end
}

The results would have been

+ elem1
* text
    + elem2
    - elem2
* more text
- elem1

Another example would be the use of false as a placeholder for the callback. Suppose that we would like to print only the text associated with elem2 elements and that the XML sample is

 <elem1>
    text
    <elem2>
        inside text
    </elem2>
    more text
</elem1>

We could define the new callback table as

callbacks = {
    StartElement = function (parser, name)
      if name == "elem2" then
        -- redefines CharacterData behaviour
        callbacks.CharacterData = function (parser, string)
          io.write(string, "\n")
        end
      end
    end,

    EndElement = function (parser, name)
      if name == "elem2" then
        callbacks.CharacterData = false -- restores placeholder
      end
    end,

    CharacterData = false               -- placeholder
}

The results would have been

inside text

Note that this example assumes no other elements are present inside elem2 tags.

luaexpat-1.5.2/docs/index.html000066400000000000000000000207601463727376500163200ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Overview

LuaExpat is a SAX XML parser based on the Expat library.

LuaExpat is free software and uses the same MIT license as Lua 5.1.

Status

Current version is 1.5.2. It was developed for Lua 5.1 to Lua 5.4, and has been tested on Linux and MacOS X with Expat 2.4.0+.

Download

The package can be downloaded using LuaRocks, or from the Github repository.

Older LuaExpat releases can be downloaded from LuaForge page.

Binaries for Windows are bundled with the Lua for Windows project.

History

Version 1.5.2 [27/Jun/2024]
  • Fix: threat protection works with documents that declare empty namespace URIs
  • Chore: Add version 2.6.2 of libExpat to the CI matrix
Version 1.5.1 [03/Oct/2022]
  • Fix: makefile no longer sets .lua files as executable
  • Fix: makefile honors $RPM_OPT_FLAGS and $RPM_LD_FLAGS
  • Chore: added BLA test
Version 1.5.0 [26/Aug/2022]
  • warning: this update requires a minimum libExpat version of 2.4.0. Though at the time of writing a minimum version of 2.4.6 is recommended due to CVE's fixed in the intermediate versions.
  • Added option "allowDTD" to the threat protection parser.
  • Add configuration for Billion Laughs Attack prevention. This includes adding #include "expat_config.h", since these functions are conditionally included in the exposed API of Expat. This means that LuaExpat will now be compiled using the same options used to compile Expat itself.
  • Expose Expat compile time constants (lxp._EXPAT_FEATURES), see Expat documentation.
Version 1.4.1 [01/Apr/2022]
  • Fix bad buffer size calculation in threat parser
Version 1.4.0 [22/Mar/2022]
  • Improved Lua version support (5.1 - 5.4)
  • Fix memory leak when callbacks reference a parser object
  • Expose Expat library version (lxp._EXPAT_VERSION)
  • Added 'lxp.totable' module (thanks Tomás Guisasola Gorham)
  • Fix integers being returned as floats on Lua 5.3+ (thanks Kim Alvefur)
  • Fix XmlDecl callback can also return 'nil' for 'standalone'
  • Many documentation updates
  • Added triplet namespace setting (returnnstriplet)
  • Added EntityDecl handler
  • Added AttlistDecl handler
  • Added ElementDecl handler
  • Added SkippedEntity handler
  • Added EndDoctypeDecl handler
  • More tests and new test setup; Busted, LuaCheck, Github actions
  • Improved finishing, multiple nil-calls no longer throw errors
  • Fix 'lxp.lom' to properly handle input as function, table, or file
  • Added option for namespace aware parsing to the 'lxp.lom' module
  • Repository moved to lunarmodules
  • Added threat protection parser (protects against excessively large inputs), with options for the 'lxp.lom' and 'lxp.totable' to use it.
Version 1.3.2 and Version 1.3.3
  • These versions were from a different fork by Tomás Guisasola Gorham
  • Both these versions are available from LuaRocks
  • The work done there has been integrated in the 1.4.0 version
Version 1.3.0 [02/Apr/2014]
  • Lua 5.2 support (thanks Tomás Guisasola Gorham)
  • support for the XmlDecl handler
  • add parser:getcurrentbytecount() (XML_GetCurrentByteCount)
  • ability to disable CharacterData merging
  • Makefile improvements (thanks Vadim Misbakh-Soloviov)
Version 1.2.0 [02/Jun/2011]
New maintainer: Matthew Wild
  • support for the StartDoctypeDecl handler
  • add parser:stop() to abort parsing inside a callback
Version 1.1.0 [05/Jun/2007]
  • adapted to work on both Lua 5.0 and Lua 5.1
  • updated to use Expat 2.0.0
Version 1.0.2 [23/Mar/2006]
  • minor bugfix, lom correct module name is lxp.lom
Version 1.0.1 [06/Jun/2005]
  • minor changes for compatibility with Expat version 1.95.8
Version 1.0 [2/Dec/2004]
Version 1.0 Beta [4/Apr/2004]
Version 1.0 Alpha [10/Dec/2003]

References

LuaExpat uses the Expat library. For details on the C API please refer to the article "Using Expat".

LuaExpat implements the SAX API.

Credits

Version 1.0 was designed by Roberto Ierusalimschy, André Carregal and Tomás Guisasola as part of the Kepler Project which holds its copyright. The implementation was coded by Roberto Ierusalimschy, based on a previous design by Jay Carlson.

LuaExpat development was sponsored by Fábrica Digital and FINEP.

luaexpat-1.5.2/docs/license.html000066400000000000000000000100751463727376500166310ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

License

LuaExpat is free software: it can be used for both academic and commercial purposes at absolutely no cost. There are no royalties or GNU-like "copyleft" restrictions. LuaExpat qualifies as Open Source software.

The spirit of the license is that you are free to use LuaExpat for any purpose at no cost without having to ask us. The only requirement is that if you do use LuaExpat, then you should give us credit by including the appropriate copyright notice somewhere in your product or its documentation.

The original LuaExpat library is designed and implemented by Roberto Ierusalimschy. The implementation is not derived from licensed software.


MIT license

Copyright © 2003-2007 The Kepler Project, 2013-2024 Matthew Wild.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

luaexpat-1.5.2/docs/lom.html000066400000000000000000000151551463727376500160020ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Introduction

Lua Object Model (LOM) is a representation of XML elements through Lua data types. Currently it is not supposed to be 100% complete, but simple. LuaExpat provides an implementation of LOM that gets an XML document and transforms it to a Lua table.

Characteristics

The model represents each XML element as a Lua table. A LOM table has three special characteristics:

  • a special field called tag that holds the element's name;
  • an optional field called attr that stores the element's attributes; and
  • the element's children are stored at the array-part of the table. A child could be an ordinary string or another XML element that will be represented by a Lua table following these same rules.

The special field attr is a Lua table that stores the XML element's attributes as pairs <key>=<value>. To assure an order (if necessary), the sequence of keys could be placed at the array-part of this same table.

Functions

lom.parse(string|function|table|file[, opts])
Parses the input into the LOM table format and returns it. The input can be;
  • string: the entire XML document as a string
  • function: an iterator that returns the next chunk of the XML document on each call, and returns nil when finished
  • table: an array like table that contains the chunks that combined make up the XML document
  • file: an open file handle from which the XML document will be read line-by-line, using read(). Note: the file will not be closed when done.
The second parameter opts is an options table that supports the following options;
  • separator (string): the namespace separator character to use, setting this will enable namespace aware parsing.
  • threat (table): a threat protection options table. If provided the threat protection parser will be used instead of the regular lxp parser.
Upon parsing errors it will return nil, err, line, col, pos.
lom.find_elem(node, tag)
Traverses the tree recursively, and returns the first element that matches the tag. Parameter tag (string) is the tag name to look for. The node table can be the result from the parse function, or any of its children.
lom.list_children(node[, tag])
Iterator returning all child tags of a node (non-recursive). It will only children that are tags, and will skip text-nodes. The node table can be the result from the parse function, or any of its children. If the optional parameter tag (string) is given, then the iterator will only return tags that match the tag name.

Examples

For a simple string like

    s = [[<abc a1="A1" a2="A2">inside tag `abc'</abc>]]

A call like

    tab = lxp.lom.parse (s))

Would result in a table equivalent to

tab = {
        ["attr"] = {
                [1] = "a1",
                [2] = "a2",
                ["a2"] = "A2",
                ["a1"] = "A1",
        },
        [1] = "inside tag `abc'",
        ["tag"] = "abc",
}

Now an example with an element nested inside another element

tab = lxp.lom.parse(
[[<qwerty q1="q1" q2="q2">
    <asdf>some text</asdf>
</qwerty>]]
)

The result would have been a table equivalent to

tab = {
        [1] = "\
        ",
        [2] = {
                ["attr"] = {
                },
                [1] = "some text",
                ["tag"] = "asdf",
        },
        ["attr"] = {
                [1] = "q1",
                [2] = "q2",
                ["q2"] = "q2",
                ["q1"] = "q1",
        },
        [3] = "\
",
        ["tag"] = "qwerty",
}

Note that even the new-line and tab characters are stored on the table.

luaexpat-1.5.2/docs/luaexpat.png000066400000000000000000000173571463727376500166640ustar00rootroot00000000000000PNG  IHDRL\nIDATxyXSWMHš-v ,"`Ej(X[|mgu(Vi (eDD&aߗ@ȞPHro}=s=!~C`u~SMz7U`ppPߩZZZΟ?_Ѕ{]@ $ ϗJNVs'kmm%H>>>8533c08:4jΜVAeee'fffnnn6mDW\ٻwN;abb 777SSSݻd겳% @7o䵖>0AX,q fӦMw B$d2Ͷ%xOD+++bg(Rlx?2"` cQп*+++**|իB͛7駽{oذahh`|<blmmq8 #2'))IiYY/33!YD7}bc Pup RԔ> +VxEnnߢE/^hkkAбcbcc,X0~` W0ccc4 ǫlTv\_iqتxOrfK722R bggi%QQQAAA111#@$|>_٧AEC&;::_b=<<֮],a'lɺ 1$Y{:gdSrMMM'OljjdXWd2,)__=z+++. A+zLV^^ՕADzɭ[JբE{o?8..m H_Nzzu\q03RRÇKJJ.] ߄F&966FPGϯp\CCCCBBrrr:;;B!Bٽ{ӧOq8AD"Q?oa0*:ޘ]{qHvlݚYmZn*Azzzҥ jjju떥 F( ;A999&&&၁X,ٳgw511x Y`V;i Ywu~V&\P$iGaOOϵk\\\~my>D"H +***//_bw^^^\\lA777Ayi;z6`tTx?H%ǧfwx<իW&˔ŋ۷ӳR"hKKKH?`V\ rrVXR LPuّ;wLn FFF(::z֭d2׷I$iѣ>_}<\&34r/\pggd**0::*\ŔwА=_WQUV*JMLLݻw׭[7g2n >D222:x\\zzzD5#݌`Jʺ7҇ oe'x|D `h54YZ[Y;;i4 uPD ҁneIIڵkL|||ZZO?k.p6oᡮP^w2]O QHQ. s w" 5ȝE06V0'qG\aLLrS[;Bѣ8rvv(.T"[W+,$SǏ>|rJy%AEBFFqqKK{+VXTByy[--O@TS3qWcAkk""|h42 &K ֯_oooqttT &&W2X0*~wHF13Kvl銺hLLLܹsd8p@@ TT1sE7}"Si 23?`BBWY?RK4l!X,vƍ CJ;d_Z;Gbx :ӫOLq#IEgf2###hj!A@7omTTȑǩezؘn_ ex<b ;&rH&hh"y󦭭U(UD"HTMK\KKKo+ԥD mV-\&lmmϟ_PP !*U@yyyjjjiiUڶ6ݞoq`< ^Ao[dɯ!ر#GMzĐJ@?^\\< (SA))wg@ sܶ_q8SqS lΝR~~~#Fb=y򤮮-^xɒ%O"Yӌe$]}{ju+X,600@lQ Fuu5  v획MBBccBo{{Q֙ٙҲeE99oqmMkk&sfJy||~'N * =Jիݴ>VSWRYXWIU{011QVVswO|B{ΝKxD)7 }$j&&&&QQQʤc J''F%Hyk׮ul~BqjuvP6qB rȨ[Y+ N`;lٰf2tDUT]]J: qQA&BVP(hvr Y %uu,eO{{_#<==GGGw xq4W$d CYs ,ݡCY) +J"R$ MMnpZuw|60Q@xK5=g*欱ՠ:̫WǏŧDA4 P^ckuK)A塱2 ...hJ@_$'nk#G?xjS NuV33z@wp8ի/% (-9u/U;#əAnMMmlWgᐐs "Kǂ;;Q": VVVݻZT$I$} :*gO @TX'OV###FͨZKKcG̎Hde?X Y[`0 Abqʕ49 ̢P\\fkk:XG0ė.\TH05bm7B*J1 2=jƫWW#>9::*4QKRT!ON?yU)@&IR4t7hST)@,CFB\j1kh}U 7nDc_7Giffx [4r׮]Xjq8/pՕ|S,uz/lޙ3mXjfF`@ ֲm>VZG}>;nxlssH  X kh`+T@JJc~eLdѲeN˖9ɷ^L}'Y}3A88e2xb5btƥK alD Nw#GV患RyLo4~}侩 ѷj+B$''ϟvp9v8ڜ=|3GUUfY OU[o}M i۶-D?lGs|K>OPp]]]ϟ?54DQN+H$ѣ+_/"(җd@ˑիWx|bb"$Bt\՞=w7`N@*^2 J%K78 pdÜ{;ogg-|OV>{NElp!lQd$-6mZO.Œʕ+^^^VIif6Jpi4.D1_}}}~~虩Fx:A,r!/=EFFd a27oޤR۶mCm HBAW^ۿj6!zіf455={7ވy /]ӳlٲf}wj,UA2D"1!!a>1 B*0K?#Yxg~~}f?|pxxx:llFQ )..Rγ0=WUUU[Ք'Zb\:;;ۭ-ZDӕJ<DcB!jg2sbb*++>#]4===MOgN(F*NC뫫stttpprNi2o߮p>>>M9)477?|bIR=yH$b fze-JGGGGGG\0L2i GD". y;wp8,o>pN> LbC:{֘%@5cccB>{ҥ H8H444{ U5j7o 3/BUx LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Introduction

LuaExpat is a SAX XML parser based on the Expat library. SAX is the Simple API for XML and allows programs to:

  • process a XML document incrementally, thus being able to handle huge documents without memory penalties;
  • register handler functions which are called by the parser during the processing of the document, handling the document elements or text.

With an event-based API like SAX the XML document can be fed to the parser in chunks, and the parsing begins as soon as the parser receives the first document chunk. LuaExpat reports parsing events (such as the start and end of elements) directly to the application through callbacks. The parsing of huge documents can benefit from this piecemeal operation.

LuaExpat is distributed as a library and a file lom.lua that implements the Lua Object Model.

Building

LuaExpat could be built for Lua 5.1 to 5.4. The language library and headers files for the desired version must be installed properly. LuaExpat also depends on Expat 2.0.0+ which should also be installed.

The simplest way of building and installing LuaExpat is through LuaRocks.

LuaExpat also offers a Makefile. The file has some definitions like paths to the external libraries, compiler options and the like. One important definition is the version of Lua language, which is not obtained from the installed software.

Installation

installation can be done using LuaRocks or make install.

Manually, the compiled binary file should be copied to a directory in your C path. The Lua files ./src/lxp/*.lua should be copied to a directory in your Lua path.

Parser objects

Usually SAX implementations base all operations on the concept of a parser that allows the registration of callback functions. LuaExpat offers the same functionality but uses a different registration method, based on a table of callbacks. This table contains references to the callback functions which are responsible for the handling of the document parts. The parser will assume no behaviour for any undeclared callbacks.

Finishing parsing

Since the parser is a streaming parser, handling chunks of input at a time, the following input will parse just fine (despite the unbalanced tags);

    <one><two>some text</two>

Only when making the final call (with no data) to the parse method, the document will be closed and an error will be returned;

    assert(lxp.parse())

Closing the document is important to ensure the document being complete and valid.

Constructor

lxp.new(callbacks [, separator[, merge_character_data]])
The parser is created by a call to the function lxp.new, which returns the created parser or raises a Lua error. It receives the callbacks table and optionally the parser separator character used in the namespace expanded element names. If merge_character_data is false then LuaExpat will not combine multiple CharacterData calls into one. For more info on this behaviour see CharacterData below.

Methods

parser:close()
Closes the parser, freeing all memory used by it. A call to parser:close() without a previous call to parser:parse() could result in an error. Returns the parser object on success.
parser:getbase()
Returns the base for resolving relative URIs.
parser:getcallbacks()
Returns the callbacks table.
parser:parse(s)
Parse some more of the document. The string s contains part (or perhaps all) of the document. When called without arguments the document is closed (but the parser still has to be closed).
The function returns the parser object when the parser has been successful. If the parser finds an error it returns five results: nil, msg, line, col, and pos, which are the error message, the line number, column number and absolute position of the error in the XML document.
local cb = {}    -- table with callbacks
local doc = "<root>xml doc</root>"
lxp.new(cb):setencoding("UTF-8"):parse(doc):parse():close()
parser:pos()
Returns three results: the current parsing line, column, and absolute position.
parser:getcurrentbytecount()
Return the number of bytes of input corresponding to the current event. This function can only be called inside a handler, in other contexts it will return 0. Do not use inside a CharacterData handler unless CharacterData merging has been disabled (see lxp.new).
parser:returnnstriplet(bool)
Instructs the parser to return namespaces in triplet (true), or only duo (false). Setting this must be done before calling parse, and will only have effect if the parser was created with a separator. Returns the parser object.
parser:setbase(base)
Sets the base to be used for resolving relative URIs in system identifiers. Returns the parser object on success.
parser:setblamaxamplification(max_amp)
Sets the maximum amplification (float) to be allowed. This protects against the Billion Laughs Attack. The libexpat default is 100. Returns the parser object on success.
parser:setblathreshold(threshold)
Sets the threshold (int, in bytes) after which the protection starts. This protects against the Billion Laughs Attack. The libexpat default is 8 MiB. Returns the parser object on success.
parser:setencoding(encoding)
Set the encoding to be used by the parser. There are four built-in encodings, passed as strings: "US-ASCII", "UTF-8", "UTF-16", and "ISO-8859-1". Returns the parser object on success.
parser:stop()
Abort the parser and prevent it from parsing any further through the data it was last passed. Use to halt parsing the document when an error is discovered inside a callback, for example. The parser object cannot accept more data after this call.

Callbacks

The Lua callbacks define the handlers of the parser events. The use of a table in the parser constructor has some advantages over the registration of callbacks, since there is no need for for the API to provide a way to manipulate callbacks.

Another difference lies in the behaviour of the callbacks during the parsing itself. The callback table contains references to the functions that can be redefined at will. The only restriction is that only the callbacks present in the table at creation time will be called.

The callbacks table indices are named after the equivalent Expat callbacks:
AttlistDecl, CharacterData, Comment, Default, DefaultExpand, ElementDecl, EndCdataSection, EndElement, EndNamespaceDecl, EntityDecl, ExternalEntityRef, NotStandalone, NotationDecl, ProcessingInstruction, SkippedEntity, StartCdataSection, StartElement, StartNamespaceDecl, UnparsedEntityDecl, XmlDecl and StartDoctypeDecl.

These indices can be references to functions with specific signatures, as seen below. The parser constructor also checks the presence of a field called _nonstrict in the callbacks table. If _nonstrict is absent, only valid callback names are accepted as indices in the table (Defaultexpanded would be considered an error for example). If _nonstrict is defined, any other fieldnames can be used (even if not called at all).

The callbacks can optionally be defined as false, acting thus as placeholders for future assignment of functions.

Every callback function receives as the first parameter the calling parser itself, thus allowing the same functions to be used for more than one parser for example.

callbacks.AttlistDecl = function(parser, elementName, attrName, attrType, default, required)
The Attlist declaration handler is called for each attribute. So a single Attlist declaration with multiple attributes declared will generate multiple calls to this handler. The default parameter may be nil in the case of the "#IMPLIED" or "#REQUIRED" keyword. The required parameter will be true and the default value will be nil in the case of "#REQUIRED". If required is true and default is non-nil, then this is a "#FIXED" default.
callbacks.CharacterData = function(parser, string)
Called when the parser recognizes an XML CDATA string. Note that LuaExpat automatically combines multiple CharacterData events from Expat into a single call to this handler, unless merge_character_data is set to false when calling lxp.new().
callbacks.Comment = function(parser, string)
Called when the parser recognizes an XML comment string.
callbacks.Default = function(parser, string)
Called when the parser has a string corresponding to any characters in the document which wouldn't otherwise be handled. Using this handler has the side effect of turning off expansion of references to internally defined general entities. Instead these references are passed to the default handler.
callbacks.DefaultExpand = function(parser, string)
Called when the parser has a string corresponding to any characters in the document which wouldn't otherwise be handled. Using this handler doesn't affect expansion of internal entity references.
callbacks.ElementDecl = function(parser, name, type, quantifier, children)
Called when the parser detects an Element declaration in the DTD. The type parameter will be any of; "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", or "SEQUENCE". The quantifier parameter will be any of "?", "*", "+", or nil. The array children can be nil if there are no children. The child elements in the array will be objects with the following fields; name, type, quantifier, and children. Those fields will have the same values as the similarly named parameters for this call.
callbacks.EndCdataSection = function(parser)
Called when the parser detects the end of a CDATA section.
callbacks.EndDoctypeDecl = function(parser)
Called when the parser detects the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset.
callbacks.EndElement = function(parser, elementName)
Called when the parser detects the ending of an XML element with elementName.
callbacks.EndNamespaceDecl = function(parser, namespaceName)
Called when the parser detects the ending of an XML namespace with namespaceName. The handling of the end namespace is done after the handling of the end tag for the element the namespace is associated with.
callbacks.EntityDecl = function(parser, entityName, is_parameter, value, base, systemId, publicId, notationName)
This is called for entity declarations. The is_parameter argument will be true if the entity is a parameter entity, false otherwise. For internal entities (<!ENTITY foo "bar">), value will be a string and systemId, publicID, and notationName will be nil. The value string can be nil, as well as an empty string, which is a valid value. For external entities, value will be nil and systemId will be a string. The publicId argument will be nil unless a public identifier was provided. The notationName argument will have a string value only for unparsed entity declarations.
callbacks.ExternalEntityRef = function(parser, subparser, base, systemId, publicId)
Called when the parser detects an external entity reference.

The subparser is a LuaExpat parser created with the same callbacks and Expat context as the parser and should be used to parse the external entity.
The base parameter is the base to use for relative system identifiers. It is set by parser:setbase and may be nil.
The systemId parameter is the system identifier specified in the entity declaration and is never nil.
The publicId parameter is the public id given in the entity declaration and may be nil.
callbacks.NotStandalone = function(parser)
This callback expects a return value, if the callback returns a falsy value, parsing will be aborted. The callback will be called when the parser detects that the document is not "standalone". This happens when there is an external subset or a reference to a parameter entity, but the document does not have standalone set to "yes" in an XML declaration.
callbacks.NotationDecl = function(parser, notationName, base, systemId, publicId)
Called when the parser detects XML notation declarations with notationName
The base parameter is the base to use for relative system identifiers. It is set by parser:setbase and may be nil.
The systemId parameter is the system identifier specified in the entity declaration and is never nil.
The publicId parameter is the public id given in the entity declaration and may be nil.
callbacks.ProcessingInstruction = function(parser, target, data)
Called when the parser detects XML processing instructions. The target is the first word in the processing instruction. The data is the rest of the characters in it after skipping all whitespace after the initial word.
callbacks.SkippedEntity = function(parser, name, isParameter)
This is called in two situations. One; An entity reference is encountered for which no declaration has been read *and* this is not an error. Two; An internal entity reference is read, but not expanded, because the Default handler has been called. Note: skipped parameter entities in declarations and skipped general entities in attribute values cannot be reported, because the event would be out of sync with the reporting of the declarations or attribute values.
callbacks.StartCdataSection = function(parser)
Called when the parser detects the beginning of an XML CDATA section.
callbacks.StartDoctypeDecl = function(parser, name, sysid, pubid, has_internal_subset)
Called when the parser detects the beginning of an XML DTD (DOCTYPE) section. These precede the XML root element and take the form:
<!DOCTYPE root_elem PUBLIC "example">
callbacks.StartElement = function(parser, elementName, attributes)
Called when the parser detects the beginning of an XML element with elementName.
The attributes parameter is a Lua table with all the element attribute names and values. The table contains an entry for every attribute in the element start tag and entries for the default attributes for that element.
The attributes are listed by name (including the inherited ones) and by position (inherited attributes are not considered in the position list).
As an example if the book element has attributes author, title and an optional format attribute (with "printed" as default value),
<book author="Ierusalimschy, Roberto" title="Programming in Lua">
would be represented as
{[1] = "Ierusalimschy, Roberto",
 [2] = "Programming in Lua",
 author = "Ierusalimschy, Roberto",
 format = "printed",
 title = "Programming in Lua"}
callbacks.StartNamespaceDecl = function(parser, namespaceName, namespaceUri)
Called when the parser detects an XML namespace declaration. The namespaceName can be nil. Namespace declarations occur inside start tags, but the StartNamespaceDecl handler is called before the StartElement handler for each namespace declared in that start tag.
callbacks.UnparsedEntityDecl = function(parser, entityName, base, systemId, publicId, notationName)
Obsolete: use EntityDecl instead. Called when the parser receives declarations of unparsed entities. These are entity declarations that have a notation (NDATA) field.
As an example, in the chunk
<!ENTITY logo SYSTEM "images/logo.gif" NDATA gif>
entityName would be "logo", systemId would be "images/logo.gif" and notationName would be "gif". For this example the publicId parameter would be nil. The base parameter would be whatever has been set with parser:setbase. If not set, it would be nil.
callbacks.XmlDecl = function(parser, version, encoding, standalone)
Called when the parser encounters an XML document declaration (these are optional, and valid only at the start of the document). The callback receives the declared XML version and document encoding as strings, and standalone as a boolean (or nil if it was not specified).

The separator character

The optional separator character in the parser constructor defines the character used in the namespace expanded element names. The separator character is optional (if not defined the parser will not handle namespaces) but if defined it must be different from the character '\0'.

luaexpat-1.5.2/docs/threat.html000066400000000000000000000163541463727376500165040ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Introduction

Threat protection enables validation of structure and size of a document while parsing it.

The threat parser is identical to the regular parser.

  • Has the same methods
  • Uses the same signature for creating it through new
  • The callbacks table should get another entry threat containing the configuration of the limits
  • Any callback not defined by the user will be added using a no-op function in the callbacks table (exceptions are Default and DefaultExpand)
  • The separator parameter for the constructor is required when any of the following checks have been added (since they require namespace aware parsing);
    • maxNamespaces
    • prefix
    • namespaceUri

Limitations

Due to the way the parser works, the elements of a document must first be parsed before a callback is issued that verifies its maximum size. For example even if the maximum size for an attribute is set to 50 bytes, a 2mb attribute will first be entirely parsed before the parser bails out with a size error. To protect against this make sure to set the maximum buffer size (option buffer).

Options

Structural checks:

  • depth max depth of tags, child elements like Text or Comments are not counted as another level. Default 50.
  • allowDTD boolean indicating whether DTDs are allowed. Default true.
  • maxChildren max number of children (Element, Text, Comment, ProcessingInstruction, CDATASection).
    NOTE: adjacent text/CDATA sections are counted as 1 (so text-cdata-text-cdata is 1 child). Default 100.
  • maxAttributes max number of attributes (including default ones).
    NOTE: if not parsing namespaces, then the namespaces will be counted as attributes. Default 100.
  • maxNamespaces max number of namespaces defined on a tag. Default 20.

Size limits (per element, in bytes)

  • document size of entire document. Default 10 mb.
  • buffer size of the unparsed buffer (see below). Default 1 mb.
  • comment size of comment. Default 1 kb.
  • localName size of localname applies to tags and attributes.
    NOTE: If not parsing namespaces, this limit will count against the full name (prefix + localName). Default 1 kb.
  • prefix size of prefix, applies to tags and attributes. Default 1 kb.
  • namespaceUri size of namespace uri. Default 1 kb.
  • attribute size of attribute value. Default 1 mb.
  • text text inside tags (counted over all adjacent text/CDATA combined). Default 1 mb.
  • PITarget size of processing instruction target. Default 1 kb.
  • PIData size of processing instruction data. Default 1 kb.
  • entityName size of entity name in EntityDecl in bytes. Default 1 kb.
  • entity size of entity value in EntityDecl in bytes. Default 1 kb.
  • entityProperty size of systemId, publicId, or notationName in EntityDecl in bytes. Default 1 kb.

The buffer setting is the maximum size of unparsed data. The unparsed buffer is from the last byte delivered through a callback to the end of the current data fed into the parser.

As an example assume we have set a maximum of 1 attribute, with name max 20 and value max 20. This means that the maximum allowed opening tag could look like this (take or leave some white space);

<abcde12345abcde12345 ABCDE12345ABCDE12345="12345678901234567890">

But because of the way Expat works, a user could pass in a 2mb attribute value and it would have to be parsed completely before the callback for the new element fires. In this case the maximum expected buffer would be 2x 20 (attr+tag name) + 1x 20 (attr value) + 50 (account for whitespace and other overhead characters) == 110. If this value is set and the parser is fed in chunks, it will bail out after hitting the first 110 characters of the faulty oversized tag.

Example of threat protected parsing

local threat_parser = require "lxp.threat"

local separator = "\1"
local callbacks = {
	-- add your regular callbacks here
}

local threat = {

	-- structure
	depth = 3,
	maxChildren = 3,
	maxAttributes = 3,
	maxNamespaces = 3,

	-- sizes
	document = 2000,
	buffer = 1000,
	comment = 20,
	localName = 20,
	prefix = 20,
	namespaceUri = 20,
	attribute = 20,
	text = 20,
	PITarget = 20,
	PIData = 20,
}

callbacks.threat = threat

local parser = threat_parser.new(callbacks, separator)

assert(parser.parse(xml_data))
luaexpat-1.5.2/docs/totable.html000066400000000000000000000136021463727376500166400ustar00rootroot00000000000000 LuaExpat: XML Expat parsing for the Lua programming language
LuaExpat
XML Expat parsing for the Lua programming language

Introduction

The "table" parser is another way of representing XML data in Lua tables.

Characteristics

The model represents each XML element as a Lua table. The characteristics of this format are:

  • The XML nodes are represented as a table.
  • child elements are in the array part of the table; where Text nodes are strings, and Child nodes are sub-tables
  • The Tag name is stored in the array at index 0, so outside the regular array range in Lua
  • Attributes are stored in the hash-part of the table, as key-value pairs

Functions

totable.parse(string|function|table|file[, opts])
Parses the input into the table format and returns it. The input can be;
  • string: the entire XML document as a string
  • function: an iterator that returns the next chunk of the XML document on each call, and returns nil when finished
  • table: an array like table that contains the chunks that combined make up the XML document
  • file: an open file handle from which the XML document will be read line-by-line, using read(). Note: the file will not be closed when done.
The second parameter opts is an options table that supports the following options;
  • separator (string): the namespace separator character to use, setting this will enable namespace aware parsing.
  • threat (table): a threat protection options table. If provided the threat protection parser will be used instead of the regular lxp parser.
Upon parsing errors it will return nil, err, line, col, pos.
totable.clean(t)
Traverses the tree recursively, and drops all whitespace-only Text nodes. Returns the (modified) input table.
totable.torecord()
Traverses the tree recursively, and will update every entry that is a Tag with only 1 Text node as child element, to a key-value entry. Note: Any attributes on the converted element will be lost! If the key already exists (duplicate tag names, or an attribute by that name) then it will not update the entry. Returns the (modified) input table.

Examples

For a string like

s = [[
<person id="123">
	<first>John</first>
	<last>Doe</last>
</abc>
]]

A call like

    tab = lxp.totable.parse (s)

Would result in a table equivalent to

tab = {
	[0] = "person",     -- tag names go into array index 0
	id = "123",         -- attribute names go into the hash part of the table
	[1] = "\n\t"        -- Note that the new-line and tab characters are preserved
	on the table
	[2] = {
		[0] = "first",
		[1] = "John",
	},
	[3] = "\n\t"
	[4] = {
		[0] = "last",
		[1] = "Doe",
	},
	[5] = "\n"
}

After a call to clean like this lxp.totable.clean (tab) the empty whitespace elements will be removed:

tab = {
	[0] = "person",
	id = "123",
	[1] = {
		[0] = "first",
		[1] = "John",
	},
	[3] = {
		[0] = "last",
		[1] = "Doe",
	},
}

After a call to torecord like this lxp.totable.torecord (tab) the single-textfield nodes are turned into key-value pairs:

tab = {
	[0] = "person",
	id = "123",
	first = "John",
	last = "Doe",
}
luaexpat-1.5.2/luaexpat-scm-1.rockspec000066400000000000000000000022321463727376500176610ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "scm" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = (package_version == "scm") and "master" or nil, tag = (package_version ~= "scm") and package_version or nil, } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/makefile.win000066400000000000000000000014321463727376500156620ustar00rootroot00000000000000LUA_INC=c:\lua5.1\include LUA_DIR=c:\lua5.1\lua LUA_LIBDIR=c:\lua5.1 LUA_LIB=c:\lua5.1\lua5.1.lib OBJS= src\lxplib.obj EXPAT_INCLUDE=C:\Expat-2.0.0\Source\lib EXPAT_LIB=C:\Expat-2.0.0\Libs\libexpat.lib .c.obj: cl /c /Fo$@ /O2 /I$(LUA_INC) /I$(EXPAT_INCLUDE) /D_CRT_SECURE_NO_DEPRECATE $< src\lxp.dll: $(OBJS) link /dll /def:src\lxp.def /out:$@ $(OBJS) $(EXPAT_LIB) $(LUA_LIB) install: src\lxp.dll IF NOT EXIST $(LUA_LIBDIR) mkdir $(LUA_LIBDIR) copy src\lxp.dll $(LUA_LIBDIR) IF NOT EXIST $(LUA_DIR)\lxp mkdir $(LUA_DIR)\lxp copy src\lxp\lom.lua $(LUA_DIR)\lxp copy src\lxp\totable.lua $(LUA_DIR)\lxp copy src\lxp\threat.lua $(LUA_DIR)\lxp clean: del src\lxp.dll del src\*.obj del src\lxp.exp del src\lxp.lib # $Id: makefile.win,v 1.3 2007/06/05 20:03:12 carregal Exp $ luaexpat-1.5.2/rockspecs/000077500000000000000000000000001463727376500153625ustar00rootroot00000000000000luaexpat-1.5.2/rockspecs/luaexpat-1.4.0-1.rockspec000066400000000000000000000022111463727376500215300ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "1.4.0" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name local git_checkout = package_version == "scm" and "master" or package_version package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = git_checkout } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/rockspecs/luaexpat-1.4.1-1.rockspec000066400000000000000000000022111463727376500215310ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "1.4.1" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name local git_checkout = package_version == "scm" and "master" or package_version package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = git_checkout } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/rockspecs/luaexpat-1.5.0-1.rockspec000066400000000000000000000022341463727376500215360ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "1.5.0" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = (package_version == "scm") and "master" or nil, tag = (package_version ~= "scm") and package_version or nil, } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/rockspecs/luaexpat-1.5.1-1.rockspec000066400000000000000000000022341463727376500215370ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "1.5.1" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = (package_version == "scm") and "master" or nil, tag = (package_version ~= "scm") and package_version or nil, } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/rockspecs/luaexpat-1.5.2-1.rockspec000066400000000000000000000022341463727376500215400ustar00rootroot00000000000000local package_name = "luaexpat" local package_version = "1.5.2" local rockspec_revision = "1" local github_account_name = "lunarmodules" local github_repo_name = package_name package = package_name version = package_version .. "-" .. rockspec_revision source = { url = "git+https://github.com/"..github_account_name.."/"..github_repo_name..".git", branch = (package_version == "scm") and "master" or nil, tag = (package_version ~= "scm") and package_version or nil, } description = { summary = "XML Expat parsing", detailed = [[ LuaExpat is a SAX (Simple API for XML) XML parser based on the Expat library. ]], license = "MIT/X11", homepage = "https://"..github_account_name..".github.io/"..github_repo_name, } dependencies = { "lua >= 5.1" } external_dependencies = { EXPAT = { header = "expat.h" } } build = { type = "builtin", modules = { lxp = { sources = { "src/lxplib.c" }, libraries = { "expat" }, incdirs = { "$(EXPAT_INCDIR)", "src/" }, libdirs = { "$(EXPAT_LIBDIR)" }, }, ["lxp.lom"] = "src/lxp/lom.lua", ["lxp.totable"] = "src/lxp/totable.lua", ["lxp.threat"] = "src/lxp/threat.lua", }, copy_directories = { "docs" } } luaexpat-1.5.2/spec/000077500000000000000000000000001463727376500143205ustar00rootroot00000000000000luaexpat-1.5.2/spec/01-lxp_spec.lua000066400000000000000000000711641463727376500170670ustar00rootroot00000000000000local d = require("pl.stringx").dedent or require("pl.text").dedent local preamble = [[ ]> ]] describe("lxp:", function() local lxp -- create a test parser. -- table 'cbs' can contain array elements, where the values are callback -- names. The results of those callbacks will be stored in 'cbdata'. -- The hash part, can have callback names as keys, and callback functions -- as values. -- Returns the new parser object. local cbdata, _p local function test_parser(cbs, separator) assert(type(cbs) == "table", "expected arg #1 to be a table") local t = {} for k,v in pairs(cbs) do if type(k) == "number" then assert(type(v) == "string", "array entries must have string values") k = v v = function(p, ...) --assert(p == _p, "parser mismatch (self)") cbdata[#cbdata+1] = { k, ... } end elseif type(k) == "string" then assert(type(v) == "function", "string keys must have function values") else error("bad entry, expected string or numeric keys, got "..tostring(k)) end assert(t[k] == nil, "key '"..k.."' was provided more than once") t[k] = v end cbdata = {} _p = lxp.new(t, separator) return _p end before_each(function() lxp = require "lxp" end) describe("basics", function() it("exports constants", function() assert.is.string(lxp._VERSION) assert.matches("^LuaExpat %d%.%d%.%d$", lxp._VERSION) assert.is.string(lxp._DESCRIPTION) assert.is.string(lxp._COPYRIGHT) assert.is.string(lxp._EXPAT_VERSION) assert.is.table(lxp._EXPAT_FEATURES) assert.is.number(lxp._EXPAT_FEATURES.XML_CONTEXT_BYTES) end) it("exports 'new' constructor", function() assert.is_function(lxp.new) end) it("new() creates a working parser", function() local p = lxp.new{} p:setencoding("ISO-8859-1") assert(p:parse[[hi]]) p:close() end) it("setbase, setblamaxamplification, setblathreshold, setencoding, close, and parse return parser upon success", function() assert.has.no.error(function() lxp.new({}):setbase("/base"): setblamaxamplification(55.55): setblathreshold(1024*1024): setencoding("ISO-8859-1"): parse(""): parse(): close(): close() end) end) -- test based on https://github.com/tomasguisasola/luaexpat/issues/2 it("reloads module if dropped", function() package.loaded.lxp = nil local first_lxp = require "lxp" assert.is_table(first_lxp) assert.is_function(first_lxp.new) package.loaded.lxp = nil local second_lxp = require "lxp" assert.is_table(second_lxp) assert.is_function(second_lxp.new) assert.not_equal(first_lxp, second_lxp) end) end) describe("_nonstrict", function() it("doesn't allow unknown entries if not set", function() assert.matches.error(function() lxp.new{ something = "something" } end, "invalid option 'something'") end) it("allows unknown entries if set", function() assert.no.error(function() lxp.new{ _nonstrict = true, something = "something", } end) end) end) describe("getcallbacks()", function() it("returns the callbacks", function() local t = {} local p = lxp.new(t) assert.equal(t, p:getcallbacks()) end) end) it("callbacks can be updated while parsing", function() local p = test_parser { "CharacterData" } assert(p:parse(preamble)) assert(p:parse("a basic text")) assert.same({ { "CharacterData", "a basic text" }, }, cbdata) -- update callback p:getcallbacks().CharacterData = "error" assert.matches.error(function() assert(p:parse("a basic text")) end, "lxp 'CharacterData' callback is not a function") end) describe("parsing", function() it("allows multiple finishing calls", function() local p = test_parser { "CharacterData" } assert(p:parse(preamble)) assert(p:parse("a basic text")) assert(p:parse()) assert.has.no.error(function() assert(p:parse()) end) p:close() end) it("handles XML declaration", function() local p = test_parser { "XmlDecl" } assert(p:parse('')) assert.same({ { "XmlDecl", "1.0", "ISO-8859-1", true }, }, cbdata) local p = test_parser { "XmlDecl" } assert(p:parse('')) assert.same({ { "XmlDecl", "1.0", "ISO-8859-1", false }, }, cbdata) local p = test_parser { "XmlDecl" } assert(p:parse('')) assert.same({ { "XmlDecl", "1.0", "ISO-8859-1", nil }, }, cbdata) end) it("handles start/end tags", function() local p = test_parser { "StartElement", "EndElement" } assert(p:parse(preamble)) assert(p:parse(d[[ ]])) assert.same({ { "StartElement", "to", { "priority", "xu", priority = "10", xu = "hi", method = "POST" }}, }, cbdata) assert(p:parse("")) assert(p:parse()) p:close() assert.same({ { "StartElement", "to", { "priority", "xu", priority = "10", xu = "hi", method = "POST" }}, { "EndElement", "to" }, }, cbdata) end) it("handles CharacterData/CDATA", function() local p = test_parser { "CharacterData" } assert(p:parse(preamble)) assert(p:parse(d[=[ a basic text<>]]> ]=])) assert.same({ { "CharacterData", "a basic text<<>" }, }, cbdata) assert(p:parse()) p:close() end) it("handles CDATA sections", function() local p = test_parser { "CharacterData", "StartCdataSection", "EndCdataSection" } assert(p:parse(preamble)) assert(p:parse"") assert(p:parse"") assert(p:parse"") p:close() assert.same({ { "StartCdataSection" }, { "CharacterData", "hi" }, { "EndCdataSection" }, }, cbdata) end) it("handles Processing Instructions", function() local p = test_parser { "ProcessingInstruction" } assert(p:parse(preamble)) assert(p:parse(d[[ ? ?> ]])) p:close() assert.same({ { "ProcessingInstruction", "lua", "how is this passed to ? " }, }, cbdata) end) it("handles Comments", function() local p = test_parser { "Comment", "CharacterData" } assert(p:parse(preamble)) assert(p:parse(d[[ some text some more text]] )) p:close() assert.same({ { "CharacterData", "some text\n" }, { "Comment", " with some & symbols " }, { "CharacterData", "\nsome more text" }, }, cbdata) end) it("Default handler", function() local root = [[ hi &xuxu; ]] local r = "" local p = test_parser { Default = function(p, data) r = r .. data end, } assert(p:parse(preamble)) assert(p:parse(root)) p:close() assert.equal(preamble..root, r) end) it("DefaultExpand handler", function() local root = [[ hi &xuxu; ]] local r = "" local p = test_parser { DefaultExpand = function(p, data) r = r .. data end, } assert(p:parse(preamble)) assert(p:parse(root)) p:close() assert.equal((preamble..root):gsub("&xuxu;", "is this a xuxu?"), r) end) it("handles notation declarations and unparsed entities", function() local p = test_parser { "UnparsedEntityDecl", "NotationDecl" } p:setbase("/base") assert(p:parse(preamble)) assert(p:parse[[]]) p:close() assert.same({ { "NotationDecl", "TXT", "/base", "txt" }, { "UnparsedEntityDecl", "test-unparsed", "/base", "unparsed.txt", nil, "txt" }, }, cbdata) end) it("handles entity declarations", function() local p = test_parser { "EntityDecl" } p:setbase("/base") assert(p:parse(preamble)) assert(p:parse[[]]) p:close() assert.same({ { "EntityDecl", "xuxu", false, "is this a xuxu?", "/base" }, { "EntityDecl", "test-entity", false, nil, "/base", "entity1.xml" }, { "EntityDecl", "test-unparsed", false, nil, "/base", "unparsed.txt", nil, "txt" }, { "EntityDecl", "myParameterEntity", true, "myElement | myElement2 | myElement3", "/base" }, { "EntityDecl", "emptyValue", true, "", "/base" }, }, cbdata) end) it("handles attribute list declarations", function() local p = test_parser { "AttlistDecl" } p:setbase("/base") assert(p:parse(preamble)) assert(p:parse[[]]) p:close() assert.same({ { "AttlistDecl", "to", "method", "CDATA", "POST", true }, { "AttlistDecl", "hihi", "explanation", "ENTITY", nil, true }, }, cbdata) end) it("handles attribute list declarations; multiple attributes", function() local p = test_parser { "AttlistDecl" } p:setbase("/base") assert(p:parse(d[[ ]> ]])) p:close() assert.same({ { "AttlistDecl", "student_name", "student_no", "ID", nil, true }, { "AttlistDecl", "student_name", "tutor_1", "IDREF", nil, false }, { "AttlistDecl", "student_name", "tutor_2", "IDREF", nil, false }, }, cbdata) end) it("handles attribute list declarations with namespaces", function() local p = test_parser({ "AttlistDecl", "StartNamespaceDecl", "EndNamespaceDecl", "StartElement", "EndElement" }, "?") p:setbase("/base") assert(p:parse(d[[ ]> valid doc ]])) -- example from: https://www.informit.com/articles/article.aspx?p=31837&seqNum=6 p:close() assert.same({ { 'AttlistDecl', 'kbs:myRoot', 'xmlns:kbs', 'CDATA', 'http://www.example.com/', true }, { 'StartNamespaceDecl', 'kbs', 'http://www.example.com/' }, { 'StartElement', 'http://www.example.com/?myRoot', {} }, { 'StartElement', 'http://www.example.com/?child1', {} }, { 'EndElement', 'http://www.example.com/?child1' }, { 'StartElement', 'http://www.example.com/?child2', {} }, { 'EndElement', 'http://www.example.com/?child2' }, { 'EndElement', 'http://www.example.com/?myRoot' }, { 'EndNamespaceDecl', 'kbs' }, }, cbdata) end) it("handles namespace declarations", function() local p = test_parser({ "StartNamespaceDecl", "EndNamespaceDecl", "StartElement", "EndElement" }, "?") assert(p:parse(d[[ defined namespace on x named namespace for a default namespace on y inherited namespace for b empty namespace ]])) p:close() assert.same({ { "StartElement", "root", {} }, { "StartNamespaceDecl", "space", "a/namespace" }, { "StartElement", "x", {} }, { "StartElement", "a/namespace?a", { "attr1", "a/namespace?attr2", ["attr1"] = "1", ["a/namespace?attr2"] = "2", } }, { "EndElement", "a/namespace?a" }, { "EndElement", "x" }, { "EndNamespaceDecl", "space" }, { "StartNamespaceDecl", nil, "b/namespace" }, { "StartElement", "b/namespace?y", {} }, { "StartElement", "b/namespace?b", {} }, { "EndElement", "b/namespace?b" }, { "EndElement", "b/namespace?y" }, { "EndNamespaceDecl", nil }, { "StartNamespaceDecl", nil }, -- emptynamespace results in no namespace; nil { "StartElement", "z", {} }, { "EndElement", "z" }, { "EndNamespaceDecl", nil }, { "EndElement", "root" }, }, cbdata) end) it("handles namespace triplet", function() local p = test_parser({ "StartNamespaceDecl", "EndNamespaceDecl", "StartElement", "EndElement" }, "?") assert(p:returnnstriplet(true):parse(d[[ defined namespace on x named namespace for a default namespace on y inherited namespace for b empty namespace ]])) p:close() assert.same({ { "StartElement", "root", {} }, { "StartNamespaceDecl", "space", "a/namespace" }, { "StartElement", "x", {} }, { "StartElement", "a/namespace?a?space", { "attr1", "a/namespace?attr2?space", ["attr1"] = "1", ["a/namespace?attr2?space"] = "2", } }, { "EndElement", "a/namespace?a?space" }, { "EndElement", "x" }, { "EndNamespaceDecl", "space" }, { "StartNamespaceDecl", nil, "b/namespace" }, { "StartElement", "b/namespace?y", {} }, { "StartElement", "b/namespace?b", {} }, { "EndElement", "b/namespace?b" }, { "EndElement", "b/namespace?y" }, { "EndNamespaceDecl", nil }, { "StartNamespaceDecl", nil }, -- emptynamespace results in no namespace; nil { "StartElement", "z", {} }, { "EndElement", "z" }, { "EndNamespaceDecl", nil }, { "EndElement", "root" }, }, cbdata) end) it("handles doctype declarations", function() local p = test_parser { "StartDoctypeDecl", "EndDoctypeDecl"} assert(p:parse([[]])) assert(p:parse[[]]) p:close() assert.same({ { "StartDoctypeDecl", "root", "hello-world", "foo", false }, { "EndDoctypeDecl" }, }, cbdata) end) it("skipped entity handler", function() local p = test_parser { "Default", "SkippedEntity", "CharacterData", "StartElement", "EndElement", } -- skip default handler during preamble local cb = p:getcallbacks().Default p:getcallbacks().Default = function() end assert(p:parse(preamble)) p:getcallbacks().Default = cb assert(p:parse[[body start: &xuxu; :body end]]) p:close() assert.same({ { 'StartElement', 'root', { 'attr1', attr1 = 'attr: is this a xuxu?', -- expanded } }, { 'CharacterData', 'body start: ' }, { 'SkippedEntity', 'xuxu', false }, -- reported { 'CharacterData', ' :body end' }, { 'EndElement', 'root' }, }, cbdata) end) it("handles ExternalEntity", function() local entities = { ["entity1.xml"] = "" } local p = test_parser { "StartElement", "EndElement", ExternalEntityRef = function (p, context, base, systemID, publicId) assert.equal("/base", base) return context:parse(entities[systemID]) end } p:setbase("/base") assert(p:parse(preamble)) assert(p:parse(d[[ &test-entity; ]])) assert(p:getbase() == "/base") p:close() assert.same({ { "StartElement", "to", { method = "POST" } }, { "StartElement", "hi", {} }, { "EndElement", "hi" }, { "EndElement", "to" }, }, cbdata) end) describe("Element Declarations", function() -- test data from examples on this page: -- https://xmlwriter.net/xml_guide/element_declaration.shtml local data = { { desc = "PCDATA", xml = [[ ]> ]], expected = { { "ElementDecl", "bar", "MIXED" }, }, }, { desc = "EMPTY", xml = [[ ]> ]], expected = { { "ElementDecl", "bar", "EMPTY" }, }, }, { desc = "ANY", xml = [[ ]> ]], expected = { { "ElementDecl", "bar", "ANY" }, }, }, { desc = "children", xml = [[ ]> 9216735 ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { name = "id", type = "NAME", } } }, { "ElementDecl", "id", "MIXED" }, }, }, { desc = "sequence of children", xml = [[ ]> 9216735 Smith Jo ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { name = "id", type = "NAME", }, { name = "surname", type = "NAME", }, { name = "firstname", type = "NAME", }, } }, { "ElementDecl", "id", "MIXED" }, { "ElementDecl", "firstname", "MIXED" }, { "ElementDecl", "surname", "MIXED" }, }, }, { desc = "children with qualifiers", xml = [[ ]> 19.06.74 ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { name = "dob", quantifier = "?", type = "NAME", }, { name = "subject", quantifier = "*", type = "NAME", }, { name = "dummy", quantifier = "+", type = "NAME", }, } }, { "ElementDecl", "dob", "MIXED" }, { "ElementDecl", "subject", "MIXED" }, { "ElementDecl", "dummy", "MIXED" }, }, }, { desc = "choice of children", xml = [[ ]> 9216735 ]], expected = { { "ElementDecl", "student", "CHOICE", nil, { { name = "id", type = "NAME", }, { name = "surname", type = "NAME", }, } }, { "ElementDecl", "id", "MIXED" }, }, }, { desc = "nested children 1", xml = [[ ]> Smith Jo Sephine female ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { name = "surname", type = "NAME", }, { name = "firstname", quantifier = "*", type = "NAME", }, { name = "dob", quantifier = "?", type = "NAME", }, { quantifier = "?", type = "CHOICE", children = { { name = "origin", type = "NAME", }, { name = "sex", type = "NAME", }, } }, } }, { "ElementDecl", "surname", "MIXED" }, { "ElementDecl", "firstname", "MIXED" }, { "ElementDecl", "sex", "MIXED" }, }, }, { desc = "nested children 2", xml = [[ ]> Smith Josephine Jo ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { name = "surname", type = "NAME", }, { name = "firstname", type = "NAME", }, } }, { "ElementDecl", "firstname", "SEQUENCE", nil, { { name = "fullname", type = "NAME", }, { name = "nickname", type = "NAME", } } }, { "ElementDecl", "surname", "MIXED" }, { "ElementDecl", "fullname", "MIXED" }, { "ElementDecl", "nickname", "MIXED" }, }, }, { desc = "nested children 3", xml = [[ ]> ]], expected = { { "ElementDecl", "student", "CHOICE", nil, { { name = "sex", type = "NAME", }, { name = "maritalstatus", quantifier = "*", type = "NAME", }, } }, }, }, { desc = "nested children 4", xml = [[ ]> ]], expected = { { "ElementDecl", "student", "SEQUENCE", nil, { { quantifier = "*", type = "SEQUENCE", children = { { name = "sex", type = "NAME", }, { name = "maritalstatus", type = "NAME", }, }, }, } }, }, }, { desc = "mixed content 1", xml = [[ ]> Here's a bit of text mixed up with the child element. 9216735 You can put text anywhere, before or after the child element. You don't even have to include the 'id' element. ]], expected = { { "ElementDecl", "student", "MIXED", "*", { { name = "id", type = "NAME", }, } }, { "ElementDecl", "id", "MIXED" }, }, }, { desc = "mixed content 2", xml = [[ ]> ]], expected = { { "ElementDecl", "student", "MIXED" }, }, }, { desc = "mixed content 3", xml = [[ ]> You can put text anywhere. You can also put the elements in any order in the document. Smith And, you don't have to include all the elements listed in the element declaration. 9216735 ]], expected = { { "ElementDecl", "student", "MIXED", "*", { { name = "id", type = "NAME", }, { name = "surname", type = "NAME", }, { name = "dob", type = "NAME", } } }, { "ElementDecl", "id", "MIXED" }, { "ElementDecl", "surname", "MIXED" }, }, } } for i, case in ipairs(data) do it(case.desc, function() assert:set_parameter("TableFormatLevel", -1) -- display full table depth local p = test_parser { "ElementDecl" } assert(p:parse(d(case.xml))) p:close() assert.same(case.expected, cbdata) end) end end) describe("error handling", function() it("bad xml", function() local p = test_parser {} local status, msg, line, col, byte = p:parse(d[[ ]] ) assert.same({ status = nil, msg = "not well-formed (invalid token)", line = 2, col = 9, byte = 15, },{ status = status, msg = msg, line = line, col = col, byte = byte, }) end) it("incomplete doc (early finish)", function() local p = test_parser {} assert(p:parse("")) local status, msg, line, col, byte = p:parse() assert.same({ status = nil, msg = "no element found", line = 1, col = 5, byte = 5, },{ status = status, msg = msg, line = line, col = col, byte = byte, }) end) it("invalid sequences; parse after finish", function() local p = test_parser {} assert(p:parse[[]]) assert(p:parse()) local r = { p:parse(" ") } assert.same({nil, "cannot parse - document is finished" }, r) end) it("closing unfinshed doc", function() local p = test_parser {} assert(p:parse[[]]) assert.has.error(function() p:close() end, "error closing parser: no element found") end) it("calling parser:stop() to abort", function() local stopped local p = test_parser { StartElement = function (parser, name, attr) if name == "stop" then parser:stop() stopped = true else stopped = false end end, } local r = { p:parse[[ Hello here ]] } assert.is_true(stopped) assert.same({ nil, "parsing aborted", 3, -- line 13, -- column 56, -- position }, r) end) end) it("position reporting", function() local pos local p = test_parser { ProcessingInstruction = function(p) pos = { p:pos() } end, } assert(p:parse(d[[ ]] )) p:close() assert.same({ 1, -- line 6, -- column 6, -- position }, pos) end) end) describe("BLA protection", function() local bla_body = [[ ]> &lolb; ]] it("fails if amplification too big", function() local p = test_parser { "StartElement", "EndElement", "CharacterData" } local ok, err = p:parse(bla_body) assert.is_nil(ok) assert.equal("limit on input amplification factor (from DTD and entities) breached", err) end) end) describe("garbage collection", function() local gcinfo = function() return collectgarbage"count" end it("normal", function() for i=1,100000 do -- due to a small bug in Lua... if (math.mod or math.fmod)(i, 100) == 0 then collectgarbage() end lxp.new {} end collectgarbage() collectgarbage() local x = gcinfo() for i=1,100000 do -- due to a small bug in Lua... if (math.mod or math.fmod)(i, 100) == 0 then collectgarbage() end lxp.new {} end collectgarbage() collectgarbage() assert.near(x, gcinfo(), 2) end) it("circular references", function() collectgarbage() collectgarbage() for i=1,100000 do -- due to a small bug in Lua... if (math.mod or math.fmod)(i, 100) == 0 then collectgarbage() end local p, x -- luacheck: ignore p = lxp.new { StartElement = function() x = tostring(p) end } end collectgarbage() collectgarbage() local x = gcinfo() for i=1,100000 do -- due to a small bug in Lua... if (math.mod or math.fmod)(i, 100) == 0 then collectgarbage() end local p, x -- luacheck: ignore p = lxp.new { StartElement = function() x = tostring(p) end } end collectgarbage() collectgarbage() assert.near(x, gcinfo(), 2) end) end) end) luaexpat-1.5.2/spec/02-threat_spec.lua000066400000000000000000001326721463727376500175560ustar00rootroot00000000000000local d = require("pl.stringx").dedent or require("pl.text").dedent describe("threats", function() local sep = string.char(1) local threat = { depth = 3, -- depth of tags -- counts maxChildren = 3, -- max number of children (DOM2; Element, Text, Comment, -- ProcessingInstruction, CDATASection). NOTE: adjacent text/CDATA -- sections are counted as 1 (so text-cdata-text-cdata is 1 child). maxAttributes = 3, -- max number of attributes (including default ones), if not parsing -- namespaces, then the namespaces will be counted as attributes. maxNamespaces = 3, -- max number of namespaces defined on a tag -- size limits document = 200, -- size of entire document in bytes buffer = 100, -- size of the unparsed buffer comment = 20, -- size of comment in bytes localName = 20, -- size of localname (or full name if not parsing namespaces) in bytes, -- applies to tags and attributes prefix = 20, -- size of prefix in bytes (only if parsing namespaces), applies to -- tags and attributes namespaceUri = 20, -- size of namespace uri in bytes (only if parsing namespaces) attribute = 20, -- size of attribute value in bytes text = 20, -- text inside tags (counted over all adjacent text/CDATA) PITarget = 20, -- size of processing instruction target in bytes PIData = 20, -- size of processing instruction data in bytes entityName = 20, -- size of entity name in EntityDecl in bytes entity = 20, -- size of entity value in EntityDecl in bytes entityProperty = 20, -- size of systemId, publicId, or notationName in EntityDecl in bytes } local threat_no_ns = {} -- same as above, except without namespaces for k,v in pairs(threat) do threat_no_ns[k] = v end threat_no_ns.maxNamespaces = nil threat_no_ns.prefix = nil threat_no_ns.namespaceUri = nil local callbacks_def = { -- all callbacks and their parameters AttlistDecl = { "parser", "elementName", "attrName", "attrType", "default", "required" }, CharacterData = { "parser", "data" }, Comment = { "parser", "data" }, Default = { "parser", "data" }, --DefaultExpand = { "parser", "data" }, -- overrides "Default" if set ElementDecl = { "parser", "name", "type", "quantifier", "children" }, EndCdataSection = { "parser" }, EndDoctypeDecl = { "parser" }, EndElement = { "parser", "elementName" }, EndNamespaceDecl = { "parser", "namespaceName" }, EntityDecl = { "parser", "entityName", "is_parameter", "value", "base", "systemId", "publicId", "notationName" }, ExternalEntityRef = { "parser", "subparser", "base", "systemId", "publicId" }, NotStandalone = { "parser" }, NotationDecl = { "parser", "notationName", "base", "systemId", "publicId" }, ProcessingInstruction = { "parser", "target", "data" }, StartCdataSection = { "parser" }, StartDoctypeDecl = { "parser", "name", "sysid", "pubid", "has_internal_subset" }, StartElement = { "parser", "elementName", "attributes" }, StartNamespaceDecl = { "parser", "namespaceName", "namespaceUri" }, --UnparsedEntityDecl = { "parser", "entityName", "base", "systemId", "publicId", "notationName" }, -- superseded by EntityDecl XmlDecl = { "parser", "version", "encoding", "standalone" }, } local callbacks = {} local cbdata for cb, params in pairs(callbacks_def) do -- generate callbacks that just store the parameters callbacks[cb] = function(parser, ...) local info = {cb, ...} --print(cb, ...) cbdata[#cbdata+1] = info end end local p before_each(function() cbdata = {} callbacks.threat = threat p = require("lxp.threat").new(callbacks, sep, false) end) it("parses a simple xml", function() local r, err = p:parse(d[[ hello ]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", "UTF-8", true }, { "Default", "\n"}, { "StartElement", "root", {} }, { "CharacterData", "hello" }, { "EndElement", "root" }, { "Default", "\n\n"}, }, cbdata) end) it("doesn't accept maxNamespaces, prefix, or namespaceUri without separator", function() callbacks.threat = {} for k,v in pairs(threat_no_ns) do callbacks.threat[k] = v end callbacks.threat.maxNamespaces = 1 assert.has.error(function() require("lxp.threat").new(callbacks, nil, false) end, "expected separator to be set when checking maxNamespaces, prefix, and/or namespaceUri") callbacks.threat.maxNamespaces = nil callbacks.threat.prefix = 1 assert.has.error(function() require("lxp.threat").new(callbacks, nil, false) end, "expected separator to be set when checking maxNamespaces, prefix, and/or namespaceUri") callbacks.threat.prefix = nil callbacks.threat.namespaceUri = 1 assert.has.error(function() require("lxp.threat").new(callbacks, nil, false) end, "expected separator to be set when checking maxNamespaces, prefix, and/or namespaceUri") callbacks.threat.namespaceUri = nil end) describe("depth:", function() it("accepts on the edge (3)", function() local r, err = p:parse(d[[hello]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "StartElement", "l2", {} }, { "StartElement", "l3", {} }, { "CharacterData", "hello" }, { "EndElement", "l3" }, { "EndElement", "l2" }, { "EndElement", "root" }, { "Default", "\n"}, }, cbdata) end) it("blocks over the edge (4)", function() local r, err = p:parse(d[[hello]]) assert.equal("structure is too deep", err) assert.falsy(r) end) end) describe("allowDTD:", function() it("accepts if allowed", function() local r, err = p:parse(d[[ ]> ]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n"}, { "StartDoctypeDecl", "test_doc", nil, nil, true }, { "Default", "\n\t"}, { "ElementDecl", "br", "EMPTY" }, { "Default", "\n"}, { "EndDoctypeDecl" }, { "Default", "\n\n"}, }, cbdata) end) it("blocks if not allowed", function() local old_dtd = threat.allowDTD finally(function() threat.allowDTD = old_dtd end) threat.allowDTD = false local r, err = p:parse(d[[ ]> ]]) assert.equal("DTD is not allowed", err) assert.falsy(r) end) end) describe("children:", function() it("accepts on the edge (3)", function() local r, err = p:parse(d[[]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "StartElement", "c1", {} }, { "EndElement", "c1" }, { "StartElement", "c2", {} }, { "EndElement", "c2" }, { "StartElement", "c3", {} }, { "EndElement", "c3" }, { "EndElement", "root" }, { "Default", "\n"}, }, cbdata) end) it("treats adjacent text/CDATA as 1 child", function() local r, err = p:parse(d[=[txttxt]=]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "StartElement", "c1", {} }, { "EndElement", "c1" }, { "StartElement", "c2", {} }, { "EndElement", "c2" }, { "CharacterData", "txt" }, { "StartCdataSection" }, { "CharacterData", "in the middle" }, { "EndCdataSection" }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "Default", "\n"}, }, cbdata) end) describe("blocks over the edge, counts:", function() it("element nodes", function() local r, err = p:parse(d[[]]) assert.equal("too many children", err) assert.falsy(r) end) it("Text nodes", function() local r, err = p:parse(d[[c4 as text]]) assert.equal("too many children", err) assert.falsy(r) end) it("Comment nodes", function() local r, err = p:parse(d[[]]) assert.equal("too many children", err) assert.falsy(r) end) it("ProcessingInstruction nodes", function() local r, err = p:parse(d[[]]) assert.equal("too many children", err) assert.falsy(r) end) it("CDATASection nodes", function() local r, err = p:parse(d[=[]=]) assert.equal("too many children", err) assert.falsy(r) end) end) end) describe("maxAttributes", function() describe("accepts on the edge", function() it("plain attributes", function() local r, err = p:parse(d[[txt]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", { "attra", "attrb", "attrc", attra = "a", attrb = "b", attrc = "c", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "Default", "\n"}, }, cbdata) end) it("attr+namespaces, separator", function() local r, err = p:parse(d[[ txt ]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", nil, "http://ns" }, { "StartNamespaceDecl", "hello", "http://hello" }, { "StartElement", "http://ns"..sep.."root", { "attra", "attrb", "attrc", attra = "a", attrb = "b", attrc = "c", } }, { "CharacterData", "txt" }, { "EndElement", "http://ns"..sep.."root" }, { "EndNamespaceDecl", "hello" }, { "EndNamespaceDecl" }, { "Default", "\n\n"}, }, cbdata) end) it("attr+namespaces, no separator", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local r, err = p:parse(d[[ txt ]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", { "attra", "xmlns", "xmlns:hello", attra = "a", xmlns = "http://ns", ["xmlns:hello"] = "http://hello", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "Default", "\n\n"}, }, cbdata) end) end) describe("blocks over the edge", function() it("plain attributes", function() local r, err = p:parse(d[[txt]]) assert.equal("too many attributes", err) assert.falsy(r) end) it("attr+namespaces, separator", function() local r, err = p:parse(d[[ txt ]]) assert.equal("too many attributes", err) assert.falsy(r) end) it("attr+namespaces, no separator", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local r, err = p:parse(d[[ txt ]]) assert.equal("too many attributes", err) assert.falsy(r) end) end) end) describe("maxNamespaces", function() it("accepts on the edge", function() local r, err = p:parse(d[[ txt ]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", nil, "http://ns" }, { "StartNamespaceDecl", "hello", "http://hello" }, { "StartNamespaceDecl", "world", "http://world" }, { "StartElement", "http://ns"..sep.."root", {} }, { "CharacterData", "txt" }, { "EndElement", "http://ns"..sep.."root" }, { "EndNamespaceDecl", "world" }, { "EndNamespaceDecl", "hello" }, { "EndNamespaceDecl" }, { "Default", "\n\n"}, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse(d[[ txt ]]) assert.equal("too many namespaces", err) assert.falsy(r) end) end) describe("document size", function() local old_buffer setup(function() old_buffer = threat.buffer threat.buffer = nil -- disable unparsed buffer checks with these tests end) teardown(function() threat.buffer = old_buffer -- reenable old setting end) it("accepts on the edge as one", function() local doc = "txt" local txt = (" "):rep(200-#doc) doc = txt..doc assert.equal(200, #doc) local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "Default", txt}, { "StartElement", "root", {} }, { "CharacterData", "txt" }, { "EndElement", "root" }, }, cbdata) end) it("accepts on the edge chunked", function() local doc = "txt" local txt = (" "):rep(200-#doc) doc = txt..doc assert.equal(200, #doc) local r, err = p:parse(doc:sub(1,100)) assert.equal(nil, err) assert.truthy(r) local r, err = p:parse(doc:sub(101,190)) assert.equal(nil, err) assert.truthy(r) local r, err = p:parse(doc:sub(191,-1)) assert.equal(nil, err) assert.truthy(r) assert.same({ { "Default", txt:sub(1,100)}, { "Default", txt:sub(101,-1)}, { "StartElement", "root", {} }, { "CharacterData", "txt" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge, as one", function() local doc = "txt" local txt = (" "):rep(200-#doc + 1) -- +1; over the edge doc = txt..doc assert.equal(201, #doc) local r, err = p:parse(doc) assert.equal("document too large", err) assert.falsy(r) end) it("blocks over the edge, chunked", function() local doc = "txt" local txt = (" "):rep(200-#doc + 1) -- +1; over the edge doc = txt..doc assert.equal(201, #doc) local r, err = p:parse(doc:sub(1,100)) assert.equal(nil, err) assert.truthy(r) local r, err = p:parse(doc:sub(101,190)) assert.equal(nil, err) assert.truthy(r) local r, err = p:parse(doc:sub(191,-1)) assert.equal("document too large", err) assert.falsy(r) end) end) describe("comment size", function() it("accepts on the edge", function() local doc = "" local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "Comment", "01234567890123456789" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() local doc = "" local r, err = p:parse(doc) assert.equal("comment too long", err) assert.falsy(r) end) end) describe("localName size", function() describe("element, plain", function() it("accepts on the edge", function() local doc = "txt" local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "roota12345abcde12345", {} }, { "CharacterData", "txt" }, { "EndElement", "roota12345abcde12345" }, }, cbdata) end) it("blocks over the edge", function() local doc = "txt" local r, err = p:parse(doc) assert.equal("element localName too long", err) assert.falsy(r) end) end) describe("element, namespaced with separator", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", "cool", "http://cool" }, { "StartElement", "http://cool"..sep.."roota12345abcde12345", {} }, { "CharacterData", "txt" }, { "EndElement", "http://cool"..sep.."roota12345abcde12345" }, { "EndNamespaceDecl", "cool" } }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("element localName too long", err) assert.falsy(r) end) end) describe("element, namespaced without separator", function() it("accepts on the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = "" local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "space:root12345abcde", {} }, { "EndElement", "space:root12345abcde" }, }, cbdata) end) it("blocks over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = "" local r, err = p:parse(doc) assert.equal("element name too long", err) assert.falsy(r) end) end) describe("attribute, plain", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", { "attra12345abcde12345", attra12345abcde12345 = "value", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("attribute localName too long", err) assert.falsy(r) end) end) describe("attribute, namespaced with separator", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", "yummy", "http://nice" }, { "StartElement", "root", { "http://nice"..sep.."attra12345abcde12345", ["http://nice"..sep.."attra12345abcde12345"] = "value", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "EndNamespaceDecl", "yummy" }, }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("attribute localName too long", err) assert.falsy(r) end) end) describe("attribute, namespaced without separator", function() it("accepts on the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", { "yummy:attr12345abcde", ["yummy:attr12345abcde"] = "value", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = [[txt]] local r, err = p:parse(doc) assert.equal("attribute name too long", err) assert.falsy(r) end) end) describe("ElementDecl", function() local old_doc1, old_buffer1, old_doc2, old_buffer2 setup(function() old_doc1 = threat.document old_buffer1 = threat.buffer old_doc2 = threat_no_ns.document old_buffer2 = threat_no_ns.buffer threat.document = nil -- disable document checks with these tests threat.buffer = nil threat_no_ns.document = nil -- disable document checks with these tests threat_no_ns.buffer = nil end) teardown(function() threat.document = old_doc1 -- reenable old setting threat.buffer = old_buffer1 threat_no_ns.document = old_doc2 -- reenable old setting threat_no_ns.buffer = old_buffer2 end) local xmldoc = function(elemPref, elemName, childPref, childName) local elem = (elemPref and (elemPref .. ":") or "")..elemName local attr = (childPref and (childPref .. ":") or "")..childName return string.format(d[[ ]> ]], elem, attr) end describe("plain", function() it("accepts on the edge", function() local doc = xmldoc(nil, "student345abcde12345", nil, "surname345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "ElementDecl", "student345abcde12345", "CHOICE", nil, { { name = "id", type = "NAME" }, { name = "surname345abcde12345", type = "NAME" }, } }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks over the edge", function() local doc = xmldoc(nil, "student345abcde12345x", nil, "surname345abcde12345") local r, err = p:parse(doc) assert.equal("elementDecl localName too long", err) assert.falsy(r) end) it("blocks child over the edge", function() local doc = xmldoc(nil, "student345abcde12345", nil, "surname345abcde12345x") local r, err = p:parse(doc) assert.equal("elementDecl localName too long", err) assert.falsy(r) end) end) describe("namespaced with separator", function() it("accepts localName+prefix on the edge", function() local doc = xmldoc("prefix2345abcde12345", "student345abcde12345", "prefix2345abcde12345", "surname345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "ElementDecl", "prefix2345abcde12345:student345abcde12345", "CHOICE", nil, { { name = "id", type = "NAME" }, { name = "prefix2345abcde12345:surname345abcde12345", type = "NAME" }, } }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName over the edge", function() local doc = xmldoc("prefix2345abcde12345", "student345abcde12345x", "prefix2345abcde12345", "surname345abcde12345") local r, err = p:parse(doc) assert.equal("elementDecl localName too long", err) assert.falsy(r) end) it("blocks localName child over the edge", function() local doc = xmldoc("prefix2345abcde12345", "student345abcde12345x", "prefix2345abcde12345", "surname345abcde12345") local r, err = p:parse(doc) assert.equal("elementDecl localName too long", err) assert.falsy(r) end) end) describe("namespaced without separator", function() it("accepts localName+prefix on the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc("prefix2345", "student34", "prefix2345", "surname34") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "ElementDecl", "prefix2345:student34", "CHOICE", nil, { { name = "id", type = "NAME" }, { name = "prefix2345:surname34", type = "NAME" }, } }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName+prefix over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc("prefix2345", "student345", "prefix2345", "surname34") local r, err = p:parse(doc) assert.equal("elementDecl name too long", err) assert.falsy(r) end) it("blocks localName+prefix child over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc("prefix2345", "student34", "prefix2345", "surname345") local r, err = p:parse(doc) assert.equal("elementDecl name too long", err) assert.falsy(r) end) end) end) describe("AttlistDecl", function() local old_doc1, old_buffer1, old_doc2, old_buffer2 setup(function() old_doc1 = threat.document old_buffer1 = threat.buffer old_doc2 = threat_no_ns.document old_buffer2 = threat_no_ns.buffer threat.document = nil -- disable document checks with these tests threat.buffer = nil threat_no_ns.document = nil -- disable document checks with these tests threat_no_ns.buffer = nil end) teardown(function() threat.document = old_doc1 -- reenable old setting threat.buffer = old_buffer1 threat_no_ns.document = old_doc2 -- reenable old setting threat_no_ns.buffer = old_buffer2 end) local xmldoc = function(ePref, eName, aPref, aName) local elem = (ePref and (ePref .. ":") or "")..eName local attr = (aPref and (aPref .. ":") or "")..aName return string.format(d[[ ]> ]], elem, attr) end describe("element, plain", function() it("accepts on the edge", function() local doc = xmldoc(nil, "roota12345abcde12345", nil, "attra12345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "roota12345abcde12345", "attra12345abcde12345", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks over the edge", function() local doc = xmldoc(nil, "roota12345abcde12345x", nil, "attra12345abcde12345") local r, err = p:parse(doc) assert.equal("element localName too long", err) assert.falsy(r) end) end) describe("element, namespaced with separator", function() it("accepts localName+prefix on the edge", function() local doc = xmldoc("prefix2345abcde12345", "roota12345abcde12345", "prefix2345abcde12345", "attra12345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "prefix2345abcde12345:roota12345abcde12345", "prefix2345abcde12345:attra12345abcde12345", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName over the edge", function() local doc = xmldoc("prefix2345abcde12345", "roota12345abcde12345x", "prefix2345abcde12345", "attra12345abcde12345") local r, err = p:parse(doc) assert.equal("element localName too long", err) assert.falsy(r) end) it("blocks prefix over the edge", function() local doc = xmldoc("prefix2345abcde12345x", "roota12345abcde12345", "prefix2345abcde12345", "attra12345abcde12345") local r, err = p:parse(doc) assert.equal("element prefix too long", err) assert.falsy(r) end) end) describe("element, namespaced without separator", function() it("accepts localName+prefix on the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc(nil, "prefix2345:roota1234", nil, "prefix2345:attra1234") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "prefix2345:roota1234", "prefix2345:attra1234", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName+prefix over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc(nil, "prefix2345:roota1234x", nil, "prefix2345:attra1234") local r, err = p:parse(doc) assert.equal("elementName too long", err) assert.falsy(r) end) end) describe("attribute, plain", function() it("accepts on the edge", function() local doc = xmldoc(nil, "roota12345abcde12345", nil, "attra12345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "roota12345abcde12345", "attra12345abcde12345", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks over the edge", function() local doc = xmldoc(nil, "roota12345abcde12345", nil, "attra12345abcde12345x") local r, err = p:parse(doc) assert.equal("attribute localName too long", err) assert.falsy(r) end) end) describe("attribute, namespaced with separator", function() it("accepts localName+prefix on the edge", function() local doc = xmldoc("prefix2345abcde12345", "roota12345abcde12345", "prefix2345abcde12345", "attra12345abcde12345") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "prefix2345abcde12345:roota12345abcde12345", "prefix2345abcde12345:attra12345abcde12345", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName over the edge", function() local doc = xmldoc("prefix2345abcde12345", "roota12345abcde12345", "prefix2345abcde12345", "attra12345abcde12345x") local r, err = p:parse(doc) assert.equal("attribute localName too long", err) assert.falsy(r) end) it("blocks prefix over the edge", function() local doc = xmldoc("prefix2345abcde12345", "roota12345abcde12345", "prefix2345abcde12345x", "attra12345abcde12345") local r, err = p:parse(doc) assert.equal("attribute prefix too long", err) assert.falsy(r) end) end) describe("attribute, namespaced without separator", function() it("accepts localName+prefix on the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc(nil, "prefix2345:roota1234", nil, "prefix2345:attra1234") local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "lab_group", nil, nil, true }, { "Default", "\n\t" }, { "AttlistDecl", "prefix2345:roota1234", "prefix2345:attra1234", "CDATA", "www.example.com", true }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks localName+prefix over the edge", function() callbacks.threat = threat_no_ns p = require("lxp.threat").new(callbacks, nil, false) -- new parser without separator local doc = xmldoc(nil, "prefix2345:roota1234", nil, "prefix2345:attra1234x") local r, err = p:parse(doc) assert.equal("attributeName too long", err) assert.falsy(r) end) end) end) end) describe("prefix size", function() describe("tag", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", "coola12345abcde12345", "http://cool" }, { "StartElement", "http://cool"..sep.."root", {} }, { "CharacterData", "txt" }, { "EndElement", "http://cool"..sep.."root" }, { "EndNamespaceDecl", "coola12345abcde12345" } }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("prefix too long", err) assert.falsy(r) end) end) describe("attribute", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", "coola12345abcde12345", "http://cool" }, { "StartElement", "root", { "http://cool"..sep.."attra", ["http://cool"..sep.."attra"] = "a", } }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "EndNamespaceDecl", "coola12345abcde12345" } }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("prefix too long", err) assert.falsy(r) end) end) end) describe("namespaceUri size", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl", "cool", "http://cool2345abcde" }, { "StartElement", "root", {} }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "EndNamespaceDecl", "cool" } }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("namespaceUri too long", err) assert.falsy(r) end) it("accepts empty URI", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartNamespaceDecl" }, { "StartElement", "root", {} }, { "CharacterData", "txt" }, { "EndElement", "root" }, { "EndNamespaceDecl" } }, cbdata) end) end) describe("attribute value size", function() it("accepts on the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", { "attr", attr = "abcde12345abcde12345" } }, { "CharacterData", "txt" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() local doc = [[txt]] local r, err = p:parse(doc) assert.equal("attribute value too long", err) assert.falsy(r) end) end) describe("text size", function() describe("text-node", function() it("accepts on the edge", function() local r, err = p:parse(d[[abcde12345abcde12345]]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "CharacterData", "abcde12345abcde12345" }, { "EndElement", "root" }, { "Default", "\n" }, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse(d[[abcde12345abcde12345x]]) assert.equal("text/CDATA node(s) too long", err) assert.falsy(r) end) end) describe("CDATA-node", function() it("accepts on the edge", function() local r, err = p:parse(d[=[]=]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "StartCdataSection" }, { "CharacterData", "abcde12345abcde12345" }, { "EndCdataSection" }, { "EndElement", "root" }, { "Default", "\n" }, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse(d[=[]=]) assert.equal("text/CDATA node(s) too long", err) assert.falsy(r) end) end) describe("mixed text/CDATA", function() it("accepts on the edge", function() local r, err = p:parse(d[=[txttxt!]=]) assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "CharacterData", "txt" }, { "StartCdataSection" }, { "CharacterData", "in the middle" }, { "EndCdataSection" }, { "CharacterData", "txt!" }, { "EndElement", "root" }, { "Default", "\n" }, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse(d[=[txttxt!!]=]) assert.equal("text/CDATA node(s) too long", err) assert.falsy(r) end) describe("doesn't block if interleaved with other types: ", function() for t, sub in pairs { element = "", comment = "", process_instruction = "" } do it(t, function() local doc = [=[abcde12345abcde12345%s]=] doc = doc:format(sub) local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) end) end end) end) end) describe("PITarget size", function() it("accepts on the edge", function() local r, err = p:parse("") assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "ProcessingInstruction", "target2345abcde12345", "instructions" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse("") assert.equal("processing instruction target too long", err) assert.falsy(r) end) end) describe("PIData size", function() it("accepts on the edge", function() local r, err = p:parse("") assert.equal(nil, err) assert.truthy(r) assert.same({ { "StartElement", "root", {} }, { "ProcessingInstruction", "target", "instructions345abcde" }, { "EndElement", "root" }, }, cbdata) end) it("blocks over the edge", function() local r, err = p:parse("") assert.equal("processing instruction data too long", err) assert.falsy(r) end) end) describe("entity", function() local old_doc1, old_buffer1, old_doc2, old_buffer2 setup(function() old_doc1 = threat.document old_buffer1 = threat.buffer old_doc2 = threat_no_ns.document old_buffer2 = threat_no_ns.buffer threat.document = nil -- disable document checks with these tests threat.buffer = nil threat_no_ns.document = nil -- disable document checks with these tests threat_no_ns.buffer = nil end) teardown(function() threat.document = old_doc1 -- reenable old setting threat.buffer = old_buffer1 threat_no_ns.document = old_doc2 -- reenable old setting threat_no_ns.buffer = old_buffer2 end) local xmldoc = function(entity) return string.format(d[[ ]], entity) end describe("entityName size", function() it("accepts on the edge", function() local doc = xmldoc([[]]) local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "greeting", nil, nil, true }, { "Default", "\n\t" }, { "EntityDecl", "xuxu5abcde12345abcde", false, "is this a xuxu?12345" }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks over the edge", function() local doc = xmldoc([[]]) local r, err = p:parse(doc) assert.equal("entityName too long", err) assert.falsy(r) end) end) describe("entity size", function() it("accepts on the edge", function() local doc = xmldoc([[]]) local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "greeting", nil, nil, true }, { "Default", "\n\t" }, { "EntityDecl", "xuxu5abcde12345abcde", false, "is this a xuxu?12345" }, { "Default", "\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks over the edge", function() local doc = xmldoc([[]]) local r, err = p:parse(doc) assert.equal("entity value too long", err) assert.falsy(r) end) end) describe("entityProperty size", function() it("accepts on the edge", function() p:setbase("/base") local doc = xmldoc(d[[ ]]) local r, err = p:parse(doc) assert.equal(nil, err) assert.truthy(r) assert.same({ { "XmlDecl", "1.0", nil, true }, { "Default", "\n" }, { "StartDoctypeDecl", "greeting", nil, nil, true }, { "Default", "\n\t" }, { "EntityDecl", "test1", false, nil, "/base", "uri_e12345abcde12345", nil, "txt45abcde1234512345" }, { "Default", "\n " }, { "EntityDecl", "test2", false, nil, "/base", "uri_e12345abcde12345", "public_id5abcde12345", "txt45abcde1234512345" }, { "Default", "\n\n" }, { "EndDoctypeDecl" }, { "Default", "\n\n" }, }, cbdata) end) it("blocks systemId over the edge", function() p:setbase("/base") local doc = xmldoc(d[[ ]]) local r, err = p:parse(doc) assert.equal("systemId too long", err) assert.falsy(r) end) it("blocks publicId over the edge", function() p:setbase("/base") local doc = xmldoc(d[[ ]]) local r, err = p:parse(doc) assert.equal("publicId too long", err) assert.falsy(r) end) it("blocks notationName over the edge", function() p:setbase("/base") local doc = xmldoc(d[[ ]]) local r, err = p:parse(doc) assert.equal("notationName too long", err) assert.falsy(r) end) end) end) describe("buffer size", function() local old_doc, old_mchild setup(function() old_doc = threat.document old_mchild = threat.maxChildren threat.document = nil -- disable document checks with these tests threat.maxChildren = nil -- and max children checks end) teardown(function() -- reenable old setting threat.document = old_doc threat.maxChildren = old_mchild end) it("blocks over the edge", function() local attrs = {} for i = 1,50 do attrs[i] = "attr"..i.."='abcde12345abcde12345'" end local doc = "text" local i = 0 local r, err repeat -- parse in chunks of 10 bytes i = i + 1 local s = (i-1) * 10 + 1 local e = s + 9 r, err = p:parse(doc:sub(s, e)) until not r assert.equal("unparsed buffer too large", err) assert.falsy(r) end) it("passes with complexer xml", function() local child = "hello" -- child within constraints local doc = ""..child:rep(100).."" local i = 0 local r, err local chunk_size = 10 -- parse in chunks of 10 bytes repeat i = i + 1 local s = (i-1) * chunk_size + 1 local e = s + chunk_size - 1 r, err = p:parse(doc:sub(s, e)) until (not r) or (s > #doc) assert.is_nil(err) assert.truthy(r) end) end) end) luaexpat-1.5.2/spec/03-lom_spec.lua000066400000000000000000000141631463727376500170510ustar00rootroot00000000000000local u_acute_utf8 = string.char(195)..string.char(186) -- C3 BA local u_acute_latin1 = string.char(250) -- FA describe("Lua object model:", function() local lom before_each(function() lom = require "lxp.lom" end) -- run all tests twice; using plain and threat protected parser for _, parser in ipairs { "lxp", "lxp.threat"} do local opts = { separator = "?", threat = parser == "lxp.threat" and {} or nil, } describe(parser..".parse()", function() local tests = { { root_elem = [[inside tag 'abc']], lom = { tag="abc", attr = { "a1", "a2", a1 = "A1", a2 = "A2", }, "inside tag 'abc'", }, }, { root_elem = [[ some text ]], lom = { tag = "qwerty", attr = { "q1", "q2", q1 = "q1", q2 = "q2", }, "\n\t", { tag = "asdf", attr = {}, "some text", }, "\n", }, }, { root_elem = [[
  • conteudo 1
  • conte]]..u_acute_utf8..[[do 2
]], encoding = "UTF-8", lom = { tag = "ul", attr = {}, { tag = "li", attr = {}, "conteudo 1", }, { tag = "li", attr = {}, "conteúdo 2", }, }, }, { root_elem = [[
  • Conteudo 1
  • Conte]]..u_acute_latin1..[[do 2
  • Conteúdo 3
]], encoding = "ISO-8859-1", doctype = [[]>]], -- Ok! lom = { tag = "ul", attr = {}, { tag = "li", attr = {}, "Conteudo 1", }, { tag = "li", attr = {}, "Conteúdo 2", -- Latin-1 becomes UTF-8 }, { tag = "li", attr = {}, "Conteúdo 3", -- entity becomes a UTF-8 character }, }, }, { root_elem = [[
  • Conteúdo
]], --doctype = [[]], --> ignora as entidades --doctype = [[]], --> ignora as entidades --doctype = [[]], --> undefined entity --doctype = [[]], --> syntax error --doctype = [[]], --> syntax error --doctype = [[]], --> syntax error --doctype = [[]], --> ignora entidades --doctype = [[]], --> ignora entidades doctype = [[]>]], -- Ok! encoding = "UTF-8", lom = { tag = "ul", attr = {}, { tag = "li", attr = {}, "Conteúdo", -- entity becomes a UTF-8 character }, }, }, { root_elem = [[inside tag 'abc']], lom = { -- namespace parsing, assumes separator to be set to "?" tag="http://expat?abc", attr = { "a1", "http://expat?a2", a1 = "A1", ["http://expat?a2"] = "A2", }, "inside tag 'abc'", }, }, } for i, test in pairs(tests) do local encoding = test.encoding or "ISO-8859-1" local header = [[]]..(test.doctype or '') local doc = header..test.root_elem it("test case " .. i .. ": string (all at once)", function() local o = assert(lom.parse(doc, opts)) assert.same(test.lom, o) end) it("test case " .. i .. ": iterator", function() local o = assert(lom.parse(string.gmatch(doc, ".-%>"), opts)) assert.same(test.lom, o) end) it("test case " .. i .. ": file", function() local fn = assert(require("pl.path").tmpname()) finally(function() os.remove(fn) end) assert(require("pl.utils").writefile(fn, doc)) local o = assert(lom.parse(assert(io.open(fn)), opts)) assert.same(test.lom, o) end) it("test case " .. i .. ": table", function() local t = {} for i = 1, #doc, 10 do t[#t+1] = doc:sub(i, i+9) end local o = assert(lom.parse(t, opts)) assert.same(test.lom, o) end) end end) end local input = [[ t111 t112 t113 t121 t122 ]] describe("find_elem()", function() it("returns element", function() local output = assert(lom.parse(input)) local c1 = lom.find_elem (output, "c1") assert (type(c1) == "table") assert (c1.tag == "c1") assert (c1[1] == "t111") end) end) describe("list_children()", function() it("returns all children if no tag specified", function() local output = assert(lom.parse(input)) local children = {} -- output[1] is whitespace before tag , output[2] is the table -- for . for child in lom.list_children(output[2]) do children[#children+1] = child.tag end assert.same({ "c1", "c2", "c1" }, children) end) it("returns all matching children if tag specified", function() local output = assert(lom.parse(input)) local children = {} -- output[1] is whitespace before tag , output[2] is the table -- for . for child in lom.list_children(output[2], "c1") do children[#children+1] = child.tag end assert.same({ "c1", "c1" }, children) children = {} for child in lom.list_children(output[2], "c2") do children[#children+1] = child.tag end assert.same({ "c2" }, children) end) it("returns nothing when run on a text-node", function() local children = {} -- test on whitespace, typically before a tag for child in lom.list_children(" ") do children[#children+1] = child.tag end assert.same({}, children) end) end) end) luaexpat-1.5.2/spec/04-totable_spec.lua000066400000000000000000000222161463727376500177130ustar00rootroot00000000000000local tests = { { input = [[inside tag 'abc']], totable = { [0] = "abc", a1 = "A1", a2 = "A2", "inside tag 'abc'", }, clean = { -- no whitesapce, no changes [0] = "abc", a1 = "A1", a2 = "A2", "inside tag 'abc'", }, torecord = { -- no single entries, no changes [0] = "abc", a1 = "A1", a2 = "A2", "inside tag 'abc'", }, }, { input = [[inside tag 'abc']], totable = { [0] = "http://expat?abc", a1 = "A1", ["http://expat?a2"] = "A2", "inside tag 'abc'", }, clean = { -- no whitesapce, no changes [0] = "http://expat?abc", a1 = "A1", ["http://expat?a2"] = "A2", "inside tag 'abc'", }, torecord = { -- no single entries, no changes [0] = "http://expat?abc", a1 = "A1", ["http://expat?a2"] = "A2", "inside tag 'abc'", }, }, { input = [[ some text some text ]], totable = { [0] = "qwerty", q1 = "q1", q2 = "q2", "\n\t", { [0] = "q2", "some text", }, "\n\t", { [0] = "asdf", "some text", attr = "value", }, "\n", }, clean = { [0] = "qwerty", q1 = "q1", q2 = "q2", { [0] = "q2", "some text", }, { [0] = "asdf", "some text", attr = "value", }, }, torecord = { [0] = "qwerty", q1 = "q1", q2 = "q2", { [0] = "q2", -- does not overwrite the existing q2 entry "some text", }, asdf = "some text", -- entry moved to a field, NOTE: attribute dropped!! }, }, { input = [[ Belgian Waffles $5.95 Two of our famous Belgian Waffles with plenty of real maple syrup 650 Strawberry Belgian Waffles $7.95 Light Belgian waffles covered with strawberries and whipped cream 900 Berry-Berry Belgian Waffles $8.95 Light Belgian waffles covered with an assortment of fresh berries and whipped cream 900 French Toast $4.50 Thick slices made from our homemade sourdough bread 600 Homestyle Breakfast $6.95 Two eggs, bacon or sausage, toast, and our ever-popular hash browns 950 ]], totable = { [0] = "breakfast_menu", [1] = "\n\t", [2] = { [0] = "food", [1] = "\n\t\t", [2] = { [0] = "name", [1] = "Belgian Waffles", }, [3] = "\n\t\t", [4] = { [0] = "price", [1] = "$5.95", }, [5] = "\n\t\t", [6] = { [0] = "description", [1] = "Two of our famous Belgian Waffles with plenty of real maple syrup", }, [7] = "\n\t\t", [8] = { [0] = "calories", [1] = "650", }, [9] = "\n\t", }, [3] = "\n\t", [4] = { [0] = "food", [1] = "\n\t\t", [2] = { [0] = "name", [1] = "Strawberry Belgian Waffles", }, [3] = "\n\t\t", [4] = { [0] = "price", [1] = "$7.95", }, [5] = "\n\t\t", [6] = { [0] = "description", [1] = "Light Belgian waffles covered with strawberries and whipped cream", }, [7] = "\n\t\t", [8] = { [0] = "calories", [1] = "900", }, [9] = "\n\t", }, [5] = "\n\t", [6] = { [0] = "food", [1] = "\n\t\t", [2] = { [0] = "name", [1] = "Berry-Berry Belgian Waffles", }, [3] = "\n\t\t", [4] = { [0] = "price", [1] = "$8.95", }, [5] = "\n\t\t", [6] = { [0] = "description", [1] = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream", }, [7] = "\n\t\t", [8] = { [0] = "calories", [1] = "900", }, [9] = "\n\t", }, [7] = "\n\t", [8] = { [0] = "food", [1] = "\n\t\t", [2] = { [0] = "name", [1] = "French Toast", }, [3] = "\n\t\t", [4] = { [0] = "price", [1] = "$4.50", }, [5] = "\n\t\t", [6] = { [0] = "description", [1] = "Thick slices made from our homemade sourdough bread", }, [7] = "\n\t\t", [8] = { [0] = "calories", [1] = "600", }, [9] = "\n\t", }, [9] = "\n\t", [10] = { [0] = "food", [1] = "\n\t\t", [2] = { [0] = "name", [1] = "Homestyle Breakfast", }, [3] = "\n\t\t", [4] = { [0] = "price", [1] = "$6.95", }, [5] = "\n\t\t", [6] = { [0] = "description", [1] = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns", }, [7] = "\n\t\t", [8] = { [0] = "calories", [1] = "950", }, [9] = "\n\t", }, [11] = "\n", }, clean = { [0] = "breakfast_menu", [1] = { [0] = "food", [1] = { [0] = "name", [1] = "Belgian Waffles", }, [2] = { [0] = "price", [1] = "$5.95", }, [3] = { [0] = "description", [1] = "Two of our famous Belgian Waffles with plenty of real maple syrup", }, [4] = { [0] = "calories", [1] = "650", }, }, [2] = { [0] = "food", [1] = { [0] = "name", [1] = "Strawberry Belgian Waffles", }, [2] = { [0] = "price", [1] = "$7.95", }, [3] = { [0] = "description", [1] = "Light Belgian waffles covered with strawberries and whipped cream", }, [4] = { [0] = "calories", [1] = "900", }, }, [3] = { [0] = "food", [1] = { [0] = "name", [1] = "Berry-Berry Belgian Waffles", }, [2] = { [0] = "price", [1] = "$8.95", }, [3] = { [0] = "description", [1] = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream", }, [4] = { [0] = "calories", [1] = "900", }, }, [4] = { [0] = "food", [1] = { [0] = "name", [1] = "French Toast", }, [2] = { [0] = "price", [1] = "$4.50", }, [3] = { [0] = "description", [1] = "Thick slices made from our homemade sourdough bread", }, [4] = { [0] = "calories", [1] = "600", }, }, [5] = { [0] = "food", [1] = { [0] = "name", [1] = "Homestyle Breakfast", }, [2] = { [0] = "price", [1] = "$6.95", }, [3] = { [0] = "description", [1] = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns", }, [4] = { [0] = "calories", [1] = "950", }, }, }, torecord = { [0] = "breakfast_menu", [1] = { [0] = "food", name = "Belgian Waffles", price = "$5.95", description = "Two of our famous Belgian Waffles with plenty of real maple syrup", calories = "650", }, [2] = { [0] = "food", name = "Strawberry Belgian Waffles", price = "$7.95", description = "Light Belgian waffles covered with strawberries and whipped cream", calories = "900", }, [3] = { [0] = "food", name = "Berry-Berry Belgian Waffles", price = "$8.95", description = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream", calories = "900", }, [4] = { [0] = "food", name = "French Toast", price = "$4.50", description = "Thick slices made from our homemade sourdough bread", calories = "600", }, [5] = { [0] = "food", name = "Homestyle Breakfast", price = "$6.95", description = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns", calories = "950", }, }, }, } describe("totable:", function() local totable before_each(function() totable = require "lxp.totable" end) for i, test in ipairs(tests) do describe("case " .. i .. ":", function() local preamble = [[]] local doc = preamble .. test.input -- run all tests twice; using plain and threat protected parser for _, parser in ipairs { "lxp", "lxp.threat"} do local opts = { separator = "?", threat = parser == "lxp.threat" and {} or nil, } describe(parser..".parse()", function() it("string (all at once)", function() local o = assert(totable.parse(doc, opts)) assert.same(test.totable, o) end) it("iterator", function() local o = assert(totable.parse(string.gmatch(doc, ".-%>"), opts)) assert.same(test.totable, o) end) it("file", function() local fn = assert(require("pl.path").tmpname()) finally(function() os.remove(fn) end) assert(require("pl.utils").writefile(fn, doc)) local o = assert(totable.parse(assert(io.open(fn)), opts)) assert.same(test.totable, o) end) it("table", function() local t = {} for i = 1, #doc, 10 do t[#t+1] = doc:sub(i, i+9) end local o = assert(totable.parse(t, opts)) assert.same(test.totable, o) end) end) end it("clean", function() local result = assert(totable.parse(doc, { separator = "?" })) totable.clean(result) assert.same(test.clean, result) end) it("torecord", function() local result = assert(totable.parse(doc, { separator = "?" })) totable.torecord(totable.clean(result)) assert.same(test.torecord, result) end) end) end end) luaexpat-1.5.2/src/000077500000000000000000000000001463727376500141555ustar00rootroot00000000000000luaexpat-1.5.2/src/lxp/000077500000000000000000000000001463727376500147605ustar00rootroot00000000000000luaexpat-1.5.2/src/lxp/lom.lua000066400000000000000000000053021463727376500162520ustar00rootroot00000000000000-- See Copyright Notice in license.html local table = require"table" local tinsert, tremove = table.insert, table.remove local assert, type = assert, type -- auxiliary functions ------------------------------------------------------- local function starttag (p, tag, attr) local stack = p:getcallbacks().stack local newelement = {tag = tag, attr = attr} tinsert(stack, newelement) end local function endtag (p, tag) local stack = p:getcallbacks().stack local element = tremove(stack) assert(element.tag == tag) local level = #stack tinsert(stack[level], element) end local function text (p, txt) local stack = p:getcallbacks().stack local element = stack[#stack] local n = #element if type(element[n]) == "string" then element[n] = element[n] .. txt else tinsert(element, txt) end end -- main function ------------------------------------------------------------- local function parse (o, opts) local opts = opts or {} local c = { StartElement = starttag, EndElement = endtag, CharacterData = text, _nonstrict = true, stack = {{}} } local p if opts.threat then c.threat = opts.threat p = require("lxp.threat").new(c, opts.separator) else p = require("lxp").new(c, opts.separator) end local to = type(o) if to == "string" then local status, err, line, col, pos = p:parse(o) if not status then return nil, err, line, col, pos end else local iter if to == "table" then local i = 0 iter = function() i = i + 1; return o[i] end elseif to == "function" then iter = o elseif to == "userdata" and o.read then iter = function() local l = o:read() if l then return l.."\n" end end else error ("Bad argument #1 to parse: expected a string, a table, a function or a file, but got "..to, 2) end for l in iter do local status, err, line, col, pos = p:parse(l) if not status then return nil, err, line, col, pos end end end local status, err, line, col, pos = p:parse() -- close document if not status then return nil, err, line, col, pos end p:close() return c.stack[1][1] end -- utility functions --------------------------------------------------------- local function find_elem (self, tag) if self.tag == tag then return self end for i = 1, #self do local v = self[i] if type(v) == "table" then local found = find_elem (v, tag) if found then return found end end end return nil end local function list_children (self, tag) local i = 0 return function () i = i+1 local v = self[i] while v do if type (v) == "table" and (tag == nil or tag == v.tag) then return v end i = i+1 v = self[i] end return nil end end return { find_elem = find_elem, list_children = list_children, parse = parse, } luaexpat-1.5.2/src/lxp/threat.lua000066400000000000000000000413741463727376500167630ustar00rootroot00000000000000--- Expat Parser wrapper which protects against XML threats. --[[ inspired by; - https://docs.sensedia.com/en/api-platform-guide/4.2.x.x/interceptors/security_xml-threat-protection.html - https://tech.forums.softwareag.com/t/xml-threat-protection/236959 - https://docs.mulesoft.com/api-manager/2.x/apply-configure-xml-threat-task - https://github.com/Trust1Team/kong-plugin-xml-threat-protection ]] local lxp = require "lxp" local threat = {} local defaults = { depth = 50, -- depth of tags allowDTD = true, -- is a DTD allowed -- counts maxChildren = 100, -- max number of children (DOM2; Element, Text, Comment, -- ProcessingInstruction, CDATASection). NOTE: adjacent text/CDATA -- sections are counted as 1 (so text-cdata-text-cdata is 1 child). maxAttributes = 100, -- max number of attributes (including default ones), if not parsing -- namespaces, then the namespaces will be counted as attributes. maxNamespaces = 20, -- max number of namespaces defined on a tag -- size limits document = 10*1024*1024, -- 10 mb; size of entire document in bytes buffer = 1024*1024, -- 1 mb; size of the unparsed buffer comment = 1024, -- 1 kb; size of comment in bytes localName = 1024, -- 1 kb; size of localname (or full name if not parsing namespaces) in bytes, -- applies to tags and attributes prefix = 1024, -- 1 kb; size of prefix in bytes (only if parsing namespaces), applies to -- tags and attributes namespaceUri = 1024, -- 1 kb; size of namespace uri in bytes (only if parsing namespaces) attribute = 1024*1024, -- 1 mb; size of attribute value in bytes text = 1024*1024, -- 1 mb; text inside tags (counted over all adjacent text/CDATA) PITarget = 1024, -- 1 kb; size of processing instruction target in bytes PIData = 1024, -- 1 kb; size of processing instruction data in bytes entityName = 1024, -- 1 kb; size of entity name in EntityDecl in bytes entity = 1024, -- 1 kb; size of entity value in EntityDecl in bytes entityProperty = 1024, -- 1 kb; size of systemId, publicId, or notationName in EntityDecl in bytes } --- Creates a parser that implements xml threat protection. function threat.new(callbacks, separator, merge_character_data) assert(type(callbacks) == "table", "expected arg #1 to be a table with callbacks") local checks = callbacks.threat assert(type(checks) == "table", "expected entry 'threat' in callbacks table to be a table with checks") if checks.maxNamespaces or checks.prefix or checks.namespaceUri then assert(separator ~= nil, "expected separator to be set when checking maxNamespaces, prefix, and/or namespaceUri") end -- apply defaults for setting, value in pairs(defaults) do if checks[setting] == nil then checks[setting] = value end end if separator == nil then checks.maxNamespaces = nil checks.prefix = nil checks.namespaceUri = nil end do -- add missing callbacks so we get all checks local callbacks_def = { "CharacterData", "Comment", --"Default", --"DefaultExpand", "EndCdataSection", "EndDoctypeDecl", "EndElement", "EndNamespaceDecl", "ExternalEntityRef", "NotStandalone", "NotationDecl", "ProcessingInstruction", "StartCdataSection", "StartDoctypeDecl", "StartElement", "StartNamespaceDecl", --"UnparsedEntityDecl", -- superseded by EntityDecl "EntityDecl", "AttlistDecl", "ElementDecl", "SkippedEntity", "XmlDecl", } local nop = function() end for _, cbname in ipairs(callbacks_def) do if not callbacks[cbname] then callbacks[cbname] = nop end end end local parser -- the standard expat parser; forward declaration local p = {} -- the parser object to return local new_cbs = {} -- new callbacks local threat_error_data -- error data to return local function threat_error(msg) threat_error_data = { msg, parser:pos() } -- total 4 results parser:stop() end function p:close() local ok, err = parser:close() return ok == parser and p or ok, err end function p:getbase() return parser:getbase() end function p:getcallbacks() return callbacks end function p:pos() return parser:pos() end function p:getcurrentbytecount() return parser:getcurrentbytecount() end function p:setbase(base) local ok, err = parser:setbase(base) return ok == parser and p or ok, err end function p:setblamaxamplification(amp) local ok, err = parser:setblamaxamplification(amp) return ok == parser and p or ok, err end function p:setblathreshold(threshold) local ok, err = parser:setblathreshold(threshold) return ok == parser and p or ok, err end function p:setencoding(encoding) local ok, err = parser:setencoding(encoding) return ok == parser and p or ok, err end function p:stop() local ok, err = parser:stop() return ok == parser and p or ok, err end function p:returnnstriplet(enable) local ok, err = parser:returnnstriplet(enable) return ok == parser and p or ok, err end do local size = 0 function p:parse(s) size = size + #(s or "") if checks.document and size > checks.document then return nil, "document too large" end local a,b,c,d,e = parser:parse(s) if threat_error_data then return nil, threat_error_data[1], threat_error_data[2], threat_error_data[3], threat_error_data[4] end if checks.buffer then local _, _, pos = parser:pos() if size - pos > checks.buffer then return nil, "unparsed buffer too large" end end if a == parser then return p,b,c,d,e end return a,b,c,d,e end end -- stats to track local context = { -- current context children = 0, } local stack = { context } -- tracking depth of context for key, cb in pairs(callbacks) do local ncb if key == "CharacterData" then ncb = function(parser, data) local l = context.charcount if not l then l = #data if checks.maxChildren then context.children = context.children + 1 if context.children > checks.maxChildren then return threat_error("too many children") end end else l = l + #data end if checks.text and l > checks.text then return threat_error("text/CDATA node(s) too long") end context.charcount = l return callbacks.CharacterData(p, data) end elseif key == "Comment" then ncb = function(parser, data) if checks.comment and #data > checks.comment then return threat_error("comment too long") end context.children = context.children + 1 if checks.maxChildren and context.children > checks.maxChildren then return threat_error("too many children") end context.charcount = nil -- reset text-length counter return callbacks.Comment(p, data) end elseif key == "Default" then ncb = function(parser, data) return callbacks.Default(p, data) end elseif key == "DefaultExpand" then ncb = function(parser, data) return callbacks.DefaultExpand(p, data) end elseif key == "EndCdataSection" then ncb = function(parser) return callbacks.EndCdataSection(p) end elseif key == "EndElement" then ncb = function(parser, elementName) local d = #stack context = stack[d-1] -- revert to previous level context stack[d] = nil -- delete last context return callbacks.EndElement(p, elementName) end elseif key == "EndNamespaceDecl" then ncb = function(parser, namespaceName) return callbacks.EndNamespaceDecl(p, namespaceName) end elseif key == "ExternalEntityRef" then -- TODO: implement ncb = function(parser, subparser, base, systemId, publicId) -- subparser must be wrapped... -- do we need to pass current depth as its initial depth? return callbacks.ExternalEntityRef(p, subparser, base, systemId, publicId) end elseif key == "NotStandalone" then ncb = function(parser) return callbacks.NotStandalone(p) end elseif key == "NotationDecl" then -- TODO: implement ncb = function(parser, notationName, base, systemId, publicId) return callbacks.NotationDecl(p, notationName, base, systemId, publicId) end elseif key == "ProcessingInstruction" then ncb = function(parser, target, data) if checks.PITarget and checks.PITarget < #target then return threat_error("processing instruction target too long") end if checks.PIData and checks.PIData < #data then return threat_error("processing instruction data too long") end context.children = context.children + 1 if checks.maxChildren and context.children > checks.maxChildren then return threat_error("too many children") end context.charcount = nil -- reset text-length counter return callbacks.ProcessingInstruction(p, target, data) end elseif key == "StartCdataSection" then ncb = function(parser) return callbacks.StartCdataSection(p) end elseif key == "StartDoctypeDecl" then -- TODO: implement ncb = function(parser, name, sysid, pubid, has_internal_subset) if not checks.allowDTD then return threat_error("DTD is not allowed") end return callbacks.StartDoctypeDecl(p, name, sysid, pubid, has_internal_subset) end elseif key == "StartElement" then ncb = function(parser, elementName, attributes) context.children = context.children + 1 if checks.maxChildren and context.children > checks.maxChildren then return threat_error("too many children") end context.charcount = nil -- reset text-length counter context = { children = 0 } local d = #stack if checks.depth and d > checks.depth then return threat_error("structure is too deep") end d = d + 1 stack[d] = context if separator then -- handle namespaces local l local s,e = elementName:find(separator, 1, true) if s then l = #elementName - e -- namespaced else l = #elementName -- not namespaced, entire key end if checks.localName and l > checks.localName then return threat_error("element localName too long") end else if checks.localName and #elementName > checks.localName then return threat_error("element name too long") end end local count = 0 for key, value in pairs(attributes) do if type(key) == "string" then -- we only check the hash entries to prevent doubles count = count + 1 if separator then -- handle namespaces local l local s,e = key:find(separator, 1, true) if s then l = #key - e -- namespaced else l = #key -- not namespaced, entire key end if checks.localName and l > checks.localName then return threat_error("attribute localName too long") end else -- no namespaces if checks.localName and #key > checks.localName then return threat_error("attribute name too long") end end if checks.attribute and #value > checks.attribute then return threat_error("attribute value too long") end end end if checks.maxAttributes and count > checks.maxAttributes then return threat_error("too many attributes") end return callbacks.StartElement(p, elementName, attributes) end elseif key == "StartNamespaceDecl" then ncb = function(parser, namespaceName, namespaceUri) -- we're storing in the current context, which is one level up -- from the tag they are intended for. Because the namespace callbacks -- happen before the element callbacks. But for our purposes -- this is fine. context.ns = (context.ns or 0) + 1 if checks.maxNamespaces and context.ns > checks.maxNamespaces then return threat_error("too many namespaces") end if checks.prefix and #(namespaceName or "") > checks.prefix then return threat_error("prefix too long") end if checks.namespaceUri and namespaceUri and #namespaceUri > checks.namespaceUri then return threat_error("namespaceUri too long") end return callbacks.StartNamespaceDecl(p, namespaceName, namespaceUri) end -- elseif key == "UnparsedEntityDecl" then -- TODO: implement?? superseded by "EntityDecl" -- ncb = function(parser, entityName, base, systemId, publicId, notationName) -- return callbacks.UnparsedEntityDecl(p, entityName, base, systemId, publicId, notationName) -- end elseif key == "EndDoctypeDecl" then ncb = function(parser) return callbacks.EndDoctypeDecl(p) end elseif key == "XmlDecl" then ncb = function(parser, version, encoding, standalone) return callbacks.XmlDecl(p, version, encoding, standalone) end elseif key == "EntityDecl" then ncb = function(parser, entityName, is_parameter, value, base, systemId, publicId, notationName) if checks.entityName and checks.entityName < #entityName then return threat_error("entityName too long") end if checks.entity and value and checks.entity < #value then return threat_error("entity value too long") end if checks.entityProperty then if systemId and checks.entityProperty < #systemId then return threat_error("systemId too long") end if publicId and checks.entityProperty < #publicId then return threat_error("publicId too long") end if notationName and checks.entityProperty < #notationName then return threat_error("notationName too long") end end return callbacks.EntityDecl(p, entityName, is_parameter, value, base, systemId, publicId, notationName) end elseif key == "AttlistDecl" then ncb = function(parser, elementName, attrName, attrType, default, required) if separator then local ePrefix, eName, aPrefix, aName if checks.prefix or checks.localName then -- namespace based parsing, check against localName+prefix local colon = elementName:find(":", 1, true) or 0 ePrefix = elementName:sub(1, colon-1) eName = elementName:sub(colon+1, -1) colon = attrName:find(":", 1, true) or 0 aPrefix = attrName:sub(1, colon-1) aName = attrName:sub(colon+1, -1) if checks.localName then if checks.localName < #eName then return threat_error("element localName too long") end if checks.localName < #aName then return threat_error("attribute localName too long") end end if checks.prefix then if checks.prefix < #ePrefix then return threat_error("element prefix too long") end if checks.prefix < #aPrefix then return threat_error("attribute prefix too long") end end end else -- no namespace parsing, check against localName if checks.localName then if checks.localName < #elementName then return threat_error("elementName too long") end if checks.localName < #attrName then return threat_error("attributeName too long") end end end if default and checks.attribute then if checks.attribute < #default then return threat_error("attribute default too long") end end return callbacks.AttlistDecl(p, elementName, attrName, attrType, default, required) end elseif key == "ElementDecl" then ncb = function(parser, name, type, quantifier, children) if name or children then local checkName if separator then if checks.localName or checks.prefix then checkName = function(name) local colon = name:find(":", 1, true) or 0 local ePrefix = name:sub(1, colon-1) local eName = name:sub(colon+1, -1) if checks.localName and checks.localName < #eName then return threat_error("elementDecl localName too long") end if checks.prefix and checks.prefix < #ePrefix then return threat_error("elementDecl prefix too long") end return true end end elseif checks.localName then checkName = function(name) if checks.localName < #name then return threat_error("elementDecl name too long") end return true end end if checkName then local function checkChild(child) if child.name and not checkName(child.name) then return end for _, subchild in ipairs(child.children or {}) do if not checkChild(subchild) then return end end return true end if not checkChild { name = name, children = children } then return end end end return callbacks.ElementDecl(p, name, type, quantifier, children) end elseif key == "SkippedEntity" then ncb = function(parser, name, isParameter) return callbacks.SkippedEntity(p, name, isParameter) end elseif key == "threat" then -- threat protection config table, remove, do not pass on ncb = nil else -- unknown entry, just copy ncb = cb end new_cbs[key] = ncb end -- create final parser with updated/wrapped callbacks local err parser, err = lxp.new(new_cbs, separator, merge_character_data) if not parser then return parser, err end return p end return threat luaexpat-1.5.2/src/lxp/totable.lua000066400000000000000000000065241463727376500171240ustar00rootroot00000000000000-- See Copyright Notice in license.html -- Based on Luiz Henrique de Figueiredo's lxml: -- http://www.tecgraf.puc-rio.br/~lhf/ftp/lua/#lxml local table = require"table" local tinsert, tremove = table.insert, table.remove local assert, tostring, type = assert, tostring, type -- auxiliary functions ------------------------------------------------------- local function starttag (p, tag, attr) local stack = p:getcallbacks().stack local newelement = {[0] = tag} for i = 1, #attr do local attrname = attr[i] local attrvalue = attr[attrname] newelement[attrname] = attrvalue end tinsert(stack, newelement) end local function endtag (p, tag) local stack = p:getcallbacks().stack local element = tremove(stack) assert(element[0] == tag, "Error while closing element: table[0] should be `".. tostring(tag).."' but is `"..tostring(element[0]).."'") local level = #stack tinsert(stack[level], element) end local function text (p, txt) local stack = p:getcallbacks().stack local element = stack[#stack] local n = #element if type(element[n]) == "string" and n > 0 then element[n] = element[n] .. txt else tinsert(element, txt) end end -- main function ------------------------------------------------------------- local function parse (o, opts) local opts = opts or {} local c = { StartElement = starttag, EndElement = endtag, CharacterData = text, _nonstrict = true, stack = {{}}, } local p if opts.threat then c.threat = opts.threat p = require("lxp.threat").new(c, opts.separator) else p = require("lxp").new(c, opts.separator) end local to = type(o) if to == "string" then local status, err, line, col, pos = p:parse(o) if not status then return nil, err, line, col, pos end else local iter if to == "table" then local i = 0 iter = function() i = i + 1; return o[i] end elseif to == "function" then iter = o elseif to == "userdata" and o.read then iter = function() local l = o:read() if l then return l.."\n" end end else error ("Bad argument #1 to parse: expected a string, a table, a function or a file, but got "..to, 2) end for l in iter do local status, err, line, col, pos = p:parse(l) if not status then return nil, err, line, col, pos end end end local status, err, line, col, pos = p:parse() -- close document if not status then return nil, err, line, col, pos end p:close() return c.stack[1][1] end -- utility functions --------------------------------------------------------- local function compact (t) -- remove empty entries local n = 0 for i = 1, #t do local v = t[i] if v then n = n+1 if n ~= i then t[n] = v t[i] = nil end else t[i] = nil end end return t end local function clean (t) -- remove empty strings for i = 1, #t do local v = t[i] local tv = type(v) if tv == "table" then clean (v) elseif tv == "string" and v:match"^%s*$" then t[i] = false end end return compact (t) end local function torecord (t) -- move 1-value subtables to table entries for i = 1, #t do local v = t[i] if type(v) == "table" then if #v == 1 and type(v[1]) == "string" and t[v[0]] == nil then t[v[0]] = v[1] t[i] = false else torecord (v) end end end return compact (t) end return { clean = clean, compact = compact, -- TODO: internal only, should not be exported parse = parse, torecord = torecord, } luaexpat-1.5.2/src/lxplib.c000066400000000000000000000601631463727376500156210ustar00rootroot00000000000000/* ** $Id: lxplib.c,v 1.16 2007/06/05 20:03:12 carregal Exp $ ** LuaExpat: Lua bind for Expat library ** See Copyright Notice in license.html */ #include #include #include #include "expat_config.h" #include "expat.h" #if (XML_MAJOR_VERSION == 2 && XML_MINOR_VERSION < 4) || (XML_MAJOR_VERSION < 2) #error Expat 2.4 or newer is required #endif #include "lua.h" #include "lauxlib.h" #include "lxplib.h" #if (LUA_VERSION_NUM == 501) #define lua_getuservalue(L, i) lua_getfenv(L, i) #define lua_setuservalue(L, i) lua_setfenv(L, i) #define luaL_setfuncs(L, R, N) luaL_register(L, NULL, R) #endif #if !defined(lua_pushliteral) #define lua_pushliteral(L, s) \ lua_pushstring(L, "" s, (sizeof(s)/sizeof(char))-1) #endif enum XPState { XPSpre, /* parser just initialized */ XPSok, /* state while parsing */ XPSfinished, /* state after finished parsing */ XPSerror, XPSstring /* state while reading a string */ }; struct lxp_userdata { lua_State *L; XML_Parser parser; /* associated expat parser */ int errorref; /* reference to error message if state is XPSerror */ enum XPState state; luaL_Buffer *b; /* to concatenate sequences of cdata pieces */ int bufferCharData; /* whether to buffer cdata pieces */ }; typedef struct lxp_userdata lxp_userdata; static int reporterror (lxp_userdata *xpu) { lua_State *L = xpu->L; XML_Parser p = xpu->parser; lua_pushnil(L); lua_pushstring(L, XML_ErrorString(XML_GetErrorCode(p))); lua_pushinteger(L, XML_GetCurrentLineNumber(p)); lua_pushinteger(L, XML_GetCurrentColumnNumber(p) + 1); lua_pushinteger(L, XML_GetCurrentByteIndex(p) + 1); return 5; } static lxp_userdata *createlxp (lua_State *L) { lxp_userdata *xpu = (lxp_userdata *)lua_newuserdata(L, sizeof(lxp_userdata)); xpu->errorref = LUA_REFNIL; xpu->parser = NULL; xpu->L = NULL; xpu->state = XPSpre; luaL_getmetatable(L, ParserType); lua_setmetatable(L, -2); return xpu; } static void lxpclose (lua_State *L, lxp_userdata *xpu) { luaL_unref(L, LUA_REGISTRYINDEX, xpu->errorref); xpu->errorref = LUA_REFNIL; if (xpu->parser) XML_ParserFree(xpu->parser); xpu->parser = NULL; } /* ** Auxiliary function to call a Lua handle */ static void docall (lxp_userdata *xpu, int nargs, int nres) { lua_State *L = xpu->L; assert(xpu->state == XPSok); if (lua_pcall(L, nargs + 1, nres, 0) != 0) { xpu->state = XPSerror; xpu->errorref = luaL_ref(L, LUA_REGISTRYINDEX); /* error message */ } } /* ** Check whether there is pending Cdata, and call its handle if necessary */ static void dischargestring (lxp_userdata *xpu) { assert(xpu->state == XPSstring); xpu->state = XPSok; luaL_pushresult(xpu->b); docall(xpu, 1, 0); } /* ** Check whether there is a Lua handle for a given event: If so, ** put it on the stack (to be called later), and also push `self' */ static int getHandle (lxp_userdata *xpu, const char *handle) { lua_State *L = xpu->L; if (xpu->state == XPSstring) dischargestring(xpu); if (xpu->state == XPSerror) return 0; /* some error happened before; skip all handles */ lua_pushstring(L, handle); lua_gettable(L, 3); if (lua_toboolean(L, -1) == 0) { lua_pop(L, 1); return 0; } if (!lua_isfunction(L, -1)) { luaL_error(L, "lxp '%s' callback is not a function", handle); } lua_pushvalue(L, 1); /* first argument in every call (self) */ return 1; } /* ** {====================================================== ** Handles ** ======================================================= */ static void f_StartCdata (void *ud) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, StartCdataKey) == 0) return; /* no handle */ docall(xpu, 0, 0); } static void f_EndCdataKey (void *ud) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, EndCdataKey) == 0) return; /* no handle */ docall(xpu, 0, 0); } static void f_CharData (void *ud, const char *s, int len) { lxp_userdata *xpu = (lxp_userdata *)ud; if (xpu->state == XPSok) { if (getHandle(xpu, CharDataKey) == 0) return; /* no handle */ if(xpu->bufferCharData != 0) { xpu->state = XPSstring; luaL_buffinit(xpu->L, xpu->b); } else { lua_pushlstring(xpu->L, s, len); docall(xpu, 1, 0); } } if (xpu->state == XPSstring) luaL_addlstring(xpu->b, s, len); } static void f_Comment (void *ud, const char *data) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, CommentKey) == 0) return; /* no handle */ lua_pushstring(xpu->L, data); docall(xpu, 1, 0); } static void f_Default (void *ud, const char *data, int len) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, DefaultKey) == 0) return; /* no handle */ lua_pushlstring(xpu->L, data, len); docall(xpu, 1, 0); } static void f_DefaultExpand (void *ud, const char *data, int len) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, DefaultExpandKey) == 0) return; /* no handle */ lua_pushlstring(xpu->L, data, len); docall(xpu, 1, 0); } static void f_StartElement (void *ud, const char *name, const char **attrs) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; int lastspec = XML_GetSpecifiedAttributeCount(xpu->parser) / 2; int i = 1; if (getHandle(xpu, StartElementKey) == 0) return; /* no handle */ lua_pushstring(L, name); lua_newtable(L); while (*attrs) { if (i <= lastspec) { lua_pushinteger(L, i++); lua_pushstring(L, *attrs); lua_settable(L, -3); } lua_pushstring(L, *attrs++); lua_pushstring(L, *attrs++); lua_settable(L, -3); } docall(xpu, 2, 0); /* call function with self, name, and attributes */ } static void f_EndElement (void *ud, const char *name) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, EndElementKey) == 0) return; /* no handle */ lua_pushstring(xpu->L, name); docall(xpu, 1, 0); } static int f_ExternaEntity (XML_Parser p, const char *context, const char *base, const char *systemId, const char *publicId) { lxp_userdata *xpu = (lxp_userdata *)XML_GetUserData(p); lua_State *L = xpu->L; lxp_userdata *child; int status; if (getHandle(xpu, ExternalEntityKey) == 0) return 1; /* no handle */ child = createlxp(L); child->parser = XML_ExternalEntityParserCreate(p, context, NULL); if (!child->parser) luaL_error(L, "XML_ParserCreate failed"); lua_getuservalue(L, 1); lua_setuservalue(L, -2); /* child uses the same table of its father */ lua_pushstring(L, base); lua_pushstring(L, systemId); lua_pushstring(L, publicId); docall(xpu, 4, 1); status = lua_toboolean(L, -1); lua_pop(L, 1); lxpclose(L, child); return status; } static void f_StartNamespaceDecl (void *ud, const char *prefix, const char *uri) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, StartNamespaceDeclKey) == 0) return; /* no handle */ lua_pushstring(L, prefix); lua_pushstring(L, uri); docall(xpu, 2, 0); } static void f_EndNamespaceDecl (void *ud, const char *prefix) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, EndNamespaceDeclKey) == 0) return; /* no handle */ lua_pushstring(xpu->L, prefix); docall(xpu, 1, 0); } static void f_NotationDecl (void *ud, const char *notationName, const char *base, const char *systemId, const char *publicId) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, NotationDeclKey) == 0) return; /* no handle */ lua_pushstring(L, notationName); lua_pushstring(L, base); lua_pushstring(L, systemId); lua_pushstring(L, publicId); docall(xpu, 4, 0); } static int f_NotStandalone (void *ud) { int status; lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, NotStandaloneKey) == 0) return 1; /* no handle */ docall(xpu, 0, 1); status = lua_toboolean(L, -1); lua_pop(L, 1); return status; } static void f_ProcessingInstruction (void *ud, const char *target, const char *data) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, ProcessingInstructionKey) == 0) return; /* no handle */ lua_pushstring(L, target); lua_pushstring(L, data); docall(xpu, 2, 0); } static void f_UnparsedEntityDecl (void *ud, const char *entityName, const char *base, const char *systemId, const char *publicId, const char *notationName) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, UnparsedEntityDeclKey) == 0) return; /* no handle */ lua_pushstring(L, entityName); lua_pushstring(L, base); lua_pushstring(L, systemId); lua_pushstring(L, publicId); lua_pushstring(L, notationName); docall(xpu, 5, 0); } static void f_EntityDecl (void *ud, const char *entityName, int is_parameter_entity, const char *value, int value_length, const char *base, const char *systemId, const char *publicId, const char *notationName) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, EntityDeclKey) == 0) return; /* no handle */ lua_pushstring(L, entityName); lua_pushboolean(L, is_parameter_entity); if (value == NULL) { lua_pushnil(L); } else { lua_pushlstring(L, value, value_length); } lua_pushstring(L, base); lua_pushstring(L, systemId); lua_pushstring(L, publicId); lua_pushstring(L, notationName); docall(xpu, 7, 0); } static void PushElementDeclType(lua_State *L, XML_Content *model) { switch(model->type) { case XML_CTYPE_EMPTY: lua_pushliteral(L, "EMPTY"); return; case XML_CTYPE_ANY: lua_pushliteral(L, "ANY"); return; case XML_CTYPE_MIXED: lua_pushliteral(L, "MIXED"); return; case XML_CTYPE_NAME: lua_pushliteral(L, "NAME"); return; case XML_CTYPE_CHOICE: lua_pushliteral(L, "CHOICE"); return; case XML_CTYPE_SEQ: lua_pushliteral(L, "SEQUENCE"); return; default: /* safe guard, should not happen */ lua_pushliteral(L, "unknown"); return; } } static int PushElementDeclQuant(lua_State *L, XML_Content *model) { switch(model->quant) { case XML_CQUANT_NONE: return 0; case XML_CQUANT_OPT: lua_pushliteral(L, "?"); return 1; case XML_CQUANT_REP: lua_pushliteral(L, "*"); return 1; case XML_CQUANT_PLUS: lua_pushliteral(L, "+"); return 1; default: /* safe guard, should not happen */ lua_pushliteral(L, "unknown"); return 1; } } static void PushElementDeclChildren(lua_State *L, XML_Content *model) { lua_checkstack(L, 4); int i; XML_Content *child; for (i = 0; i < model->numchildren; i++) { child = model->children+i; lua_newtable(L); PushElementDeclType(L, child); lua_setfield(L, -2, "type"); if (PushElementDeclQuant(L, child) != 0) { lua_setfield(L, -2, "quantifier"); } if (child->name != NULL) { lua_pushstring(L, child->name); lua_setfield(L, -2, "name"); } if (child->numchildren > 0 ) { lua_newtable(L); PushElementDeclChildren(L, child); lua_setfield(L, -2, "children"); } lua_rawseti(L, -2, i+1); } } static void f_ElementDecl (void *ud, const char *name, XML_Content *model) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, ElementDeclKey) == 0) { /* no handle */ XML_FreeContentModel(xpu->parser, model); return; } lua_pushstring(L, name); PushElementDeclType(L, model); if (PushElementDeclQuant(L, model) == 0) { lua_pushnil(L); } if (model->numchildren == 0) { XML_FreeContentModel(xpu->parser, model); docall(xpu, 3, 0); } else { lua_newtable(L); PushElementDeclChildren(L, model); XML_FreeContentModel(xpu->parser, model); docall(xpu, 4, 0); } } static void f_AttlistDecl (void *ud, const char *elName, const char *attName, const char *attType, const char *dflt, int isRequired) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, AttlistDeclKey) == 0) return; /* no handle */ lua_pushstring(L, elName); lua_pushstring(L, attName); lua_pushstring(L, attType); lua_pushstring(L, dflt); lua_pushboolean(L, isRequired); docall(xpu, 5, 0); } static void f_SkippedEntity (void *ud, const char *entityName, int isParameter) { lxp_userdata *xpu = (lxp_userdata *)ud; lua_State *L = xpu->L; if (getHandle(xpu, SkippedEntityKey) == 0) return; /* no handle */ lua_pushstring(L, entityName); lua_pushboolean(L, isParameter); docall(xpu, 2, 0); } static void f_StartDoctypeDecl (void *ud, const XML_Char *doctypeName, const XML_Char *sysid, const XML_Char *pubid, int has_internal_subset) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, StartDoctypeDeclKey) == 0) return; /* no handle */ lua_pushstring(xpu->L, doctypeName); lua_pushstring(xpu->L, sysid); lua_pushstring(xpu->L, pubid); lua_pushboolean(xpu->L, has_internal_subset); docall(xpu, 4, 0); } static void f_EndDoctypeDecl (void *ud) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, EndDoctypeDeclKey) == 0) return; /* no handle */ docall(xpu, 0, 0); } static void f_XmlDecl (void *ud, const XML_Char *version, const XML_Char *encoding, int standalone) { lxp_userdata *xpu = (lxp_userdata *)ud; if (getHandle(xpu, XmlDeclKey) == 0) return; /* no handle */ lua_pushstring(xpu->L, version); lua_pushstring(xpu->L, encoding); if (standalone >= 0) { lua_pushboolean(xpu->L, standalone); docall(xpu, 3, 0); } else { docall(xpu, 2, 0); } } /* }====================================================== */ static int hasfield (lua_State *L, const char *fname) { int res; lua_pushstring(L, fname); lua_gettable(L, 1); res = !lua_isnil(L, -1); lua_pop(L, 1); return res; } static void checkcallbacks (lua_State *L) { static const char *const validkeys[] = { "StartCdataSection", "EndCdataSection", "CharacterData", "Comment", "Default", "DefaultExpand", "StartElement", "EndElement", "ExternalEntityRef", "StartNamespaceDecl", "EndNamespaceDecl", "NotationDecl", "NotStandalone", "ProcessingInstruction", "UnparsedEntityDecl", "EntityDecl", "StartDoctypeDecl", "EndDoctypeDecl", "XmlDecl", "AttlistDecl", "SkippedEntity", "ElementDecl", NULL}; if (hasfield(L, "_nonstrict")) return; lua_pushnil(L); while (lua_next(L, 1)) { lua_pop(L, 1); /* remove value */ #if ! defined (LUA_VERSION_NUM) || LUA_VERSION_NUM < 501 if (lua_type(L, -1) != LUA_TSTRING || luaL_findstring(lua_tostring(L, -1), validkeys) < 0) luaL_error(L, "invalid key `%s' in callback table", lua_tostring(L, -1)); #else luaL_checkoption(L, -1, NULL, validkeys); #endif } } static int lxp_make_parser (lua_State *L) { XML_Parser p; int bufferCharData = (lua_type(L, 3) != LUA_TBOOLEAN) || (lua_toboolean(L, 3) != 0); char sep = *luaL_optstring(L, 2, ""); lxp_userdata *xpu = createlxp(L); xpu->bufferCharData = bufferCharData; p = xpu->parser = (sep == '\0') ? XML_ParserCreate(NULL) : XML_ParserCreateNS(NULL, sep); if (!p) luaL_error(L, "XML_ParserCreate failed"); luaL_checktype(L, 1, LUA_TTABLE); checkcallbacks(L); lua_pushvalue(L, 1); lua_setuservalue(L, -2); XML_SetUserData(p, xpu); if (hasfield(L, StartCdataKey) || hasfield(L, EndCdataKey)) XML_SetCdataSectionHandler(p, f_StartCdata, f_EndCdataKey); if (hasfield(L, CharDataKey)) XML_SetCharacterDataHandler(p, f_CharData); if (hasfield(L, CommentKey)) XML_SetCommentHandler(p, f_Comment); if (hasfield(L, DefaultKey)) XML_SetDefaultHandler(p, f_Default); if (hasfield(L, DefaultExpandKey)) XML_SetDefaultHandlerExpand(p, f_DefaultExpand); if (hasfield(L, StartElementKey) || hasfield(L, EndElementKey)) XML_SetElementHandler(p, f_StartElement, f_EndElement); if (hasfield(L, ExternalEntityKey)) XML_SetExternalEntityRefHandler(p, f_ExternaEntity); if (hasfield(L, StartNamespaceDeclKey) || hasfield(L, EndNamespaceDeclKey)) XML_SetNamespaceDeclHandler(p, f_StartNamespaceDecl, f_EndNamespaceDecl); if (hasfield(L, NotationDeclKey)) XML_SetNotationDeclHandler(p, f_NotationDecl); if (hasfield(L, NotStandaloneKey)) XML_SetNotStandaloneHandler(p, f_NotStandalone); if (hasfield(L, ProcessingInstructionKey)) XML_SetProcessingInstructionHandler(p, f_ProcessingInstruction); if (hasfield(L, UnparsedEntityDeclKey)) XML_SetUnparsedEntityDeclHandler(p, f_UnparsedEntityDecl); if (hasfield(L, EntityDeclKey)) XML_SetEntityDeclHandler(p, f_EntityDecl); if (hasfield(L, AttlistDeclKey)) XML_SetAttlistDeclHandler(p, f_AttlistDecl); if (hasfield(L, SkippedEntityKey)) XML_SetSkippedEntityHandler(p, f_SkippedEntity); if (hasfield(L, StartDoctypeDeclKey)) XML_SetStartDoctypeDeclHandler(p, f_StartDoctypeDecl); if (hasfield(L, EndDoctypeDeclKey)) XML_SetEndDoctypeDeclHandler(p, f_EndDoctypeDecl); if (hasfield(L, XmlDeclKey)) XML_SetXmlDeclHandler(p, f_XmlDecl); if (hasfield(L, ElementDeclKey)) XML_SetElementDeclHandler(p, f_ElementDecl); return 1; } static lxp_userdata *checkparser (lua_State *L, int idx) { lxp_userdata *xpu = (lxp_userdata *)luaL_checkudata(L, idx, ParserType); luaL_argcheck(L, xpu, idx, "expat parser expected"); luaL_argcheck(L, xpu->parser, idx, "parser is closed"); return xpu; } static int parser_gc (lua_State *L) { lxp_userdata *xpu = (lxp_userdata *)luaL_checkudata(L, 1, ParserType); luaL_argcheck(L, xpu, 1, "expat parser expected"); lxpclose(L, xpu); return 0; } static int setbase (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); if (XML_SetBase(xpu->parser, luaL_checkstring(L, 2)) == 0) luaL_error(L, "no memory to store base"); lua_settop(L, 1); return 1; } static int getbase (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); lua_pushstring(L, XML_GetBase(xpu->parser)); return 1; } static int getcallbacks (lua_State *L) { checkparser(L, 1); lua_getuservalue(L, 1); return 1; } static int parse_aux (lua_State *L, lxp_userdata *xpu, const char *s, size_t len) { luaL_Buffer b; int status; xpu->L = L; xpu->state = XPSok; xpu->b = &b; lua_settop(L, 2); getcallbacks(L); status = XML_Parse(xpu->parser, s, (int)len, s == NULL); if (xpu->state == XPSstring) dischargestring(xpu); if (xpu->state == XPSerror) { /* callback error? */ lua_rawgeti(L, LUA_REGISTRYINDEX, xpu->errorref); /* get original msg. */ lua_error(L); } if (s == NULL) xpu->state = XPSfinished; if (status) { lua_settop(L, 1); /* return parser userdata on success */ return 1; } else { /* error */ return reporterror(xpu); } } static int lxp_parse (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); size_t len; const char *s = luaL_optlstring(L, 2, NULL, &len); if (xpu->state == XPSfinished) { if (s != NULL) { lua_pushnil(L); lua_pushliteral(L, "cannot parse - document is finished"); return 2; } else { lua_settop(L, 1); return 1; } } return parse_aux(L, xpu, s, len); } static int lxp_close (lua_State *L) { int status = 1; lxp_userdata *xpu = (lxp_userdata *)luaL_checkudata(L, 1, ParserType); luaL_argcheck(L, xpu, 1, "expat parser expected"); if (xpu->state != XPSfinished) status = parse_aux(L, xpu, NULL, 0); lxpclose(L, xpu); if (status > 1) luaL_error(L, "error closing parser: %s", lua_tostring(L, -status+1)); lua_settop(L, 1); return 1; } static int lxp_pos (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); XML_Parser p = xpu->parser; lua_pushinteger(L, XML_GetCurrentLineNumber(p)); lua_pushinteger(L, XML_GetCurrentColumnNumber(p) + 1); lua_pushinteger(L, XML_GetCurrentByteIndex(p) + 1); return 3; } static int lxp_setreturnnstriplet (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); luaL_argcheck(L, xpu->state == XPSpre, 1, "invalid parser state"); XML_SetReturnNSTriplet(xpu->parser, lua_toboolean(L, 2)); lua_settop(L, 1); return 1; } static int lxp_setencoding (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); const char *encoding = luaL_checkstring(L, 2); luaL_argcheck(L, xpu->state == XPSpre, 1, "invalid parser state"); XML_SetEncoding(xpu->parser, encoding); lua_settop(L, 1); return 1; } static int lxp_stop (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); lua_pushboolean(L, XML_StopParser(xpu->parser, XML_FALSE) == XML_STATUS_OK); return 1; } /* Billion Laughs Attack mitigation from Expat 2.4.0+ */ #ifdef XML_DTD static int lxp_bla_maximum_amplification (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); if (! XML_SetBillionLaughsAttackProtectionMaximumAmplification(xpu->parser, luaL_checknumber(L, 2))) { lua_pushnil(L); lua_pushliteral(L, "failed to set BLA maximum amplification"); return 2; } lua_settop(L, 1); return 1; } static int lxp_bla_activation_threshold (lua_State *L) { lxp_userdata *xpu = checkparser(L, 1); if (! XML_SetBillionLaughsAttackProtectionActivationThreshold(xpu->parser, luaL_checkinteger(L, 2))) { lua_pushnil(L); lua_pushliteral(L, "failed to set BLA activation threshold"); return 2; } lua_settop(L, 1); return 1; } #endif #if !defined LUA_VERSION_NUM /* Lua 5.0 */ #define luaL_Reg luaL_reg #endif static int lxp_getcurrentbytecount (lua_State* L) { lxp_userdata *xpu = checkparser(L, 1); lua_pushinteger(L, XML_GetCurrentByteCount(xpu->parser)); return 1; } static const struct luaL_Reg lxp_meths[] = { {"parse", lxp_parse}, {"close", lxp_close}, {"__gc", parser_gc}, {"pos", lxp_pos}, {"getcurrentbytecount", lxp_getcurrentbytecount}, {"setencoding", lxp_setencoding}, {"getcallbacks", getcallbacks}, {"getbase", getbase}, {"setbase", setbase}, {"returnnstriplet", lxp_setreturnnstriplet}, {"stop", lxp_stop}, #ifdef XML_DTD {"setblamaxamplification", lxp_bla_maximum_amplification}, {"setblathreshold", lxp_bla_activation_threshold}, #endif {NULL, NULL} }; static const struct luaL_Reg lxp_funcs[] = { {"new", lxp_make_parser}, {NULL, NULL} }; /* ** Assumes the table is on top of the stack. */ static void set_info (lua_State *L) { lua_pushliteral (L, "_COPYRIGHT"); lua_pushliteral (L, LuaExpatCopyright); lua_settable (L, -3); lua_pushliteral (L, "_DESCRIPTION"); lua_pushliteral (L, "LuaExpat is a SAX XML parser based on the Expat library"); lua_settable (L, -3); lua_pushliteral (L, "_VERSION"); lua_pushliteral (L, LuaExpatVersion); lua_settable (L, -3); lua_pushliteral (L, "_EXPAT_VERSION"); lua_pushstring (L, XML_ExpatVersion()); lua_settable (L, -3); /* create feature list */ lua_pushliteral (L, "_EXPAT_FEATURES"); lua_newtable (L); const XML_Feature *features; for (features = XML_GetFeatureList (); features->name != NULL; features++) { lua_pushstring (L, features->name); lua_pushinteger (L, features->value); lua_settable (L, -3); } lua_settable (L, -3); } int luaopen_lxp (lua_State *L) { luaL_newmetatable(L, ParserType); lua_pushliteral(L, "__index"); lua_pushvalue(L, -2); lua_rawset(L, -3); luaL_setfuncs (L, lxp_meths, 0); lua_pop (L, 1); /* remove metatable */ lua_newtable (L); /* push library table */ luaL_setfuncs (L, lxp_funcs, 0); set_info (L); return 1; } luaexpat-1.5.2/src/lxplib.h000066400000000000000000000022211463727376500156150ustar00rootroot00000000000000/* ** See Copyright Notice in license.html */ #define LuaExpatCopyright "Copyright (C) 2003-2007 The Kepler Project, 2013-2024 Matthew Wild" #define LuaExpatVersion "LuaExpat 1.5.2" #define ParserType "Expat" #define StartCdataKey "StartCdataSection" #define EndCdataKey "EndCdataSection" #define CharDataKey "CharacterData" #define CommentKey "Comment" #define DefaultKey "Default" #define DefaultExpandKey "DefaultExpand" #define StartElementKey "StartElement" #define EndElementKey "EndElement" #define ExternalEntityKey "ExternalEntityRef" #define StartNamespaceDeclKey "StartNamespaceDecl" #define EndNamespaceDeclKey "EndNamespaceDecl" #define NotationDeclKey "NotationDecl" #define NotStandaloneKey "NotStandalone" #define ProcessingInstructionKey "ProcessingInstruction" #define UnparsedEntityDeclKey "UnparsedEntityDecl" #define EntityDeclKey "EntityDecl" #define AttlistDeclKey "AttlistDecl" #define SkippedEntityKey "SkippedEntity" #define StartDoctypeDeclKey "StartDoctypeDecl" #define EndDoctypeDeclKey "EndDoctypeDecl" #define XmlDeclKey "XmlDecl" #define ElementDeclKey "ElementDecl" int luaopen_lxp (lua_State *L);