pax_global_header 0000666 0000000 0000000 00000000064 14544351356 0014524 g ustar 00root root 0000000 0000000 52 comment=d46b199f3e7d1148500f49bab0c36c33230b8632
etree-1.3.0/ 0000775 0000000 0000000 00000000000 14544351356 0012631 5 ustar 00root root 0000000 0000000 etree-1.3.0/.github/ 0000775 0000000 0000000 00000000000 14544351356 0014171 5 ustar 00root root 0000000 0000000 etree-1.3.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14544351356 0016226 5 ustar 00root root 0000000 0000000 etree-1.3.0/.github/workflows/go.yml 0000664 0000000 0000000 00000000767 14544351356 0017370 0 ustar 00root root 0000000 0000000 name: Go
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
go-version: [ '1.13', '1.20', '1.21.x' ]
steps:
- uses: actions/checkout@v4
- name: Setup Go ${{ matrix.go-version }}
uses: actions/setup-go@v4
with:
go-version: ${{ matrix.go-version }}
- name: Build
run: go build -v ./...
- name: Test
run: go test -v ./...
etree-1.3.0/CONTRIBUTORS 0000664 0000000 0000000 00000000525 14544351356 0014513 0 ustar 00root root 0000000 0000000 Brett Vickers (beevik)
Felix Geisendörfer (felixge)
Kamil Kisiel (kisielk)
Graham King (grahamking)
Matt Smith (ma314smith)
Michal Jemala (michaljemala)
Nicolas Piganeau (npiganeau)
Chris Brown (ccbrown)
Earncef Sequeira (earncef)
Gabriel de Labachelerie (wuzuf)
Martin Dosch (mdosch)
Hugo Wetterberg (hugowetterberg)
Tobias Theel (nerzal)
etree-1.3.0/LICENSE 0000664 0000000 0000000 00000002415 14544351356 0013640 0 ustar 00root root 0000000 0000000 Copyright 2015-2023 Brett Vickers. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
etree-1.3.0/README.md 0000664 0000000 0000000 00000013000 14544351356 0014102 0 ustar 00root root 0000000 0000000 [](https://godoc.org/github.com/beevik/etree)
[](https://github.com/beevik/etree/actions/workflows/go.yml)
etree
=====
The etree package is a lightweight, pure go package that expresses XML in
the form of an element tree. Its design was inspired by the Python
[ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html)
module.
Some of the package's capabilities and features:
* Represents XML documents as trees of elements for easy traversal.
* Imports, serializes, modifies or creates XML documents from scratch.
* Writes and reads XML to/from files, byte slices, strings and io interfaces.
* Performs simple or complex searches with lightweight XPath-like query APIs.
* Auto-indents XML using spaces or tabs for better readability.
* Implemented in pure go; depends only on standard go libraries.
* Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml)
package.
### Creating an XML document
The following example creates an XML document from scratch using the etree
package and outputs its indented contents to stdout.
```go
doc := etree.NewDocument()
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
people := doc.CreateElement("People")
people.CreateComment("These are all known people")
jon := people.CreateElement("Person")
jon.CreateAttr("name", "Jon")
sally := people.CreateElement("Person")
sally.CreateAttr("name", "Sally")
doc.Indent(2)
doc.WriteTo(os.Stdout)
```
Output:
```xml
```
### Reading an XML file
Suppose you have a file on disk called `bookstore.xml` containing the
following data:
```xml
Everyday ItalianGiada De Laurentiis200530.00Harry PotterJ K. Rowling200529.99XQuery Kick StartJames McGovernPer BothnerKurt CagleJames LinnVaidyanathan Nagarajan200349.99Learning XMLErik T. Ray200339.95
```
This code reads the file's contents into an etree document.
```go
doc := etree.NewDocument()
if err := doc.ReadFromFile("bookstore.xml"); err != nil {
panic(err)
}
```
You can also read XML from a string, a byte slice, or an `io.Reader`.
### Processing elements and attributes
This example illustrates several ways to access elements and attributes using
etree selection queries.
```go
root := doc.SelectElement("bookstore")
fmt.Println("ROOT element:", root.Tag)
for _, book := range root.SelectElements("book") {
fmt.Println("CHILD element:", book.Tag)
if title := book.SelectElement("title"); title != nil {
lang := title.SelectAttrValue("lang", "unknown")
fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang)
}
for _, attr := range book.Attr {
fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value)
}
}
```
Output:
```
ROOT element: bookstore
CHILD element: book
TITLE: Everyday Italian (en)
ATTR: category=COOKING
CHILD element: book
TITLE: Harry Potter (en)
ATTR: category=CHILDREN
CHILD element: book
TITLE: XQuery Kick Start (en)
ATTR: category=WEB
CHILD element: book
TITLE: Learning XML (en)
ATTR: category=WEB
```
### Path queries
This example uses etree's path functions to select all book titles that fall
into the category of 'WEB'. The double-slash prefix in the path causes the
search for book elements to occur recursively; book elements may appear at any
level of the XML hierarchy.
```go
for _, t := range doc.FindElements("//book[@category='WEB']/title") {
fmt.Println("Title:", t.Text())
}
```
Output:
```
Title: XQuery Kick Start
Title: Learning XML
```
This example finds the first book element under the root bookstore element and
outputs the tag and text of each of its child elements.
```go
for _, e := range doc.FindElements("./bookstore/book[1]/*") {
fmt.Printf("%s: %s\n", e.Tag, e.Text())
}
```
Output:
```
title: Everyday Italian
author: Giada De Laurentiis
year: 2005
price: 30.00
```
This example finds all books with a price of 49.99 and outputs their titles.
```go
path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title")
for _, e := range doc.FindElementsPath(path) {
fmt.Println(e.Text())
}
```
Output:
```
XQuery Kick Start
```
Note that this example uses the FindElementsPath function, which takes as an
argument a pre-compiled path object. Use precompiled paths when you plan to
search with the same path more than once.
### Other features
These are just a few examples of the things the etree package can do. See the
[documentation](http://godoc.org/github.com/beevik/etree) for a complete
description of its capabilities.
### Contributing
This project accepts contributions. Just fork the repo and submit a pull
request!
etree-1.3.0/RELEASE_NOTES.md 0000664 0000000 0000000 00000014044 14544351356 0015206 0 ustar 00root root 0000000 0000000 Release v1.3.0
==============
**New Features**
* Add support for double-quotes in filter path queries.
* Add `PreserveDuplicateAttrs` to `ReadSettings`.
* Add `ReindexChildren` to `Element`.
Release v1.2.0
==============
**New Features**
* Add the ability to write XML fragments using Token WriteTo functions.
* Add the ability to re-indent an XML element as though it were the root of
the document.
* Add a ReadSettings option to preserve CDATA blocks when reading and XML
document.
Release v1.1.4
==============
**New Features**
* Add the ability to preserve whitespace in leaf elements during indent.
* Add the ability to suppress a document-trailing newline during indent.
* Add choice of XML attribute quoting style (single-quote or double-quote).
**Removed Features**
* Removed the CDATA preservation change introduced in v1.1.3. It was
implemented in a way that broke the ability to process XML documents
encoded using non-UTF8 character sets.
Release v1.1.3
==============
* XML reads now preserve CDATA sections instead of converting them to
standard character data.
Release v1.1.2
==============
* Fixed a path parsing bug.
* The `Element.Text` function now handles comments embedded between
character data spans.
Release v1.1.1
==============
* Updated go version in `go.mod` to 1.20
Release v1.1.0
==============
**New Features**
* New attribute helpers.
* Added the `Element.SortAttrs` method, which lexicographically sorts an
element's attributes by key.
* New `ReadSettings` properties.
* Added `Entity` for the support of custom entity maps.
* New `WriteSettings` properties.
* Added `UseCRLF` to allow the output of CR-LF newlines instead of the
default LF newlines. This is useful on Windows systems.
* Additional support for text and CDATA sections.
* The `Element.Text` method now returns the concatenation of all consecutive
character data tokens immediately following an element's opening tag.
* Added `Element.SetCData` to replace the character data immediately
following an element's opening tag with a CDATA section.
* Added `Element.CreateCData` to create and add a CDATA section child
`CharData` token to an element.
* Added `Element.CreateText` to create and add a child text `CharData` token
to an element.
* Added `NewCData` to create a parentless CDATA section `CharData` token.
* Added `NewText` to create a parentless text `CharData`
token.
* Added `CharData.IsCData` to detect if the token contains a CDATA section.
* Added `CharData.IsWhitespace` to detect if the token contains whitespace
inserted by one of the document Indent functions.
* Modified `Element.SetText` so that it replaces a run of consecutive
character data tokens following the element's opening tag (instead of just
the first one).
* New "tail text" support.
* Added the `Element.Tail` method, which returns the text immediately
following an element's closing tag.
* Added the `Element.SetTail` method, which modifies the text immediately
following an element's closing tag.
* New element child insertion and removal methods.
* Added the `Element.InsertChildAt` method, which inserts a new child token
before the specified child token index.
* Added the `Element.RemoveChildAt` method, which removes the child token at
the specified child token index.
* New element and attribute queries.
* Added the `Element.Index` method, which returns the element's index within
its parent element's child token list.
* Added the `Element.NamespaceURI` method to return the namespace URI
associated with an element.
* Added the `Attr.NamespaceURI` method to return the namespace URI
associated with an element.
* Added the `Attr.Element` method to return the element that an attribute
belongs to.
* New Path filter functions.
* Added `[local-name()='val']` to keep elements whose unprefixed tag matches
the desired value.
* Added `[name()='val']` to keep elements whose full tag matches the desired
value.
* Added `[namespace-prefix()='val']` to keep elements whose namespace prefix
matches the desired value.
* Added `[namespace-uri()='val']` to keep elements whose namespace URI
matches the desired value.
**Bug Fixes**
* A default XML `CharSetReader` is now used to prevent failed parsing of XML
documents using certain encodings.
([Issue](https://github.com/beevik/etree/issues/53)).
* All characters are now properly escaped according to XML parsing rules.
([Issue](https://github.com/beevik/etree/issues/55)).
* The `Document.Indent` and `Document.IndentTabs` functions no longer insert
empty string `CharData` tokens.
**Deprecated**
* `Element`
* The `InsertChild` method is deprecated. Use `InsertChildAt` instead.
* The `CreateCharData` method is deprecated. Use `CreateText` instead.
* `CharData`
* The `NewCharData` method is deprecated. Use `NewText` instead.
Release v1.0.1
==============
**Changes**
* Added support for absolute etree Path queries. An absolute path begins with
`/` or `//` and begins its search from the element's document root.
* Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath)
and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath)
functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element)
type.
**Breaking changes**
* A path starting with `//` is now interpreted as an absolute path.
Previously, it was interpreted as a relative path starting from the element
whose
[`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement)
method was called. To remain compatible with this release, all paths
prefixed with `//` should be prefixed with `.//` when called from any
element other than the document's root.
* [**edit 2/1/2019**]: Minor releases should not contain breaking changes.
Even though this breaking change was very minor, it was a mistake to include
it in this minor release. In the future, all breaking changes will be
limited to major releases (e.g., version 2.0.0).
Release v1.0.0
==============
Initial release.
etree-1.3.0/etree.go 0000664 0000000 0000000 00000131115 14544351356 0014266 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package etree provides XML services through an Element Tree
// abstraction.
package etree
import (
"bufio"
"bytes"
"encoding/xml"
"errors"
"io"
"os"
"sort"
"strings"
)
const (
// NoIndent is used with the IndentSettings record to remove all
// indenting.
NoIndent = -1
)
// ErrXML is returned when XML parsing fails due to incorrect formatting.
var ErrXML = errors.New("etree: invalid XML format")
// cdataPrefix is used to detect CDATA text when ReadSettings.PreserveCData is
// true.
var cdataPrefix = []byte(". If false, XML character references
// are also produced for " and '. Default: false.
CanonicalText bool
// CanonicalAttrVal forces the production of XML character references for
// attribute value characters &, < and ". If false, XML character
// references are also produced for > and '. Default: false.
CanonicalAttrVal bool
// AttrSingleQuote causes attributes to use single quotes (attr='example')
// instead of double quotes (attr = "example") when set to true. Default:
// false.
AttrSingleQuote bool
// UseCRLF causes the document's Indent* methods to use a carriage return
// followed by a linefeed ("\r\n") when outputting a newline. If false,
// only a linefeed is used ("\n"). Default: false.
//
// Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead.
UseCRLF bool
}
// newWriteSettings creates a default WriteSettings record.
func newWriteSettings() WriteSettings {
return WriteSettings{
CanonicalEndTags: false,
CanonicalText: false,
CanonicalAttrVal: false,
AttrSingleQuote: false,
UseCRLF: false,
}
}
// dup creates a duplicate of the WriteSettings object.
func (s *WriteSettings) dup() WriteSettings {
return *s
}
// IndentSettings determine the behavior of the Document's Indent* methods.
type IndentSettings struct {
// Spaces indicates the number of spaces to insert for each level of
// indentation. Set to etree.NoIndent to remove all indentation. Ignored
// when UseTabs is true. Default: 4.
Spaces int
// UseTabs causes tabs to be used instead of spaces when indenting.
// Default: false.
UseTabs bool
// UseCRLF causes newlines to be written as a carriage return followed by
// a linefeed ("\r\n"). If false, only a linefeed character is output
// for a newline ("\n"). Default: false.
UseCRLF bool
// PreserveLeafWhitespace causes indent methods to preserve whitespace
// within XML elements containing only non-CDATA character data. Default:
// false.
PreserveLeafWhitespace bool
// SuppressTrailingWhitespace suppresses the generation of a trailing
// whitespace characters (such as newlines) at the end of the indented
// document. Default: false.
SuppressTrailingWhitespace bool
}
// NewIndentSettings creates a default IndentSettings record.
func NewIndentSettings() *IndentSettings {
return &IndentSettings{
Spaces: 4,
UseTabs: false,
UseCRLF: false,
PreserveLeafWhitespace: false,
SuppressTrailingWhitespace: false,
}
}
type indentFunc func(depth int) string
func getIndentFunc(s *IndentSettings) indentFunc {
if s.UseTabs {
if s.UseCRLF {
return func(depth int) string { return indentCRLF(depth, indentTabs) }
} else {
return func(depth int) string { return indentLF(depth, indentTabs) }
}
} else {
if s.Spaces < 0 {
return func(depth int) string { return "" }
} else if s.UseCRLF {
return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) }
} else {
return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) }
}
}
}
// Writer is the interface that wraps the Write* methods called by each token
// type's WriteTo function.
type Writer interface {
io.StringWriter
io.ByteWriter
io.Writer
}
// A Token is an interface type used to represent XML elements, character
// data, CDATA sections, XML comments, XML directives, and XML processing
// instructions.
type Token interface {
Parent() *Element
Index() int
WriteTo(w Writer, s *WriteSettings)
dup(parent *Element) Token
setParent(parent *Element)
setIndex(index int)
}
// A Document is a container holding a complete XML tree.
//
// A document has a single embedded element, which contains zero or more child
// tokens, one of which is usually the root element. The embedded element may
// include other children such as processing instruction tokens or character
// data tokens. The document's embedded element is never directly serialized;
// only its children are.
//
// A document also contains read and write settings, which influence the way
// the document is deserialized, serialized, and indented.
type Document struct {
Element
ReadSettings ReadSettings
WriteSettings WriteSettings
}
// An Element represents an XML element, its attributes, and its child tokens.
type Element struct {
Space, Tag string // namespace prefix and tag
Attr []Attr // key-value attribute pairs
Child []Token // child tokens (elements, comments, etc.)
parent *Element // parent element
index int // token index in parent's children
}
// An Attr represents a key-value attribute within an XML element.
type Attr struct {
Space, Key string // The attribute's namespace prefix and key
Value string // The attribute value string
element *Element // element containing the attribute
}
// charDataFlags are used with CharData tokens to store additional settings.
type charDataFlags uint8
const (
// The CharData contains only whitespace.
whitespaceFlag charDataFlags = 1 << iota
// The CharData contains a CDATA section.
cdataFlag
)
// CharData may be used to represent simple text data or a CDATA section
// within an XML document. The Data property should never be modified
// directly; use the SetData method instead.
type CharData struct {
Data string // the simple text or CDATA section content
parent *Element
index int
flags charDataFlags
}
// A Comment represents an XML comment.
type Comment struct {
Data string // the comment's text
parent *Element
index int
}
// A Directive represents an XML directive.
type Directive struct {
Data string // the directive string
parent *Element
index int
}
// A ProcInst represents an XML processing instruction.
type ProcInst struct {
Target string // the processing instruction target
Inst string // the processing instruction value
parent *Element
index int
}
// NewDocument creates an XML document without a root element.
func NewDocument() *Document {
return &Document{
Element: Element{Child: make([]Token, 0)},
ReadSettings: newReadSettings(),
WriteSettings: newWriteSettings(),
}
}
// NewDocumentWithRoot creates an XML document and sets the element 'e' as its
// root element. If the element 'e' is already part of another document, it is
// first removed from its existing document.
func NewDocumentWithRoot(e *Element) *Document {
d := NewDocument()
d.SetRoot(e)
return d
}
// Copy returns a recursive, deep copy of the document.
func (d *Document) Copy() *Document {
return &Document{
Element: *(d.Element.dup(nil).(*Element)),
ReadSettings: d.ReadSettings.dup(),
WriteSettings: d.WriteSettings.dup(),
}
}
// Root returns the root element of the document. It returns nil if there is
// no root element.
func (d *Document) Root() *Element {
for _, t := range d.Child {
if c, ok := t.(*Element); ok {
return c
}
}
return nil
}
// SetRoot replaces the document's root element with the element 'e'. If the
// document already has a root element when this function is called, then the
// existing root element is unbound from the document. If the element 'e' is
// part of another document, then it is unbound from the other document.
func (d *Document) SetRoot(e *Element) {
if e.parent != nil {
e.parent.RemoveChild(e)
}
// If there is already a root element, replace it.
p := &d.Element
for i, t := range p.Child {
if _, ok := t.(*Element); ok {
t.setParent(nil)
t.setIndex(-1)
p.Child[i] = e
e.setParent(p)
e.setIndex(i)
return
}
}
// No existing root element, so add it.
p.addChild(e)
}
// ReadFrom reads XML from the reader 'r' into this document. The function
// returns the number of bytes read and any error encountered.
func (d *Document) ReadFrom(r io.Reader) (n int64, err error) {
return d.Element.readFrom(r, d.ReadSettings)
}
// ReadFromFile reads XML from a local file at path 'filepath' into this
// document.
func (d *Document) ReadFromFile(filepath string) error {
f, err := os.Open(filepath)
if err != nil {
return err
}
defer f.Close()
_, err = d.ReadFrom(f)
return err
}
// ReadFromBytes reads XML from the byte slice 'b' into the this document.
func (d *Document) ReadFromBytes(b []byte) error {
_, err := d.ReadFrom(bytes.NewReader(b))
return err
}
// ReadFromString reads XML from the string 's' into this document.
func (d *Document) ReadFromString(s string) error {
_, err := d.ReadFrom(strings.NewReader(s))
return err
}
// WriteTo serializes the document out to the writer 'w'. The function returns
// the number of bytes written and any error encountered.
func (d *Document) WriteTo(w io.Writer) (n int64, err error) {
xw := newXmlWriter(w)
b := bufio.NewWriter(xw)
for _, c := range d.Child {
c.WriteTo(b, &d.WriteSettings)
}
err, n = b.Flush(), xw.bytes
return
}
// WriteToFile serializes the document out to the file at path 'filepath'.
func (d *Document) WriteToFile(filepath string) error {
f, err := os.Create(filepath)
if err != nil {
return err
}
defer f.Close()
_, err = d.WriteTo(f)
return err
}
// WriteToBytes serializes this document into a slice of bytes.
func (d *Document) WriteToBytes() (b []byte, err error) {
var buf bytes.Buffer
if _, err = d.WriteTo(&buf); err != nil {
return
}
return buf.Bytes(), nil
}
// WriteToString serializes this document into a string.
func (d *Document) WriteToString() (s string, err error) {
var b []byte
if b, err = d.WriteToBytes(); err != nil {
return
}
return string(b), nil
}
// Indent modifies the document's element tree by inserting character data
// tokens containing newlines and spaces for indentation. The amount of
// indentation per depth level is given by the 'spaces' parameter. Other than
// the number of spaces, default IndentSettings are used.
func (d *Document) Indent(spaces int) {
s := NewIndentSettings()
s.Spaces = spaces
d.IndentWithSettings(s)
}
// IndentTabs modifies the document's element tree by inserting CharData
// tokens containing newlines and tabs for indentation. One tab is used per
// indentation level. Other than the use of tabs, default IndentSettings
// are used.
func (d *Document) IndentTabs() {
s := NewIndentSettings()
s.UseTabs = true
d.IndentWithSettings(s)
}
// IndentWithSettings modifies the document's element tree by inserting
// character data tokens containing newlines and indentation. The behavior
// of the indentation algorithm is configured by the indent settings.
func (d *Document) IndentWithSettings(s *IndentSettings) {
// WriteSettings.UseCRLF is deprecated. Until removed from the package, it
// overrides IndentSettings.UseCRLF when true.
if d.WriteSettings.UseCRLF {
s.UseCRLF = true
}
d.Element.indent(0, getIndentFunc(s), s)
if s.SuppressTrailingWhitespace {
d.Element.stripTrailingWhitespace()
}
}
// Unindent modifies the document's element tree by removing character data
// tokens containing only whitespace. Other than the removal of indentation,
// default IndentSettings are used.
func (d *Document) Unindent() {
s := NewIndentSettings()
s.Spaces = NoIndent
d.IndentWithSettings(s)
}
// NewElement creates an unparented element with the specified tag (i.e.,
// name). The tag may include a namespace prefix followed by a colon.
func NewElement(tag string) *Element {
space, stag := spaceDecompose(tag)
return newElement(space, stag, nil)
}
// newElement is a helper function that creates an element and binds it to
// a parent element if possible.
func newElement(space, tag string, parent *Element) *Element {
e := &Element{
Space: space,
Tag: tag,
Attr: make([]Attr, 0),
Child: make([]Token, 0),
parent: parent,
index: -1,
}
if parent != nil {
parent.addChild(e)
}
return e
}
// Copy creates a recursive, deep copy of the element and all its attributes
// and children. The returned element has no parent but can be parented to a
// another element using AddChild, or added to a document with SetRoot or
// NewDocumentWithRoot.
func (e *Element) Copy() *Element {
return e.dup(nil).(*Element)
}
// FullTag returns the element e's complete tag, including namespace prefix if
// present.
func (e *Element) FullTag() string {
if e.Space == "" {
return e.Tag
}
return e.Space + ":" + e.Tag
}
// NamespaceURI returns the XML namespace URI associated with the element. If
// the element is part of the XML default namespace, NamespaceURI returns the
// empty string.
func (e *Element) NamespaceURI() string {
if e.Space == "" {
return e.findDefaultNamespaceURI()
}
return e.findLocalNamespaceURI(e.Space)
}
// findLocalNamespaceURI finds the namespace URI corresponding to the
// requested prefix.
func (e *Element) findLocalNamespaceURI(prefix string) string {
for _, a := range e.Attr {
if a.Space == "xmlns" && a.Key == prefix {
return a.Value
}
}
if e.parent == nil {
return ""
}
return e.parent.findLocalNamespaceURI(prefix)
}
// findDefaultNamespaceURI finds the default namespace URI of the element.
func (e *Element) findDefaultNamespaceURI() string {
for _, a := range e.Attr {
if a.Space == "" && a.Key == "xmlns" {
return a.Value
}
}
if e.parent == nil {
return ""
}
return e.parent.findDefaultNamespaceURI()
}
// namespacePrefix returns the namespace prefix associated with the element.
func (e *Element) namespacePrefix() string {
return e.Space
}
// name returns the tag associated with the element.
func (e *Element) name() string {
return e.Tag
}
// ReindexChildren recalculates the index values of the element's child
// tokens. This is necessary only if you have manually manipulated the
// element's `Child` array.
func (e *Element) ReindexChildren() {
for i := 0; i < len(e.Child); i++ {
e.Child[i].setIndex(i)
}
}
// Text returns all character data immediately following the element's opening
// tag.
func (e *Element) Text() string {
if len(e.Child) == 0 {
return ""
}
text := ""
for _, ch := range e.Child {
if cd, ok := ch.(*CharData); ok {
if text == "" {
text = cd.Data
} else {
text += cd.Data
}
} else if _, ok := ch.(*Comment); ok {
// ignore
} else {
break
}
}
return text
}
// SetText replaces all character data immediately following an element's
// opening tag with the requested string.
func (e *Element) SetText(text string) {
e.replaceText(0, text, 0)
}
// SetCData replaces all character data immediately following an element's
// opening tag with a CDATA section.
func (e *Element) SetCData(text string) {
e.replaceText(0, text, cdataFlag)
}
// Tail returns all character data immediately following the element's end
// tag.
func (e *Element) Tail() string {
if e.Parent() == nil {
return ""
}
p := e.Parent()
i := e.Index()
text := ""
for _, ch := range p.Child[i+1:] {
if cd, ok := ch.(*CharData); ok {
if text == "" {
text = cd.Data
} else {
text += cd.Data
}
} else {
break
}
}
return text
}
// SetTail replaces all character data immediately following the element's end
// tag with the requested string.
func (e *Element) SetTail(text string) {
if e.Parent() == nil {
return
}
p := e.Parent()
p.replaceText(e.Index()+1, text, 0)
}
// replaceText is a helper function that replaces a series of chardata tokens
// starting at index i with the requested text.
func (e *Element) replaceText(i int, text string, flags charDataFlags) {
end := e.findTermCharDataIndex(i)
switch {
case end == i:
if text != "" {
// insert a new chardata token at index i
cd := newCharData(text, flags, nil)
e.InsertChildAt(i, cd)
}
case end == i+1:
if text == "" {
// remove the chardata token at index i
e.RemoveChildAt(i)
} else {
// replace the first and only character token at index i
cd := e.Child[i].(*CharData)
cd.Data, cd.flags = text, flags
}
default:
if text == "" {
// remove all chardata tokens starting from index i
copy(e.Child[i:], e.Child[end:])
removed := end - i
e.Child = e.Child[:len(e.Child)-removed]
for j := i; j < len(e.Child); j++ {
e.Child[j].setIndex(j)
}
} else {
// replace the first chardata token at index i and remove all
// subsequent chardata tokens
cd := e.Child[i].(*CharData)
cd.Data, cd.flags = text, flags
copy(e.Child[i+1:], e.Child[end:])
removed := end - (i + 1)
e.Child = e.Child[:len(e.Child)-removed]
for j := i + 1; j < len(e.Child); j++ {
e.Child[j].setIndex(j)
}
}
}
}
// findTermCharDataIndex finds the index of the first child token that isn't
// a CharData token. It starts from the requested start index.
func (e *Element) findTermCharDataIndex(start int) int {
for i := start; i < len(e.Child); i++ {
if _, ok := e.Child[i].(*CharData); !ok {
return i
}
}
return len(e.Child)
}
// CreateElement creates a new element with the specified tag (i.e., name) and
// adds it as the last child token of this element. The tag may include a
// prefix followed by a colon.
func (e *Element) CreateElement(tag string) *Element {
space, stag := spaceDecompose(tag)
return newElement(space, stag, e)
}
// AddChild adds the token 't' as the last child of the element. If token 't'
// was already the child of another element, it is first removed from its
// parent element.
func (e *Element) AddChild(t Token) {
if t.Parent() != nil {
t.Parent().RemoveChild(t)
}
e.addChild(t)
}
// InsertChild inserts the token 't' into this element's list of children just
// before the element's existing child token 'ex'. If the existing element
// 'ex' does not appear in this element's list of child tokens, then 't' is
// added to the end of this element's list of child tokens. If token 't' is
// already the child of another element, it is first removed from the other
// element's list of child tokens.
//
// Deprecated: InsertChild is deprecated. Use InsertChildAt instead.
func (e *Element) InsertChild(ex Token, t Token) {
if ex == nil || ex.Parent() != e {
e.AddChild(t)
return
}
if t.Parent() != nil {
t.Parent().RemoveChild(t)
}
t.setParent(e)
i := ex.Index()
e.Child = append(e.Child, nil)
copy(e.Child[i+1:], e.Child[i:])
e.Child[i] = t
for j := i; j < len(e.Child); j++ {
e.Child[j].setIndex(j)
}
}
// InsertChildAt inserts the token 't' into this element's list of child
// tokens just before the requested 'index'. If the index is greater than or
// equal to the length of the list of child tokens, then the token 't' is
// added to the end of the list of child tokens.
func (e *Element) InsertChildAt(index int, t Token) {
if index >= len(e.Child) {
e.AddChild(t)
return
}
if t.Parent() != nil {
if t.Parent() == e && t.Index() > index {
index--
}
t.Parent().RemoveChild(t)
}
t.setParent(e)
e.Child = append(e.Child, nil)
copy(e.Child[index+1:], e.Child[index:])
e.Child[index] = t
for j := index; j < len(e.Child); j++ {
e.Child[j].setIndex(j)
}
}
// RemoveChild attempts to remove the token 't' from this element's list of
// child tokens. If the token 't' was a child of this element, then it is
// removed and returned. Otherwise, nil is returned.
func (e *Element) RemoveChild(t Token) Token {
if t.Parent() != e {
return nil
}
return e.RemoveChildAt(t.Index())
}
// RemoveChildAt removes the child token appearing in slot 'index' of this
// element's list of child tokens. The removed child token is then returned.
// If the index is out of bounds, no child is removed and nil is returned.
func (e *Element) RemoveChildAt(index int) Token {
if index >= len(e.Child) {
return nil
}
t := e.Child[index]
for j := index + 1; j < len(e.Child); j++ {
e.Child[j].setIndex(j - 1)
}
e.Child = append(e.Child[:index], e.Child[index+1:]...)
t.setIndex(-1)
t.setParent(nil)
return t
}
// ReadFrom reads XML from the reader 'ri' and stores the result as a new
// child of this element.
func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) {
var r xmlReader
var pr *xmlPeekReader
if settings.PreserveCData {
pr = newXmlPeekReader(ri)
r = pr
} else {
r = newXmlSimpleReader(ri)
}
dec := xml.NewDecoder(r)
dec.CharsetReader = settings.CharsetReader
dec.Strict = !settings.Permissive
dec.Entity = settings.Entity
var stack stack
stack.push(e)
for {
if pr != nil {
pr.PeekPrepare(dec.InputOffset(), len(cdataPrefix))
}
t, err := dec.RawToken()
switch {
case err == io.EOF:
if len(stack.data) != 1 {
return r.Bytes(), ErrXML
}
return r.Bytes(), nil
case err != nil:
return r.Bytes(), err
case stack.empty():
return r.Bytes(), ErrXML
}
top := stack.peek().(*Element)
switch t := t.(type) {
case xml.StartElement:
e := newElement(t.Name.Space, t.Name.Local, top)
for _, a := range t.Attr {
e.createAttr(a.Name.Space, a.Name.Local, a.Value, e, settings.PreserveDuplicateAttrs)
}
stack.push(e)
case xml.EndElement:
if top.Tag != t.Name.Local || top.Space != t.Name.Space {
return r.Bytes(), ErrXML
}
stack.pop()
case xml.CharData:
data := string(t)
var flags charDataFlags
if pr != nil {
peekBuf := pr.PeekFinalize()
if bytes.Equal(peekBuf, cdataPrefix) {
flags = cdataFlag
} else if isWhitespace(data) {
flags = whitespaceFlag
}
} else {
if isWhitespace(data) {
flags = whitespaceFlag
}
}
newCharData(data, flags, top)
case xml.Comment:
newComment(string(t), top)
case xml.Directive:
newDirective(string(t), top)
case xml.ProcInst:
newProcInst(t.Target, string(t.Inst), top)
}
}
}
// SelectAttr finds an element attribute matching the requested 'key' and, if
// found, returns a pointer to the matching attribute. The function returns
// nil if no matching attribute is found. The key may include a namespace
// prefix followed by a colon.
func (e *Element) SelectAttr(key string) *Attr {
space, skey := spaceDecompose(key)
for i, a := range e.Attr {
if spaceMatch(space, a.Space) && skey == a.Key {
return &e.Attr[i]
}
}
return nil
}
// SelectAttrValue finds an element attribute matching the requested 'key' and
// returns its value if found. If no matching attribute is found, the function
// returns the 'dflt' value instead. The key may include a namespace prefix
// followed by a colon.
func (e *Element) SelectAttrValue(key, dflt string) string {
space, skey := spaceDecompose(key)
for _, a := range e.Attr {
if spaceMatch(space, a.Space) && skey == a.Key {
return a.Value
}
}
return dflt
}
// ChildElements returns all elements that are children of this element.
func (e *Element) ChildElements() []*Element {
var elements []*Element
for _, t := range e.Child {
if c, ok := t.(*Element); ok {
elements = append(elements, c)
}
}
return elements
}
// SelectElement returns the first child element with the given 'tag' (i.e.,
// name). The function returns nil if no child element matching the tag is
// found. The tag may include a namespace prefix followed by a colon.
func (e *Element) SelectElement(tag string) *Element {
space, stag := spaceDecompose(tag)
for _, t := range e.Child {
if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
return c
}
}
return nil
}
// SelectElements returns a slice of all child elements with the given 'tag'
// (i.e., name). The tag may include a namespace prefix followed by a colon.
func (e *Element) SelectElements(tag string) []*Element {
space, stag := spaceDecompose(tag)
var elements []*Element
for _, t := range e.Child {
if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag {
elements = append(elements, c)
}
}
return elements
}
// FindElement returns the first element matched by the XPath-like 'path'
// string. The function returns nil if no child element is found using the
// path. It panics if an invalid path string is supplied.
func (e *Element) FindElement(path string) *Element {
return e.FindElementPath(MustCompilePath(path))
}
// FindElementPath returns the first element matched by the 'path' object. The
// function returns nil if no element is found using the path.
func (e *Element) FindElementPath(path Path) *Element {
p := newPather()
elements := p.traverse(e, path)
if len(elements) > 0 {
return elements[0]
}
return nil
}
// FindElements returns a slice of elements matched by the XPath-like 'path'
// string. The function returns nil if no child element is found using the
// path. It panics if an invalid path string is supplied.
func (e *Element) FindElements(path string) []*Element {
return e.FindElementsPath(MustCompilePath(path))
}
// FindElementsPath returns a slice of elements matched by the 'path' object.
func (e *Element) FindElementsPath(path Path) []*Element {
p := newPather()
return p.traverse(e, path)
}
// GetPath returns the absolute path of the element. The absolute path is the
// full path from the document's root.
func (e *Element) GetPath() string {
path := []string{}
for seg := e; seg != nil; seg = seg.Parent() {
if seg.Tag != "" {
path = append(path, seg.Tag)
}
}
// Reverse the path.
for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
path[i], path[j] = path[j], path[i]
}
return "/" + strings.Join(path, "/")
}
// GetRelativePath returns the path of this element relative to the 'source'
// element. If the two elements are not part of the same element tree, then
// the function returns the empty string.
func (e *Element) GetRelativePath(source *Element) string {
var path []*Element
if source == nil {
return ""
}
// Build a reverse path from the element toward the root. Stop if the
// source element is encountered.
var seg *Element
for seg = e; seg != nil && seg != source; seg = seg.Parent() {
path = append(path, seg)
}
// If we found the source element, reverse the path and compose the
// string.
if seg == source {
if len(path) == 0 {
return "."
}
parts := []string{}
for i := len(path) - 1; i >= 0; i-- {
parts = append(parts, path[i].Tag)
}
return "./" + strings.Join(parts, "/")
}
// The source wasn't encountered, so climb from the source element toward
// the root of the tree until an element in the reversed path is
// encountered.
findPathIndex := func(e *Element, path []*Element) int {
for i, ee := range path {
if e == ee {
return i
}
}
return -1
}
climb := 0
for seg = source; seg != nil; seg = seg.Parent() {
i := findPathIndex(seg, path)
if i >= 0 {
path = path[:i] // truncate at found segment
break
}
climb++
}
// No element in the reversed path was encountered, so the two elements
// must not be part of the same tree.
if seg == nil {
return ""
}
// Reverse the (possibly truncated) path and prepend ".." segments to
// climb.
parts := []string{}
for i := 0; i < climb; i++ {
parts = append(parts, "..")
}
for i := len(path) - 1; i >= 0; i-- {
parts = append(parts, path[i].Tag)
}
return strings.Join(parts, "/")
}
// IndentWithSettings modifies the element and its child tree by inserting
// character data tokens containing newlines and indentation. The behavior of
// the indentation algorithm is configured by the indent settings. Because
// this function indents the element as if it were at the root of a document,
// it is most useful when called just before writing the element as an XML
// fragment using WriteTo.
func (e *Element) IndentWithSettings(s *IndentSettings) {
e.indent(1, getIndentFunc(s), s)
}
// indent recursively inserts proper indentation between an XML element's
// child tokens.
func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) {
e.stripIndent(s)
n := len(e.Child)
if n == 0 {
return
}
oldChild := e.Child
e.Child = make([]Token, 0, n*2+1)
isCharData, firstNonCharData := false, true
for _, c := range oldChild {
// Insert NL+indent before child if it's not character data.
// Exceptions: when it's the first non-character-data child, or when
// the child is at root depth.
_, isCharData = c.(*CharData)
if !isCharData {
if !firstNonCharData || depth > 0 {
s := indent(depth)
if s != "" {
newCharData(s, whitespaceFlag, e)
}
}
firstNonCharData = false
}
e.addChild(c)
// Recursively process child elements.
if ce, ok := c.(*Element); ok {
ce.indent(depth+1, indent, s)
}
}
// Insert NL+indent before the last child.
if !isCharData {
if !firstNonCharData || depth > 0 {
s := indent(depth - 1)
if s != "" {
newCharData(s, whitespaceFlag, e)
}
}
}
}
// stripIndent removes any previously inserted indentation.
func (e *Element) stripIndent(s *IndentSettings) {
// Count the number of non-indent child tokens
n := len(e.Child)
for _, c := range e.Child {
if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
n--
}
}
if n == len(e.Child) {
return
}
if n == 0 && len(e.Child) == 1 && s.PreserveLeafWhitespace {
return
}
// Strip out indent CharData
newChild := make([]Token, n)
j := 0
for _, c := range e.Child {
if cd, ok := c.(*CharData); ok && cd.IsWhitespace() {
continue
}
newChild[j] = c
newChild[j].setIndex(j)
j++
}
e.Child = newChild
}
// stripTrailingWhitespace removes any trailing whitespace CharData tokens
// from the element's children.
func (e *Element) stripTrailingWhitespace() {
for i := len(e.Child) - 1; i >= 0; i-- {
if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() {
e.Child = e.Child[:i+1]
return
}
}
}
// dup duplicates the element.
func (e *Element) dup(parent *Element) Token {
ne := &Element{
Space: e.Space,
Tag: e.Tag,
Attr: make([]Attr, len(e.Attr)),
Child: make([]Token, len(e.Child)),
parent: parent,
index: e.index,
}
for i, t := range e.Child {
ne.Child[i] = t.dup(ne)
}
copy(ne.Attr, e.Attr)
return ne
}
// Parent returns this element's parent element. It returns nil if this
// element has no parent.
func (e *Element) Parent() *Element {
return e.parent
}
// Index returns the index of this element within its parent element's
// list of child tokens. If this element has no parent, then the function
// returns -1.
func (e *Element) Index() int {
return e.index
}
// WriteTo serializes the element to the writer w.
func (e *Element) WriteTo(w Writer, s *WriteSettings) {
w.WriteByte('<')
w.WriteString(e.FullTag())
for _, a := range e.Attr {
w.WriteByte(' ')
a.WriteTo(w, s)
}
if len(e.Child) > 0 {
w.WriteByte('>')
for _, c := range e.Child {
c.WriteTo(w, s)
}
w.Write([]byte{'<', '/'})
w.WriteString(e.FullTag())
w.WriteByte('>')
} else {
if s.CanonicalEndTags {
w.Write([]byte{'>', '<', '/'})
w.WriteString(e.FullTag())
w.WriteByte('>')
} else {
w.Write([]byte{'/', '>'})
}
}
}
// setParent replaces this element token's parent.
func (e *Element) setParent(parent *Element) {
e.parent = parent
}
// setIndex sets this element token's index within its parent's Child slice.
func (e *Element) setIndex(index int) {
e.index = index
}
// addChild adds a child token to the element e.
func (e *Element) addChild(t Token) {
t.setParent(e)
t.setIndex(len(e.Child))
e.Child = append(e.Child, t)
}
// CreateAttr creates an attribute with the specified 'key' and 'value' and
// adds it to this element. If an attribute with same key already exists on
// this element, then its value is replaced. The key may include a namespace
// prefix followed by a colon.
func (e *Element) CreateAttr(key, value string) *Attr {
space, skey := spaceDecompose(key)
return e.createAttr(space, skey, value, e, false)
}
// createAttr is a helper function that creates attributes.
func (e *Element) createAttr(space, key, value string, parent *Element, preserveDups bool) *Attr {
if !preserveDups {
for i, a := range e.Attr {
if space == a.Space && key == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
}
}
a := Attr{
Space: space,
Key: key,
Value: value,
element: parent,
}
e.Attr = append(e.Attr, a)
return &e.Attr[len(e.Attr)-1]
}
// RemoveAttr removes the first attribute of this element whose key matches
// 'key'. It returns a copy of the removed attribute if a match is found. If
// no match is found, it returns nil. The key may include a namespace prefix
// followed by a colon.
func (e *Element) RemoveAttr(key string) *Attr {
space, skey := spaceDecompose(key)
for i, a := range e.Attr {
if space == a.Space && skey == a.Key {
e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...)
return &Attr{
Space: a.Space,
Key: a.Key,
Value: a.Value,
element: nil,
}
}
}
return nil
}
// SortAttrs sorts this element's attributes lexicographically by key.
func (e *Element) SortAttrs() {
sort.Sort(byAttr(e.Attr))
}
type byAttr []Attr
func (a byAttr) Len() int {
return len(a)
}
func (a byAttr) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func (a byAttr) Less(i, j int) bool {
sp := strings.Compare(a[i].Space, a[j].Space)
if sp == 0 {
return strings.Compare(a[i].Key, a[j].Key) < 0
}
return sp < 0
}
// FullKey returns this attribute's complete key, including namespace prefix
// if present.
func (a *Attr) FullKey() string {
if a.Space == "" {
return a.Key
}
return a.Space + ":" + a.Key
}
// Element returns a pointer to the element containing this attribute.
func (a *Attr) Element() *Element {
return a.element
}
// NamespaceURI returns the XML namespace URI associated with this attribute.
// The function returns the empty string if the attribute is unprefixed or
// if the attribute is part of the XML default namespace.
func (a *Attr) NamespaceURI() string {
if a.Space == "" {
return ""
}
return a.element.findLocalNamespaceURI(a.Space)
}
// WriteTo serializes the attribute to the writer.
func (a *Attr) WriteTo(w Writer, s *WriteSettings) {
w.WriteString(a.FullKey())
if s.AttrSingleQuote {
w.WriteString(`='`)
} else {
w.WriteString(`="`)
}
var m escapeMode
if s.CanonicalAttrVal {
m = escapeCanonicalAttr
} else {
m = escapeNormal
}
escapeString(w, a.Value, m)
if s.AttrSingleQuote {
w.WriteByte('\'')
} else {
w.WriteByte('"')
}
}
// NewText creates an unparented CharData token containing simple text data.
func NewText(text string) *CharData {
return newCharData(text, 0, nil)
}
// NewCData creates an unparented XML character CDATA section with 'data' as
// its content.
func NewCData(data string) *CharData {
return newCharData(data, cdataFlag, nil)
}
// NewCharData creates an unparented CharData token containing simple text
// data.
//
// Deprecated: NewCharData is deprecated. Instead, use NewText, which does the
// same thing.
func NewCharData(data string) *CharData {
return newCharData(data, 0, nil)
}
// newCharData creates a character data token and binds it to a parent
// element. If parent is nil, the CharData token remains unbound.
func newCharData(data string, flags charDataFlags, parent *Element) *CharData {
c := &CharData{
Data: data,
parent: nil,
index: -1,
flags: flags,
}
if parent != nil {
parent.addChild(c)
}
return c
}
// CreateText creates a CharData token containing simple text data and adds it
// to the end of this element's list of child tokens.
func (e *Element) CreateText(text string) *CharData {
return newCharData(text, 0, e)
}
// CreateCData creates a CharData token containing a CDATA section with 'data'
// as its content and adds it to the end of this element's list of child
// tokens.
func (e *Element) CreateCData(data string) *CharData {
return newCharData(data, cdataFlag, e)
}
// CreateCharData creates a CharData token containing simple text data and
// adds it to the end of this element's list of child tokens.
//
// Deprecated: CreateCharData is deprecated. Instead, use CreateText, which
// does the same thing.
func (e *Element) CreateCharData(data string) *CharData {
return e.CreateText(data)
}
// SetData modifies the content of the CharData token. In the case of a
// CharData token containing simple text, the simple text is modified. In the
// case of a CharData token containing a CDATA section, the CDATA section's
// content is modified.
func (c *CharData) SetData(text string) {
c.Data = text
if isWhitespace(text) {
c.flags |= whitespaceFlag
} else {
c.flags &= ^whitespaceFlag
}
}
// IsCData returns true if this CharData token is contains a CDATA section. It
// returns false if the CharData token contains simple text.
func (c *CharData) IsCData() bool {
return (c.flags & cdataFlag) != 0
}
// IsWhitespace returns true if this CharData token contains only whitespace.
func (c *CharData) IsWhitespace() bool {
return (c.flags & whitespaceFlag) != 0
}
// Parent returns this CharData token's parent element, or nil if it has no
// parent.
func (c *CharData) Parent() *Element {
return c.parent
}
// Index returns the index of this CharData token within its parent element's
// list of child tokens. If this CharData token has no parent, then the
// function returns -1.
func (c *CharData) Index() int {
return c.index
}
// WriteTo serializes character data to the writer.
func (c *CharData) WriteTo(w Writer, s *WriteSettings) {
if c.IsCData() {
w.WriteString(``)
} else {
var m escapeMode
if s.CanonicalText {
m = escapeCanonicalText
} else {
m = escapeNormal
}
escapeString(w, c.Data, m)
}
}
// dup duplicates the character data.
func (c *CharData) dup(parent *Element) Token {
return &CharData{
Data: c.Data,
flags: c.flags,
parent: parent,
index: c.index,
}
}
// setParent replaces the character data token's parent.
func (c *CharData) setParent(parent *Element) {
c.parent = parent
}
// setIndex sets the CharData token's index within its parent element's Child
// slice.
func (c *CharData) setIndex(index int) {
c.index = index
}
// NewComment creates an unparented comment token.
func NewComment(comment string) *Comment {
return newComment(comment, nil)
}
// NewComment creates a comment token and sets its parent element to 'parent'.
func newComment(comment string, parent *Element) *Comment {
c := &Comment{
Data: comment,
parent: nil,
index: -1,
}
if parent != nil {
parent.addChild(c)
}
return c
}
// CreateComment creates a comment token using the specified 'comment' string
// and adds it as the last child token of this element.
func (e *Element) CreateComment(comment string) *Comment {
return newComment(comment, e)
}
// dup duplicates the comment.
func (c *Comment) dup(parent *Element) Token {
return &Comment{
Data: c.Data,
parent: parent,
index: c.index,
}
}
// Parent returns comment token's parent element, or nil if it has no parent.
func (c *Comment) Parent() *Element {
return c.parent
}
// Index returns the index of this Comment token within its parent element's
// list of child tokens. If this Comment token has no parent, then the
// function returns -1.
func (c *Comment) Index() int {
return c.index
}
// WriteTo serialies the comment to the writer.
func (c *Comment) WriteTo(w Writer, s *WriteSettings) {
w.WriteString("")
}
// setParent replaces the comment token's parent.
func (c *Comment) setParent(parent *Element) {
c.parent = parent
}
// setIndex sets the Comment token's index within its parent element's Child
// slice.
func (c *Comment) setIndex(index int) {
c.index = index
}
// NewDirective creates an unparented XML directive token.
func NewDirective(data string) *Directive {
return newDirective(data, nil)
}
// newDirective creates an XML directive and binds it to a parent element. If
// parent is nil, the Directive remains unbound.
func newDirective(data string, parent *Element) *Directive {
d := &Directive{
Data: data,
parent: nil,
index: -1,
}
if parent != nil {
parent.addChild(d)
}
return d
}
// CreateDirective creates an XML directive token with the specified 'data'
// value and adds it as the last child token of this element.
func (e *Element) CreateDirective(data string) *Directive {
return newDirective(data, e)
}
// dup duplicates the directive.
func (d *Directive) dup(parent *Element) Token {
return &Directive{
Data: d.Data,
parent: parent,
index: d.index,
}
}
// Parent returns directive token's parent element, or nil if it has no
// parent.
func (d *Directive) Parent() *Element {
return d.parent
}
// Index returns the index of this Directive token within its parent element's
// list of child tokens. If this Directive token has no parent, then the
// function returns -1.
func (d *Directive) Index() int {
return d.index
}
// WriteTo serializes the XML directive to the writer.
func (d *Directive) WriteTo(w Writer, s *WriteSettings) {
w.WriteString("")
}
// setParent replaces the directive token's parent.
func (d *Directive) setParent(parent *Element) {
d.parent = parent
}
// setIndex sets the Directive token's index within its parent element's Child
// slice.
func (d *Directive) setIndex(index int) {
d.index = index
}
// NewProcInst creates an unparented XML processing instruction.
func NewProcInst(target, inst string) *ProcInst {
return newProcInst(target, inst, nil)
}
// newProcInst creates an XML processing instruction and binds it to a parent
// element. If parent is nil, the ProcInst remains unbound.
func newProcInst(target, inst string, parent *Element) *ProcInst {
p := &ProcInst{
Target: target,
Inst: inst,
parent: nil,
index: -1,
}
if parent != nil {
parent.addChild(p)
}
return p
}
// CreateProcInst creates an XML processing instruction token with the
// specified 'target' and instruction 'inst'. It is then added as the last
// child token of this element.
func (e *Element) CreateProcInst(target, inst string) *ProcInst {
return newProcInst(target, inst, e)
}
// dup duplicates the procinst.
func (p *ProcInst) dup(parent *Element) Token {
return &ProcInst{
Target: p.Target,
Inst: p.Inst,
parent: parent,
index: p.index,
}
}
// Parent returns processing instruction token's parent element, or nil if it
// has no parent.
func (p *ProcInst) Parent() *Element {
return p.parent
}
// Index returns the index of this ProcInst token within its parent element's
// list of child tokens. If this ProcInst token has no parent, then the
// function returns -1.
func (p *ProcInst) Index() int {
return p.index
}
// WriteTo serializes the processing instruction to the writer.
func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) {
w.WriteString("")
w.WriteString(p.Target)
if p.Inst != "" {
w.WriteByte(' ')
w.WriteString(p.Inst)
}
w.WriteString("?>")
}
// setParent replaces the processing instruction token's parent.
func (p *ProcInst) setParent(parent *Element) {
p.parent = parent
}
// setIndex sets the processing instruction token's index within its parent
// element's Child slice.
func (p *ProcInst) setIndex(index int) {
p.index = index
}
etree-1.3.0/etree_test.go 0000664 0000000 0000000 00000115330 14544351356 0015326 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import (
"bytes"
"encoding/xml"
"io"
"math/rand"
"strings"
"testing"
)
func newDocumentFromString(t *testing.T, s string) *Document {
return newDocumentFromString2(t, s, ReadSettings{})
}
func newDocumentFromString2(t *testing.T, s string, settings ReadSettings) *Document {
t.Helper()
doc := NewDocument()
doc.ReadSettings = settings
err := doc.ReadFromString(s)
if err != nil {
t.Fatal("etree: failed to parse document")
}
return doc
}
func checkStrEq(t *testing.T, got, want string) {
t.Helper()
if got != want {
t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want)
}
}
func checkStrBinaryEq(t *testing.T, got, want string) {
t.Helper()
if got != want {
t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want))
}
}
func checkIntEq(t *testing.T, got, want int) {
t.Helper()
if got != want {
t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want)
}
}
func checkBoolEq(t *testing.T, got, want bool) {
t.Helper()
if got != want {
t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want)
}
}
func checkElementEq(t *testing.T, got, want *Element) {
t.Helper()
if got != want {
t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want)
}
}
func checkDocEq(t *testing.T, doc *Document, expected string) {
t.Helper()
doc.Indent(NoIndent)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
if s != expected {
t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected)
}
}
func checkIndexes(t *testing.T, e *Element) {
t.Helper()
for i := 0; i < len(e.Child); i++ {
c := e.Child[i]
if c.Index() != i {
t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i)
}
if ce, ok := c.(*Element); ok {
checkIndexes(t, ce)
}
}
}
func TestDocument(t *testing.T) {
// Create a document
doc := NewDocument()
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
store := doc.CreateElement("store")
store.CreateAttr("xmlns:t", "urn:books-com:titles")
store.CreateDirective("Directive")
store.CreateComment("This is a comment")
book := store.CreateElement("book")
book.CreateAttr("lang", "fr")
book.CreateAttr("lang", "en")
title := book.CreateElement("t:title")
title.SetText("Nicholas Nickleby")
title.SetText("Great Expectations")
author := book.CreateElement("author")
author.CreateCharData("Charles Dickens")
review := book.CreateElement("review")
review.CreateCData("<<< Will be replaced")
review.SetCData(">>> Excellent book")
doc.IndentTabs()
checkIndexes(t, &doc.Element)
// Serialize the document to a string
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
// Make sure the serialized XML matches expectation.
expected := `
Great ExpectationsCharles Dickens>> Excellent book]]>
`
checkStrEq(t, s, expected)
// Test the structure of the XML
if doc.Root() != store {
t.Error("etree: root mismatch")
}
if len(store.ChildElements()) != 1 || len(store.Child) != 7 {
t.Error("etree: incorrect tree structure")
}
if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 {
t.Error("etree: incorrect tree structure")
}
if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 {
t.Error("etree: incorrect tree structure")
}
if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 {
t.Error("etree: incorrect tree structure")
}
if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 {
t.Error("etree: incorrect tree structure")
}
if book.parent != store || store.parent != &doc.Element || doc.parent != nil {
t.Error("etree: incorrect tree structure")
}
if title.parent != book || author.parent != book {
t.Error("etree: incorrect tree structure")
}
// Perform some basic queries on the document
elements := doc.SelectElements("store")
if len(elements) != 1 || elements[0] != store {
t.Error("etree: incorrect SelectElements result")
}
element := doc.SelectElement("store")
if element != store {
t.Error("etree: incorrect SelectElement result")
}
elements = store.SelectElements("book")
if len(elements) != 1 || elements[0] != book {
t.Error("etree: incorrect SelectElements result")
}
element = store.SelectElement("book")
if element != book {
t.Error("etree: incorrect SelectElement result")
}
attr := book.SelectAttr("lang")
if attr == nil || attr.Key != "lang" || attr.Value != "en" {
t.Error("etree: incorrect SelectAttr result")
}
if book.SelectAttrValue("lang", "unknown") != "en" {
t.Error("etree: incorrect SelectAttrValue result")
}
if book.SelectAttrValue("t:missing", "unknown") != "unknown" {
t.Error("etree: incorrect SelectAttrValue result")
}
attr = book.RemoveAttr("lang")
if attr.Value != "en" {
t.Error("etree: incorrect RemoveAttr result")
}
book.CreateAttr("lang", "de")
attr = book.RemoveAttr("lang")
if attr.Value != "de" {
t.Error("etree: incorrect RemoveAttr result")
}
element = book.SelectElement("t:title")
if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 {
t.Error("etree: incorrect SelectElement result")
}
element = book.SelectElement("title")
if element != title {
t.Error("etree: incorrect SelectElement result")
}
element = book.SelectElement("p:title")
if element != nil {
t.Error("etree: incorrect SelectElement result")
}
element = book.RemoveChildAt(title.Index()).(*Element)
if element != title {
t.Error("etree: incorrect RemoveElement result")
}
element = book.SelectElement("title")
if element != nil {
t.Error("etree: incorrect SelectElement result")
}
element = book.SelectElement("review")
if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 {
t.Error("etree: incorrect SelectElement result")
}
}
func TestImbalancedXML(t *testing.T) {
cases := []string{
``,
``,
``,
``,
``,
`malformed`,
`malformed`,
``,
``,
``,
``,
}
for _, c := range cases {
doc := NewDocument()
err := doc.ReadFromString(c)
if err == nil {
t.Errorf("etree: imbalanced XML should have failed:\n%s", c)
}
}
}
func TestDocumentCharsetReader(t *testing.T) {
s := `
Great ExpectationsCharles Dickens`
charsetLabel := ""
doc := newDocumentFromString2(t, s, ReadSettings{
CharsetReader: func(label string, input io.Reader) (io.Reader, error) {
charsetLabel = label
return &lowercaseCharsetReader{input}, nil
},
})
if charsetLabel != "lowercase" {
t.Fatalf("etree: incorrect charset encoding, expected lowercase, got %s", charsetLabel)
}
cases := []struct {
path string
text string
}{
{"/store/book/title", "great expectations"},
{"/store/book/author", "charles dickens"},
}
for _, c := range cases {
e := doc.FindElement(c.path)
if e == nil {
t.Errorf("etree: failed to find element '%s'", c.path)
} else if e.Text() != c.text {
t.Errorf("etree: expected path '%s' to contain '%s', got '%s'", c.path, c.text, e.Text())
}
}
}
type lowercaseCharsetReader struct {
r io.Reader
}
func (c *lowercaseCharsetReader) Read(p []byte) (n int, err error) {
n, err = c.r.Read(p)
if err != nil {
return n, err
}
for i := 0; i < n; i++ {
if p[i] >= 'A' && p[i] <= 'Z' {
p[i] = p[i] - 'A' + 'a'
}
}
return n, nil
}
func TestDocumentReadPermissive(t *testing.T) {
s := ""
doc := NewDocument()
err := doc.ReadFromString(s)
if err == nil {
t.Fatal("etree: incorrect ReadFromString result")
}
doc.ReadSettings.Permissive = true
err = doc.ReadFromString(s)
if err != nil {
t.Fatal("etree: incorrect ReadFromString result")
}
}
func TestEmbeddedComment(t *testing.T) {
s := `123456`
doc := NewDocument()
err := doc.ReadFromString(s)
if err != nil {
t.Fatal("etree: incorrect ReadFromString result")
}
a := doc.SelectElement("a")
checkStrEq(t, a.Text(), "123456")
}
func TestDocumentReadHTMLEntities(t *testing.T) {
s := `→ Great ExpectationsCharles Dickens`
doc := NewDocument()
err := doc.ReadFromString(s)
if err == nil {
t.Fatal("etree: incorrect ReadFromString result")
}
doc.ReadSettings.Entity = xml.HTMLEntity
err = doc.ReadFromString(s)
if err != nil {
t.Fatal("etree: incorrect ReadFromString result")
}
}
func TestEscapeCodes(t *testing.T) {
cases := []struct {
input string
normal string
attrCanonical string
textCanonical string
}{
{
"&<>'\"\t\n\r",
"&<>'"\t\n\r",
"'"
\">&<>'"\t\n\r",
"&<>'\"\t\n
",
},
{
"\x00\x1f\x08\x09\x0a\x0d",
"���\t\n\r",
"���\t\n\r",
"���\t\n
",
},
}
for _, c := range cases {
doc := NewDocument()
e := doc.CreateElement("e")
e.SetText(c.input)
e.CreateAttr("a", c.input)
doc.WriteSettings.CanonicalText = false
doc.WriteSettings.CanonicalAttrVal = false
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: Escape test produced inocrrect result.")
}
checkStrEq(t, s, c.normal)
doc.WriteSettings.CanonicalText = false
doc.WriteSettings.CanonicalAttrVal = true
s, err = doc.WriteToString()
if err != nil {
t.Error("etree: Escape test produced inocrrect result.")
}
checkStrEq(t, s, c.attrCanonical)
doc.WriteSettings.CanonicalText = true
doc.WriteSettings.CanonicalAttrVal = false
s, err = doc.WriteToString()
if err != nil {
t.Error("etree: Escape test produced inocrrect result.")
}
checkStrEq(t, s, c.textCanonical)
}
}
func TestCanonical(t *testing.T) {
BOM := "\xef\xbb\xbf"
doc := NewDocument()
doc.WriteSettings.CanonicalEndTags = true
doc.WriteSettings.CanonicalText = true
doc.WriteSettings.CanonicalAttrVal = true
doc.CreateCharData(BOM)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
people := doc.CreateElement("People")
people.CreateComment("These are all known people")
jon := people.CreateElement("Person")
jon.CreateAttr("name", "Jon O'Reilly")
jon.SetText("\r<'\">&\u0004\u0005\u001f�")
sally := people.CreateElement("Person")
sally.CreateAttr("name", "Sally")
sally.CreateAttr("escape", "\r\n\t<'\">&")
doc.Indent(2)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: WriteSettings WriteTo produced incorrect result.")
}
expected := BOM + `
<'">&����
`
checkStrEq(t, s, expected)
}
func TestCopy(t *testing.T) {
s := `Great ExpectationsCharles Dickens`
doc := newDocumentFromString(t, s)
s1, err := doc.WriteToString()
if err != nil {
t.Error("etree: incorrect WriteToString result")
}
doc2 := doc.Copy()
checkIndexes(t, &doc2.Element)
s2, err := doc2.WriteToString()
if err != nil {
t.Error("etree: incorrect Copy result")
}
if s1 != s2 {
t.Error("etree: mismatched Copy result")
t.Error("wanted:\n" + s1)
t.Error("got:\n" + s2)
}
e1 := doc.FindElement("./store/book/title")
e2 := doc2.FindElement("./store/book/title")
if e1 == nil || e2 == nil || e1.parent == nil || e1 == e2 {
t.Error("etree: incorrect FindElement result")
}
e1.parent.RemoveChildAt(e1.Index())
s1, _ = doc.WriteToString()
s2, _ = doc2.WriteToString()
if s1 == s2 {
t.Error("etree: incorrect result after RemoveElement")
}
}
func TestGetPath(t *testing.T) {
s := ``
doc := newDocumentFromString(t, s)
cases := []struct {
from string
to string
relpath string
topath string
}{
{"a", ".", "..", "/"},
{".", "a", "./a", "/a"},
{"a/b1/c1/d1", ".", "../../../..", "/"},
{".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"},
{"a", "a", ".", "/a"},
{"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"},
{"a/b1/c1", "a/b1", "..", "/a/b1"},
{"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"},
{"a", "a/b1", "./b1", "/a/b1"},
{"a/b1", "a", "..", "/a"},
{"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"},
{"a/b1/c1", "a", "../..", "/a"},
{"a/b1/c1/d1", "a", "../../..", "/a"},
{"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"},
{"a/b1", "a/b2", "../b2", "/a/b2"},
{"a/b2", "a/b1", "../b1", "/a/b1"},
{"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"},
{"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"},
{"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"},
}
for _, c := range cases {
fe := doc.FindElement(c.from)
te := doc.FindElement(c.to)
rp := te.GetRelativePath(fe)
if rp != c.relpath {
t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp)
}
p := te.GetPath()
if p != c.topath {
t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p)
}
}
}
func TestInsertChild(t *testing.T) {
s := `Great ExpectationsCharles Dickens
`
doc := newDocumentFromString(t, s)
year := NewElement("year")
year.SetText("1861")
book := doc.FindElement("//book")
book.InsertChildAt(book.SelectElement("t:title").Index(), year)
expected1 := `1861Great ExpectationsCharles Dickens
`
doc.Indent(2)
s1, _ := doc.WriteToString()
checkStrEq(t, s1, expected1)
book.RemoveChildAt(year.Index())
book.InsertChildAt(book.SelectElement("author").Index(), year)
expected2 := `Great Expectations1861Charles Dickens
`
doc.Indent(2)
s2, _ := doc.WriteToString()
checkStrEq(t, s2, expected2)
book.RemoveChildAt(year.Index())
book.InsertChildAt(len(book.Child), year)
expected3 := `Great ExpectationsCharles Dickens1861
`
doc.Indent(2)
s3, _ := doc.WriteToString()
checkStrEq(t, s3, expected3)
book.RemoveChildAt(year.Index())
book.InsertChildAt(999, year)
expected4 := `Great ExpectationsCharles Dickens1861
`
doc.Indent(2)
s4, _ := doc.WriteToString()
checkStrEq(t, s4, expected4)
}
func TestCdata(t *testing.T) {
var tests = []struct {
in, out string
}{
{`1234567`, "1234567"},
{``, "1234567"},
{`1357`, "1234567"},
{`13457`, "123"},
{`1457`, "1"},
{`457`, "1"},
}
for _, test := range tests {
doc := NewDocument()
err := doc.ReadFromString(test.in)
if err != nil {
t.Fatal("etree ReadFromString: " + err.Error())
}
tag := doc.FindElement("tag")
if tag.Text() != test.out {
t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text())
}
}
}
func TestAddChild(t *testing.T) {
s := `Great ExpectationsCharles Dickens
`
doc1 := newDocumentFromString(t, s)
doc2 := NewDocument()
root := doc2.CreateElement("root")
for _, e := range doc1.FindElements("//book/*") {
root.AddChild(e)
}
expected1 := `
`
doc1.Indent(2)
s1, _ := doc1.WriteToString()
checkStrEq(t, s1, expected1)
expected2 := `Great ExpectationsCharles Dickens
`
doc2.Indent(2)
s2, _ := doc2.WriteToString()
checkStrEq(t, s2, expected2)
}
func TestSetRoot(t *testing.T) {
s := `
Great ExpectationsCharles Dickens
`
doc := newDocumentFromString(t, s)
origroot := doc.Root()
if origroot.Parent() != &doc.Element {
t.Error("Root incorrect")
}
newroot := NewElement("root")
doc.SetRoot(newroot)
if doc.Root() != newroot {
t.Error("doc.Root() != newroot")
}
if origroot.Parent() != nil {
t.Error("origroot.Parent() != nil")
}
expected1 := `
`
doc.Indent(2)
s1, _ := doc.WriteToString()
checkStrEq(t, s1, expected1)
doc.SetRoot(origroot)
doc.Indent(2)
expected2 := s
s2, _ := doc.WriteToString()
checkStrEq(t, s2, expected2)
doc2 := NewDocument()
doc2.CreateProcInst("test", `a="wow"`)
doc2.SetRoot(NewElement("root"))
doc2.Indent(2)
expected3 := expected1
s3, _ := doc2.WriteToString()
checkStrEq(t, s3, expected3)
doc2.SetRoot(doc.Root())
doc2.Indent(2)
expected4 := s
s4, _ := doc2.WriteToString()
checkStrEq(t, s4, expected4)
expected5 := `
`
doc.Indent(2)
s5, _ := doc.WriteToString()
checkStrEq(t, s5, expected5)
}
func TestSortAttrs(t *testing.T) {
s := ``
doc := newDocumentFromString(t, s)
doc.Root().SortAttrs()
doc.Indent(2)
out, _ := doc.WriteToString()
checkStrEq(t, out, ``+"\n")
}
func TestCharsetReaderEncoding(t *testing.T) {
cases := []string{
``,
``,
``,
}
for _, c := range cases {
doc := NewDocument()
if err := doc.ReadFromBytes([]byte(c)); err != nil {
t.Error(err)
}
}
}
func TestCharData(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
root.CreateCharData("This ")
root.CreateCData("is ")
e1 := NewText("a ")
e2 := NewCData("text ")
root.AddChild(e1)
root.AddChild(e2)
root.CreateCharData("Element!!")
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
checkStrEq(t, s, `This a Element!!`)
// Check we can parse the output
err = doc.ReadFromString(s)
if err != nil {
t.Fatal("etree: incorrect ReadFromString result")
}
if doc.Root().Text() != "This is a text Element!!" {
t.Error("etree: invalid text")
}
}
func TestIndentSimple(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
ch1 := root.CreateElement("child1")
ch1.CreateElement("child2")
// First test Unindent.
doc.Unindent()
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
expected := ""
checkStrEq(t, s, expected)
// Now test Indent with NoIndent (which should produce the same result
// as Unindent).
doc.Indent(NoIndent)
s, err = doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
checkStrEq(t, s, expected)
// Run all indent test cases.
tests := []struct {
useTabs, useCRLF bool
ws, nl string
}{
{false, false, " ", "\n"},
{false, true, " ", "\r\n"},
{true, false, "\t", "\n"},
{true, true, "\t", "\r\n"},
}
for _, test := range tests {
doc.WriteSettings.UseCRLF = test.useCRLF
if test.useTabs {
doc.IndentTabs()
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
tab := test.ws
expected := "" + test.nl + tab + "" + test.nl +
tab + tab + "" + test.nl + tab +
"" + test.nl + "" + test.nl
checkStrEq(t, s, expected)
} else {
for i := 0; i < 256; i++ {
doc.Indent(i)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
tab := strings.Repeat(test.ws, i)
expected := "" + test.nl + tab + "" + test.nl +
tab + tab + "" + test.nl + tab +
"" + test.nl + "" + test.nl
checkStrEq(t, s, expected)
}
}
}
}
func TestIndentWithDefaultSettings(t *testing.T) {
input := ``
doc := NewDocument()
err := doc.ReadFromString(input)
if err != nil {
t.Error("etree: failed to read string")
}
settings := NewIndentSettings()
doc.IndentWithSettings(settings)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
expected := "\n \n \n \n\n"
checkStrEq(t, s, expected)
}
func TestIndentWithSettings(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
ch1 := root.CreateElement("child1")
ch1.CreateElement("child2")
// First test with NoIndent.
settings := NewIndentSettings()
settings.UseCRLF = false
settings.UseTabs = false
settings.Spaces = NoIndent
doc.IndentWithSettings(settings)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
expected := ""
checkStrEq(t, s, expected)
// Run all indent test cases.
tests := []struct {
useTabs, useCRLF bool
ws, nl string
}{
{false, false, " ", "\n"},
{false, true, " ", "\r\n"},
{true, false, "\t", "\n"},
{true, true, "\t", "\r\n"},
}
for _, test := range tests {
if test.useTabs {
settings := NewIndentSettings()
settings.UseTabs = true
settings.UseCRLF = test.useCRLF
doc.IndentWithSettings(settings)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
tab := test.ws
expected := "" + test.nl + tab + "" + test.nl +
tab + tab + "" + test.nl + tab +
"" + test.nl + "" + test.nl
checkStrEq(t, s, expected)
} else {
for i := 0; i < 256; i++ {
settings := NewIndentSettings()
settings.Spaces = i
settings.UseTabs = false
settings.UseCRLF = test.useCRLF
doc.IndentWithSettings(settings)
s, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to serialize document")
}
tab := strings.Repeat(test.ws, i)
expected := "" + test.nl + tab + "" + test.nl +
tab + tab + "" + test.nl + tab +
"" + test.nl + "" + test.nl
checkStrEq(t, s, expected)
}
}
}
}
func TestIndentPreserveWhitespace(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"", ""},
{"", ""},
{"\t", "\t"},
{"\t\n \t", "\t\n \t"},
{"", ""},
{"", ""},
{"", "\n \n"},
}
for _, test := range tests {
doc := NewDocument()
err := doc.ReadFromString(test.input)
if err != nil {
t.Error("etree: failed to read string")
}
s := NewIndentSettings()
s.Spaces = 2
s.PreserveLeafWhitespace = true
s.SuppressTrailingWhitespace = true
doc.IndentWithSettings(s)
output, err := doc.WriteToString()
if err != nil {
t.Error("etree: failed to read string")
}
checkStrEq(t, output, test.expected)
}
}
func TestPreserveCData(t *testing.T) {
tests := []struct {
input string
expectedWithPreserve string
expectedWithoutPreserve string
}{
{
"",
"",
"x",
},
{
"foo]]>",
"foo]]>",
"x <b>foo</b>",
},
{
"name",
"name",
"My name is",
},
}
for _, test := range tests {
doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: true})
output, _ := doc.WriteToString()
checkStrEq(t, output, test.expectedWithPreserve)
}
for _, test := range tests {
doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: false})
output, _ := doc.WriteToString()
checkStrEq(t, output, test.expectedWithoutPreserve)
}
}
func TestTokenIndexing(t *testing.T) {
s := `
Great ExpectationsCharles Dickens`
doc := newDocumentFromString(t, s)
review := doc.FindElement("/store/book/review")
review.SetText("Excellent")
checkIndexes(t, &doc.Element)
doc.Indent(4)
checkIndexes(t, &doc.Element)
doc.Indent(NoIndent)
checkIndexes(t, &doc.Element)
e := NewElement("foo")
store := doc.SelectElement("store")
store.InsertChildAt(0, e)
checkIndexes(t, &doc.Element)
store.RemoveChildAt(0)
checkIndexes(t, &doc.Element)
}
func TestSetText(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
checkDocEq(t, doc, ``)
checkStrEq(t, root.Text(), "")
checkIntEq(t, len(root.Child), 0)
root.SetText("foo")
checkDocEq(t, doc, `foo`)
checkStrEq(t, root.Text(), "foo")
checkIntEq(t, len(root.Child), 1)
root.SetText("bar")
checkDocEq(t, doc, `bar`)
checkStrEq(t, root.Text(), "bar")
checkIntEq(t, len(root.Child), 1)
root.CreateCData("cdata")
checkDocEq(t, doc, `bar`)
checkStrEq(t, root.Text(), "barcdata")
checkIntEq(t, len(root.Child), 2)
root.SetText("qux")
checkDocEq(t, doc, `qux`)
checkStrEq(t, root.Text(), "qux")
checkIntEq(t, len(root.Child), 1)
root.CreateCData("cdata")
checkDocEq(t, doc, `qux`)
checkStrEq(t, root.Text(), "quxcdata")
checkIntEq(t, len(root.Child), 2)
root.SetCData("baz")
checkDocEq(t, doc, ``)
checkStrEq(t, root.Text(), "baz")
checkIntEq(t, len(root.Child), 1)
root.CreateText("corge")
root.CreateCData("grault")
root.CreateText("waldo")
root.CreateCData("fred")
root.CreateElement("child")
checkDocEq(t, doc, `corgewaldo`)
checkStrEq(t, root.Text(), "bazcorgegraultwaldofred")
checkIntEq(t, len(root.Child), 6)
root.SetText("plugh")
checkDocEq(t, doc, `plugh`)
checkStrEq(t, root.Text(), "plugh")
checkIntEq(t, len(root.Child), 2)
root.SetText("")
checkDocEq(t, doc, ``)
checkStrEq(t, root.Text(), "")
checkIntEq(t, len(root.Child), 1)
root.SetText("")
checkDocEq(t, doc, ``)
checkStrEq(t, root.Text(), "")
checkIntEq(t, len(root.Child), 1)
root.RemoveChildAt(0)
root.CreateText("corge")
root.CreateCData("grault")
root.CreateText("waldo")
root.CreateCData("fred")
root.CreateElement("child")
checkDocEq(t, doc, `corgewaldo`)
checkStrEq(t, root.Text(), "corgegraultwaldofred")
checkIntEq(t, len(root.Child), 5)
root.SetText("")
checkDocEq(t, doc, ``)
checkStrEq(t, root.Text(), "")
checkIntEq(t, len(root.Child), 1)
}
func TestSetTail(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
child := root.CreateElement("child")
root.CreateText("\n\t")
child.SetText("foo")
checkDocEq(t, doc, "foo\n\t")
checkStrEq(t, child.Tail(), "\n\t")
checkIntEq(t, len(root.Child), 2)
checkIntEq(t, len(child.Child), 1)
root.CreateCData(" ")
checkDocEq(t, doc, "foo\n\t")
checkStrEq(t, child.Tail(), "\n\t ")
checkIntEq(t, len(root.Child), 3)
checkIntEq(t, len(child.Child), 1)
child.SetTail("")
checkDocEq(t, doc, "foo")
checkStrEq(t, child.Tail(), "")
checkIntEq(t, len(root.Child), 1)
checkIntEq(t, len(child.Child), 1)
child.SetTail("\t\t\t")
checkDocEq(t, doc, "foo\t\t\t")
checkStrEq(t, child.Tail(), "\t\t\t")
checkIntEq(t, len(root.Child), 2)
checkIntEq(t, len(child.Child), 1)
child.SetTail("\t\n\n\t")
checkDocEq(t, doc, "foo\t\n\n\t")
checkStrEq(t, child.Tail(), "\t\n\n\t")
checkIntEq(t, len(root.Child), 2)
checkIntEq(t, len(child.Child), 1)
child.SetTail("")
checkDocEq(t, doc, "foo")
checkStrEq(t, child.Tail(), "")
checkIntEq(t, len(root.Child), 1)
checkIntEq(t, len(child.Child), 1)
}
func TestAttrParent(t *testing.T) {
doc := NewDocument()
root := doc.CreateElement("root")
attr1 := root.CreateAttr("bar", "1")
attr2 := root.CreateAttr("qux", "2")
checkIntEq(t, len(root.Attr), 2)
checkElementEq(t, attr1.Element(), root)
checkElementEq(t, attr2.Element(), root)
attr1 = root.RemoveAttr("bar")
attr2 = root.RemoveAttr("qux")
checkElementEq(t, attr1.Element(), nil)
checkElementEq(t, attr2.Element(), nil)
s := ``
err := doc.ReadFromString(s)
if err != nil {
t.Error("etree: failed to parse document")
}
root = doc.SelectElement("root")
for i := range root.Attr {
checkElementEq(t, root.Attr[i].Element(), root)
}
}
func TestDefaultNamespaceURI(t *testing.T) {
s := `
`
doc := newDocumentFromString(t, s)
root := doc.SelectElement("root")
child1 := root.SelectElement("child1")
child2 := root.SelectElement("child2")
grandchild1 := child1.SelectElement("grandchild1")
grandchild2 := child1.SelectElement("grandchild2")
greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
checkStrEq(t, doc.NamespaceURI(), "")
checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com")
checkStrEq(t, root.Attr[0].NamespaceURI(), "")
checkStrEq(t, root.Attr[1].NamespaceURI(), "")
checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com")
checkStrEq(t, root.Attr[3].NamespaceURI(), "")
checkStrEq(t, child1.Attr[0].NamespaceURI(), "")
checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com")
checkStrEq(t, child2.Attr[0].NamespaceURI(), "")
checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "")
checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "")
checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "")
checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com")
f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
if len(f) != 2 || f[0] != root || f[1] != child2 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
if len(f) != 1 || f[0] != grandchild1 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='']")
if len(f) != 0 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='foo']")
if len(f) != 0 {
t.Error("etree: failed namespace-uri test")
}
}
func TestLocalNamespaceURI(t *testing.T) {
s := `
`
doc := newDocumentFromString(t, s)
root := doc.SelectElement("root")
child1 := root.SelectElement("child1")
child2 := root.SelectElement("child2")
child3 := root.SelectElement("child3")
grandchild1 := child1.SelectElement("grandchild1")
grandchild2 := child1.SelectElement("grandchild2")
grandchild3 := child1.SelectElement("grandchild3")
grandchild4 := child1.SelectElement("grandchild4")
greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
checkStrEq(t, doc.NamespaceURI(), "")
checkStrEq(t, root.NamespaceURI(), "https://root.example.com")
checkStrEq(t, child1.NamespaceURI(), "https://child.example.com")
checkStrEq(t, child2.NamespaceURI(), "https://root.example.com")
checkStrEq(t, child3.NamespaceURI(), "")
checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com")
checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com")
checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com")
checkStrEq(t, grandchild4.NamespaceURI(), "")
checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com")
f := doc.FindElements("//*[namespace-uri()='https://root.example.com']")
if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='https://child.example.com']")
if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']")
if len(f) != 1 || f[0] != grandchild1 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='']")
if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 {
t.Error("etree: failed namespace-uri test")
}
f = doc.FindElements("//*[namespace-uri()='foo']")
if len(f) != 0 {
t.Error("etree: failed namespace-uri test")
}
}
func TestWhitespace(t *testing.T) {
s := "\n\t\n\t\t x\n \n"
doc := newDocumentFromString(t, s)
root := doc.Root()
checkIntEq(t, len(root.Child), 3)
cd := root.Child[0].(*CharData)
checkBoolEq(t, cd.IsWhitespace(), true)
checkStrBinaryEq(t, cd.Data, "\n\t")
cd = root.Child[2].(*CharData)
checkBoolEq(t, cd.IsWhitespace(), true)
checkStrBinaryEq(t, cd.Data, "\n")
child := root.SelectElement("child")
checkIntEq(t, len(child.Child), 3)
cd = child.Child[0].(*CharData)
checkBoolEq(t, cd.IsWhitespace(), true)
checkStrBinaryEq(t, cd.Data, "\n\t\t")
cd = child.Child[2].(*CharData)
checkBoolEq(t, cd.IsWhitespace(), true)
checkStrBinaryEq(t, cd.Data, "\n ")
grandchild := child.SelectElement("grandchild")
checkIntEq(t, len(grandchild.Child), 1)
cd = grandchild.Child[0].(*CharData)
checkBoolEq(t, cd.IsWhitespace(), false)
cd.SetData(" ")
checkBoolEq(t, cd.IsWhitespace(), true)
cd.SetData(" x")
checkBoolEq(t, cd.IsWhitespace(), false)
cd.SetData("\t\n\r ")
checkBoolEq(t, cd.IsWhitespace(), true)
cd.SetData("\uFFFD")
checkBoolEq(t, cd.IsWhitespace(), false)
cd.SetData("")
checkBoolEq(t, cd.IsWhitespace(), true)
}
func TestTokenWriteTo(t *testing.T) {
s := `Great Expectations`
doc := newDocumentFromString(t, s)
writeSettings := WriteSettings{}
indentSettings := IndentSettings{UseTabs: true}
tests := []struct {
path string
expected string
}{
{"//store", "\n\t\n\t\n\t\tGreat Expectations\n\t\n"},
{"//store/book", "\n\tGreat Expectations\n"},
{"//store/book/title", "Great Expectations"},
}
for _, test := range tests {
var buffer bytes.Buffer
c := doc.FindElement(test.path)
c.IndentWithSettings(&indentSettings)
c.WriteTo(&buffer, &writeSettings)
checkStrEq(t, buffer.String(), test.expected)
}
}
func TestReindexChildren(t *testing.T) {
s := ``
doc := newDocumentFromString(t, s)
doc.Unindent()
root := doc.Root()
if root == nil || root.Tag != "root" || len(root.Child) != 5 {
t.Error("etree: expected root element not found")
}
for i := 0; i < len(root.Child); i++ {
if root.Child[i].Index() != i {
t.Error("etree: incorrect child index found in root element child")
}
}
rand.Shuffle(len(root.Child), func(i, j int) {
root.Child[i], root.Child[j] = root.Child[j], root.Child[i]
})
root.ReindexChildren()
for i := 0; i < len(root.Child); i++ {
if root.Child[i].Index() != i {
t.Error("etree: incorrect child index found in root element child")
}
}
}
func TestPreserveDuplicateAttrs(t *testing.T) {
s := ``
checkAttrCount := func(e *Element, n int) {
if len(e.Attr) != n {
t.Errorf("etree: expected %d attributes, got %d", n, len(e.Attr))
}
}
checkAttr := func(e *Element, i int, key, value string) {
if i >= len(e.Attr) {
t.Errorf("etree: attr[%d] out of bounds", i)
return
}
if e.Attr[i].Key != key {
t.Errorf("etree: attr[%d] expected key %s, got %s", i, key, e.Attr[i].Key)
}
if e.Attr[i].Value != value {
t.Errorf("etree: attr[%d] expected value %s, got %s", i, value, e.Attr[i].Value)
}
}
t.Run("enabled", func(t *testing.T) {
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true})
e := doc.FindElement("element")
checkAttrCount(e, 2)
checkAttr(e, 0, "attr", "test")
checkAttr(e, 1, "attr", "test2")
})
t.Run("disabled", func(t *testing.T) {
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: false})
e := doc.FindElement("element")
checkAttrCount(e, 1)
checkAttr(e, 0, "attr", "test2")
})
t.Run("default", func(t *testing.T) {
doc := newDocumentFromString(t, s)
e := doc.FindElement("element")
checkAttrCount(e, 1)
checkAttr(e, 0, "attr", "test2")
})
}
etree-1.3.0/example_test.go 0000664 0000000 0000000 00000003172 14544351356 0015655 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import "os"
// Create an etree Document, add XML entities to it, and serialize it
// to stdout.
func ExampleDocument_creating() {
doc := NewDocument()
doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`)
doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`)
people := doc.CreateElement("People")
people.CreateComment("These are all known people")
jon := people.CreateElement("Person")
jon.CreateAttr("name", "Jon O'Reilly")
sally := people.CreateElement("Person")
sally.CreateAttr("name", "Sally")
doc.Indent(2)
doc.WriteTo(os.Stdout)
// Output:
//
//
//
//
//
//
//
}
func ExampleDocument_reading() {
doc := NewDocument()
if err := doc.ReadFromFile("document.xml"); err != nil {
panic(err)
}
}
func ExamplePath() {
xml := `
Great ExpectationsCharles DickensUlyssesJames Joyce`
doc := NewDocument()
doc.ReadFromString(xml)
for _, e := range doc.FindElements(".//book[author='Charles Dickens']") {
doc := NewDocumentWithRoot(e.Copy())
doc.Indent(2)
doc.WriteTo(os.Stdout)
}
// Output:
//
// Great Expectations
// Charles Dickens
//
}
etree-1.3.0/go.mod 0000664 0000000 0000000 00000000050 14544351356 0013732 0 ustar 00root root 0000000 0000000 module github.com/beevik/etree
go 1.13
etree-1.3.0/helpers.go 0000664 0000000 0000000 00000020563 14544351356 0014630 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import (
"io"
"strings"
"unicode/utf8"
)
// A simple stack
type stack struct {
data []interface{}
}
func (s *stack) empty() bool {
return len(s.data) == 0
}
func (s *stack) push(value interface{}) {
s.data = append(s.data, value)
}
func (s *stack) pop() interface{} {
value := s.data[len(s.data)-1]
s.data[len(s.data)-1] = nil
s.data = s.data[:len(s.data)-1]
return value
}
func (s *stack) peek() interface{} {
return s.data[len(s.data)-1]
}
// A fifo is a simple first-in-first-out queue.
type fifo struct {
data []interface{}
head, tail int
}
func (f *fifo) add(value interface{}) {
if f.len()+1 >= len(f.data) {
f.grow()
}
f.data[f.tail] = value
if f.tail++; f.tail == len(f.data) {
f.tail = 0
}
}
func (f *fifo) remove() interface{} {
value := f.data[f.head]
f.data[f.head] = nil
if f.head++; f.head == len(f.data) {
f.head = 0
}
return value
}
func (f *fifo) len() int {
if f.tail >= f.head {
return f.tail - f.head
}
return len(f.data) - f.head + f.tail
}
func (f *fifo) grow() {
c := len(f.data) * 2
if c == 0 {
c = 4
}
buf, count := make([]interface{}, c), f.len()
if f.tail >= f.head {
copy(buf[0:count], f.data[f.head:f.tail])
} else {
hindex := len(f.data) - f.head
copy(buf[0:hindex], f.data[f.head:])
copy(buf[hindex:count], f.data[:f.tail])
}
f.data, f.head, f.tail = buf, 0, count
}
// xmlReader provides the interface by which an XML byte stream is
// processed and decoded.
type xmlReader interface {
Bytes() int64
Read(p []byte) (n int, err error)
}
// xmlSimpleReader implements a proxy reader that counts the number of
// bytes read from its encapsulated reader.
type xmlSimpleReader struct {
r io.Reader
bytes int64
}
func newXmlSimpleReader(r io.Reader) xmlReader {
return &xmlSimpleReader{r, 0}
}
func (xr *xmlSimpleReader) Bytes() int64 {
return xr.bytes
}
func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) {
n, err = xr.r.Read(p)
xr.bytes += int64(n)
return n, err
}
// xmlPeekReader implements a proxy reader that counts the number of
// bytes read from its encapsulated reader. It also allows the caller to
// "peek" at the previous portions of the buffer after they have been
// parsed.
type xmlPeekReader struct {
r io.Reader
bytes int64 // total bytes read by the Read function
buf []byte // internal read buffer
bufSize int // total bytes used in the read buffer
bufOffset int64 // total bytes read when buf was last filled
window []byte // current read buffer window
peekBuf []byte // buffer used to store data to be peeked at later
peekOffset int64 // total read offset of the start of the peek buffer
}
func newXmlPeekReader(r io.Reader) *xmlPeekReader {
buf := make([]byte, 4096)
return &xmlPeekReader{
r: r,
bytes: 0,
buf: buf,
bufSize: 0,
bufOffset: 0,
window: buf[0:0],
peekBuf: make([]byte, 0),
peekOffset: -1,
}
}
func (xr *xmlPeekReader) Bytes() int64 {
return xr.bytes
}
func (xr *xmlPeekReader) Read(p []byte) (n int, err error) {
if len(xr.window) == 0 {
err = xr.fill()
if err != nil {
return 0, err
}
if len(xr.window) == 0 {
return 0, nil
}
}
if len(xr.window) < len(p) {
n = len(xr.window)
} else {
n = len(p)
}
copy(p, xr.window)
xr.window = xr.window[n:]
xr.bytes += int64(n)
return n, err
}
func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) {
if maxLen > cap(xr.peekBuf) {
xr.peekBuf = make([]byte, 0, maxLen)
}
xr.peekBuf = xr.peekBuf[0:0]
xr.peekOffset = offset
xr.updatePeekBuf()
}
func (xr *xmlPeekReader) PeekFinalize() []byte {
xr.updatePeekBuf()
return xr.peekBuf
}
func (xr *xmlPeekReader) fill() error {
xr.bufOffset = xr.bytes
xr.bufSize = 0
n, err := xr.r.Read(xr.buf)
if err != nil {
xr.window, xr.bufSize = xr.buf[0:0], 0
return err
}
xr.window, xr.bufSize = xr.buf[:n], n
xr.updatePeekBuf()
return nil
}
func (xr *xmlPeekReader) updatePeekBuf() {
peekRemain := cap(xr.peekBuf) - len(xr.peekBuf)
if xr.peekOffset >= 0 && peekRemain > 0 {
rangeMin := xr.peekOffset
rangeMax := xr.peekOffset + int64(cap(xr.peekBuf))
bufMin := xr.bufOffset
bufMax := xr.bufOffset + int64(xr.bufSize)
if rangeMin < bufMin {
rangeMin = bufMin
}
if rangeMax > bufMax {
rangeMax = bufMax
}
if rangeMax > rangeMin {
rangeMin -= xr.bufOffset
rangeMax -= xr.bufOffset
if int(rangeMax-rangeMin) > peekRemain {
rangeMax = rangeMin + int64(peekRemain)
}
xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...)
}
}
}
// xmlWriter implements a proxy writer that counts the number of
// bytes written by its encapsulated writer.
type xmlWriter struct {
w io.Writer
bytes int64
}
func newXmlWriter(w io.Writer) *xmlWriter {
return &xmlWriter{w: w}
}
func (xw *xmlWriter) Write(p []byte) (n int, err error) {
n, err = xw.w.Write(p)
xw.bytes += int64(n)
return n, err
}
// isWhitespace returns true if the byte slice contains only
// whitespace characters.
func isWhitespace(s string) bool {
for i := 0; i < len(s); i++ {
if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
return false
}
}
return true
}
// spaceMatch returns true if namespace a is the empty string
// or if namespace a equals namespace b.
func spaceMatch(a, b string) bool {
switch {
case a == "":
return true
default:
return a == b
}
}
// spaceDecompose breaks a namespace:tag identifier at the ':'
// and returns the two parts.
func spaceDecompose(str string) (space, key string) {
colon := strings.IndexByte(str, ':')
if colon == -1 {
return "", str
}
return str[:colon], str[colon+1:]
}
// Strings used by indentCRLF and indentLF
const (
indentSpaces = "\r\n "
indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
)
// indentCRLF returns a CRLF newline followed by n copies of the first
// non-CRLF character in the source string.
func indentCRLF(n int, source string) string {
switch {
case n < 0:
return source[:2]
case n < len(source)-1:
return source[:n+2]
default:
return source + strings.Repeat(source[2:3], n-len(source)+2)
}
}
// indentLF returns a LF newline followed by n copies of the first non-LF
// character in the source string.
func indentLF(n int, source string) string {
switch {
case n < 0:
return source[1:2]
case n < len(source)-1:
return source[1 : n+2]
default:
return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
}
}
// nextIndex returns the index of the next occurrence of byte ch in s,
// starting from offset. It returns -1 if the byte is not found.
func nextIndex(s string, ch byte, offset int) int {
switch i := strings.IndexByte(s[offset:], ch); i {
case -1:
return -1
default:
return offset + i
}
}
// isInteger returns true if the string s contains an integer.
func isInteger(s string) bool {
for i := 0; i < len(s); i++ {
if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
return false
}
}
return true
}
type escapeMode byte
const (
escapeNormal escapeMode = iota
escapeCanonicalText
escapeCanonicalAttr
)
// escapeString writes an escaped version of a string to the writer.
func escapeString(w Writer, s string, m escapeMode) {
var esc []byte
last := 0
for i := 0; i < len(s); {
r, width := utf8.DecodeRuneInString(s[i:])
i += width
switch r {
case '&':
esc = []byte("&")
case '<':
esc = []byte("<")
case '>':
if m == escapeCanonicalAttr {
continue
}
esc = []byte(">")
case '\'':
if m != escapeNormal {
continue
}
esc = []byte("'")
case '"':
if m == escapeCanonicalText {
continue
}
esc = []byte(""")
case '\t':
if m != escapeCanonicalAttr {
continue
}
esc = []byte(" ")
case '\n':
if m != escapeCanonicalAttr {
continue
}
esc = []byte("
")
case '\r':
if m == escapeNormal {
continue
}
esc = []byte("
")
default:
if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
esc = []byte("\uFFFD")
break
}
continue
}
w.WriteString(s[last : i-width])
w.Write(esc)
last = i
}
w.WriteString(s[last:])
}
func isInCharacterRange(r rune) bool {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
etree-1.3.0/path.go 0000664 0000000 0000000 00000037440 14544351356 0014124 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import (
"strconv"
"strings"
)
/*
A Path is a string that represents a search path through an etree starting
from the document root or an arbitrary element. Paths are used with the
Element object's Find* methods to locate and return desired elements.
A Path consists of a series of slash-separated "selectors", each of which may
be modified by one or more bracket-enclosed "filters". Selectors are used to
traverse the etree from element to element, while filters are used to narrow
the list of candidate elements at each node.
Although etree Path strings are structurally and behaviorally similar to XPath
strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more
limited set of selectors and filtering options.
The following selectors are supported by etree paths:
. Select the current element.
.. Select the parent of the current element.
* Select all child elements of the current element.
/ Select the root element when used at the start of a path.
// Select all descendants of the current element.
tag Select all child elements with a name matching the tag.
The following basic filters are supported:
[@attrib] Keep elements with an attribute named attrib.
[@attrib='val'] Keep elements with an attribute named attrib and value matching val.
[tag] Keep elements with a child element named tag.
[tag='val'] Keep elements with a child element named tag and text matching val.
[n] Keep the n-th element, where n is a numeric index starting from 1.
The following function-based filters are supported:
[text()] Keep elements with non-empty text.
[text()='val'] Keep elements whose text matches val.
[local-name()='val'] Keep elements whose un-prefixed tag matches val.
[name()='val'] Keep elements whose full tag exactly matches val.
[namespace-prefix()] Keep elements with non-empty namespace prefixes.
[namespace-prefix()='val'] Keep elements whose namespace prefix matches val.
[namespace-uri()] Keep elements with non-empty namespace URIs.
[namespace-uri()='val'] Keep elements whose namespace URI matches val.
Below are some examples of etree path strings.
Select the bookstore child element of the root element:
/bookstore
Beginning from the root element, select the title elements of all descendant
book elements having a 'category' attribute of 'WEB':
//book[@category='WEB']/title
Beginning from the current element, select the first descendant book element
with a title child element containing the text 'Great Expectations':
.//book[title='Great Expectations'][1]
Beginning from the current element, select all child elements of book elements
with an attribute 'language' set to 'english':
./book/*[@language='english']
Beginning from the current element, select all child elements of book elements
containing the text 'special':
./book/*[text()='special']
Beginning from the current element, select all descendant book elements whose
title child element has a 'language' attribute of 'french':
.//book/title[@language='french']/..
Beginning from the current element, select all descendant book elements
belonging to the http://www.w3.org/TR/html4/ namespace:
.//book[namespace-uri()='http://www.w3.org/TR/html4/']
*/
type Path struct {
segments []segment
}
// ErrPath is returned by path functions when an invalid etree path is provided.
type ErrPath string
// Error returns the string describing a path error.
func (err ErrPath) Error() string {
return "etree: " + string(err)
}
// CompilePath creates an optimized version of an XPath-like string that
// can be used to query elements in an element tree.
func CompilePath(path string) (Path, error) {
var comp compiler
segments := comp.parsePath(path)
if comp.err != ErrPath("") {
return Path{nil}, comp.err
}
return Path{segments}, nil
}
// MustCompilePath creates an optimized version of an XPath-like string that
// can be used to query elements in an element tree. Panics if an error
// occurs. Use this function to create Paths when you know the path is
// valid (i.e., if it's hard-coded).
func MustCompilePath(path string) Path {
p, err := CompilePath(path)
if err != nil {
panic(err)
}
return p
}
// A segment is a portion of a path between "/" characters.
// It contains one selector and zero or more [filters].
type segment struct {
sel selector
filters []filter
}
func (seg *segment) apply(e *Element, p *pather) {
seg.sel.apply(e, p)
for _, f := range seg.filters {
f.apply(p)
}
}
// A selector selects XML elements for consideration by the
// path traversal.
type selector interface {
apply(e *Element, p *pather)
}
// A filter pares down a list of candidate XML elements based
// on a path filter in [brackets].
type filter interface {
apply(p *pather)
}
// A pather is helper object that traverses an element tree using
// a Path object. It collects and deduplicates all elements matching
// the path query.
type pather struct {
queue fifo
results []*Element
inResults map[*Element]bool
candidates []*Element
scratch []*Element // used by filters
}
// A node represents an element and the remaining path segments that
// should be applied against it by the pather.
type node struct {
e *Element
segments []segment
}
func newPather() *pather {
return &pather{
results: make([]*Element, 0),
inResults: make(map[*Element]bool),
candidates: make([]*Element, 0),
scratch: make([]*Element, 0),
}
}
// traverse follows the path from the element e, collecting
// and then returning all elements that match the path's selectors
// and filters.
func (p *pather) traverse(e *Element, path Path) []*Element {
for p.queue.add(node{e, path.segments}); p.queue.len() > 0; {
p.eval(p.queue.remove().(node))
}
return p.results
}
// eval evaluates the current path node by applying the remaining
// path's selector rules against the node's element.
func (p *pather) eval(n node) {
p.candidates = p.candidates[0:0]
seg, remain := n.segments[0], n.segments[1:]
seg.apply(n.e, p)
if len(remain) == 0 {
for _, c := range p.candidates {
if in := p.inResults[c]; !in {
p.inResults[c] = true
p.results = append(p.results, c)
}
}
} else {
for _, c := range p.candidates {
p.queue.add(node{c, remain})
}
}
}
// A compiler generates a compiled path from a path string.
type compiler struct {
err ErrPath
}
// parsePath parses an XPath-like string describing a path
// through an element tree and returns a slice of segment
// descriptors.
func (c *compiler) parsePath(path string) []segment {
// If path ends with //, fix it
if strings.HasSuffix(path, "//") {
path += "*"
}
var segments []segment
// Check for an absolute path
if strings.HasPrefix(path, "/") {
segments = append(segments, segment{new(selectRoot), []filter{}})
path = path[1:]
}
// Split path into segments
for _, s := range splitPath(path) {
segments = append(segments, c.parseSegment(s))
if c.err != ErrPath("") {
break
}
}
return segments
}
func splitPath(path string) []string {
var pieces []string
start := 0
inquote := false
var quote byte
for i := 0; i+1 <= len(path); i++ {
if !inquote {
if path[i] == '\'' || path[i] == '"' {
inquote, quote = true, path[i]
} else if path[i] == '/' {
pieces = append(pieces, path[start:i])
start = i + 1
}
} else if path[i] == quote {
inquote = false
}
}
return append(pieces, path[start:])
}
// parseSegment parses a path segment between / characters.
func (c *compiler) parseSegment(path string) segment {
pieces := strings.Split(path, "[")
seg := segment{
sel: c.parseSelector(pieces[0]),
filters: []filter{},
}
for i := 1; i < len(pieces); i++ {
fpath := pieces[i]
if len(fpath) == 0 || fpath[len(fpath)-1] != ']' {
c.err = ErrPath("path has invalid filter [brackets].")
break
}
seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1]))
}
return seg
}
// parseSelector parses a selector at the start of a path segment.
func (c *compiler) parseSelector(path string) selector {
switch path {
case ".":
return new(selectSelf)
case "..":
return new(selectParent)
case "*":
return new(selectChildren)
case "":
return new(selectDescendants)
default:
return newSelectChildrenByTag(path)
}
}
var fnTable = map[string]func(e *Element) string{
"local-name": (*Element).name,
"name": (*Element).FullTag,
"namespace-prefix": (*Element).namespacePrefix,
"namespace-uri": (*Element).NamespaceURI,
"text": (*Element).Text,
}
// parseFilter parses a path filter contained within [brackets].
func (c *compiler) parseFilter(path string) filter {
if len(path) == 0 {
c.err = ErrPath("path contains an empty filter expression.")
return nil
}
// Filter contains [@attr='val'], [@attr="val"], [fn()='val'],
// [fn()="val"], [tag='val'] or [tag="val"]?
eqindex := strings.IndexByte(path, '=')
if eqindex >= 0 && eqindex+1 < len(path) {
quote := path[eqindex+1]
if quote == '\'' || quote == '"' {
rindex := nextIndex(path, quote, eqindex+2)
if rindex != len(path)-1 {
c.err = ErrPath("path has mismatched filter quotes.")
return nil
}
key := path[:eqindex]
value := path[eqindex+2 : rindex]
switch {
case key[0] == '@':
return newFilterAttrVal(key[1:], value)
case strings.HasSuffix(key, "()"):
name := key[:len(key)-2]
if fn, ok := fnTable[name]; ok {
return newFilterFuncVal(fn, value)
}
c.err = ErrPath("path has unknown function " + name)
return nil
default:
return newFilterChildText(key, value)
}
}
}
// Filter contains [@attr], [N], [tag] or [fn()]
switch {
case path[0] == '@':
return newFilterAttr(path[1:])
case strings.HasSuffix(path, "()"):
name := path[:len(path)-2]
if fn, ok := fnTable[name]; ok {
return newFilterFunc(fn)
}
c.err = ErrPath("path has unknown function " + name)
return nil
case isInteger(path):
pos, _ := strconv.Atoi(path)
switch {
case pos > 0:
return newFilterPos(pos - 1)
default:
return newFilterPos(pos)
}
default:
return newFilterChild(path)
}
}
// selectSelf selects the current element into the candidate list.
type selectSelf struct{}
func (s *selectSelf) apply(e *Element, p *pather) {
p.candidates = append(p.candidates, e)
}
// selectRoot selects the element's root node.
type selectRoot struct{}
func (s *selectRoot) apply(e *Element, p *pather) {
root := e
for root.parent != nil {
root = root.parent
}
p.candidates = append(p.candidates, root)
}
// selectParent selects the element's parent into the candidate list.
type selectParent struct{}
func (s *selectParent) apply(e *Element, p *pather) {
if e.parent != nil {
p.candidates = append(p.candidates, e.parent)
}
}
// selectChildren selects the element's child elements into the
// candidate list.
type selectChildren struct{}
func (s *selectChildren) apply(e *Element, p *pather) {
for _, c := range e.Child {
if c, ok := c.(*Element); ok {
p.candidates = append(p.candidates, c)
}
}
}
// selectDescendants selects all descendant child elements
// of the element into the candidate list.
type selectDescendants struct{}
func (s *selectDescendants) apply(e *Element, p *pather) {
var queue fifo
for queue.add(e); queue.len() > 0; {
e := queue.remove().(*Element)
p.candidates = append(p.candidates, e)
for _, c := range e.Child {
if c, ok := c.(*Element); ok {
queue.add(c)
}
}
}
}
// selectChildrenByTag selects into the candidate list all child
// elements of the element having the specified tag.
type selectChildrenByTag struct {
space, tag string
}
func newSelectChildrenByTag(path string) *selectChildrenByTag {
s, l := spaceDecompose(path)
return &selectChildrenByTag{s, l}
}
func (s *selectChildrenByTag) apply(e *Element, p *pather) {
for _, c := range e.Child {
if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag {
p.candidates = append(p.candidates, c)
}
}
}
// filterPos filters the candidate list, keeping only the
// candidate at the specified index.
type filterPos struct {
index int
}
func newFilterPos(pos int) *filterPos {
return &filterPos{pos}
}
func (f *filterPos) apply(p *pather) {
if f.index >= 0 {
if f.index < len(p.candidates) {
p.scratch = append(p.scratch, p.candidates[f.index])
}
} else {
if -f.index <= len(p.candidates) {
p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index])
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterAttr filters the candidate list for elements having
// the specified attribute.
type filterAttr struct {
space, key string
}
func newFilterAttr(str string) *filterAttr {
s, l := spaceDecompose(str)
return &filterAttr{s, l}
}
func (f *filterAttr) apply(p *pather) {
for _, c := range p.candidates {
for _, a := range c.Attr {
if spaceMatch(f.space, a.Space) && f.key == a.Key {
p.scratch = append(p.scratch, c)
break
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterAttrVal filters the candidate list for elements having
// the specified attribute with the specified value.
type filterAttrVal struct {
space, key, val string
}
func newFilterAttrVal(str, value string) *filterAttrVal {
s, l := spaceDecompose(str)
return &filterAttrVal{s, l, value}
}
func (f *filterAttrVal) apply(p *pather) {
for _, c := range p.candidates {
for _, a := range c.Attr {
if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value {
p.scratch = append(p.scratch, c)
break
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterFunc filters the candidate list for elements satisfying a custom
// boolean function.
type filterFunc struct {
fn func(e *Element) string
}
func newFilterFunc(fn func(e *Element) string) *filterFunc {
return &filterFunc{fn}
}
func (f *filterFunc) apply(p *pather) {
for _, c := range p.candidates {
if f.fn(c) != "" {
p.scratch = append(p.scratch, c)
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterFuncVal filters the candidate list for elements containing a value
// matching the result of a custom function.
type filterFuncVal struct {
fn func(e *Element) string
val string
}
func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
return &filterFuncVal{fn, value}
}
func (f *filterFuncVal) apply(p *pather) {
for _, c := range p.candidates {
if f.fn(c) == f.val {
p.scratch = append(p.scratch, c)
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterChild filters the candidate list for elements having
// a child element with the specified tag.
type filterChild struct {
space, tag string
}
func newFilterChild(str string) *filterChild {
s, l := spaceDecompose(str)
return &filterChild{s, l}
}
func (f *filterChild) apply(p *pather) {
for _, c := range p.candidates {
for _, cc := range c.Child {
if cc, ok := cc.(*Element); ok &&
spaceMatch(f.space, cc.Space) &&
f.tag == cc.Tag {
p.scratch = append(p.scratch, c)
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
// filterChildText filters the candidate list for elements having
// a child element with the specified tag and text.
type filterChildText struct {
space, tag, text string
}
func newFilterChildText(str, text string) *filterChildText {
s, l := spaceDecompose(str)
return &filterChildText{s, l, text}
}
func (f *filterChildText) apply(p *pather) {
for _, c := range p.candidates {
for _, cc := range c.Child {
if cc, ok := cc.(*Element); ok &&
spaceMatch(f.space, cc.Space) &&
f.tag == cc.Tag &&
f.text == cc.Text() {
p.scratch = append(p.scratch, c)
}
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
etree-1.3.0/path_test.go 0000664 0000000 0000000 00000017361 14544351356 0015163 0 ustar 00root root 0000000 0000000 // Copyright 2015-2019 Brett Vickers.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package etree
import "testing"
var testXML = `
Everyday ItalianGiada De Laurentiis200530.00Clarkson PotterHarry PotterJ K. Rowling200529.99XQuery Kick StartJames McGovernPer BothnerKurt CagleJames LinnVaidyanathan Nagarajan200349.99Learning XMLErik T. Ray200339.95
`
type test struct {
path string
result interface{}
}
type errorResult string
var tests = []test{
// basic queries
{"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}},
{"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}},
{"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}},
{"./bookstore/book/isbn", nil},
// descendant queries
{"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}},
{".//p:price/.", []string{"30.00", "29.99", "39.95"}},
{".//price", []string{"30.00", "29.99", "49.99", "39.95"}},
// positional queries
{"./bookstore/book[1]/title", "Everyday Italian"},
{"./bookstore/book[4]/title", "Learning XML"},
{"./bookstore/book[5]/title", nil},
{"./bookstore/book[3]/author[0]", "James McGovern"},
{"./bookstore/book[3]/author[1]", "James McGovern"},
{"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
{"./bookstore/book[3]/author[6]", nil},
{"./bookstore/book[-1]/title", "Learning XML"},
{"./bookstore/book[-4]/title", "Everyday Italian"},
{"./bookstore/book[-5]/title", nil},
// text function queries
{"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"},
{"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"},
{"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"},
{"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"},
{"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"},
{"//book[p:price='29.99']/title", "Harry Potter"},
{"//book[price='29.99']/title", "Harry Potter"},
{"//book/price[text()='29.99']", "29.99"},
{"//book/author[text()='Kurt Cagle']", "Kurt Cagle"},
{"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}},
// namespace function queries
{"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}},
{"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}},
{"//*[namespace-uri()='foo']", nil},
{"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}},
{"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}},
{"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}},
{"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
{"//price[namespace-uri()='']", []string{"49.99"}},
{"//price[namespace-prefix()='']", []string{"49.99"}},
{"//price[name()='price']", []string{"49.99"}},
{"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}},
// attribute queries
{"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}},
{"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}},
{"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"},
{`./bookstore/book[@category="COOKING"]/title[@lang="en"]`, "Everyday Italian"},
{"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"},
{"./bookstore/book/title[@lang='fr']", nil},
{"//p:price[@p:tax='1.99']", []string{"29.99"}},
{"//p:price[@tax='1.99']", []string{"29.99"}},
{"//p:price[@p:tax]", []string{"29.99"}},
{"//p:price[@tax]", []string{"29.99"}},
// parent queries
{"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"},
// root queries
{"/bookstore/book[1]/title", "Everyday Italian"},
{"/bookstore/book[4]/title", "Learning XML"},
{"/bookstore/book[5]/title", nil},
{"/bookstore/book[3]/author[0]", "James McGovern"},
{"/bookstore/book[3]/author[1]", "James McGovern"},
{"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"},
{"/bookstore/book[3]/author[6]", nil},
{"/bookstore/book[-1]/title", "Learning XML"},
{"/bookstore/book[-4]/title", "Everyday Italian"},
{"/bookstore/book[-5]/title", nil},
// bad paths
{"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")},
{"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")},
{"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")},
{`./bookstore/book[@category='WEB"]`, errorResult("etree: path has mismatched filter quotes.")},
{`./bookstore/book[@category="WEB']`, errorResult("etree: path has mismatched filter quotes.")},
{"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")},
{"/][", errorResult("etree: path has invalid filter [brackets].")},
}
func TestPath(t *testing.T) {
doc := NewDocument()
err := doc.ReadFromString(testXML)
if err != nil {
t.Error(err)
}
for _, test := range tests {
path, err := CompilePath(test.path)
if err != nil {
if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) {
fail(t, test)
}
continue
}
// Test both FindElementsPath and FindElementPath
element := doc.FindElementPath(path)
elements := doc.FindElementsPath(path)
switch s := test.result.(type) {
case errorResult:
fail(t, test)
case nil:
if element != nil || len(elements) != 0 {
fail(t, test)
}
case string:
if element == nil || element.Text() != s ||
len(elements) != 1 || elements[0].Text() != s {
fail(t, test)
}
case []string:
if element == nil || element.Text() != s[0] || len(elements) != len(s) {
fail(t, test)
continue
}
for i := 0; i < len(elements); i++ {
if elements[i].Text() != s[i] {
fail(t, test)
break
}
}
}
}
}
func fail(t *testing.T, test test) {
t.Helper()
t.Errorf("etree: failed test '%s'\n", test.path)
}
func TestAbsolutePath(t *testing.T) {
doc := NewDocument()
err := doc.ReadFromString(testXML)
if err != nil {
t.Error(err)
}
elements := doc.FindElements("//book/author")
for _, e := range elements {
title := e.FindElement("/bookstore/book[1]/title")
if title == nil || title.Text() != "Everyday Italian" {
t.Errorf("etree: absolute path test failed")
}
title = e.FindElement("//book[p:price='29.99']/title")
if title == nil || title.Text() != "Harry Potter" {
t.Errorf("etree: absolute path test failed")
}
}
}