pax_global_header00006660000000000000000000000064147773460140014527gustar00rootroot0000000000000052 comment=b602ec3889c3af0cd9c6da968689afad5de6a261 etree-1.5.1/000077500000000000000000000000001477734601400126375ustar00rootroot00000000000000etree-1.5.1/.github/000077500000000000000000000000001477734601400141775ustar00rootroot00000000000000etree-1.5.1/.github/workflows/000077500000000000000000000000001477734601400162345ustar00rootroot00000000000000etree-1.5.1/.github/workflows/go.yml000066400000000000000000000026771477734601400174000ustar00rootroot00000000000000name: Go on: [push, pull_request] permissions: contents: read jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: ["go"] steps: - name: Checkout repository uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - name: Initialize CodeQL uses: github/codeql-action/init@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 with: languages: ${{ matrix.language }} - name: Autobuild uses: github/codeql-action/autobuild@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@4fa2a7953630fd2f3fb380f21be14ede0169dd4f # v3.25.12 with: category: "/language:${{matrix.language}}" build: name: Build runs-on: ubuntu-latest strategy: matrix: go-version: [ '1.21', '1.22.x' ] steps: - name: Checkout repository uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - name: Setup Go ${{ matrix.go-version }} uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 with: go-version: ${{ matrix.go-version }} - name: Build run: go build -v ./... - name: Test run: go test -v ./... etree-1.5.1/CONTRIBUTORS000066400000000000000000000006261477734601400145230ustar00rootroot00000000000000Brett Vickers (beevik) Felix Geisendörfer (felixge) Kamil Kisiel (kisielk) Graham King (grahamking) Matt Smith (ma314smith) Michal Jemala (michaljemala) Nicolas Piganeau (npiganeau) Chris Brown (ccbrown) Earncef Sequeira (earncef) Gabriel de Labachelerie (wuzuf) Martin Dosch (mdosch) Hugo Wetterberg (hugowetterberg) Tobias Theel (nerzal) Daniel Potapov (dpotapov) Mikhail Ferapontow (MikhailFerapontow) etree-1.5.1/LICENSE000066400000000000000000000024151477734601400136460ustar00rootroot00000000000000Copyright 2015-2024 Brett Vickers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. etree-1.5.1/README.md000066400000000000000000000130001477734601400141100ustar00rootroot00000000000000[![GoDoc](https://godoc.org/github.com/beevik/etree?status.svg)](https://godoc.org/github.com/beevik/etree) [![Go](https://github.com/beevik/etree/actions/workflows/go.yml/badge.svg)](https://github.com/beevik/etree/actions/workflows/go.yml) etree ===== The etree package is a lightweight, pure go package that expresses XML in the form of an element tree. Its design was inspired by the Python [ElementTree](http://docs.python.org/2/library/xml.etree.elementtree.html) module. Some of the package's capabilities and features: * Represents XML documents as trees of elements for easy traversal. * Imports, serializes, modifies or creates XML documents from scratch. * Writes and reads XML to/from files, byte slices, strings and io interfaces. * Performs simple or complex searches with lightweight XPath-like query APIs. * Auto-indents XML using spaces or tabs for better readability. * Implemented in pure go; depends only on standard go libraries. * Built on top of the go [encoding/xml](http://golang.org/pkg/encoding/xml) package. ### Creating an XML document The following example creates an XML document from scratch using the etree package and outputs its indented contents to stdout. ```go doc := etree.NewDocument() doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) people := doc.CreateElement("People") people.CreateComment("These are all known people") jon := people.CreateElement("Person") jon.CreateAttr("name", "Jon") sally := people.CreateElement("Person") sally.CreateAttr("name", "Sally") doc.Indent(2) doc.WriteTo(os.Stdout) ``` Output: ```xml ``` ### Reading an XML file Suppose you have a file on disk called `bookstore.xml` containing the following data: ```xml Everyday Italian Giada De Laurentiis 2005 30.00 Harry Potter J K. Rowling 2005 29.99 XQuery Kick Start James McGovern Per Bothner Kurt Cagle James Linn Vaidyanathan Nagarajan 2003 49.99 Learning XML Erik T. Ray 2003 39.95 ``` This code reads the file's contents into an etree document. ```go doc := etree.NewDocument() if err := doc.ReadFromFile("bookstore.xml"); err != nil { panic(err) } ``` You can also read XML from a string, a byte slice, or an `io.Reader`. ### Processing elements and attributes This example illustrates several ways to access elements and attributes using etree selection queries. ```go root := doc.SelectElement("bookstore") fmt.Println("ROOT element:", root.Tag) for _, book := range root.SelectElements("book") { fmt.Println("CHILD element:", book.Tag) if title := book.SelectElement("title"); title != nil { lang := title.SelectAttrValue("lang", "unknown") fmt.Printf(" TITLE: %s (%s)\n", title.Text(), lang) } for _, attr := range book.Attr { fmt.Printf(" ATTR: %s=%s\n", attr.Key, attr.Value) } } ``` Output: ``` ROOT element: bookstore CHILD element: book TITLE: Everyday Italian (en) ATTR: category=COOKING CHILD element: book TITLE: Harry Potter (en) ATTR: category=CHILDREN CHILD element: book TITLE: XQuery Kick Start (en) ATTR: category=WEB CHILD element: book TITLE: Learning XML (en) ATTR: category=WEB ``` ### Path queries This example uses etree's path functions to select all book titles that fall into the category of 'WEB'. The double-slash prefix in the path causes the search for book elements to occur recursively; book elements may appear at any level of the XML hierarchy. ```go for _, t := range doc.FindElements("//book[@category='WEB']/title") { fmt.Println("Title:", t.Text()) } ``` Output: ``` Title: XQuery Kick Start Title: Learning XML ``` This example finds the first book element under the root bookstore element and outputs the tag and text of each of its child elements. ```go for _, e := range doc.FindElements("./bookstore/book[1]/*") { fmt.Printf("%s: %s\n", e.Tag, e.Text()) } ``` Output: ``` title: Everyday Italian author: Giada De Laurentiis year: 2005 price: 30.00 ``` This example finds all books with a price of 49.99 and outputs their titles. ```go path := etree.MustCompilePath("./bookstore/book[p:price='49.99']/title") for _, e := range doc.FindElementsPath(path) { fmt.Println(e.Text()) } ``` Output: ``` XQuery Kick Start ``` Note that this example uses the FindElementsPath function, which takes as an argument a pre-compiled path object. Use precompiled paths when you plan to search with the same path more than once. ### Other features These are just a few examples of the things the etree package can do. See the [documentation](http://godoc.org/github.com/beevik/etree) for a complete description of its capabilities. ### Contributing This project accepts contributions. Just fork the repo and submit a pull request! etree-1.5.1/RELEASE_NOTES.md000066400000000000000000000155741477734601400152250ustar00rootroot00000000000000Release 1.5.1 ============= **Fixes** * Fixed a bug in `InsertChildAt`. Release 1.5.0 ============= **Changes** * Added `Element` function `CreateChild`, which calls a continuation function after creating and adding a child element. **Fixes** * Removed a potential conflict between two `ReadSettings` values. When `AttrSingleQuote` is true, `CanonicalAttrVal` is forced to be false. Release 1.4.1 ============= **Changes** * Minimal go version updated to 1.21. * Default-initialized CharsetReader causes same result as NewDocument(). * When reading an XML document, attributes are parsed more efficiently. Release v1.4.0 ============== **New Features** * Add `AutoClose` option to `ReadSettings`. * Add `ValidateInput` to `ReadSettings`. * Add `NotNil` function to `Element`. * Add `NextSibling` and `PrevSibling` functions to `Element`. Release v1.3.0 ============== **New Features** * Add support for double-quotes in filter path queries. * Add `PreserveDuplicateAttrs` to `ReadSettings`. * Add `ReindexChildren` to `Element`. Release v1.2.0 ============== **New Features** * Add the ability to write XML fragments using Token WriteTo functions. * Add the ability to re-indent an XML element as though it were the root of the document. * Add a ReadSettings option to preserve CDATA blocks when reading and XML document. Release v1.1.4 ============== **New Features** * Add the ability to preserve whitespace in leaf elements during indent. * Add the ability to suppress a document-trailing newline during indent. * Add choice of XML attribute quoting style (single-quote or double-quote). **Removed Features** * Removed the CDATA preservation change introduced in v1.1.3. It was implemented in a way that broke the ability to process XML documents encoded using non-UTF8 character sets. Release v1.1.3 ============== * XML reads now preserve CDATA sections instead of converting them to standard character data. Release v1.1.2 ============== * Fixed a path parsing bug. * The `Element.Text` function now handles comments embedded between character data spans. Release v1.1.1 ============== * Updated go version in `go.mod` to 1.20 Release v1.1.0 ============== **New Features** * New attribute helpers. * Added the `Element.SortAttrs` method, which lexicographically sorts an element's attributes by key. * New `ReadSettings` properties. * Added `Entity` for the support of custom entity maps. * New `WriteSettings` properties. * Added `UseCRLF` to allow the output of CR-LF newlines instead of the default LF newlines. This is useful on Windows systems. * Additional support for text and CDATA sections. * The `Element.Text` method now returns the concatenation of all consecutive character data tokens immediately following an element's opening tag. * Added `Element.SetCData` to replace the character data immediately following an element's opening tag with a CDATA section. * Added `Element.CreateCData` to create and add a CDATA section child `CharData` token to an element. * Added `Element.CreateText` to create and add a child text `CharData` token to an element. * Added `NewCData` to create a parentless CDATA section `CharData` token. * Added `NewText` to create a parentless text `CharData` token. * Added `CharData.IsCData` to detect if the token contains a CDATA section. * Added `CharData.IsWhitespace` to detect if the token contains whitespace inserted by one of the document Indent functions. * Modified `Element.SetText` so that it replaces a run of consecutive character data tokens following the element's opening tag (instead of just the first one). * New "tail text" support. * Added the `Element.Tail` method, which returns the text immediately following an element's closing tag. * Added the `Element.SetTail` method, which modifies the text immediately following an element's closing tag. * New element child insertion and removal methods. * Added the `Element.InsertChildAt` method, which inserts a new child token before the specified child token index. * Added the `Element.RemoveChildAt` method, which removes the child token at the specified child token index. * New element and attribute queries. * Added the `Element.Index` method, which returns the element's index within its parent element's child token list. * Added the `Element.NamespaceURI` method to return the namespace URI associated with an element. * Added the `Attr.NamespaceURI` method to return the namespace URI associated with an element. * Added the `Attr.Element` method to return the element that an attribute belongs to. * New Path filter functions. * Added `[local-name()='val']` to keep elements whose unprefixed tag matches the desired value. * Added `[name()='val']` to keep elements whose full tag matches the desired value. * Added `[namespace-prefix()='val']` to keep elements whose namespace prefix matches the desired value. * Added `[namespace-uri()='val']` to keep elements whose namespace URI matches the desired value. **Bug Fixes** * A default XML `CharSetReader` is now used to prevent failed parsing of XML documents using certain encodings. ([Issue](https://github.com/beevik/etree/issues/53)). * All characters are now properly escaped according to XML parsing rules. ([Issue](https://github.com/beevik/etree/issues/55)). * The `Document.Indent` and `Document.IndentTabs` functions no longer insert empty string `CharData` tokens. **Deprecated** * `Element` * The `InsertChild` method is deprecated. Use `InsertChildAt` instead. * The `CreateCharData` method is deprecated. Use `CreateText` instead. * `CharData` * The `NewCharData` method is deprecated. Use `NewText` instead. Release v1.0.1 ============== **Changes** * Added support for absolute etree Path queries. An absolute path begins with `/` or `//` and begins its search from the element's document root. * Added [`GetPath`](https://godoc.org/github.com/beevik/etree#Element.GetPath) and [`GetRelativePath`](https://godoc.org/github.com/beevik/etree#Element.GetRelativePath) functions to the [`Element`](https://godoc.org/github.com/beevik/etree#Element) type. **Breaking changes** * A path starting with `//` is now interpreted as an absolute path. Previously, it was interpreted as a relative path starting from the element whose [`FindElement`](https://godoc.org/github.com/beevik/etree#Element.FindElement) method was called. To remain compatible with this release, all paths prefixed with `//` should be prefixed with `.//` when called from any element other than the document's root. * [**edit 2/1/2019**]: Minor releases should not contain breaking changes. Even though this breaking change was very minor, it was a mistake to include it in this minor release. In the future, all breaking changes will be limited to major releases (e.g., version 2.0.0). Release v1.0.0 ============== Initial release. etree-1.5.1/etree.go000066400000000000000000001427141477734601400143030ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package etree provides XML services through an Element Tree // abstraction. package etree import ( "bufio" "bytes" "encoding/xml" "errors" "io" "os" "slices" "strings" ) const ( // NoIndent is used with the IndentSettings record to remove all // indenting. NoIndent = -1 ) // ErrXML is returned when XML parsing fails due to incorrect formatting. var ErrXML = errors.New("etree: invalid XML format") // cdataPrefix is used to detect CDATA text when ReadSettings.PreserveCData is // true. var cdataPrefix = []byte(". If false, XML character references // are also produced for " and '. Default: false. CanonicalText bool // CanonicalAttrVal forces the production of XML character references for // attribute value characters &, < and ". If false, XML character // references are also produced for > and '. Ignored when AttrSingleQuote // is true. Default: false. CanonicalAttrVal bool // AttrSingleQuote causes attributes to use single quotes (attr='example') // instead of double quotes (attr = "example") when set to true. Default: // false. AttrSingleQuote bool // UseCRLF causes the document's Indent* functions to use a carriage return // followed by a linefeed ("\r\n") when outputting a newline. If false, // only a linefeed is used ("\n"). Default: false. // // Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead. UseCRLF bool } // dup creates a duplicate of the WriteSettings object. func (s *WriteSettings) dup() WriteSettings { return *s } // IndentSettings determine the behavior of the Document's Indent* functions. type IndentSettings struct { // Spaces indicates the number of spaces to insert for each level of // indentation. Set to etree.NoIndent to remove all indentation. Ignored // when UseTabs is true. Default: 4. Spaces int // UseTabs causes tabs to be used instead of spaces when indenting. // Default: false. UseTabs bool // UseCRLF causes newlines to be written as a carriage return followed by // a linefeed ("\r\n"). If false, only a linefeed character is output // for a newline ("\n"). Default: false. UseCRLF bool // PreserveLeafWhitespace causes indent functions to preserve whitespace // within XML elements containing only non-CDATA character data. Default: // false. PreserveLeafWhitespace bool // SuppressTrailingWhitespace suppresses the generation of a trailing // whitespace characters (such as newlines) at the end of the indented // document. Default: false. SuppressTrailingWhitespace bool } // NewIndentSettings creates a default IndentSettings record. func NewIndentSettings() *IndentSettings { return &IndentSettings{ Spaces: 4, UseTabs: false, UseCRLF: false, PreserveLeafWhitespace: false, SuppressTrailingWhitespace: false, } } type indentFunc func(depth int) string func getIndentFunc(s *IndentSettings) indentFunc { if s.UseTabs { if s.UseCRLF { return func(depth int) string { return indentCRLF(depth, indentTabs) } } else { return func(depth int) string { return indentLF(depth, indentTabs) } } } else { if s.Spaces < 0 { return func(depth int) string { return "" } } else if s.UseCRLF { return func(depth int) string { return indentCRLF(depth*s.Spaces, indentSpaces) } } else { return func(depth int) string { return indentLF(depth*s.Spaces, indentSpaces) } } } } // Writer is the interface that wraps the Write* functions called by each token // type's WriteTo function. type Writer interface { io.StringWriter io.ByteWriter io.Writer } // A Token is an interface type used to represent XML elements, character // data, CDATA sections, XML comments, XML directives, and XML processing // instructions. type Token interface { Parent() *Element Index() int WriteTo(w Writer, s *WriteSettings) dup(parent *Element) Token setParent(parent *Element) setIndex(index int) } // A Document is a container holding a complete XML tree. // // A document has a single embedded element, which contains zero or more child // tokens, one of which is usually the root element. The embedded element may // include other children such as processing instruction tokens or character // data tokens. The document's embedded element is never directly serialized; // only its children are. // // A document also contains read and write settings, which influence the way // the document is deserialized, serialized, and indented. type Document struct { Element ReadSettings ReadSettings WriteSettings WriteSettings } // An Element represents an XML element, its attributes, and its child tokens. type Element struct { Space, Tag string // namespace prefix and tag Attr []Attr // key-value attribute pairs Child []Token // child tokens (elements, comments, etc.) parent *Element // parent element index int // token index in parent's children } // An Attr represents a key-value attribute within an XML element. type Attr struct { Space, Key string // The attribute's namespace prefix and key Value string // The attribute value string element *Element // element containing the attribute } // charDataFlags are used with CharData tokens to store additional settings. type charDataFlags uint8 const ( // The CharData contains only whitespace. whitespaceFlag charDataFlags = 1 << iota // The CharData contains a CDATA section. cdataFlag ) // CharData may be used to represent simple text data or a CDATA section // within an XML document. The Data property should never be modified // directly; use the SetData function instead. type CharData struct { Data string // the simple text or CDATA section content parent *Element index int flags charDataFlags } // A Comment represents an XML comment. type Comment struct { Data string // the comment's text parent *Element index int } // A Directive represents an XML directive. type Directive struct { Data string // the directive string parent *Element index int } // A ProcInst represents an XML processing instruction. type ProcInst struct { Target string // the processing instruction target Inst string // the processing instruction value parent *Element index int } // NewDocument creates an XML document without a root element. func NewDocument() *Document { return &Document{ Element: Element{Child: make([]Token, 0)}, } } // NewDocumentWithRoot creates an XML document and sets the element 'e' as its // root element. If the element 'e' is already part of another document, it is // first removed from its existing document. func NewDocumentWithRoot(e *Element) *Document { d := NewDocument() d.SetRoot(e) return d } // Copy returns a recursive, deep copy of the document. func (d *Document) Copy() *Document { return &Document{ Element: *(d.Element.dup(nil).(*Element)), ReadSettings: d.ReadSettings.dup(), WriteSettings: d.WriteSettings.dup(), } } // Root returns the root element of the document. It returns nil if there is // no root element. func (d *Document) Root() *Element { for _, t := range d.Child { if c, ok := t.(*Element); ok { return c } } return nil } // SetRoot replaces the document's root element with the element 'e'. If the // document already has a root element when this function is called, then the // existing root element is unbound from the document. If the element 'e' is // part of another document, then it is unbound from the other document. func (d *Document) SetRoot(e *Element) { if e.parent != nil { e.parent.RemoveChild(e) } // If there is already a root element, replace it. p := &d.Element for i, t := range p.Child { if _, ok := t.(*Element); ok { t.setParent(nil) t.setIndex(-1) p.Child[i] = e e.setParent(p) e.setIndex(i) return } } // No existing root element, so add it. p.addChild(e) } // ReadFrom reads XML from the reader 'r' into this document. The function // returns the number of bytes read and any error encountered. func (d *Document) ReadFrom(r io.Reader) (n int64, err error) { if d.ReadSettings.ValidateInput { b, err := io.ReadAll(r) if err != nil { return 0, err } if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil { return 0, err } r = bytes.NewReader(b) } return d.Element.readFrom(r, d.ReadSettings) } // ReadFromFile reads XML from a local file at path 'filepath' into this // document. func (d *Document) ReadFromFile(filepath string) error { f, err := os.Open(filepath) if err != nil { return err } defer f.Close() _, err = d.ReadFrom(f) return err } // ReadFromBytes reads XML from the byte slice 'b' into the this document. func (d *Document) ReadFromBytes(b []byte) error { if d.ReadSettings.ValidateInput { if err := validateXML(bytes.NewReader(b), d.ReadSettings); err != nil { return err } } _, err := d.Element.readFrom(bytes.NewReader(b), d.ReadSettings) return err } // ReadFromString reads XML from the string 's' into this document. func (d *Document) ReadFromString(s string) error { if d.ReadSettings.ValidateInput { if err := validateXML(strings.NewReader(s), d.ReadSettings); err != nil { return err } } _, err := d.Element.readFrom(strings.NewReader(s), d.ReadSettings) return err } // validateXML determines if the data read from the reader 'r' contains // well-formed XML according to the rules set by the go xml package. func validateXML(r io.Reader, settings ReadSettings) error { dec := newDecoder(r, settings) err := dec.Decode(new(interface{})) if err != nil { return err } // If there are any trailing tokens after unmarshalling with Decode(), // then the XML input didn't terminate properly. _, err = dec.Token() if err == io.EOF { return nil } return ErrXML } // newDecoder creates an XML decoder for the reader 'r' configured using // the provided read settings. func newDecoder(r io.Reader, settings ReadSettings) *xml.Decoder { d := xml.NewDecoder(r) d.CharsetReader = settings.CharsetReader if d.CharsetReader == nil { d.CharsetReader = defaultCharsetReader } d.Strict = !settings.Permissive d.Entity = settings.Entity d.AutoClose = settings.AutoClose return d } // WriteTo serializes the document out to the writer 'w'. The function returns // the number of bytes written and any error encountered. func (d *Document) WriteTo(w io.Writer) (n int64, err error) { xw := newXmlWriter(w) b := bufio.NewWriter(xw) for _, c := range d.Child { c.WriteTo(b, &d.WriteSettings) } err, n = b.Flush(), xw.bytes return } // WriteToFile serializes the document out to the file at path 'filepath'. func (d *Document) WriteToFile(filepath string) error { f, err := os.Create(filepath) if err != nil { return err } defer f.Close() _, err = d.WriteTo(f) return err } // WriteToBytes serializes this document into a slice of bytes. func (d *Document) WriteToBytes() (b []byte, err error) { var buf bytes.Buffer if _, err = d.WriteTo(&buf); err != nil { return } return buf.Bytes(), nil } // WriteToString serializes this document into a string. func (d *Document) WriteToString() (s string, err error) { var b []byte if b, err = d.WriteToBytes(); err != nil { return } return string(b), nil } // Indent modifies the document's element tree by inserting character data // tokens containing newlines and spaces for indentation. The amount of // indentation per depth level is given by the 'spaces' parameter. Other than // the number of spaces, default IndentSettings are used. func (d *Document) Indent(spaces int) { s := NewIndentSettings() s.Spaces = spaces d.IndentWithSettings(s) } // IndentTabs modifies the document's element tree by inserting CharData // tokens containing newlines and tabs for indentation. One tab is used per // indentation level. Other than the use of tabs, default IndentSettings // are used. func (d *Document) IndentTabs() { s := NewIndentSettings() s.UseTabs = true d.IndentWithSettings(s) } // IndentWithSettings modifies the document's element tree by inserting // character data tokens containing newlines and indentation. The behavior // of the indentation algorithm is configured by the indent settings. func (d *Document) IndentWithSettings(s *IndentSettings) { // WriteSettings.UseCRLF is deprecated. Until removed from the package, it // overrides IndentSettings.UseCRLF when true. if d.WriteSettings.UseCRLF { s.UseCRLF = true } d.Element.indent(0, getIndentFunc(s), s) if s.SuppressTrailingWhitespace { d.Element.stripTrailingWhitespace() } } // Unindent modifies the document's element tree by removing character data // tokens containing only whitespace. Other than the removal of indentation, // default IndentSettings are used. func (d *Document) Unindent() { s := NewIndentSettings() s.Spaces = NoIndent d.IndentWithSettings(s) } // NewElement creates an unparented element with the specified tag (i.e., // name). The tag may include a namespace prefix followed by a colon. func NewElement(tag string) *Element { space, stag := spaceDecompose(tag) return newElement(space, stag, nil) } // newElement is a helper function that creates an element and binds it to // a parent element if possible. func newElement(space, tag string, parent *Element) *Element { e := &Element{ Space: space, Tag: tag, Attr: make([]Attr, 0), Child: make([]Token, 0), parent: parent, index: -1, } if parent != nil { parent.addChild(e) } return e } // Copy creates a recursive, deep copy of the element and all its attributes // and children. The returned element has no parent but can be parented to a // another element using AddChild, or added to a document with SetRoot or // NewDocumentWithRoot. func (e *Element) Copy() *Element { return e.dup(nil).(*Element) } // FullTag returns the element e's complete tag, including namespace prefix if // present. func (e *Element) FullTag() string { if e.Space == "" { return e.Tag } return e.Space + ":" + e.Tag } // NamespaceURI returns the XML namespace URI associated with the element. If // the element is part of the XML default namespace, NamespaceURI returns the // empty string. func (e *Element) NamespaceURI() string { if e.Space == "" { return e.findDefaultNamespaceURI() } return e.findLocalNamespaceURI(e.Space) } // findLocalNamespaceURI finds the namespace URI corresponding to the // requested prefix. func (e *Element) findLocalNamespaceURI(prefix string) string { for _, a := range e.Attr { if a.Space == "xmlns" && a.Key == prefix { return a.Value } } if e.parent == nil { return "" } return e.parent.findLocalNamespaceURI(prefix) } // findDefaultNamespaceURI finds the default namespace URI of the element. func (e *Element) findDefaultNamespaceURI() string { for _, a := range e.Attr { if a.Space == "" && a.Key == "xmlns" { return a.Value } } if e.parent == nil { return "" } return e.parent.findDefaultNamespaceURI() } // namespacePrefix returns the namespace prefix associated with the element. func (e *Element) namespacePrefix() string { return e.Space } // name returns the tag associated with the element. func (e *Element) name() string { return e.Tag } // ReindexChildren recalculates the index values of the element's child // tokens. This is necessary only if you have manually manipulated the // element's `Child` array. func (e *Element) ReindexChildren() { for i := 0; i < len(e.Child); i++ { e.Child[i].setIndex(i) } } // Text returns all character data immediately following the element's opening // tag. func (e *Element) Text() string { if len(e.Child) == 0 { return "" } text := "" for _, ch := range e.Child { if cd, ok := ch.(*CharData); ok { if text == "" { text = cd.Data } else { text += cd.Data } } else if _, ok := ch.(*Comment); ok { // ignore } else { break } } return text } // SetText replaces all character data immediately following an element's // opening tag with the requested string. func (e *Element) SetText(text string) { e.replaceText(0, text, 0) } // SetCData replaces all character data immediately following an element's // opening tag with a CDATA section. func (e *Element) SetCData(text string) { e.replaceText(0, text, cdataFlag) } // Tail returns all character data immediately following the element's end // tag. func (e *Element) Tail() string { if e.Parent() == nil { return "" } p := e.Parent() i := e.Index() text := "" for _, ch := range p.Child[i+1:] { if cd, ok := ch.(*CharData); ok { if text == "" { text = cd.Data } else { text += cd.Data } } else { break } } return text } // SetTail replaces all character data immediately following the element's end // tag with the requested string. func (e *Element) SetTail(text string) { if e.Parent() == nil { return } p := e.Parent() p.replaceText(e.Index()+1, text, 0) } // replaceText is a helper function that replaces a series of chardata tokens // starting at index i with the requested text. func (e *Element) replaceText(i int, text string, flags charDataFlags) { end := e.findTermCharDataIndex(i) switch { case end == i: if text != "" { // insert a new chardata token at index i cd := newCharData(text, flags, nil) e.InsertChildAt(i, cd) } case end == i+1: if text == "" { // remove the chardata token at index i e.RemoveChildAt(i) } else { // replace the first and only character token at index i cd := e.Child[i].(*CharData) cd.Data, cd.flags = text, flags } default: if text == "" { // remove all chardata tokens starting from index i copy(e.Child[i:], e.Child[end:]) removed := end - i e.Child = e.Child[:len(e.Child)-removed] for j := i; j < len(e.Child); j++ { e.Child[j].setIndex(j) } } else { // replace the first chardata token at index i and remove all // subsequent chardata tokens cd := e.Child[i].(*CharData) cd.Data, cd.flags = text, flags copy(e.Child[i+1:], e.Child[end:]) removed := end - (i + 1) e.Child = e.Child[:len(e.Child)-removed] for j := i + 1; j < len(e.Child); j++ { e.Child[j].setIndex(j) } } } } // findTermCharDataIndex finds the index of the first child token that isn't // a CharData token. It starts from the requested start index. func (e *Element) findTermCharDataIndex(start int) int { for i := start; i < len(e.Child); i++ { if _, ok := e.Child[i].(*CharData); !ok { return i } } return len(e.Child) } // CreateElement creates a new element with the specified tag (i.e., name) and // adds it as the last child of element 'e'. The tag may include a prefix // followed by a colon. func (e *Element) CreateElement(tag string) *Element { space, stag := spaceDecompose(tag) return newElement(space, stag, e) } // CreateChild performs the same task as CreateElement but calls a // continuation function after the child element is created, allowing // additional actions to be performed on the child element before returning. // // This method of element creation is particularly useful when building nested // XML documents from code. For example: // // org := doc.CreateChild("organization", func(e *Element) { // e.CreateComment("Mary") // e.CreateChild("person", func(e *Element) { // e.CreateAttr("name", "Mary") // e.CreateAttr("age", "30") // e.CreateAttr("hair", "brown") // }) // }) func (e *Element) CreateChild(tag string, cont func(e *Element)) *Element { child := e.CreateElement(tag) cont(child) return child } // AddChild adds the token 't' as the last child of the element. If token 't' // was already the child of another element, it is first removed from its // parent element. func (e *Element) AddChild(t Token) { if t.Parent() != nil { t.Parent().RemoveChild(t) } e.addChild(t) } // InsertChild inserts the token 't' into this element's list of children just // before the element's existing child token 'ex'. If the existing element // 'ex' does not appear in this element's list of child tokens, then 't' is // added to the end of this element's list of child tokens. If token 't' is // already the child of another element, it is first removed from the other // element's list of child tokens. // // Deprecated: InsertChild is deprecated. Use InsertChildAt instead. func (e *Element) InsertChild(ex Token, t Token) { if ex == nil || ex.Parent() != e { e.AddChild(t) return } if t.Parent() != nil { t.Parent().RemoveChild(t) } t.setParent(e) i := ex.Index() e.Child = append(e.Child, nil) copy(e.Child[i+1:], e.Child[i:]) e.Child[i] = t for j := i; j < len(e.Child); j++ { e.Child[j].setIndex(j) } } // InsertChildAt inserts the token 't' into this element's list of child // tokens just before the requested 'index'. If the index is greater than or // equal to the length of the list of child tokens, then the token 't' is // added to the end of the list of child tokens. func (e *Element) InsertChildAt(index int, t Token) { if index >= len(e.Child) { e.AddChild(t) return } if t.Parent() != nil { if t.Parent() == e && t.Index() < index { index-- } t.Parent().RemoveChild(t) } t.setParent(e) e.Child = append(e.Child, nil) copy(e.Child[index+1:], e.Child[index:]) e.Child[index] = t for j := index; j < len(e.Child); j++ { e.Child[j].setIndex(j) } } // RemoveChild attempts to remove the token 't' from this element's list of // child tokens. If the token 't' was a child of this element, then it is // removed and returned. Otherwise, nil is returned. func (e *Element) RemoveChild(t Token) Token { if t.Parent() != e { return nil } return e.RemoveChildAt(t.Index()) } // RemoveChildAt removes the child token appearing in slot 'index' of this // element's list of child tokens. The removed child token is then returned. // If the index is out of bounds, no child is removed and nil is returned. func (e *Element) RemoveChildAt(index int) Token { if index >= len(e.Child) { return nil } t := e.Child[index] for j := index + 1; j < len(e.Child); j++ { e.Child[j].setIndex(j - 1) } e.Child = append(e.Child[:index], e.Child[index+1:]...) t.setIndex(-1) t.setParent(nil) return t } // autoClose analyzes the stack's top element and the current token to decide // whether the top element should be closed. func (e *Element) autoClose(stack *stack[*Element], t xml.Token, tags []string) { if stack.empty() { return } top := stack.peek() for _, tag := range tags { if strings.EqualFold(tag, top.FullTag()) { if e, ok := t.(xml.EndElement); !ok || !strings.EqualFold(e.Name.Space, top.Space) || !strings.EqualFold(e.Name.Local, top.Tag) { stack.pop() } break } } } // ReadFrom reads XML from the reader 'ri' and stores the result as a new // child of this element. func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) { var r xmlReader var pr *xmlPeekReader if settings.PreserveCData { pr = newXmlPeekReader(ri) r = pr } else { r = newXmlSimpleReader(ri) } attrCheck := make(map[xml.Name]int) dec := newDecoder(r, settings) var stack stack[*Element] stack.push(e) for { if pr != nil { pr.PeekPrepare(dec.InputOffset(), len(cdataPrefix)) } t, err := dec.RawToken() if settings.Permissive && settings.AutoClose != nil { e.autoClose(&stack, t, settings.AutoClose) } switch { case err == io.EOF: if len(stack.data) != 1 { return r.Bytes(), ErrXML } return r.Bytes(), nil case err != nil: return r.Bytes(), err case stack.empty(): return r.Bytes(), ErrXML } top := stack.peek() switch t := t.(type) { case xml.StartElement: e := newElement(t.Name.Space, t.Name.Local, top) if settings.PreserveDuplicateAttrs || len(t.Attr) < 2 { for _, a := range t.Attr { e.addAttr(a.Name.Space, a.Name.Local, a.Value) } } else { for _, a := range t.Attr { if i, contains := attrCheck[a.Name]; contains { e.Attr[i].Value = a.Value } else { attrCheck[a.Name] = e.addAttr(a.Name.Space, a.Name.Local, a.Value) } } clear(attrCheck) } stack.push(e) case xml.EndElement: if top.Tag != t.Name.Local || top.Space != t.Name.Space { return r.Bytes(), ErrXML } stack.pop() case xml.CharData: data := string(t) var flags charDataFlags if pr != nil { peekBuf := pr.PeekFinalize() if bytes.Equal(peekBuf, cdataPrefix) { flags = cdataFlag } else if isWhitespace(data) { flags = whitespaceFlag } } else { if isWhitespace(data) { flags = whitespaceFlag } } newCharData(data, flags, top) case xml.Comment: newComment(string(t), top) case xml.Directive: newDirective(string(t), top) case xml.ProcInst: newProcInst(t.Target, string(t.Inst), top) } } } // SelectAttr finds an element attribute matching the requested 'key' and, if // found, returns a pointer to the matching attribute. The function returns // nil if no matching attribute is found. The key may include a namespace // prefix followed by a colon. func (e *Element) SelectAttr(key string) *Attr { space, skey := spaceDecompose(key) for i, a := range e.Attr { if spaceMatch(space, a.Space) && skey == a.Key { return &e.Attr[i] } } return nil } // SelectAttrValue finds an element attribute matching the requested 'key' and // returns its value if found. If no matching attribute is found, the function // returns the 'dflt' value instead. The key may include a namespace prefix // followed by a colon. func (e *Element) SelectAttrValue(key, dflt string) string { space, skey := spaceDecompose(key) for _, a := range e.Attr { if spaceMatch(space, a.Space) && skey == a.Key { return a.Value } } return dflt } // ChildElements returns all elements that are children of this element. func (e *Element) ChildElements() []*Element { var elements []*Element for _, t := range e.Child { if c, ok := t.(*Element); ok { elements = append(elements, c) } } return elements } // SelectElement returns the first child element with the given 'tag' (i.e., // name). The function returns nil if no child element matching the tag is // found. The tag may include a namespace prefix followed by a colon. func (e *Element) SelectElement(tag string) *Element { space, stag := spaceDecompose(tag) for _, t := range e.Child { if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { return c } } return nil } // SelectElements returns a slice of all child elements with the given 'tag' // (i.e., name). The tag may include a namespace prefix followed by a colon. func (e *Element) SelectElements(tag string) []*Element { space, stag := spaceDecompose(tag) var elements []*Element for _, t := range e.Child { if c, ok := t.(*Element); ok && spaceMatch(space, c.Space) && stag == c.Tag { elements = append(elements, c) } } return elements } // FindElement returns the first element matched by the XPath-like 'path' // string. The function returns nil if no child element is found using the // path. It panics if an invalid path string is supplied. func (e *Element) FindElement(path string) *Element { return e.FindElementPath(MustCompilePath(path)) } // FindElementPath returns the first element matched by the 'path' object. The // function returns nil if no element is found using the path. func (e *Element) FindElementPath(path Path) *Element { p := newPather() elements := p.traverse(e, path) if len(elements) > 0 { return elements[0] } return nil } // FindElements returns a slice of elements matched by the XPath-like 'path' // string. The function returns nil if no child element is found using the // path. It panics if an invalid path string is supplied. func (e *Element) FindElements(path string) []*Element { return e.FindElementsPath(MustCompilePath(path)) } // FindElementsPath returns a slice of elements matched by the 'path' object. func (e *Element) FindElementsPath(path Path) []*Element { p := newPather() return p.traverse(e, path) } // NotNil returns the receiver element if it isn't nil; otherwise, it returns // an unparented element with an empty string tag. This function simplifies // the task of writing code to ignore not-found results from element queries. // For example, instead of writing this: // // if e := doc.SelectElement("enabled"); e != nil { // e.SetText("true") // } // // You could write this: // // doc.SelectElement("enabled").NotNil().SetText("true") func (e *Element) NotNil() *Element { if e == nil { return NewElement("") } return e } // GetPath returns the absolute path of the element. The absolute path is the // full path from the document's root. func (e *Element) GetPath() string { path := []string{} for seg := e; seg != nil; seg = seg.Parent() { if seg.Tag != "" { path = append(path, seg.Tag) } } // Reverse the path. for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { path[i], path[j] = path[j], path[i] } return "/" + strings.Join(path, "/") } // GetRelativePath returns the path of this element relative to the 'source' // element. If the two elements are not part of the same element tree, then // the function returns the empty string. func (e *Element) GetRelativePath(source *Element) string { var path []*Element if source == nil { return "" } // Build a reverse path from the element toward the root. Stop if the // source element is encountered. var seg *Element for seg = e; seg != nil && seg != source; seg = seg.Parent() { path = append(path, seg) } // If we found the source element, reverse the path and compose the // string. if seg == source { if len(path) == 0 { return "." } parts := []string{} for i := len(path) - 1; i >= 0; i-- { parts = append(parts, path[i].Tag) } return "./" + strings.Join(parts, "/") } // The source wasn't encountered, so climb from the source element toward // the root of the tree until an element in the reversed path is // encountered. findPathIndex := func(e *Element, path []*Element) int { for i, ee := range path { if e == ee { return i } } return -1 } climb := 0 for seg = source; seg != nil; seg = seg.Parent() { i := findPathIndex(seg, path) if i >= 0 { path = path[:i] // truncate at found segment break } climb++ } // No element in the reversed path was encountered, so the two elements // must not be part of the same tree. if seg == nil { return "" } // Reverse the (possibly truncated) path and prepend ".." segments to // climb. parts := []string{} for i := 0; i < climb; i++ { parts = append(parts, "..") } for i := len(path) - 1; i >= 0; i-- { parts = append(parts, path[i].Tag) } return strings.Join(parts, "/") } // IndentWithSettings modifies the element and its child tree by inserting // character data tokens containing newlines and indentation. The behavior of // the indentation algorithm is configured by the indent settings. Because // this function indents the element as if it were at the root of a document, // it is most useful when called just before writing the element as an XML // fragment using WriteTo. func (e *Element) IndentWithSettings(s *IndentSettings) { e.indent(1, getIndentFunc(s), s) } // indent recursively inserts proper indentation between an XML element's // child tokens. func (e *Element) indent(depth int, indent indentFunc, s *IndentSettings) { e.stripIndent(s) n := len(e.Child) if n == 0 { return } oldChild := e.Child e.Child = make([]Token, 0, n*2+1) isCharData, firstNonCharData := false, true for _, c := range oldChild { // Insert NL+indent before child if it's not character data. // Exceptions: when it's the first non-character-data child, or when // the child is at root depth. _, isCharData = c.(*CharData) if !isCharData { if !firstNonCharData || depth > 0 { s := indent(depth) if s != "" { newCharData(s, whitespaceFlag, e) } } firstNonCharData = false } e.addChild(c) // Recursively process child elements. if ce, ok := c.(*Element); ok { ce.indent(depth+1, indent, s) } } // Insert NL+indent before the last child. if !isCharData { if !firstNonCharData || depth > 0 { s := indent(depth - 1) if s != "" { newCharData(s, whitespaceFlag, e) } } } } // stripIndent removes any previously inserted indentation. func (e *Element) stripIndent(s *IndentSettings) { // Count the number of non-indent child tokens n := len(e.Child) for _, c := range e.Child { if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { n-- } } if n == len(e.Child) { return } if n == 0 && len(e.Child) == 1 && s.PreserveLeafWhitespace { return } // Strip out indent CharData newChild := make([]Token, n) j := 0 for _, c := range e.Child { if cd, ok := c.(*CharData); ok && cd.IsWhitespace() { continue } newChild[j] = c newChild[j].setIndex(j) j++ } e.Child = newChild } // stripTrailingWhitespace removes any trailing whitespace CharData tokens // from the element's children. func (e *Element) stripTrailingWhitespace() { for i := len(e.Child) - 1; i >= 0; i-- { if cd, ok := e.Child[i].(*CharData); !ok || !cd.IsWhitespace() { e.Child = e.Child[:i+1] return } } } // dup duplicates the element. func (e *Element) dup(parent *Element) Token { ne := &Element{ Space: e.Space, Tag: e.Tag, Attr: make([]Attr, len(e.Attr)), Child: make([]Token, len(e.Child)), parent: parent, index: e.index, } for i, t := range e.Child { ne.Child[i] = t.dup(ne) } copy(ne.Attr, e.Attr) return ne } // NextSibling returns this element's next sibling element. It returns nil if // there is no next sibling element. func (e *Element) NextSibling() *Element { if e.parent == nil { return nil } for i := e.index + 1; i < len(e.parent.Child); i++ { if s, ok := e.parent.Child[i].(*Element); ok { return s } } return nil } // PrevSibling returns this element's preceding sibling element. It returns // nil if there is no preceding sibling element. func (e *Element) PrevSibling() *Element { if e.parent == nil { return nil } for i := e.index - 1; i >= 0; i-- { if s, ok := e.parent.Child[i].(*Element); ok { return s } } return nil } // Parent returns this element's parent element. It returns nil if this // element has no parent. func (e *Element) Parent() *Element { return e.parent } // Index returns the index of this element within its parent element's // list of child tokens. If this element has no parent, then the function // returns -1. func (e *Element) Index() int { return e.index } // WriteTo serializes the element to the writer w. func (e *Element) WriteTo(w Writer, s *WriteSettings) { w.WriteByte('<') w.WriteString(e.FullTag()) for _, a := range e.Attr { w.WriteByte(' ') a.WriteTo(w, s) } if len(e.Child) > 0 { w.WriteByte('>') for _, c := range e.Child { c.WriteTo(w, s) } w.Write([]byte{'<', '/'}) w.WriteString(e.FullTag()) w.WriteByte('>') } else { if s.CanonicalEndTags { w.Write([]byte{'>', '<', '/'}) w.WriteString(e.FullTag()) w.WriteByte('>') } else { w.Write([]byte{'/', '>'}) } } } // setParent replaces this element token's parent. func (e *Element) setParent(parent *Element) { e.parent = parent } // setIndex sets this element token's index within its parent's Child slice. func (e *Element) setIndex(index int) { e.index = index } // addChild adds a child token to the element e. func (e *Element) addChild(t Token) { t.setParent(e) t.setIndex(len(e.Child)) e.Child = append(e.Child, t) } // CreateAttr creates an attribute with the specified 'key' and 'value' and // adds it to this element. If an attribute with same key already exists on // this element, then its value is replaced. The key may include a namespace // prefix followed by a colon. func (e *Element) CreateAttr(key, value string) *Attr { space, skey := spaceDecompose(key) for i, a := range e.Attr { if space == a.Space && skey == a.Key { e.Attr[i].Value = value return &e.Attr[i] } } i := e.addAttr(space, skey, value) return &e.Attr[i] } // addAttr is a helper function that adds an attribute to an element. Returns // the index of the added attribute. func (e *Element) addAttr(space, key, value string) int { a := Attr{ Space: space, Key: key, Value: value, element: e, } e.Attr = append(e.Attr, a) return len(e.Attr) - 1 } // RemoveAttr removes the first attribute of this element whose key matches // 'key'. It returns a copy of the removed attribute if a match is found. If // no match is found, it returns nil. The key may include a namespace prefix // followed by a colon. func (e *Element) RemoveAttr(key string) *Attr { space, skey := spaceDecompose(key) for i, a := range e.Attr { if space == a.Space && skey == a.Key { e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...) return &Attr{ Space: a.Space, Key: a.Key, Value: a.Value, element: nil, } } } return nil } // SortAttrs sorts this element's attributes lexicographically by key. func (e *Element) SortAttrs() { slices.SortFunc(e.Attr, func(a, b Attr) int { if v := strings.Compare(a.Space, b.Space); v != 0 { return v } return strings.Compare(a.Key, b.Key) }) } // FullKey returns this attribute's complete key, including namespace prefix // if present. func (a *Attr) FullKey() string { if a.Space == "" { return a.Key } return a.Space + ":" + a.Key } // Element returns a pointer to the element containing this attribute. func (a *Attr) Element() *Element { return a.element } // NamespaceURI returns the XML namespace URI associated with this attribute. // The function returns the empty string if the attribute is unprefixed or // if the attribute is part of the XML default namespace. func (a *Attr) NamespaceURI() string { if a.Space == "" { return "" } return a.element.findLocalNamespaceURI(a.Space) } // WriteTo serializes the attribute to the writer. func (a *Attr) WriteTo(w Writer, s *WriteSettings) { w.WriteString(a.FullKey()) if s.AttrSingleQuote { w.WriteString(`='`) } else { w.WriteString(`="`) } var m escapeMode if s.CanonicalAttrVal && !s.AttrSingleQuote { m = escapeCanonicalAttr } else { m = escapeNormal } escapeString(w, a.Value, m) if s.AttrSingleQuote { w.WriteByte('\'') } else { w.WriteByte('"') } } // NewText creates an unparented CharData token containing simple text data. func NewText(text string) *CharData { return newCharData(text, 0, nil) } // NewCData creates an unparented XML character CDATA section with 'data' as // its content. func NewCData(data string) *CharData { return newCharData(data, cdataFlag, nil) } // NewCharData creates an unparented CharData token containing simple text // data. // // Deprecated: NewCharData is deprecated. Instead, use NewText, which does the // same thing. func NewCharData(data string) *CharData { return newCharData(data, 0, nil) } // newCharData creates a character data token and binds it to a parent // element. If parent is nil, the CharData token remains unbound. func newCharData(data string, flags charDataFlags, parent *Element) *CharData { c := &CharData{ Data: data, parent: nil, index: -1, flags: flags, } if parent != nil { parent.addChild(c) } return c } // CreateText creates a CharData token containing simple text data and adds it // to the end of this element's list of child tokens. func (e *Element) CreateText(text string) *CharData { return newCharData(text, 0, e) } // CreateCData creates a CharData token containing a CDATA section with 'data' // as its content and adds it to the end of this element's list of child // tokens. func (e *Element) CreateCData(data string) *CharData { return newCharData(data, cdataFlag, e) } // CreateCharData creates a CharData token containing simple text data and // adds it to the end of this element's list of child tokens. // // Deprecated: CreateCharData is deprecated. Instead, use CreateText, which // does the same thing. func (e *Element) CreateCharData(data string) *CharData { return e.CreateText(data) } // SetData modifies the content of the CharData token. In the case of a // CharData token containing simple text, the simple text is modified. In the // case of a CharData token containing a CDATA section, the CDATA section's // content is modified. func (c *CharData) SetData(text string) { c.Data = text if isWhitespace(text) { c.flags |= whitespaceFlag } else { c.flags &= ^whitespaceFlag } } // IsCData returns true if this CharData token is contains a CDATA section. It // returns false if the CharData token contains simple text. func (c *CharData) IsCData() bool { return (c.flags & cdataFlag) != 0 } // IsWhitespace returns true if this CharData token contains only whitespace. func (c *CharData) IsWhitespace() bool { return (c.flags & whitespaceFlag) != 0 } // Parent returns this CharData token's parent element, or nil if it has no // parent. func (c *CharData) Parent() *Element { return c.parent } // Index returns the index of this CharData token within its parent element's // list of child tokens. If this CharData token has no parent, then the // function returns -1. func (c *CharData) Index() int { return c.index } // WriteTo serializes character data to the writer. func (c *CharData) WriteTo(w Writer, s *WriteSettings) { if c.IsCData() { w.WriteString(``) } else { var m escapeMode if s.CanonicalText { m = escapeCanonicalText } else { m = escapeNormal } escapeString(w, c.Data, m) } } // dup duplicates the character data. func (c *CharData) dup(parent *Element) Token { return &CharData{ Data: c.Data, flags: c.flags, parent: parent, index: c.index, } } // setParent replaces the character data token's parent. func (c *CharData) setParent(parent *Element) { c.parent = parent } // setIndex sets the CharData token's index within its parent element's Child // slice. func (c *CharData) setIndex(index int) { c.index = index } // NewComment creates an unparented comment token. func NewComment(comment string) *Comment { return newComment(comment, nil) } // NewComment creates a comment token and sets its parent element to 'parent'. func newComment(comment string, parent *Element) *Comment { c := &Comment{ Data: comment, parent: nil, index: -1, } if parent != nil { parent.addChild(c) } return c } // CreateComment creates a comment token using the specified 'comment' string // and adds it as the last child token of this element. func (e *Element) CreateComment(comment string) *Comment { return newComment(comment, e) } // dup duplicates the comment. func (c *Comment) dup(parent *Element) Token { return &Comment{ Data: c.Data, parent: parent, index: c.index, } } // Parent returns comment token's parent element, or nil if it has no parent. func (c *Comment) Parent() *Element { return c.parent } // Index returns the index of this Comment token within its parent element's // list of child tokens. If this Comment token has no parent, then the // function returns -1. func (c *Comment) Index() int { return c.index } // WriteTo serialies the comment to the writer. func (c *Comment) WriteTo(w Writer, s *WriteSettings) { w.WriteString("") } // setParent replaces the comment token's parent. func (c *Comment) setParent(parent *Element) { c.parent = parent } // setIndex sets the Comment token's index within its parent element's Child // slice. func (c *Comment) setIndex(index int) { c.index = index } // NewDirective creates an unparented XML directive token. func NewDirective(data string) *Directive { return newDirective(data, nil) } // newDirective creates an XML directive and binds it to a parent element. If // parent is nil, the Directive remains unbound. func newDirective(data string, parent *Element) *Directive { d := &Directive{ Data: data, parent: nil, index: -1, } if parent != nil { parent.addChild(d) } return d } // CreateDirective creates an XML directive token with the specified 'data' // value and adds it as the last child token of this element. func (e *Element) CreateDirective(data string) *Directive { return newDirective(data, e) } // dup duplicates the directive. func (d *Directive) dup(parent *Element) Token { return &Directive{ Data: d.Data, parent: parent, index: d.index, } } // Parent returns directive token's parent element, or nil if it has no // parent. func (d *Directive) Parent() *Element { return d.parent } // Index returns the index of this Directive token within its parent element's // list of child tokens. If this Directive token has no parent, then the // function returns -1. func (d *Directive) Index() int { return d.index } // WriteTo serializes the XML directive to the writer. func (d *Directive) WriteTo(w Writer, s *WriteSettings) { w.WriteString("") } // setParent replaces the directive token's parent. func (d *Directive) setParent(parent *Element) { d.parent = parent } // setIndex sets the Directive token's index within its parent element's Child // slice. func (d *Directive) setIndex(index int) { d.index = index } // NewProcInst creates an unparented XML processing instruction. func NewProcInst(target, inst string) *ProcInst { return newProcInst(target, inst, nil) } // newProcInst creates an XML processing instruction and binds it to a parent // element. If parent is nil, the ProcInst remains unbound. func newProcInst(target, inst string, parent *Element) *ProcInst { p := &ProcInst{ Target: target, Inst: inst, parent: nil, index: -1, } if parent != nil { parent.addChild(p) } return p } // CreateProcInst creates an XML processing instruction token with the // specified 'target' and instruction 'inst'. It is then added as the last // child token of this element. func (e *Element) CreateProcInst(target, inst string) *ProcInst { return newProcInst(target, inst, e) } // dup duplicates the procinst. func (p *ProcInst) dup(parent *Element) Token { return &ProcInst{ Target: p.Target, Inst: p.Inst, parent: parent, index: p.index, } } // Parent returns processing instruction token's parent element, or nil if it // has no parent. func (p *ProcInst) Parent() *Element { return p.parent } // Index returns the index of this ProcInst token within its parent element's // list of child tokens. If this ProcInst token has no parent, then the // function returns -1. func (p *ProcInst) Index() int { return p.index } // WriteTo serializes the processing instruction to the writer. func (p *ProcInst) WriteTo(w Writer, s *WriteSettings) { w.WriteString("") } // setParent replaces the processing instruction token's parent. func (p *ProcInst) setParent(parent *Element) { p.parent = parent } // setIndex sets the processing instruction token's index within its parent // element's Child slice. func (p *ProcInst) setIndex(index int) { p.index = index } etree-1.5.1/etree_test.go000066400000000000000000001320271477734601400153360ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import ( "bytes" "encoding/xml" "errors" "io" "io/fs" "math/rand" "os" "path" "strings" "testing" ) func newDocumentFromString(t *testing.T, s string) *Document { return newDocumentFromString2(t, s, ReadSettings{}) } func newDocumentFromString2(t *testing.T, s string, settings ReadSettings) *Document { t.Helper() doc := NewDocument() doc.ReadSettings = settings err := doc.ReadFromString(s) if err != nil { t.Fatal("etree: failed to parse document") } return doc } func checkStrEq(t *testing.T, got, want string) { t.Helper() if got != want { t.Errorf("etree: unexpected result.\nGot:\n%s\nWanted:\n%s\n", got, want) } } func checkStrBinaryEq(t *testing.T, got, want string) { t.Helper() if got != want { t.Errorf("etree: unexpected result.\nGot:\n%v\nWanted:\n%v\n", []byte(got), []byte(want)) } } func checkIntEq(t *testing.T, got, want int) { t.Helper() if got != want { t.Errorf("etree: unexpected integer. Got: %d. Wanted: %d\n", got, want) } } func checkBoolEq(t *testing.T, got, want bool) { t.Helper() if got != want { t.Errorf("etree: unexpected boolean. Got: %v. Wanted: %v\n", got, want) } } func checkElementEq(t *testing.T, got, want *Element) { t.Helper() if got != want { t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want) } } func checkDocEq(t *testing.T, doc *Document, expected string) { t.Helper() doc.Indent(NoIndent) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } if s != expected { t.Errorf("etree: unexpected document.\nGot:\n%s\nWanted:\n%s\n", s, expected) } } func checkIndexes(t *testing.T, e *Element) { t.Helper() for i := 0; i < len(e.Child); i++ { c := e.Child[i] if c.Index() != i { t.Errorf("Child index mismatch. Got %d, expected %d.", c.Index(), i) } if ce, ok := c.(*Element); ok { checkIndexes(t, ce) } } } func TestDocument(t *testing.T) { // Create a document doc := NewDocument() doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) store := doc.CreateElement("store") store.CreateAttr("xmlns:t", "urn:books-com:titles") store.CreateDirective("Directive") store.CreateComment("This is a comment") book := store.CreateElement("book") book.CreateAttr("lang", "fr") book.CreateAttr("lang", "en") title := book.CreateElement("t:title") title.SetText("Nicholas Nickleby") title.SetText("Great Expectations") author := book.CreateElement("author") author.CreateCharData("Charles Dickens") review := book.CreateElement("review") review.CreateCData("<<< Will be replaced") review.SetCData(">>> Excellent book") doc.IndentTabs() checkIndexes(t, &doc.Element) // Serialize the document to a string s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } // Make sure the serialized XML matches expectation. expected := ` Great Expectations Charles Dickens >> Excellent book]]> ` checkStrEq(t, s, expected) // Test the structure of the XML if doc.Root() != store { t.Error("etree: root mismatch") } if len(store.ChildElements()) != 1 || len(store.Child) != 7 { t.Error("etree: incorrect tree structure") } if len(book.ChildElements()) != 3 || len(book.Attr) != 1 || len(book.Child) != 7 { t.Error("etree: incorrect tree structure") } if len(title.ChildElements()) != 0 || len(title.Child) != 1 || len(title.Attr) != 0 { t.Error("etree: incorrect tree structure") } if len(author.ChildElements()) != 0 || len(author.Child) != 1 || len(author.Attr) != 0 { t.Error("etree: incorrect tree structure") } if len(review.ChildElements()) != 0 || len(review.Child) != 1 || len(review.Attr) != 0 { t.Error("etree: incorrect tree structure") } if book.parent != store || store.parent != &doc.Element || doc.parent != nil { t.Error("etree: incorrect tree structure") } if title.parent != book || author.parent != book { t.Error("etree: incorrect tree structure") } // Perform some basic queries on the document elements := doc.SelectElements("store") if len(elements) != 1 || elements[0] != store { t.Error("etree: incorrect SelectElements result") } element := doc.SelectElement("store") if element != store { t.Error("etree: incorrect SelectElement result") } elements = store.SelectElements("book") if len(elements) != 1 || elements[0] != book { t.Error("etree: incorrect SelectElements result") } element = store.SelectElement("book") if element != book { t.Error("etree: incorrect SelectElement result") } attr := book.SelectAttr("lang") if attr == nil || attr.Key != "lang" || attr.Value != "en" { t.Error("etree: incorrect SelectAttr result") } if book.SelectAttrValue("lang", "unknown") != "en" { t.Error("etree: incorrect SelectAttrValue result") } if book.SelectAttrValue("t:missing", "unknown") != "unknown" { t.Error("etree: incorrect SelectAttrValue result") } attr = book.RemoveAttr("lang") if attr.Value != "en" { t.Error("etree: incorrect RemoveAttr result") } book.CreateAttr("lang", "de") attr = book.RemoveAttr("lang") if attr.Value != "de" { t.Error("etree: incorrect RemoveAttr result") } element = book.SelectElement("t:title") if element != title || element.Text() != "Great Expectations" || len(element.Attr) != 0 { t.Error("etree: incorrect SelectElement result") } element = book.SelectElement("title") if element != title { t.Error("etree: incorrect SelectElement result") } element = book.SelectElement("p:title") if element != nil { t.Error("etree: incorrect SelectElement result") } element = book.RemoveChildAt(title.Index()).(*Element) if element != title { t.Error("etree: incorrect RemoveElement result") } element = book.SelectElement("title") if element != nil { t.Error("etree: incorrect SelectElement result") } element = book.SelectElement("review") if element != review || element.Text() != ">>> Excellent book" || len(element.Attr) != 0 { t.Error("etree: incorrect SelectElement result") } } func TestImbalancedXML(t *testing.T) { cases := []string{ ``, ``, ``, ``, ``, `malformed`, `malformed`, ``, ``, ``, ``, } for _, c := range cases { doc := NewDocument() err := doc.ReadFromString(c) if err == nil { t.Errorf("etree: imbalanced XML should have failed:\n%s", c) } } } func TestDocumentCharsetReader(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc := newDocumentFromString2(t, s, ReadSettings{ CharsetReader: func(label string, input io.Reader) (io.Reader, error) { if label == "lowercase" { return &lowercaseCharsetReader{input}, nil } return nil, errors.New("unknown charset") }, }) cases := []struct { path string text string }{ {"/store/book/title", "great expectations"}, {"/store/book/author", "charles dickens"}, } for _, c := range cases { e := doc.FindElement(c.path) if e == nil { t.Errorf("etree: failed to find element '%s'", c.path) } else if e.Text() != c.text { t.Errorf("etree: expected path '%s' to contain '%s', got '%s'", c.path, c.text, e.Text()) } } } type lowercaseCharsetReader struct { r io.Reader } func (c *lowercaseCharsetReader) Read(p []byte) (n int, err error) { n, err = c.r.Read(p) if err != nil { return n, err } for i := 0; i < n; i++ { if p[i] >= 'A' && p[i] <= 'Z' { p[i] = p[i] - 'A' + 'a' } } return n, nil } func TestDocumentReadPermissive(t *testing.T) { s := "" doc := NewDocument() err := doc.ReadFromString(s) if err == nil { t.Fatal("etree: incorrect ReadFromString result") } doc.ReadSettings.Permissive = true err = doc.ReadFromString(s) if err != nil { t.Fatal("etree: incorrect ReadFromString result") } } func TestEmbeddedComment(t *testing.T) { s := `123456` doc := NewDocument() err := doc.ReadFromString(s) if err != nil { t.Fatal("etree: incorrect ReadFromString result") } a := doc.SelectElement("a") checkStrEq(t, a.Text(), "123456") } func TestDocumentReadHTMLEntities(t *testing.T) { s := ` → Great Expectations Charles Dickens ` doc := NewDocument() err := doc.ReadFromString(s) if err == nil { t.Fatal("etree: incorrect ReadFromString result") } doc.ReadSettings.Entity = xml.HTMLEntity err = doc.ReadFromString(s) if err != nil { t.Fatal("etree: incorrect ReadFromString result") } } func TestDocumentReadHTMLAutoClose(t *testing.T) { cases := []struct { name string input string want string }{ {"empty", ``, ``}, {"oneSelfClosing", `
`, `
`}, {"twoSelfClosingAndText", `
some text
`, `
some text
`}, { name: "largerExample", input: `
Author: Charles Dickens
Book: Great Expectations
`, want: `
Author: Charles Dickens
Book: Great Expectations
`}, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { doc := NewDocument() doc.ReadSettings.Permissive = true doc.ReadSettings.AutoClose = xml.HTMLAutoClose err := doc.ReadFromString(c.input) if err != nil { t.Fatal("etree: ReadFromString() error = ", err) } s, err := doc.WriteToString() if err != nil { t.Fatal("etree: WriteToString() error = ", err) } checkStrEq(t, s, c.want) }) } } func TestEscapeCodes(t *testing.T) { cases := []struct { input string normal string attrCanonical string textCanonical string }{ { "&<>'\"\t\n\r", "&<>'"\t\n\r", "'" \">&<>'"\t\n\r", "&<>'\"\t\n ", }, { "\x00\x1f\x08\x09\x0a\x0d", "���\t\n\r", "���\t\n\r", "���\t\n ", }, } for _, c := range cases { doc := NewDocument() e := doc.CreateElement("e") e.SetText(c.input) e.CreateAttr("a", c.input) doc.WriteSettings.CanonicalText = false doc.WriteSettings.CanonicalAttrVal = false s, err := doc.WriteToString() if err != nil { t.Error("etree: Escape test produced inocrrect result.") } checkStrEq(t, s, c.normal) doc.WriteSettings.CanonicalText = false doc.WriteSettings.CanonicalAttrVal = true s, err = doc.WriteToString() if err != nil { t.Error("etree: Escape test produced inocrrect result.") } checkStrEq(t, s, c.attrCanonical) doc.WriteSettings.CanonicalText = true doc.WriteSettings.CanonicalAttrVal = false s, err = doc.WriteToString() if err != nil { t.Error("etree: Escape test produced inocrrect result.") } checkStrEq(t, s, c.textCanonical) } } func TestCanonical(t *testing.T) { BOM := "\xef\xbb\xbf" doc := NewDocument() doc.WriteSettings.CanonicalEndTags = true doc.WriteSettings.CanonicalText = true doc.WriteSettings.CanonicalAttrVal = true doc.CreateCharData(BOM) doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) people := doc.CreateElement("People") people.CreateComment("These are all known people") jon := people.CreateElement("Person") jon.CreateAttr("name", "Jon O'Reilly") jon.SetText("\r<'\">&\u0004\u0005\u001f�") sally := people.CreateElement("Person") sally.CreateAttr("name", "Sally") sally.CreateAttr("escape", "\r\n\t<'\">&") doc.Indent(2) s, err := doc.WriteToString() if err != nil { t.Error("etree: WriteSettings WriteTo produced incorrect result.") } expected := BOM + ` <'">&���� ` checkStrEq(t, s, expected) } func TestCopy(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc := newDocumentFromString(t, s) s1, err := doc.WriteToString() if err != nil { t.Error("etree: incorrect WriteToString result") } doc2 := doc.Copy() checkIndexes(t, &doc2.Element) s2, err := doc2.WriteToString() if err != nil { t.Error("etree: incorrect Copy result") } if s1 != s2 { t.Error("etree: mismatched Copy result") t.Error("wanted:\n" + s1) t.Error("got:\n" + s2) } e1 := doc.FindElement("./store/book/title") e2 := doc2.FindElement("./store/book/title") if e1 == nil || e2 == nil || e1.parent == nil || e1 == e2 { t.Error("etree: incorrect FindElement result") } e1.parent.RemoveChildAt(e1.Index()) s1, _ = doc.WriteToString() s2, _ = doc2.WriteToString() if s1 == s2 { t.Error("etree: incorrect result after RemoveElement") } } func TestGetPath(t *testing.T) { s := ` ` doc := newDocumentFromString(t, s) cases := []struct { from string to string relpath string topath string }{ {"a", ".", "..", "/"}, {".", "a", "./a", "/a"}, {"a/b1/c1/d1", ".", "../../../..", "/"}, {".", "a/b1/c1/d1", "./a/b1/c1/d1", "/a/b1/c1/d1"}, {"a", "a", ".", "/a"}, {"a/b1", "a/b1/c1", "./c1", "/a/b1/c1"}, {"a/b1/c1", "a/b1", "..", "/a/b1"}, {"a/b1/c1", "a/b1/c1", ".", "/a/b1/c1"}, {"a", "a/b1", "./b1", "/a/b1"}, {"a/b1", "a", "..", "/a"}, {"a", "a/b1/c1", "./b1/c1", "/a/b1/c1"}, {"a/b1/c1", "a", "../..", "/a"}, {"a/b1/c1/d1", "a", "../../..", "/a"}, {"a", "a/b1/c1/d1", "./b1/c1/d1", "/a/b1/c1/d1"}, {"a/b1", "a/b2", "../b2", "/a/b2"}, {"a/b2", "a/b1", "../b1", "/a/b1"}, {"a/b1/c1/d1", "a/b2/c2/d2", "../../../b2/c2/d2", "/a/b2/c2/d2"}, {"a/b2/c2/d2", "a/b1/c1/d1", "../../../b1/c1/d1", "/a/b1/c1/d1"}, {"a/b1/c1/d1", "a/b1/c1/d1a", "../d1a", "/a/b1/c1/d1a"}, } for _, c := range cases { fe := doc.FindElement(c.from) te := doc.FindElement(c.to) rp := te.GetRelativePath(fe) if rp != c.relpath { t.Errorf("GetRelativePath from '%s' to '%s'. Expected '%s', got '%s'.\n", c.from, c.to, c.relpath, rp) } p := te.GetPath() if p != c.topath { t.Errorf("GetPath for '%s'. Expected '%s', got '%s'.\n", c.to, c.topath, p) } } } func TestInsertChild(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc := newDocumentFromString(t, s) year := NewElement("year") year.SetText("1861") book := doc.FindElement("//book") book.InsertChildAt(book.SelectElement("t:title").Index(), year) expected1 := ` 1861 Great Expectations Charles Dickens ` doc.Indent(2) s1, _ := doc.WriteToString() checkStrEq(t, s1, expected1) book.RemoveChildAt(year.Index()) book.InsertChildAt(book.SelectElement("author").Index(), year) expected2 := ` Great Expectations 1861 Charles Dickens ` doc.Indent(2) s2, _ := doc.WriteToString() checkStrEq(t, s2, expected2) book.RemoveChildAt(year.Index()) book.InsertChildAt(len(book.Child), year) expected3 := ` Great Expectations Charles Dickens 1861 ` doc.Indent(2) s3, _ := doc.WriteToString() checkStrEq(t, s3, expected3) book.RemoveChildAt(year.Index()) book.InsertChildAt(999, year) expected4 := ` Great Expectations Charles Dickens 1861 ` doc.Indent(2) s4, _ := doc.WriteToString() checkStrEq(t, s4, expected4) year = doc.FindElement("//book/year") book.InsertChildAt(0, year) expected5 := ` 1861 Great Expectations Charles Dickens ` doc.Indent(2) s5, _ := doc.WriteToString() checkStrEq(t, s5, expected5) author := doc.FindElement("//book/author") year = doc.FindElement("//book/year") book.InsertChildAt(author.Index(), year) expected6 := ` Great Expectations 1861 Charles Dickens ` doc.Indent(2) s6, _ := doc.WriteToString() checkStrEq(t, s6, expected6) } func TestCdata(t *testing.T) { var tests = []struct { in, out string }{ {`1234567`, "1234567"}, {``, "1234567"}, {`1357`, "1234567"}, {`13457`, "123"}, {`1457`, "1"}, {`457`, "1"}, } for _, test := range tests { doc := NewDocument() err := doc.ReadFromString(test.in) if err != nil { t.Fatal("etree ReadFromString: " + err.Error()) } tag := doc.FindElement("tag") if tag.Text() != test.out { t.Fatalf("etree invalid cdata. Expected: %v. Got: %v\n", test.out, tag.Text()) } } } func TestAddChild(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc1 := newDocumentFromString(t, s) doc2 := NewDocument() root := doc2.CreateElement("root") for _, e := range doc1.FindElements("//book/*") { root.AddChild(e) } expected1 := ` ` doc1.Indent(2) s1, _ := doc1.WriteToString() checkStrEq(t, s1, expected1) expected2 := ` Great Expectations Charles Dickens ` doc2.Indent(2) s2, _ := doc2.WriteToString() checkStrEq(t, s2, expected2) } func TestSetRoot(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc := newDocumentFromString(t, s) origroot := doc.Root() if origroot.Parent() != &doc.Element { t.Error("Root incorrect") } newroot := NewElement("root") doc.SetRoot(newroot) if doc.Root() != newroot { t.Error("doc.Root() != newroot") } if origroot.Parent() != nil { t.Error("origroot.Parent() != nil") } expected1 := ` ` doc.Indent(2) s1, _ := doc.WriteToString() checkStrEq(t, s1, expected1) doc.SetRoot(origroot) doc.Indent(2) expected2 := s s2, _ := doc.WriteToString() checkStrEq(t, s2, expected2) doc2 := NewDocument() doc2.CreateProcInst("test", `a="wow"`) doc2.SetRoot(NewElement("root")) doc2.Indent(2) expected3 := expected1 s3, _ := doc2.WriteToString() checkStrEq(t, s3, expected3) doc2.SetRoot(doc.Root()) doc2.Indent(2) expected4 := s s4, _ := doc2.WriteToString() checkStrEq(t, s4, expected4) expected5 := ` ` doc.Indent(2) s5, _ := doc.WriteToString() checkStrEq(t, s5, expected5) } func TestSortAttrs(t *testing.T) { s := `` doc := newDocumentFromString(t, s) doc.Root().SortAttrs() doc.Indent(2) out, _ := doc.WriteToString() checkStrEq(t, out, ``+"\n") } func TestCharsetReaderDefaultSetting(t *testing.T) { // Test encodings where the default pass-through charset conversion // should work for common single-byte character encodings. cases := []string{ ``, ``, ``, ``, ``, } for _, c := range cases { doc := NewDocument() if err := doc.ReadFromBytes([]byte(c)); err != nil { t.Error(err) } } } func TestCharData(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") root.CreateCharData("This ") root.CreateCData("is ") e1 := NewText("a ") e2 := NewCData("text ") root.AddChild(e1) root.AddChild(e2) root.CreateCharData("Element!!") s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } checkStrEq(t, s, `This a Element!!`) // Check we can parse the output err = doc.ReadFromString(s) if err != nil { t.Fatal("etree: incorrect ReadFromString result") } if doc.Root().Text() != "This is a text Element!!" { t.Error("etree: invalid text") } } func TestIndentSimple(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") ch1 := root.CreateElement("child1") ch1.CreateElement("child2") // First test Unindent. doc.Unindent() s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } expected := "" checkStrEq(t, s, expected) // Now test Indent with NoIndent (which should produce the same result // as Unindent). doc.Indent(NoIndent) s, err = doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } checkStrEq(t, s, expected) // Run all indent test cases. tests := []struct { useTabs, useCRLF bool ws, nl string }{ {false, false, " ", "\n"}, {false, true, " ", "\r\n"}, {true, false, "\t", "\n"}, {true, true, "\t", "\r\n"}, } for _, test := range tests { doc.WriteSettings.UseCRLF = test.useCRLF if test.useTabs { doc.IndentTabs() s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } tab := test.ws expected := "" + test.nl + tab + "" + test.nl + tab + tab + "" + test.nl + tab + "" + test.nl + "" + test.nl checkStrEq(t, s, expected) } else { for i := 0; i < 256; i++ { doc.Indent(i) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } tab := strings.Repeat(test.ws, i) expected := "" + test.nl + tab + "" + test.nl + tab + tab + "" + test.nl + tab + "" + test.nl + "" + test.nl checkStrEq(t, s, expected) } } } } func TestIndentWithDefaultSettings(t *testing.T) { input := ` ` doc := NewDocument() err := doc.ReadFromString(input) if err != nil { t.Error("etree: failed to read string") } settings := NewIndentSettings() doc.IndentWithSettings(settings) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } expected := "\n \n \n \n\n" checkStrEq(t, s, expected) } func TestIndentWithSettings(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") ch1 := root.CreateElement("child1") ch1.CreateElement("child2") // First test with NoIndent. settings := NewIndentSettings() settings.UseCRLF = false settings.UseTabs = false settings.Spaces = NoIndent doc.IndentWithSettings(settings) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } expected := "" checkStrEq(t, s, expected) // Run all indent test cases. tests := []struct { useTabs, useCRLF bool ws, nl string }{ {false, false, " ", "\n"}, {false, true, " ", "\r\n"}, {true, false, "\t", "\n"}, {true, true, "\t", "\r\n"}, } for _, test := range tests { if test.useTabs { settings := NewIndentSettings() settings.UseTabs = true settings.UseCRLF = test.useCRLF doc.IndentWithSettings(settings) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } tab := test.ws expected := "" + test.nl + tab + "" + test.nl + tab + tab + "" + test.nl + tab + "" + test.nl + "" + test.nl checkStrEq(t, s, expected) } else { for i := 0; i < 256; i++ { settings := NewIndentSettings() settings.Spaces = i settings.UseTabs = false settings.UseCRLF = test.useCRLF doc.IndentWithSettings(settings) s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } tab := strings.Repeat(test.ws, i) expected := "" + test.nl + tab + "" + test.nl + tab + tab + "" + test.nl + tab + "" + test.nl + "" + test.nl checkStrEq(t, s, expected) } } } } func TestIndentPreserveWhitespace(t *testing.T) { tests := []struct { input string expected string }{ {"", ""}, {" ", " "}, {"\t", "\t"}, {"\t\n \t", "\t\n \t"}, {"", " "}, {" ", ""}, {" ", "\n \n"}, } for _, test := range tests { doc := NewDocument() err := doc.ReadFromString(test.input) if err != nil { t.Error("etree: failed to read string") } s := NewIndentSettings() s.Spaces = 2 s.PreserveLeafWhitespace = true s.SuppressTrailingWhitespace = true doc.IndentWithSettings(s) output, err := doc.WriteToString() if err != nil { t.Error("etree: failed to read string") } checkStrEq(t, output, test.expected) } } func TestPreserveCData(t *testing.T) { tests := []struct { input string expectedWithPreserve string expectedWithoutPreserve string }{ { "", "", "x", }, { "foo]]>", "foo]]>", "x <b>foo</b>", }, { " name ", " name ", "My name is", }, } for _, test := range tests { doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: true}) output, _ := doc.WriteToString() checkStrEq(t, output, test.expectedWithPreserve) } for _, test := range tests { doc := newDocumentFromString2(t, test.input, ReadSettings{PreserveCData: false}) output, _ := doc.WriteToString() checkStrEq(t, output, test.expectedWithoutPreserve) } } func TestTokenIndexing(t *testing.T) { s := ` Great Expectations Charles Dickens ` doc := newDocumentFromString(t, s) review := doc.FindElement("/store/book/review") review.SetText("Excellent") checkIndexes(t, &doc.Element) doc.Indent(4) checkIndexes(t, &doc.Element) doc.Indent(NoIndent) checkIndexes(t, &doc.Element) e := NewElement("foo") store := doc.SelectElement("store") store.InsertChildAt(0, e) checkIndexes(t, &doc.Element) store.RemoveChildAt(0) checkIndexes(t, &doc.Element) } func TestSetText(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") checkDocEq(t, doc, ``) checkStrEq(t, root.Text(), "") checkIntEq(t, len(root.Child), 0) root.SetText("foo") checkDocEq(t, doc, `foo`) checkStrEq(t, root.Text(), "foo") checkIntEq(t, len(root.Child), 1) root.SetText("bar") checkDocEq(t, doc, `bar`) checkStrEq(t, root.Text(), "bar") checkIntEq(t, len(root.Child), 1) root.CreateCData("cdata") checkDocEq(t, doc, `bar`) checkStrEq(t, root.Text(), "barcdata") checkIntEq(t, len(root.Child), 2) root.SetText("qux") checkDocEq(t, doc, `qux`) checkStrEq(t, root.Text(), "qux") checkIntEq(t, len(root.Child), 1) root.CreateCData("cdata") checkDocEq(t, doc, `qux`) checkStrEq(t, root.Text(), "quxcdata") checkIntEq(t, len(root.Child), 2) root.SetCData("baz") checkDocEq(t, doc, ``) checkStrEq(t, root.Text(), "baz") checkIntEq(t, len(root.Child), 1) root.CreateText("corge") root.CreateCData("grault") root.CreateText("waldo") root.CreateCData("fred") root.CreateElement("child") checkDocEq(t, doc, `corgewaldo`) checkStrEq(t, root.Text(), "bazcorgegraultwaldofred") checkIntEq(t, len(root.Child), 6) root.SetText("plugh") checkDocEq(t, doc, `plugh`) checkStrEq(t, root.Text(), "plugh") checkIntEq(t, len(root.Child), 2) root.SetText("") checkDocEq(t, doc, ``) checkStrEq(t, root.Text(), "") checkIntEq(t, len(root.Child), 1) root.SetText("") checkDocEq(t, doc, ``) checkStrEq(t, root.Text(), "") checkIntEq(t, len(root.Child), 1) root.RemoveChildAt(0) root.CreateText("corge") root.CreateCData("grault") root.CreateText("waldo") root.CreateCData("fred") root.CreateElement("child") checkDocEq(t, doc, `corgewaldo`) checkStrEq(t, root.Text(), "corgegraultwaldofred") checkIntEq(t, len(root.Child), 5) root.SetText("") checkDocEq(t, doc, ``) checkStrEq(t, root.Text(), "") checkIntEq(t, len(root.Child), 1) } func TestSetTail(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") child := root.CreateElement("child") root.CreateText("\n\t") child.SetText("foo") checkDocEq(t, doc, "foo\n\t") checkStrEq(t, child.Tail(), "\n\t") checkIntEq(t, len(root.Child), 2) checkIntEq(t, len(child.Child), 1) root.CreateCData(" ") checkDocEq(t, doc, "foo\n\t") checkStrEq(t, child.Tail(), "\n\t ") checkIntEq(t, len(root.Child), 3) checkIntEq(t, len(child.Child), 1) child.SetTail("") checkDocEq(t, doc, "foo") checkStrEq(t, child.Tail(), "") checkIntEq(t, len(root.Child), 1) checkIntEq(t, len(child.Child), 1) child.SetTail("\t\t\t") checkDocEq(t, doc, "foo\t\t\t") checkStrEq(t, child.Tail(), "\t\t\t") checkIntEq(t, len(root.Child), 2) checkIntEq(t, len(child.Child), 1) child.SetTail("\t\n\n\t") checkDocEq(t, doc, "foo\t\n\n\t") checkStrEq(t, child.Tail(), "\t\n\n\t") checkIntEq(t, len(root.Child), 2) checkIntEq(t, len(child.Child), 1) child.SetTail("") checkDocEq(t, doc, "foo") checkStrEq(t, child.Tail(), "") checkIntEq(t, len(root.Child), 1) checkIntEq(t, len(child.Child), 1) } func TestAttrParent(t *testing.T) { doc := NewDocument() root := doc.CreateElement("root") attr1 := root.CreateAttr("bar", "1") attr2 := root.CreateAttr("qux", "2") checkIntEq(t, len(root.Attr), 2) checkElementEq(t, attr1.Element(), root) checkElementEq(t, attr2.Element(), root) attr1 = root.RemoveAttr("bar") attr2 = root.RemoveAttr("qux") checkElementEq(t, attr1.Element(), nil) checkElementEq(t, attr2.Element(), nil) s := `` err := doc.ReadFromString(s) if err != nil { t.Error("etree: failed to parse document") } root = doc.SelectElement("root") for i := range root.Attr { checkElementEq(t, root.Attr[i].Element(), root) } } func TestDefaultNamespaceURI(t *testing.T) { s := ` ` doc := newDocumentFromString(t, s) root := doc.SelectElement("root") child1 := root.SelectElement("child1") child2 := root.SelectElement("child2") grandchild1 := child1.SelectElement("grandchild1") grandchild2 := child1.SelectElement("grandchild2") greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") checkStrEq(t, doc.NamespaceURI(), "") checkStrEq(t, root.NamespaceURI(), "https://root.example.com") checkStrEq(t, child1.NamespaceURI(), "https://child.example.com") checkStrEq(t, child2.NamespaceURI(), "https://root.example.com") checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com") checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com") checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://child.example.com") checkStrEq(t, root.Attr[0].NamespaceURI(), "") checkStrEq(t, root.Attr[1].NamespaceURI(), "") checkStrEq(t, root.Attr[2].NamespaceURI(), "https://attrib.example.com") checkStrEq(t, root.Attr[3].NamespaceURI(), "") checkStrEq(t, child1.Attr[0].NamespaceURI(), "") checkStrEq(t, child1.Attr[1].NamespaceURI(), "https://attrib.example.com") checkStrEq(t, child2.Attr[0].NamespaceURI(), "") checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "") checkStrEq(t, grandchild1.Attr[1].NamespaceURI(), "") checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "") checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "https://attrib.example.com") f := doc.FindElements("//*[namespace-uri()='https://root.example.com']") if len(f) != 2 || f[0] != root || f[1] != child2 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='https://child.example.com']") if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']") if len(f) != 1 || f[0] != grandchild1 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='']") if len(f) != 0 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='foo']") if len(f) != 0 { t.Error("etree: failed namespace-uri test") } } func TestLocalNamespaceURI(t *testing.T) { s := ` ` doc := newDocumentFromString(t, s) root := doc.SelectElement("root") child1 := root.SelectElement("child1") child2 := root.SelectElement("child2") child3 := root.SelectElement("child3") grandchild1 := child1.SelectElement("grandchild1") grandchild2 := child1.SelectElement("grandchild2") grandchild3 := child1.SelectElement("grandchild3") grandchild4 := child1.SelectElement("grandchild4") greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1") checkStrEq(t, doc.NamespaceURI(), "") checkStrEq(t, root.NamespaceURI(), "https://root.example.com") checkStrEq(t, child1.NamespaceURI(), "https://child.example.com") checkStrEq(t, child2.NamespaceURI(), "https://root.example.com") checkStrEq(t, child3.NamespaceURI(), "") checkStrEq(t, grandchild1.NamespaceURI(), "https://grandchild.example.com") checkStrEq(t, grandchild2.NamespaceURI(), "https://child.example.com") checkStrEq(t, grandchild3.NamespaceURI(), "https://root.example.com") checkStrEq(t, grandchild4.NamespaceURI(), "") checkStrEq(t, greatgrandchild1.NamespaceURI(), "https://root.example.com") f := doc.FindElements("//*[namespace-uri()='https://root.example.com']") if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='https://child.example.com']") if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='https://grandchild.example.com']") if len(f) != 1 || f[0] != grandchild1 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='']") if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 { t.Error("etree: failed namespace-uri test") } f = doc.FindElements("//*[namespace-uri()='foo']") if len(f) != 0 { t.Error("etree: failed namespace-uri test") } } func TestWhitespace(t *testing.T) { s := "\n\t\n\t\t x\n \n" doc := newDocumentFromString(t, s) root := doc.Root() checkIntEq(t, len(root.Child), 3) cd := root.Child[0].(*CharData) checkBoolEq(t, cd.IsWhitespace(), true) checkStrBinaryEq(t, cd.Data, "\n\t") cd = root.Child[2].(*CharData) checkBoolEq(t, cd.IsWhitespace(), true) checkStrBinaryEq(t, cd.Data, "\n") child := root.SelectElement("child") checkIntEq(t, len(child.Child), 3) cd = child.Child[0].(*CharData) checkBoolEq(t, cd.IsWhitespace(), true) checkStrBinaryEq(t, cd.Data, "\n\t\t") cd = child.Child[2].(*CharData) checkBoolEq(t, cd.IsWhitespace(), true) checkStrBinaryEq(t, cd.Data, "\n ") grandchild := child.SelectElement("grandchild") checkIntEq(t, len(grandchild.Child), 1) cd = grandchild.Child[0].(*CharData) checkBoolEq(t, cd.IsWhitespace(), false) cd.SetData(" ") checkBoolEq(t, cd.IsWhitespace(), true) cd.SetData(" x") checkBoolEq(t, cd.IsWhitespace(), false) cd.SetData("\t\n\r ") checkBoolEq(t, cd.IsWhitespace(), true) cd.SetData("\uFFFD") checkBoolEq(t, cd.IsWhitespace(), false) cd.SetData("") checkBoolEq(t, cd.IsWhitespace(), true) } func TestTokenWriteTo(t *testing.T) { s := ` Great Expectations ` doc := newDocumentFromString(t, s) writeSettings := WriteSettings{} indentSettings := IndentSettings{UseTabs: true} tests := []struct { path string expected string }{ {"//store", "\n\t\n\t\n\t\tGreat Expectations\n\t\n"}, {"//store/book", "\n\tGreat Expectations\n"}, {"//store/book/title", "Great Expectations"}, } for _, test := range tests { var buffer bytes.Buffer c := doc.FindElement(test.path) c.IndentWithSettings(&indentSettings) c.WriteTo(&buffer, &writeSettings) checkStrEq(t, buffer.String(), test.expected) } } func TestReindexChildren(t *testing.T) { s := ` ` doc := newDocumentFromString(t, s) doc.Unindent() root := doc.Root() if root == nil || root.Tag != "root" || len(root.Child) != 5 { t.Error("etree: expected root element not found") } for i := 0; i < len(root.Child); i++ { if root.Child[i].Index() != i { t.Error("etree: incorrect child index found in root element child") } } rand.Shuffle(len(root.Child), func(i, j int) { root.Child[i], root.Child[j] = root.Child[j], root.Child[i] }) root.ReindexChildren() for i := 0; i < len(root.Child); i++ { if root.Child[i].Index() != i { t.Error("etree: incorrect child index found in root element child") } } } func TestPreserveDuplicateAttrs(t *testing.T) { s := `` checkAttrCount := func(e *Element, n int) { if len(e.Attr) != n { t.Errorf("etree: expected %d attributes, got %d", n, len(e.Attr)) } } checkAttr := func(e *Element, i int, key, value string) { if i >= len(e.Attr) { t.Errorf("etree: attr[%d] out of bounds", i) return } if e.Attr[i].Key != key { t.Errorf("etree: attr[%d] expected key %s, got %s", i, key, e.Attr[i].Key) } if e.Attr[i].Value != value { t.Errorf("etree: attr[%d] expected value %s, got %s", i, value, e.Attr[i].Value) } } t.Run("enabled", func(t *testing.T) { doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true}) e := doc.FindElement("element") checkAttrCount(e, 5) checkAttr(e, 0, "x", "value1") checkAttr(e, 1, "y", "value2") checkAttr(e, 2, "x", "value3") checkAttr(e, 3, "x", "value4") checkAttr(e, 4, "y", "value5") }) t.Run("disabled", func(t *testing.T) { doc := newDocumentFromString2(t, s, ReadSettings{}) e := doc.FindElement("element") checkAttrCount(e, 2) checkAttr(e, 0, "x", "value4") checkAttr(e, 1, "y", "value5") }) } func TestNotNil(t *testing.T) { s := `true` doc := newDocumentFromString(t, s) doc.SelectElement("enabled").NotNil().SetText("false") doc.SelectElement("visible").NotNil().SetText("true") want := `false` got, err := doc.WriteToString() if err != nil { t.Fatal("etree: failed to write document to string") } if got != want { t.Error("etree: unexpected NotNil result") t.Error("wanted:\n" + want) t.Error("got:\n" + got) } } func TestValidateInput(t *testing.T) { tests := []struct { s string err string }{ {`x`, ""}, {``, ""}, {`x`, `XML syntax error on line 1: unexpected EOF`}, {``, `XML syntax error on line 1: unexpected end element `}, {`<>`, `XML syntax error on line 1: expected element name after <`}, {`xtrailing`, "etree: invalid XML format"}, {`x<`, "etree: invalid XML format"}, {`x`, `XML syntax error on line 1: element closed by `}, } type readFunc func(doc *Document, s string) error runTests := func(t *testing.T, read readFunc) { for i, test := range tests { doc := NewDocument() doc.ReadSettings.ValidateInput = true err := read(doc, test.s) if err == nil { if test.err != "" { t.Errorf("etree: test #%d:\nExpected error:\n %s\nReceived error:\n nil", i, test.err) } root := doc.Root() if root == nil || root.Tag != "root" { t.Errorf("etree: test #%d: failed to read document after input validation", i) } } else { te := err.Error() if te != test.err { t.Errorf("etree: test #%d:\nExpected error;\n %s\nReceived error:\n %s", i, test.err, te) } } } } readFromString := func(doc *Document, s string) error { return doc.ReadFromString(s) } t.Run("ReadFromString", func(t *testing.T) { runTests(t, readFromString) }) readFromBytes := func(doc *Document, s string) error { return doc.ReadFromBytes([]byte(s)) } t.Run("ReadFromBytes", func(t *testing.T) { runTests(t, readFromBytes) }) readFromFile := func(doc *Document, s string) error { pathtmp := path.Join(t.TempDir(), "etree-test") err := os.WriteFile(pathtmp, []byte(s), fs.ModePerm) if err != nil { return errors.New("unable to write tmp file for input validation") } return doc.ReadFromFile(pathtmp) } t.Run("ReadFromFile", func(t *testing.T) { runTests(t, readFromFile) }) } func TestSiblingElement(t *testing.T) { doc := newDocumentFromString(t, ` `) root := doc.SelectElement("root") a := root.SelectElement("a") b := root.SelectElement("b") c := root.SelectElement("c") b1 := b.SelectElement("b1") tests := []struct { e *Element next *Element prev *Element }{ {root, nil, nil}, {a, b, nil}, {b, c, a}, {c, nil, b}, {b1, nil, nil}, } toString := func(e *Element) string { if e == nil { return "nil" } return e.Tag } for i, test := range tests { next := test.e.NextSibling() if next != test.next { t.Errorf("etree: test #%d unexpected NextSibling result.\n Expected: %s\n Received: %s\n", i, toString(next), toString(test.next)) } prev := test.e.PrevSibling() if prev != test.prev { t.Errorf("etree: test #%d unexpected PrevSibling result.\n Expected: %s\n Received: %s\n", i, toString(prev), toString(test.prev)) } } } func TestContinuations(t *testing.T) { doc := NewDocument() root := doc.CreateChild("root", func(e *Element) { e.CreateChild("child1", func(e *Element) { e.CreateComment("Grandchildren of child #1") e.CreateChild("grandchild1", func(e *Element) { e.CreateAttr("attr1", "1") e.CreateAttr("attr2", "2") }) e.CreateChild("grandchild2", func(e *Element) { e.CreateAttr("attr1", "3") e.CreateAttr("attr2", "4") }) }) e.CreateChild("child2", func(e *Element) { e.CreateComment("Grandchildren of child #2") e.CreateChild("grandchild1", func(e *Element) { e.CreateAttr("attr1", "5") e.CreateAttr("attr2", "6") }) e.CreateChild("grandchild2", func(e *Element) { e.CreateAttr("attr1", "7") e.CreateAttr("attr2", "8") }) }) }) checkStrEq(t, root.Tag, "root") // Serialize the document to a string doc.IndentTabs() s, err := doc.WriteToString() if err != nil { t.Error("etree: failed to serialize document") } // Make sure the serialized XML matches expectation. expected := ` ` checkStrEq(t, s, expected) } etree-1.5.1/example_test.go000066400000000000000000000031721477734601400156630ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import "os" // Create an etree Document, add XML entities to it, and serialize it // to stdout. func ExampleDocument_creating() { doc := NewDocument() doc.CreateProcInst("xml", `version="1.0" encoding="UTF-8"`) doc.CreateProcInst("xml-stylesheet", `type="text/xsl" href="style.xsl"`) people := doc.CreateElement("People") people.CreateComment("These are all known people") jon := people.CreateElement("Person") jon.CreateAttr("name", "Jon O'Reilly") sally := people.CreateElement("Person") sally.CreateAttr("name", "Sally") doc.Indent(2) doc.WriteTo(os.Stdout) // Output: // // // // // // // } func ExampleDocument_reading() { doc := NewDocument() if err := doc.ReadFromFile("document.xml"); err != nil { panic(err) } } func ExamplePath() { xml := ` Great Expectations Charles Dickens Ulysses James Joyce ` doc := NewDocument() doc.ReadFromString(xml) for _, e := range doc.FindElements(".//book[author='Charles Dickens']") { doc := NewDocumentWithRoot(e.Copy()) doc.Indent(2) doc.WriteTo(os.Stdout) } // Output: // // Great Expectations // Charles Dickens // } etree-1.5.1/go.mod000066400000000000000000000000521477734601400137420ustar00rootroot00000000000000module github.com/beevik/etree go 1.21.0 etree-1.5.1/helpers.go000066400000000000000000000204501477734601400146310ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import ( "io" "strings" "unicode/utf8" ) type stack[E any] struct { data []E } func (s *stack[E]) empty() bool { return len(s.data) == 0 } func (s *stack[E]) push(value E) { s.data = append(s.data, value) } func (s *stack[E]) pop() E { value := s.data[len(s.data)-1] var empty E s.data[len(s.data)-1] = empty s.data = s.data[:len(s.data)-1] return value } func (s *stack[E]) peek() E { return s.data[len(s.data)-1] } type queue[E any] struct { data []E head, tail int } func (f *queue[E]) add(value E) { if f.len()+1 >= len(f.data) { f.grow() } f.data[f.tail] = value if f.tail++; f.tail == len(f.data) { f.tail = 0 } } func (f *queue[E]) remove() E { value := f.data[f.head] var empty E f.data[f.head] = empty if f.head++; f.head == len(f.data) { f.head = 0 } return value } func (f *queue[E]) len() int { if f.tail >= f.head { return f.tail - f.head } return len(f.data) - f.head + f.tail } func (f *queue[E]) grow() { c := len(f.data) * 2 if c == 0 { c = 4 } buf, count := make([]E, c), f.len() if f.tail >= f.head { copy(buf[:count], f.data[f.head:f.tail]) } else { hindex := len(f.data) - f.head copy(buf[:hindex], f.data[f.head:]) copy(buf[hindex:count], f.data[:f.tail]) } f.data, f.head, f.tail = buf, 0, count } // xmlReader provides the interface by which an XML byte stream is // processed and decoded. type xmlReader interface { Bytes() int64 Read(p []byte) (n int, err error) } // xmlSimpleReader implements a proxy reader that counts the number of // bytes read from its encapsulated reader. type xmlSimpleReader struct { r io.Reader bytes int64 } func newXmlSimpleReader(r io.Reader) xmlReader { return &xmlSimpleReader{r, 0} } func (xr *xmlSimpleReader) Bytes() int64 { return xr.bytes } func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) { n, err = xr.r.Read(p) xr.bytes += int64(n) return n, err } // xmlPeekReader implements a proxy reader that counts the number of // bytes read from its encapsulated reader. It also allows the caller to // "peek" at the previous portions of the buffer after they have been // parsed. type xmlPeekReader struct { r io.Reader bytes int64 // total bytes read by the Read function buf []byte // internal read buffer bufSize int // total bytes used in the read buffer bufOffset int64 // total bytes read when buf was last filled window []byte // current read buffer window peekBuf []byte // buffer used to store data to be peeked at later peekOffset int64 // total read offset of the start of the peek buffer } func newXmlPeekReader(r io.Reader) *xmlPeekReader { buf := make([]byte, 4096) return &xmlPeekReader{ r: r, bytes: 0, buf: buf, bufSize: 0, bufOffset: 0, window: buf[0:0], peekBuf: make([]byte, 0), peekOffset: -1, } } func (xr *xmlPeekReader) Bytes() int64 { return xr.bytes } func (xr *xmlPeekReader) Read(p []byte) (n int, err error) { if len(xr.window) == 0 { err = xr.fill() if err != nil { return 0, err } if len(xr.window) == 0 { return 0, nil } } if len(xr.window) < len(p) { n = len(xr.window) } else { n = len(p) } copy(p, xr.window) xr.window = xr.window[n:] xr.bytes += int64(n) return n, err } func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) { if maxLen > cap(xr.peekBuf) { xr.peekBuf = make([]byte, 0, maxLen) } xr.peekBuf = xr.peekBuf[0:0] xr.peekOffset = offset xr.updatePeekBuf() } func (xr *xmlPeekReader) PeekFinalize() []byte { xr.updatePeekBuf() return xr.peekBuf } func (xr *xmlPeekReader) fill() error { xr.bufOffset = xr.bytes xr.bufSize = 0 n, err := xr.r.Read(xr.buf) if err != nil { xr.window, xr.bufSize = xr.buf[0:0], 0 return err } xr.window, xr.bufSize = xr.buf[:n], n xr.updatePeekBuf() return nil } func (xr *xmlPeekReader) updatePeekBuf() { peekRemain := cap(xr.peekBuf) - len(xr.peekBuf) if xr.peekOffset >= 0 && peekRemain > 0 { rangeMin := xr.peekOffset rangeMax := xr.peekOffset + int64(cap(xr.peekBuf)) bufMin := xr.bufOffset bufMax := xr.bufOffset + int64(xr.bufSize) if rangeMin < bufMin { rangeMin = bufMin } if rangeMax > bufMax { rangeMax = bufMax } if rangeMax > rangeMin { rangeMin -= xr.bufOffset rangeMax -= xr.bufOffset if int(rangeMax-rangeMin) > peekRemain { rangeMax = rangeMin + int64(peekRemain) } xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...) } } } // xmlWriter implements a proxy writer that counts the number of // bytes written by its encapsulated writer. type xmlWriter struct { w io.Writer bytes int64 } func newXmlWriter(w io.Writer) *xmlWriter { return &xmlWriter{w: w} } func (xw *xmlWriter) Write(p []byte) (n int, err error) { n, err = xw.w.Write(p) xw.bytes += int64(n) return n, err } // isWhitespace returns true if the byte slice contains only // whitespace characters. func isWhitespace(s string) bool { for i := 0; i < len(s); i++ { if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' { return false } } return true } // spaceMatch returns true if namespace a is the empty string // or if namespace a equals namespace b. func spaceMatch(a, b string) bool { switch { case a == "": return true default: return a == b } } // spaceDecompose breaks a namespace:tag identifier at the ':' // and returns the two parts. func spaceDecompose(str string) (space, key string) { colon := strings.IndexByte(str, ':') if colon == -1 { return "", str } return str[:colon], str[colon+1:] } // Strings used by indentCRLF and indentLF const ( indentSpaces = "\r\n " indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" ) // indentCRLF returns a CRLF newline followed by n copies of the first // non-CRLF character in the source string. func indentCRLF(n int, source string) string { switch { case n < 0: return source[:2] case n < len(source)-1: return source[:n+2] default: return source + strings.Repeat(source[2:3], n-len(source)+2) } } // indentLF returns a LF newline followed by n copies of the first non-LF // character in the source string. func indentLF(n int, source string) string { switch { case n < 0: return source[1:2] case n < len(source)-1: return source[1 : n+2] default: return source[1:] + strings.Repeat(source[2:3], n-len(source)+2) } } // nextIndex returns the index of the next occurrence of byte ch in s, // starting from offset. It returns -1 if the byte is not found. func nextIndex(s string, ch byte, offset int) int { switch i := strings.IndexByte(s[offset:], ch); i { case -1: return -1 default: return offset + i } } // isInteger returns true if the string s contains an integer. func isInteger(s string) bool { for i := 0; i < len(s); i++ { if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') { return false } } return true } type escapeMode byte const ( escapeNormal escapeMode = iota escapeCanonicalText escapeCanonicalAttr ) // escapeString writes an escaped version of a string to the writer. func escapeString(w Writer, s string, m escapeMode) { var esc []byte last := 0 for i := 0; i < len(s); { r, width := utf8.DecodeRuneInString(s[i:]) i += width switch r { case '&': esc = []byte("&") case '<': esc = []byte("<") case '>': if m == escapeCanonicalAttr { continue } esc = []byte(">") case '\'': if m != escapeNormal { continue } esc = []byte("'") case '"': if m == escapeCanonicalText { continue } esc = []byte(""") case '\t': if m != escapeCanonicalAttr { continue } esc = []byte(" ") case '\n': if m != escapeCanonicalAttr { continue } esc = []byte(" ") case '\r': if m == escapeNormal { continue } esc = []byte(" ") default: if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { esc = []byte("\uFFFD") break } continue } w.WriteString(s[last : i-width]) w.Write(esc) last = i } w.WriteString(s[last:]) } func isInCharacterRange(r rune) bool { return r == 0x09 || r == 0x0A || r == 0x0D || r >= 0x20 && r <= 0xD7FF || r >= 0xE000 && r <= 0xFFFD || r >= 0x10000 && r <= 0x10FFFF } etree-1.5.1/path.go000066400000000000000000000374401477734601400141320ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import ( "strconv" "strings" ) /* A Path is a string that represents a search path through an etree starting from the document root or an arbitrary element. Paths are used with the Element object's Find* methods to locate and return desired elements. A Path consists of a series of slash-separated "selectors", each of which may be modified by one or more bracket-enclosed "filters". Selectors are used to traverse the etree from element to element, while filters are used to narrow the list of candidate elements at each node. Although etree Path strings are structurally and behaviorally similar to XPath strings (https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more limited set of selectors and filtering options. The following selectors are supported by etree paths: . Select the current element. .. Select the parent of the current element. * Select all child elements of the current element. / Select the root element when used at the start of a path. // Select all descendants of the current element. tag Select all child elements with a name matching the tag. The following basic filters are supported: [@attrib] Keep elements with an attribute named attrib. [@attrib='val'] Keep elements with an attribute named attrib and value matching val. [tag] Keep elements with a child element named tag. [tag='val'] Keep elements with a child element named tag and text matching val. [n] Keep the n-th element, where n is a numeric index starting from 1. The following function-based filters are supported: [text()] Keep elements with non-empty text. [text()='val'] Keep elements whose text matches val. [local-name()='val'] Keep elements whose un-prefixed tag matches val. [name()='val'] Keep elements whose full tag exactly matches val. [namespace-prefix()] Keep elements with non-empty namespace prefixes. [namespace-prefix()='val'] Keep elements whose namespace prefix matches val. [namespace-uri()] Keep elements with non-empty namespace URIs. [namespace-uri()='val'] Keep elements whose namespace URI matches val. Below are some examples of etree path strings. Select the bookstore child element of the root element: /bookstore Beginning from the root element, select the title elements of all descendant book elements having a 'category' attribute of 'WEB': //book[@category='WEB']/title Beginning from the current element, select the first descendant book element with a title child element containing the text 'Great Expectations': .//book[title='Great Expectations'][1] Beginning from the current element, select all child elements of book elements with an attribute 'language' set to 'english': ./book/*[@language='english'] Beginning from the current element, select all child elements of book elements containing the text 'special': ./book/*[text()='special'] Beginning from the current element, select all descendant book elements whose title child element has a 'language' attribute of 'french': .//book/title[@language='french']/.. Beginning from the current element, select all descendant book elements belonging to the http://www.w3.org/TR/html4/ namespace: .//book[namespace-uri()='http://www.w3.org/TR/html4/'] */ type Path struct { segments []segment } // ErrPath is returned by path functions when an invalid etree path is provided. type ErrPath string // Error returns the string describing a path error. func (err ErrPath) Error() string { return "etree: " + string(err) } // CompilePath creates an optimized version of an XPath-like string that // can be used to query elements in an element tree. func CompilePath(path string) (Path, error) { var comp compiler segments := comp.parsePath(path) if comp.err != ErrPath("") { return Path{nil}, comp.err } return Path{segments}, nil } // MustCompilePath creates an optimized version of an XPath-like string that // can be used to query elements in an element tree. Panics if an error // occurs. Use this function to create Paths when you know the path is // valid (i.e., if it's hard-coded). func MustCompilePath(path string) Path { p, err := CompilePath(path) if err != nil { panic(err) } return p } // A segment is a portion of a path between "/" characters. // It contains one selector and zero or more [filters]. type segment struct { sel selector filters []filter } func (seg *segment) apply(e *Element, p *pather) { seg.sel.apply(e, p) for _, f := range seg.filters { f.apply(p) } } // A selector selects XML elements for consideration by the // path traversal. type selector interface { apply(e *Element, p *pather) } // A filter pares down a list of candidate XML elements based // on a path filter in [brackets]. type filter interface { apply(p *pather) } // A pather is helper object that traverses an element tree using // a Path object. It collects and deduplicates all elements matching // the path query. type pather struct { queue queue[node] results []*Element inResults map[*Element]bool candidates []*Element scratch []*Element // used by filters } // A node represents an element and the remaining path segments that // should be applied against it by the pather. type node struct { e *Element segments []segment } func newPather() *pather { return &pather{ results: make([]*Element, 0), inResults: make(map[*Element]bool), candidates: make([]*Element, 0), scratch: make([]*Element, 0), } } // traverse follows the path from the element e, collecting // and then returning all elements that match the path's selectors // and filters. func (p *pather) traverse(e *Element, path Path) []*Element { for p.queue.add(node{e, path.segments}); p.queue.len() > 0; { p.eval(p.queue.remove()) } return p.results } // eval evaluates the current path node by applying the remaining // path's selector rules against the node's element. func (p *pather) eval(n node) { p.candidates = p.candidates[0:0] seg, remain := n.segments[0], n.segments[1:] seg.apply(n.e, p) if len(remain) == 0 { for _, c := range p.candidates { if in := p.inResults[c]; !in { p.inResults[c] = true p.results = append(p.results, c) } } } else { for _, c := range p.candidates { p.queue.add(node{c, remain}) } } } // A compiler generates a compiled path from a path string. type compiler struct { err ErrPath } // parsePath parses an XPath-like string describing a path // through an element tree and returns a slice of segment // descriptors. func (c *compiler) parsePath(path string) []segment { // If path ends with //, fix it if strings.HasSuffix(path, "//") { path += "*" } var segments []segment // Check for an absolute path if strings.HasPrefix(path, "/") { segments = append(segments, segment{new(selectRoot), []filter{}}) path = path[1:] } // Split path into segments for _, s := range splitPath(path) { segments = append(segments, c.parseSegment(s)) if c.err != ErrPath("") { break } } return segments } func splitPath(path string) []string { var pieces []string start := 0 inquote := false var quote byte for i := 0; i+1 <= len(path); i++ { if !inquote { if path[i] == '\'' || path[i] == '"' { inquote, quote = true, path[i] } else if path[i] == '/' { pieces = append(pieces, path[start:i]) start = i + 1 } } else if path[i] == quote { inquote = false } } return append(pieces, path[start:]) } // parseSegment parses a path segment between / characters. func (c *compiler) parseSegment(path string) segment { pieces := strings.Split(path, "[") seg := segment{ sel: c.parseSelector(pieces[0]), filters: []filter{}, } for i := 1; i < len(pieces); i++ { fpath := pieces[i] if len(fpath) == 0 || fpath[len(fpath)-1] != ']' { c.err = ErrPath("path has invalid filter [brackets].") break } seg.filters = append(seg.filters, c.parseFilter(fpath[:len(fpath)-1])) } return seg } // parseSelector parses a selector at the start of a path segment. func (c *compiler) parseSelector(path string) selector { switch path { case ".": return new(selectSelf) case "..": return new(selectParent) case "*": return new(selectChildren) case "": return new(selectDescendants) default: return newSelectChildrenByTag(path) } } var fnTable = map[string]func(e *Element) string{ "local-name": (*Element).name, "name": (*Element).FullTag, "namespace-prefix": (*Element).namespacePrefix, "namespace-uri": (*Element).NamespaceURI, "text": (*Element).Text, } // parseFilter parses a path filter contained within [brackets]. func (c *compiler) parseFilter(path string) filter { if len(path) == 0 { c.err = ErrPath("path contains an empty filter expression.") return nil } // Filter contains [@attr='val'], [@attr="val"], [fn()='val'], // [fn()="val"], [tag='val'] or [tag="val"]? eqindex := strings.IndexByte(path, '=') if eqindex >= 0 && eqindex+1 < len(path) { quote := path[eqindex+1] if quote == '\'' || quote == '"' { rindex := nextIndex(path, quote, eqindex+2) if rindex != len(path)-1 { c.err = ErrPath("path has mismatched filter quotes.") return nil } key := path[:eqindex] value := path[eqindex+2 : rindex] switch { case key[0] == '@': return newFilterAttrVal(key[1:], value) case strings.HasSuffix(key, "()"): name := key[:len(key)-2] if fn, ok := fnTable[name]; ok { return newFilterFuncVal(fn, value) } c.err = ErrPath("path has unknown function " + name) return nil default: return newFilterChildText(key, value) } } } // Filter contains [@attr], [N], [tag] or [fn()] switch { case path[0] == '@': return newFilterAttr(path[1:]) case strings.HasSuffix(path, "()"): name := path[:len(path)-2] if fn, ok := fnTable[name]; ok { return newFilterFunc(fn) } c.err = ErrPath("path has unknown function " + name) return nil case isInteger(path): pos, _ := strconv.Atoi(path) switch { case pos > 0: return newFilterPos(pos - 1) default: return newFilterPos(pos) } default: return newFilterChild(path) } } // selectSelf selects the current element into the candidate list. type selectSelf struct{} func (s *selectSelf) apply(e *Element, p *pather) { p.candidates = append(p.candidates, e) } // selectRoot selects the element's root node. type selectRoot struct{} func (s *selectRoot) apply(e *Element, p *pather) { root := e for root.parent != nil { root = root.parent } p.candidates = append(p.candidates, root) } // selectParent selects the element's parent into the candidate list. type selectParent struct{} func (s *selectParent) apply(e *Element, p *pather) { if e.parent != nil { p.candidates = append(p.candidates, e.parent) } } // selectChildren selects the element's child elements into the // candidate list. type selectChildren struct{} func (s *selectChildren) apply(e *Element, p *pather) { for _, c := range e.Child { if c, ok := c.(*Element); ok { p.candidates = append(p.candidates, c) } } } // selectDescendants selects all descendant child elements // of the element into the candidate list. type selectDescendants struct{} func (s *selectDescendants) apply(e *Element, p *pather) { var queue queue[*Element] for queue.add(e); queue.len() > 0; { e := queue.remove() p.candidates = append(p.candidates, e) for _, c := range e.Child { if c, ok := c.(*Element); ok { queue.add(c) } } } } // selectChildrenByTag selects into the candidate list all child // elements of the element having the specified tag. type selectChildrenByTag struct { space, tag string } func newSelectChildrenByTag(path string) *selectChildrenByTag { s, l := spaceDecompose(path) return &selectChildrenByTag{s, l} } func (s *selectChildrenByTag) apply(e *Element, p *pather) { for _, c := range e.Child { if c, ok := c.(*Element); ok && spaceMatch(s.space, c.Space) && s.tag == c.Tag { p.candidates = append(p.candidates, c) } } } // filterPos filters the candidate list, keeping only the // candidate at the specified index. type filterPos struct { index int } func newFilterPos(pos int) *filterPos { return &filterPos{pos} } func (f *filterPos) apply(p *pather) { if f.index >= 0 { if f.index < len(p.candidates) { p.scratch = append(p.scratch, p.candidates[f.index]) } } else { if -f.index <= len(p.candidates) { p.scratch = append(p.scratch, p.candidates[len(p.candidates)+f.index]) } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterAttr filters the candidate list for elements having // the specified attribute. type filterAttr struct { space, key string } func newFilterAttr(str string) *filterAttr { s, l := spaceDecompose(str) return &filterAttr{s, l} } func (f *filterAttr) apply(p *pather) { for _, c := range p.candidates { for _, a := range c.Attr { if spaceMatch(f.space, a.Space) && f.key == a.Key { p.scratch = append(p.scratch, c) break } } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterAttrVal filters the candidate list for elements having // the specified attribute with the specified value. type filterAttrVal struct { space, key, val string } func newFilterAttrVal(str, value string) *filterAttrVal { s, l := spaceDecompose(str) return &filterAttrVal{s, l, value} } func (f *filterAttrVal) apply(p *pather) { for _, c := range p.candidates { for _, a := range c.Attr { if spaceMatch(f.space, a.Space) && f.key == a.Key && f.val == a.Value { p.scratch = append(p.scratch, c) break } } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterFunc filters the candidate list for elements satisfying a custom // boolean function. type filterFunc struct { fn func(e *Element) string } func newFilterFunc(fn func(e *Element) string) *filterFunc { return &filterFunc{fn} } func (f *filterFunc) apply(p *pather) { for _, c := range p.candidates { if f.fn(c) != "" { p.scratch = append(p.scratch, c) } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterFuncVal filters the candidate list for elements containing a value // matching the result of a custom function. type filterFuncVal struct { fn func(e *Element) string val string } func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal { return &filterFuncVal{fn, value} } func (f *filterFuncVal) apply(p *pather) { for _, c := range p.candidates { if f.fn(c) == f.val { p.scratch = append(p.scratch, c) } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterChild filters the candidate list for elements having // a child element with the specified tag. type filterChild struct { space, tag string } func newFilterChild(str string) *filterChild { s, l := spaceDecompose(str) return &filterChild{s, l} } func (f *filterChild) apply(p *pather) { for _, c := range p.candidates { for _, cc := range c.Child { if cc, ok := cc.(*Element); ok && spaceMatch(f.space, cc.Space) && f.tag == cc.Tag { p.scratch = append(p.scratch, c) } } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } // filterChildText filters the candidate list for elements having // a child element with the specified tag and text. type filterChildText struct { space, tag, text string } func newFilterChildText(str, text string) *filterChildText { s, l := spaceDecompose(str) return &filterChildText{s, l, text} } func (f *filterChildText) apply(p *pather) { for _, c := range p.candidates { for _, cc := range c.Child { if cc, ok := cc.(*Element); ok && spaceMatch(f.space, cc.Space) && f.tag == cc.Tag && f.text == cc.Text() { p.scratch = append(p.scratch, c) } } } p.candidates, p.scratch = p.scratch, p.candidates[0:0] } etree-1.5.1/path_test.go000066400000000000000000000173611477734601400151710ustar00rootroot00000000000000// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import "testing" var testXML = ` Everyday Italian Giada De Laurentiis 2005 30.00 Clarkson Potter Harry Potter J K. Rowling 2005 29.99 XQuery Kick Start James McGovern Per Bothner Kurt Cagle James Linn Vaidyanathan Nagarajan 2003 49.99 Learning XML Erik T. Ray 2003 39.95 ` type test struct { path string result interface{} } type errorResult string var tests = []test{ // basic queries {"./bookstore/book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {"./bookstore/book/author", []string{"Giada De Laurentiis", "J K. Rowling", "James McGovern", "Per Bothner", "Kurt Cagle", "James Linn", "Vaidyanathan Nagarajan", "Erik T. Ray"}}, {"./bookstore/book/year", []string{"2005", "2005", "2003", "2003"}}, {"./bookstore/book/p:price", []string{"30.00", "29.99", "39.95"}}, {"./bookstore/book/isbn", nil}, // descendant queries {"//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {"//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {".//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {".//bookstore//title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {".//book/title", []string{"Everyday Italian", "Harry Potter", "XQuery Kick Start", "Learning XML"}}, {".//p:price/.", []string{"30.00", "29.99", "39.95"}}, {".//price", []string{"30.00", "29.99", "49.99", "39.95"}}, // positional queries {"./bookstore/book[1]/title", "Everyday Italian"}, {"./bookstore/book[4]/title", "Learning XML"}, {"./bookstore/book[5]/title", nil}, {"./bookstore/book[3]/author[0]", "James McGovern"}, {"./bookstore/book[3]/author[1]", "James McGovern"}, {"./bookstore/book[3]/author[3]/./.", "Kurt Cagle"}, {"./bookstore/book[3]/author[6]", nil}, {"./bookstore/book[-1]/title", "Learning XML"}, {"./bookstore/book[-4]/title", "Everyday Italian"}, {"./bookstore/book[-5]/title", nil}, // text function queries {"./bookstore/book[author='James McGovern']/title", "XQuery Kick Start"}, {"./bookstore/book[author='Per Bothner']/title", "XQuery Kick Start"}, {"./bookstore/book[author='Kurt Cagle']/title", "XQuery Kick Start"}, {"./bookstore/book[author='James Linn']/title", "XQuery Kick Start"}, {"./bookstore/book[author='Vaidyanathan Nagarajan']/title", "XQuery Kick Start"}, {"//book[p:price='29.99']/title", "Harry Potter"}, {"//book[price='29.99']/title", "Harry Potter"}, {"//book/price[text()='29.99']", "29.99"}, {"//book/author[text()='Kurt Cagle']", "Kurt Cagle"}, {"//book/editor[text()]", []string{"Clarkson Potter", "\n\t\t"}}, // namespace function queries {"//*[namespace-uri()]", []string{"30.00", "29.99", "39.95"}}, {"//*[namespace-uri()='urn:books-com:prices']", []string{"30.00", "29.99", "39.95"}}, {"//*[namespace-uri()='foo']", nil}, {"//*[namespace-prefix()]", []string{"30.00", "29.99", "39.95"}}, {"//*[namespace-prefix()='p']", []string{"30.00", "29.99", "39.95"}}, {"//*[name()='p:price']", []string{"30.00", "29.99", "39.95"}}, {"//*[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}}, {"//price[namespace-uri()='']", []string{"49.99"}}, {"//price[namespace-prefix()='']", []string{"49.99"}}, {"//price[name()='price']", []string{"49.99"}}, {"//price[local-name()='price']", []string{"30.00", "29.99", "49.99", "39.95"}}, // attribute queries {"./bookstore/book[@category='WEB']/title", []string{"XQuery Kick Start", "Learning XML"}}, {"./bookstore/book[@path='/books/xml']/title", []string{"Learning XML"}}, {"./bookstore/book[@category='COOKING']/title[@lang='en']", "Everyday Italian"}, {`./bookstore/book[@category="COOKING"]/title[@lang="en"]`, "Everyday Italian"}, {"./bookstore/book/title[@lang='en'][@sku='150']", "Harry Potter"}, {"./bookstore/book/title[@lang='fr']", nil}, {"//p:price[@p:tax='1.99']", []string{"29.99"}}, {"//p:price[@tax='1.99']", []string{"29.99"}}, {"//p:price[@p:tax]", []string{"29.99"}}, {"//p:price[@tax]", []string{"29.99"}}, // parent queries {"./bookstore/book[@category='COOKING']/title/../../book[4]/title", "Learning XML"}, // root queries {"/bookstore/book[1]/title", "Everyday Italian"}, {"/bookstore/book[4]/title", "Learning XML"}, {"/bookstore/book[5]/title", nil}, {"/bookstore/book[3]/author[0]", "James McGovern"}, {"/bookstore/book[3]/author[1]", "James McGovern"}, {"/bookstore/book[3]/author[3]/./.", "Kurt Cagle"}, {"/bookstore/book[3]/author[6]", nil}, {"/bookstore/book[-1]/title", "Learning XML"}, {"/bookstore/book[-4]/title", "Everyday Italian"}, {"/bookstore/book[-5]/title", nil}, // bad paths {"./bookstore/book[]", errorResult("etree: path contains an empty filter expression.")}, {"./bookstore/book[@category='WEB'", errorResult("etree: path has invalid filter [brackets].")}, {"./bookstore/book[@category='WEB]", errorResult("etree: path has mismatched filter quotes.")}, {`./bookstore/book[@category='WEB"]`, errorResult("etree: path has mismatched filter quotes.")}, {`./bookstore/book[@category="WEB']`, errorResult("etree: path has mismatched filter quotes.")}, {"./bookstore/book[author]a", errorResult("etree: path has invalid filter [brackets].")}, {"/][", errorResult("etree: path has invalid filter [brackets].")}, } func TestPath(t *testing.T) { doc := NewDocument() err := doc.ReadFromString(testXML) if err != nil { t.Error(err) } for _, test := range tests { path, err := CompilePath(test.path) if err != nil { if r, ok := test.result.(errorResult); !ok || err.Error() != string(r) { fail(t, test) } continue } // Test both FindElementsPath and FindElementPath element := doc.FindElementPath(path) elements := doc.FindElementsPath(path) switch s := test.result.(type) { case errorResult: fail(t, test) case nil: if element != nil || len(elements) != 0 { fail(t, test) } case string: if element == nil || element.Text() != s || len(elements) != 1 || elements[0].Text() != s { fail(t, test) } case []string: if element == nil || element.Text() != s[0] || len(elements) != len(s) { fail(t, test) continue } for i := 0; i < len(elements); i++ { if elements[i].Text() != s[i] { fail(t, test) break } } } } } func fail(t *testing.T, test test) { t.Helper() t.Errorf("etree: failed test '%s'\n", test.path) } func TestAbsolutePath(t *testing.T) { doc := NewDocument() err := doc.ReadFromString(testXML) if err != nil { t.Error(err) } elements := doc.FindElements("//book/author") for _, e := range elements { title := e.FindElement("/bookstore/book[1]/title") if title == nil || title.Text() != "Everyday Italian" { t.Errorf("etree: absolute path test failed") } title = e.FindElement("//book[p:price='29.99']/title") if title == nil || title.Text() != "Harry Potter" { t.Errorf("etree: absolute path test failed") } } }