pax_global_header00006660000000000000000000000064137366104740014525gustar00rootroot0000000000000052 comment=5e9685f9dbf17c161d463189a0a5e6c99938bba0 xmlquery-1.3.3/000077500000000000000000000000001373661047400134175ustar00rootroot00000000000000xmlquery-1.3.3/.gitignore000066400000000000000000000004621373661047400154110ustar00rootroot00000000000000# vscode .vscode debug *.test ./build # Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.profxmlquery-1.3.3/.travis.yml000066400000000000000000000004501373661047400155270ustar00rootroot00000000000000language: go go: - 1.9.x - 1.12.x - 1.13.x - 1.14.x - 1.15.x install: - go get golang.org/x/net/html/charset - go get github.com/antchfx/xpath - go get github.com/mattn/goveralls - go get github.com/golang/groupcache script: - $HOME/gopath/bin/goveralls -service=travis-ci xmlquery-1.3.3/LICENSE000066400000000000000000000017761373661047400144370ustar00rootroot00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.xmlquery-1.3.3/README.md000066400000000000000000000146721373661047400147100ustar00rootroot00000000000000xmlquery ==== [![Build Status](https://travis-ci.org/antchfx/xmlquery.svg?branch=master)](https://travis-ci.org/antchfx/xmlquery) [![Coverage Status](https://coveralls.io/repos/github/antchfx/xmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xmlquery?branch=master) [![GoDoc](https://godoc.org/github.com/antchfx/xmlquery?status.svg)](https://godoc.org/github.com/antchfx/xmlquery) [![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xmlquery)](https://goreportcard.com/report/github.com/antchfx/xmlquery) Overview === `xmlquery` is an XPath query package for XML documents, allowing you to extract data or evaluate from XML documents with an XPath expression. `xmlquery` has a built-in query object caching feature that caches recently used XPATH query strings. Enabling caching can avoid recompile XPath expression for each query. Change Logs === 2020-08-?? - Add XML stream loading and parsing support. 2019-11-11 - Add XPath query caching. 2019-10-05 - Add new methods compatible with invalid XPath expression error: `QueryAll` and `Query`. - Add `QuerySelector` and `QuerySelectorAll` methods, support for reused query objects. - PR [#12](https://github.com/antchfx/xmlquery/pull/12) (Thanks @FrancescoIlario) - PR [#11](https://github.com/antchfx/xmlquery/pull/11) (Thanks @gjvnq) 2018-12-23 - Added XML output including comment nodes. [#9](https://github.com/antchfx/xmlquery/issues/9) 2018-12-03 - Added support to attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6) Installation ==== ``` $ go get github.com/antchfx/xmlquery ``` Getting Started === ### Find specified XPath query. ```go list, err := xmlquery.QueryAll(doc, "a") if err != nil { panic(err) } ``` #### Parse an XML from URL. ```go doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml") ``` #### Parse an XML from string. ```go s := `` doc, err := xmlquery.Parse(strings.NewReader(s)) ``` #### Parse an XML from io.Reader. ```go f, err := os.Open("../books.xml") doc, err := xmlquery.Parse(f) ``` #### Parse an XML in a stream fashion (simple case without elements filtering). ```go f, err := os.Open("../books.xml") p, err := xmlquery.CreateStreamParser(f, "/bookstore/book") for { n, err := p.Read() if err == io.EOF { break } if err != nil { ... } } ``` #### Parse an XML in a stream fashion (simple case advanced element filtering). ```go f, err := os.Open("../books.xml") p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]") for { n, err := p.Read() if err == io.EOF { break } if err != nil { ... } } ``` #### Find authors of all books in the bookstore. ```go list := xmlquery.Find(doc, "//book//author") // or list := xmlquery.Find(doc, "//author") ``` #### Find the second book. ```go book := xmlquery.FindOne(doc, "//book[2]") ``` #### Find all book elements and only get `id` attribute. (New Feature) ```go list := xmlquery.Find(doc,"//book/@id") ``` #### Find all books with id `bk104`. ```go list := xmlquery.Find(doc, "//book[@id='bk104']") ``` #### Find all books with price less than 5. ```go list := xmlquery.Find(doc, "//book[price<5]") ``` #### Evaluate total price of all books. ```go expr, err := xpath.Compile("sum(//book/price)") price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) fmt.Printf("total price: %f\n", price) ``` #### Evaluate number of all book elements. ```go expr, err := xpath.Compile("count(//book)") price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) ``` FAQ ==== #### `Find()` vs `QueryAll()`, which is better? `Find` and `QueryAll` both do the same thing: searches all of matched XML nodes. `Find` panics if provided with an invalid XPath query, while `QueryAll` returns an error. #### Can I save my query expression object for the next query? Yes, you can. We provide `QuerySelector` and `QuerySelectorAll` methods; they accept your query expression object. Caching a query expression object avoids recompiling the XPath query expression, improving query performance. #### Create XML document. ```go doc := &xmlquery.Node{ Type: xmlquery.DeclarationNode, Data: "xml", Attr: []xml.Attr{ xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"}, }, } root := &xmlquery.Node{ Data: "rss", Type: xmlquery.ElementNode, } doc.FirstChild = root channel := &xmlquery.Node{ Data: "channel", Type: xmlquery.ElementNode, } root.FirstChild = channel title := &xmlquery.Node{ Data: "title", Type: xmlquery.ElementNode, } title_text := &xmlquery.Node{ Data: "W3Schools Home Page", Type: xmlquery.TextNode, } title.FirstChild = title_text channel.FirstChild = title fmt.Println(doc.OutputXML(true)) // W3Schools Home Page ``` Quick Tutorial === ```go import ( "github.com/antchfx/xmlquery" ) func main(){ s := ` W3Schools Home Page https://www.w3schools.com Free web building tutorials RSS Tutorial https://www.w3schools.com/xml/xml_rss.asp New RSS tutorial on W3Schools XML Tutorial https://www.w3schools.com/xml New XML tutorial on W3Schools ` doc, err := xmlquery.Parse(strings.NewReader(s)) if err != nil { panic(err) } channel := xmlquery.FindOne(doc, "//channel") if n := channel.SelectElement("title"); n != nil { fmt.Printf("title: %s\n", n.InnerText()) } if n := channel.SelectElement("link"); n != nil { fmt.Printf("link: %s\n", n.InnerText()) } for i, n := range xmlquery.Find(doc, "//item/title") { fmt.Printf("#%d %s\n", i, n.InnerText()) } } ``` List of supported XPath query packages === | Name | Description | | ------------------------------------------------- | ----------------------------------------- | | [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for HTML documents | | [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for XML documents | | [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for JSON documents | Questions === Please let me know if you have any questions xmlquery-1.3.3/books.xml000066400000000000000000000106071373661047400152620ustar00rootroot00000000000000 Gambardella, Matthew XML Developer's Guide Computer 44.95 2000-10-01 An in-depth look at creating applications with XML. Ralls, Kim Midnight Rain Fantasy 5.95 2000-12-16 A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. Corets, Eva Maeve Ascendant Fantasy 5.95 2000-11-17 After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society. Corets, Eva Oberon's Legacy Fantasy 5.95 2001-03-10 In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant. Corets, Eva The Sundered Grail Fantasy 5.95 2001-09-10 The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy. Randall, Cynthia Lover Birds Romance 4.95 2000-09-02 When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled. Thurman, Paula Splish Splash Romance 4.95 2000-11-02 A deep sea diver finds true love twenty thousand leagues beneath the sea. Knorr, Stefan Creepy Crawlies Horror 4.95 2000-12-06 An anthology of horror stories about roaches, centipedes, scorpions and other insects. Kress, Peter Paradox Lost Science Fiction 6.95 2000-11-02 After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum. O'Brien, Tim Microsoft .NET: The Programming Bible Computer 36.95 2000-12-09 Microsoft's .NET initiative is explored in detail in this deep programmer's reference. O'Brien, Tim MSXML3: A Comprehensive Guide Computer 36.95 2000-12-01 The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more. Galos, Mike Visual Studio 7: A Comprehensive Guide Computer 49.95 2001-04-16 Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment. xmlquery-1.3.3/cache.go000066400000000000000000000016221373661047400150120ustar00rootroot00000000000000package xmlquery import ( "sync" "github.com/golang/groupcache/lru" "github.com/antchfx/xpath" ) // DisableSelectorCache will disable caching for the query selector if value is true. var DisableSelectorCache = false // SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. // Will disable caching if SelectorCacheMaxEntries <= 0. var SelectorCacheMaxEntries = 50 var ( cacheOnce sync.Once cache *lru.Cache cacheMutex sync.Mutex ) func getQuery(expr string) (*xpath.Expr, error) { if DisableSelectorCache || SelectorCacheMaxEntries <= 0 { return xpath.Compile(expr) } cacheOnce.Do(func() { cache = lru.New(SelectorCacheMaxEntries) }) cacheMutex.Lock() defer cacheMutex.Unlock() if v, ok := cache.Get(expr); ok { return v.(*xpath.Expr), nil } v, err := xpath.Compile(expr) if err != nil { return nil, err } cache.Add(expr, v) return v, nil } xmlquery-1.3.3/cached_reader.go000066400000000000000000000021171373661047400165000ustar00rootroot00000000000000package xmlquery import ( "bufio" ) type cachedReader struct { buffer *bufio.Reader cache []byte cacheCap int cacheLen int caching bool } func newCachedReader(r *bufio.Reader) *cachedReader { return &cachedReader{ buffer: r, cache: make([]byte, 4096), cacheCap: 4096, cacheLen: 0, caching: false, } } func (c *cachedReader) StartCaching() { c.cacheLen = 0 c.caching = true } func (c *cachedReader) ReadByte() (byte, error) { if !c.caching { return c.buffer.ReadByte() } b, err := c.buffer.ReadByte() if err != nil { return b, err } if c.cacheLen < c.cacheCap { c.cache[c.cacheLen] = b c.cacheLen++ } return b, err } func (c *cachedReader) Cache() []byte { return c.cache[:c.cacheLen] } func (c *cachedReader) StopCaching() { c.caching = false } func (c *cachedReader) Read(p []byte) (int, error) { n, err := c.buffer.Read(p) if err != nil { return n, err } if c.caching && c.cacheLen < c.cacheCap { for i := 0; i < n; i++ { c.cache[c.cacheLen] = p[i] c.cacheLen++ if c.cacheLen >= c.cacheCap { break } } } return n, err } xmlquery-1.3.3/cached_reader_test.go000066400000000000000000000014551373661047400175430ustar00rootroot00000000000000package xmlquery import ( "bufio" "bytes" "strings" "testing" ) func TestCaching(t *testing.T) { buf := strings.NewReader(`ABCDEF`) bufReader := bufio.NewReader(buf) cachedReader := newCachedReader(bufReader) b, err := cachedReader.ReadByte() if err != nil { t.Fatal(err.Error()) } if b != 'A' { t.Fatalf("Expected read byte to be A, got %c instead.", b) } cachedReader.StartCaching() tmpBuf := make([]byte, 10) n, err := cachedReader.Read(tmpBuf) if err != nil { t.Fatal(err.Error()) } if n != 5 { t.Fatalf("Expected 5 bytes to be read. Got %d instead.", n) } if !bytes.Equal(tmpBuf[:n], []byte("BCDEF")) { t.Fatalf("Incorrect read buffer value") } cached := cachedReader.Cache() if !bytes.Equal(cached, []byte("BCDEF")) { t.Fatalf("Incorrect cached buffer value") } } xmlquery-1.3.3/go.mod000066400000000000000000000003211373661047400145210ustar00rootroot00000000000000module github.com/antchfx/xmlquery go 1.14 require ( github.com/antchfx/xpath v1.1.10 github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc ) xmlquery-1.3.3/go.sum000066400000000000000000000026001373661047400145500ustar00rootroot00000000000000github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg= github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc h1:zK/HqS5bZxDptfPJNq8v7vJfXtkU7r9TLIoSr1bXaP4= golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= xmlquery-1.3.3/node.go000066400000000000000000000127461373661047400147050ustar00rootroot00000000000000package xmlquery import ( "bytes" "encoding/xml" "fmt" "strings" ) // A NodeType is the type of a Node. type NodeType uint const ( // DocumentNode is a document object that, as the root of the document tree, // provides access to the entire XML document. DocumentNode NodeType = iota // DeclarationNode is the document type declaration, indicated by the // following tag (for example, ). DeclarationNode // ElementNode is an element (for example, ). ElementNode // TextNode is the text content of a node. TextNode // CharDataNode node CharDataNode // CommentNode a comment (for example, ). CommentNode // AttributeNode is an attribute of element. AttributeNode ) // A Node consists of a NodeType and some Data (tag name for // element nodes, content for text) and are part of a tree of Nodes. type Node struct { Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node Type NodeType Data string Prefix string NamespaceURI string Attr []xml.Attr level int // node level in the tree } // InnerText returns the text between the start and end tags of the object. func (n *Node) InnerText() string { var output func(*bytes.Buffer, *Node) output = func(buf *bytes.Buffer, n *Node) { switch n.Type { case TextNode, CharDataNode: buf.WriteString(n.Data) case CommentNode: default: for child := n.FirstChild; child != nil; child = child.NextSibling { output(buf, child) } } } var buf bytes.Buffer output(&buf, n) return buf.String() } func (n *Node) sanitizedData(preserveSpaces bool) string { if preserveSpaces { return strings.Trim(n.Data, "\n\t") } return strings.TrimSpace(n.Data) } func calculatePreserveSpaces(n *Node, pastValue bool) bool { if attr := n.SelectAttr("xml:space"); attr == "preserve" { return true } else if attr == "default" { return false } return pastValue } func outputXML(buf *bytes.Buffer, n *Node, preserveSpaces bool) { preserveSpaces = calculatePreserveSpaces(n, preserveSpaces) switch n.Type { case TextNode: xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces))) return case CharDataNode: buf.WriteString("") return case CommentNode: buf.WriteString("") return case DeclarationNode: buf.WriteString("") } else { buf.WriteString(">") } for child := n.FirstChild; child != nil; child = child.NextSibling { outputXML(buf, child, preserveSpaces) } if n.Type != DeclarationNode { if n.Prefix == "" { buf.WriteString(fmt.Sprintf("", n.Data)) } else { buf.WriteString(fmt.Sprintf("", n.Prefix, n.Data)) } } } // OutputXML returns the text that including tags name. func (n *Node) OutputXML(self bool) string { var buf bytes.Buffer if self { outputXML(&buf, n, false) } else { for n := n.FirstChild; n != nil; n = n.NextSibling { outputXML(&buf, n, false) } } return buf.String() } // AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'. func AddAttr(n *Node, key, val string) { var attr xml.Attr if i := strings.Index(key, ":"); i > 0 { attr = xml.Attr{ Name: xml.Name{Space: key[:i], Local: key[i+1:]}, Value: val, } } else { attr = xml.Attr{ Name: xml.Name{Local: key}, Value: val, } } n.Attr = append(n.Attr, attr) } // AddChild adds a new node 'n' to a node 'parent' as its last child. func AddChild(parent, n *Node) { n.Parent = parent n.NextSibling = nil if parent.FirstChild == nil { parent.FirstChild = n n.PrevSibling = nil } else { parent.LastChild.NextSibling = n n.PrevSibling = parent.LastChild } parent.LastChild = n } // AddSibling adds a new node 'n' as a sibling of a given node 'sibling'. // Note it is not necessarily true that the new node 'n' would be added // immediately after 'sibling'. If 'sibling' isn't the last child of its // parent, then the new node 'n' will be added at the end of the sibling // chain of their parent. func AddSibling(sibling, n *Node) { for t := sibling.NextSibling; t != nil; t = t.NextSibling { sibling = t } n.Parent = sibling.Parent sibling.NextSibling = n n.PrevSibling = sibling n.NextSibling = nil if sibling.Parent != nil { sibling.Parent.LastChild = n } } // RemoveFromTree removes a node and its subtree from the document // tree it is in. If the node is the root of the tree, then it's no-op. func RemoveFromTree(n *Node) { if n.Parent == nil { return } if n.Parent.FirstChild == n { if n.Parent.LastChild == n { n.Parent.FirstChild = nil n.Parent.LastChild = nil } else { n.Parent.FirstChild = n.NextSibling n.NextSibling.PrevSibling = nil } } else { if n.Parent.LastChild == n { n.Parent.LastChild = n.PrevSibling n.PrevSibling.NextSibling = nil } else { n.PrevSibling.NextSibling = n.NextSibling n.NextSibling.PrevSibling = n.PrevSibling } } n.Parent = nil n.PrevSibling = nil n.NextSibling = nil } xmlquery-1.3.3/node_test.go000066400000000000000000000252021373661047400157330ustar00rootroot00000000000000package xmlquery import ( "encoding/xml" "html" "reflect" "strings" "testing" ) func findRoot(n *Node) *Node { if n == nil { return nil } for ; n.Parent != nil; n = n.Parent { } return n } func findNode(root *Node, name string) *Node { node := root.FirstChild for { if node == nil || node.Data == name { break } node = node.NextSibling } return node } func childNodes(root *Node, name string) []*Node { var list []*Node node := root.FirstChild for { if node == nil { break } if node.Data == name { list = append(list, node) } node = node.NextSibling } return list } func testNode(t *testing.T, n *Node, expected string) { if n.Data != expected { t.Fatalf("expected node name is %s,but got %s", expected, n.Data) } } func testAttr(t *testing.T, n *Node, name, expected string) { for _, attr := range n.Attr { if attr.Name.Local == name && attr.Value == expected { return } } t.Fatalf("not found attribute %s in the node %s", name, n.Data) } func testValue(t *testing.T, val, expected interface{}) { if val == expected { return } if reflect.DeepEqual(val, expected) { return } t.Fatalf("expected value is %+v, but got %+v", expected, val) } func testTrue(t *testing.T, v bool) { if v { return } t.Fatal("expected value is true, but got false") } // Given a *Node, verify that all the pointers (parent, first child, next sibling, etc.) of // - the node itself, // - all its child nodes, and // - pointers along the silbling chain // are valid. func verifyNodePointers(t *testing.T, n *Node) { if n == nil { return } if n.FirstChild != nil { testValue(t, n, n.FirstChild.Parent) } if n.LastChild != nil { testValue(t, n, n.LastChild.Parent) } verifyNodePointers(t, n.FirstChild) // There is no need to call verifyNodePointers(t, n.LastChild) // because verifyNodePointers(t, n.FirstChild) will traverse all its // siblings to the end, and if the last one isn't n.LastChild then it will fail. parent := n.Parent // parent could be nil if n is the root of a tree. // Verify the PrevSibling chain cur, prev := n, n.PrevSibling for ; prev != nil; cur, prev = prev, prev.PrevSibling { testValue(t, prev.Parent, parent) testValue(t, prev.NextSibling, cur) } testTrue(t, cur.PrevSibling == nil) testTrue(t, parent == nil || parent.FirstChild == cur) // Verify the NextSibling chain cur, next := n, n.NextSibling for ; next != nil; cur, next = next, next.NextSibling { testValue(t, next.Parent, parent) testValue(t, next.PrevSibling, cur) } testTrue(t, cur.NextSibling == nil) testTrue(t, parent == nil || parent.LastChild == cur) } func TestAddAttr(t *testing.T) { for _, test := range []struct { name string n *Node key string val string expected string }{ { name: "node has no existing attr", n: &Node{Type: AttributeNode}, key: "ns:k1", val: "v1", expected: `< ns:k1="v1">`, }, { name: "node has existing attrs", n: &Node{Type: AttributeNode, Attr: []xml.Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}}, key: "k2", val: "v2", expected: `< k1="v1" k2="v2">`, }, } { t.Run(test.name, func(t *testing.T) { AddAttr(test.n, test.key, test.val) testValue(t, test.n.OutputXML(true), test.expected) }) } } func TestRemoveFromTree(t *testing.T) { xml := ` ` parseXML := func() *Node { doc, err := Parse(strings.NewReader(xml)) testTrue(t, err == nil) return doc } t.Run("remove an elem node that is the only child of its parent", func(t *testing.T) { doc := parseXML() n := FindOne(doc, "//aaa/ddd/eee") testTrue(t, n != nil) RemoveFromTree(n) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove an elem node that is the first but not the last child of its parent", func(t *testing.T) { doc := parseXML() n := FindOne(doc, "//aaa/bbb") testTrue(t, n != nil) RemoveFromTree(n) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove an elem node that is neither the first nor the last child of its parent", func(t *testing.T) { doc := parseXML() n := FindOne(doc, "//aaa/ddd") testTrue(t, n != nil) RemoveFromTree(n) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove an elem node that is the last but not the first child of its parent", func(t *testing.T) { doc := parseXML() n := FindOne(doc, "//aaa/ggg") testTrue(t, n != nil) RemoveFromTree(n) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove decl node works", func(t *testing.T) { doc := parseXML() procInst := doc.FirstChild testValue(t, procInst.Type, DeclarationNode) RemoveFromTree(procInst) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove comment node works", func(t *testing.T) { doc := parseXML() commentNode := doc.FirstChild.NextSibling.NextSibling // First .NextSibling is an empty text node. testValue(t, commentNode.Type, CommentNode) RemoveFromTree(commentNode) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) t.Run("remove call on root does nothing", func(t *testing.T) { doc := parseXML() RemoveFromTree(doc) verifyNodePointers(t, doc) testValue(t, doc.OutputXML(false), ``) }) } func TestSelectElement(t *testing.T) { s := ` ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } version := root.FirstChild.SelectAttr("version") if version != "1.0" { t.Fatal("version!=1.0") } aaa := findNode(root, "AAA") var n *Node n = aaa.SelectElement("BBB") if n == nil { t.Fatalf("n is nil") } n = aaa.SelectElement("CCC") if n == nil { t.Fatalf("n is nil") } var ns []*Node ns = aaa.SelectElements("CCC") if len(ns) != 2 { t.Fatalf("len(ns)!=2") } } func TestEscapeOutputValue(t *testing.T) { data := `<*>` root, err := Parse(strings.NewReader(data)) if err != nil { t.Error(err) } escapedInnerText := root.OutputXML(true) if !strings.Contains(escapedInnerText, "<*>") { t.Fatal("Inner Text has not been escaped") } } func TestOutputXMLWithNamespacePrefix(t *testing.T) { s := `` doc, _ := Parse(strings.NewReader(s)) if s != doc.OutputXML(false) { t.Fatal("xml document missing some characters") } } func TestOutputXMLWithCommentNode(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) if e, g := "", doc.OutputXML(true); strings.Index(g, e) == -1 { t.Fatal("missing some comment-node.") } n := FindOne(doc, "//class_list") t.Log(n.OutputXML(false)) if e, g := "Lenard", n.OutputXML(false); strings.Index(g, e) == -1 { t.Fatal("missing some comment-node") } } func TestOutputXMLWithSpaceParent(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student/name") expected := " Robert " if g := doc.OutputXML(true); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceDirect(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student/name") expected := ` Robert ` if g := doc.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceOverwrittenToPreserve(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student") expected := ` Robert ` if g := n.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceOverwrittenToDefault(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student") expected := `Robert` if g := doc.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } xmlquery-1.3.3/parse.go000066400000000000000000000253061373661047400150660ustar00rootroot00000000000000package xmlquery import ( "bufio" "encoding/xml" "errors" "fmt" "io" "net/http" "regexp" "strings" "github.com/antchfx/xpath" "golang.org/x/net/html/charset" ) var xmlMIMERegex = regexp.MustCompile(`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml`) // LoadURL loads the XML document from the specified URL. func LoadURL(url string) (*Node, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() // Make sure the Content-Type has a valid XML MIME type if xmlMIMERegex.MatchString(resp.Header.Get("Content-Type")) { return Parse(resp.Body) } return nil, fmt.Errorf("invalid XML document(%s)", resp.Header.Get("Content-Type")) } // Parse returns the parse tree for the XML from the given Reader. func Parse(r io.Reader) (*Node, error) { p := createParser(r) for { _, err := p.parse() if err == io.EOF { return p.doc, nil } if err != nil { return nil, err } } } type parser struct { decoder *xml.Decoder doc *Node space2prefix map[string]string level int prev *Node streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s). streamElementFilter *xpath.Expr // If specified, it provides further filtering on the target element. streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call. streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev. reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA. } func createParser(r io.Reader) *parser { reader := newCachedReader(bufio.NewReader(r)) p := &parser{ decoder: xml.NewDecoder(reader), doc: &Node{Type: DocumentNode}, space2prefix: make(map[string]string), level: 0, reader: reader, } // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml. p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml" p.decoder.CharsetReader = charset.NewReaderLabel p.prev = p.doc return p } func (p *parser) parse() (*Node, error) { var streamElementNodeCounter int for { tok, err := p.decoder.Token() if err != nil { return nil, err } switch tok := tok.(type) { case xml.StartElement: if p.level == 0 { // mising XML declaration node := &Node{Type: DeclarationNode, Data: "xml", level: 1} AddChild(p.prev, node) p.level = 1 p.prev = node } // https://www.w3.org/TR/xml-names/#scoping-defaulting for _, att := range tok.Attr { if att.Name.Local == "xmlns" { p.space2prefix[att.Value] = "" } else if att.Name.Space == "xmlns" { p.space2prefix[att.Value] = att.Name.Local } } if tok.Name.Space != "" { if _, found := p.space2prefix[tok.Name.Space]; !found { return nil, errors.New("xmlquery: invalid XML document, namespace is missing") } } for i := 0; i < len(tok.Attr); i++ { att := &tok.Attr[i] if prefix, ok := p.space2prefix[att.Name.Space]; ok { att.Name.Space = prefix } } node := &Node{ Type: ElementNode, Data: tok.Name.Local, Prefix: p.space2prefix[tok.Name.Space], NamespaceURI: tok.Name.Space, Attr: tok.Attr, level: p.level, } if p.level == p.prev.level { AddSibling(p.prev, node) } else if p.level > p.prev.level { AddChild(p.prev, node) } else if p.level < p.prev.level { for i := p.prev.level - p.level; i > 1; i-- { p.prev = p.prev.Parent } AddSibling(p.prev.Parent, node) } // If we're in the streaming mode, we need to remember the node if it is the target node // so that when we finish processing the node's EndElement, we know how/what to return to // caller. Also we need to remove the target node from the tree upon next Read() call so // memory doesn't grow unbounded. if p.streamElementXPath != nil { if p.streamNode == nil { if QuerySelector(p.doc, p.streamElementXPath) != nil { p.streamNode = node p.streamNodePrev = p.prev streamElementNodeCounter = 1 } } else { streamElementNodeCounter++ } } p.prev = node p.level++ p.reader.StartCaching() case xml.EndElement: p.level-- // If we're in streaming mode, and we already have a potential streaming // target node identified (p.streamNode != nil) then we need to check if // this is the real one we want to return to caller. if p.streamNode != nil { streamElementNodeCounter-- if streamElementNodeCounter == 0 { // Now we know this element node is the at least passing the initial // p.streamElementXPath check and is a potential target node candidate. // We need to have 1 more check with p.streamElementFilter (if given) to // ensure it is really the element node we want. // The reason we need a two-step check process is because the following // situation: // b1 // And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during // xml.StartElement time, the node is still empty, so it will pass // the p.streamElementXPath check. However, eventually we know this // shouldn't be returned to the caller. Having a second more fine-grained // filter check ensures that. So in this case, the caller should really // setup the stream parser with: // streamElementXPath = "/AAA/BBB[" // streamElementFilter = "/AAA/BBB[. != 'b1']" if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil { return p.streamNode, nil } // otherwise, this isn't our target node, clean things up. // note we also remove the underlying *Node from the node tree, to prevent // future stream node candidate selection error. RemoveFromTree(p.streamNode) p.prev = p.streamNodePrev p.streamNode = nil p.streamNodePrev = nil } } case xml.CharData: p.reader.StopCaching() // First, normalize the cache... cached := strings.ToUpper(string(p.reader.Cache())) nodeType := TextNode if strings.HasPrefix(cached, " p.prev.level { AddChild(p.prev, node) } else if p.level < p.prev.level { for i := p.prev.level - p.level; i > 1; i-- { p.prev = p.prev.Parent } AddSibling(p.prev.Parent, node) } p.reader.StartCaching() case xml.Comment: node := &Node{Type: CommentNode, Data: string(tok), level: p.level} if p.level == p.prev.level { AddSibling(p.prev, node) } else if p.level > p.prev.level { AddChild(p.prev, node) } else if p.level < p.prev.level { for i := p.prev.level - p.level; i > 1; i-- { p.prev = p.prev.Parent } AddSibling(p.prev.Parent, node) } case xml.ProcInst: // Processing Instruction if p.prev.Type != DeclarationNode { p.level++ } node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level} pairs := strings.Split(string(tok.Inst), " ") for _, pair := range pairs { pair = strings.TrimSpace(pair) if i := strings.Index(pair, "="); i > 0 { AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`)) } } if p.level == p.prev.level { AddSibling(p.prev, node) } else if p.level > p.prev.level { AddChild(p.prev, node) } p.prev = node case xml.Directive: } } } // StreamParser enables loading and parsing an XML document in a streaming // fashion. type StreamParser struct { p *parser } // CreateStreamParser creates a StreamParser. Argument streamElementXPath is // required. // Argument streamElementFilter is optional and should only be used in advanced // scenarios. // // Scenario 1: simple case: // xml := `b1b2` // sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB") // if err != nil { // panic(err) // } // for { // n, err := sp.Read() // if err != nil { // break // } // fmt.Println(n.OutputXML(true)) // } // Output will be: // b1 // b2 // // Scenario 2: advanced case: // xml := `b1b2` // sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']") // if err != nil { // panic(err) // } // for { // n, err := sp.Read() // if err != nil { // break // } // fmt.Println(n.OutputXML(true)) // } // Output will be: // b2 // // As the argument names indicate, streamElementXPath should be used for // providing xpath query pointing to the target element node only, no extra // filtering on the element itself or its children; while streamElementFilter, // if needed, can provide additional filtering on the target element and its // children. // // CreateStreamParser returns an error if either streamElementXPath or // streamElementFilter, if provided, cannot be successfully parsed and compiled // into a valid xpath query. func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) { elemXPath, err := getQuery(streamElementXPath) if err != nil { return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error()) } elemFilter := (*xpath.Expr)(nil) if len(streamElementFilter) > 0 { elemFilter, err = getQuery(streamElementFilter[0]) if err != nil { return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error()) } } sp := &StreamParser{ p: createParser(r), } sp.p.streamElementXPath = elemXPath sp.p.streamElementFilter = elemFilter return sp, nil } // Read returns a target node that satisfies the XPath specified by caller at // StreamParser creation time. If there is no more satisfying target nodes after // reading the rest of the XML document, io.EOF will be returned. At any time, // any XML parsing error encountered will be returned, and the stream parsing // stopped. Calling Read() after an error is returned (including io.EOF) results // undefined behavior. Also note, due to the streaming nature, calling Read() // will automatically remove any previous target node(s) from the document tree. func (sp *StreamParser) Read() (*Node, error) { // Because this is a streaming read, we need to release/remove last // target node from the node tree to free up memory. if sp.p.streamNode != nil { RemoveFromTree(sp.p.streamNode) sp.p.prev = sp.p.streamNodePrev sp.p.streamNode = nil sp.p.streamNodePrev = nil } return sp.p.parse() } xmlquery-1.3.3/parse_test.go000066400000000000000000000310511373661047400161170ustar00rootroot00000000000000package xmlquery import ( "fmt" "io" "net/http" "net/http/httptest" "strings" "testing" ) func TestLoadURLSuccess(t *testing.T) { contentTypes := []string{ "application/vnd.paos.xml", "application/vnd.otps.ct-kip+xml", "application/vnd.openxmlformats-package.core-properties+xml", "application/CDFX+XML", "application/ATXML", "application/3gpdash-qoe-report+xml", "application/vnd.nokia.pcd+wbxml", "image/svg+xml", "message/imdn+xml", "model/vnd.collada+xml", "text/xml-external-parsed-entity", "text/xml", "aPPLIcaTioN/xMl; charset=UTF-8", "application/xhtml+xml", "application/xml", "text/xmL; charset=UTF-8", "application/aTOM+xmL; charset=UTF-8", "application/RsS+xmL; charset=UTF-8", "application/maTHml+xmL; charset=UTF-8", "application/xslt+xmL; charset=UTF-8", } for _, contentType := range contentTypes { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { s := ` ` w.Header().Set("Content-Type", contentType) w.Write([]byte(s)) })) defer server.Close() _, err := LoadURL(server.URL) if err != nil { t.Fatal(err) } } } func TestLoadURLFailure(t *testing.T) { contentTypes := []string{ "application/pdf", "application/json", "application/tlsrpt+gzip", "application/vnd.3gpp.pic-bw-small", "application/vnd.collabio.xodocuments.document-template", "application/vnd.ctc-posml", "application/vnd.gov.sk.e-form+zip", "audio/mp4", "audio/vnd.sealedmedia.softseal.mpeg", "image/png", "image/vnd.adobe.photoshop", "message/example", "message/vnd.wfa.wsc", "model/vnd.usdz+zip", "model/vnd.valve.source.compiled-map", "multipart/signed", "text/css", "text/html", "video/quicktime", "video/JPEG", } for _, contentType := range contentTypes { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", contentType) })) defer server.Close() _, err := LoadURL(server.URL) if err != nil && err.Error() == fmt.Sprintf("invalid XML document(%s)", contentType) { return } t.Fatalf("Want invalid XML document(%s), got %v", contentType, err) } } func TestNamespaceURL(t *testing.T) { s := ` 21|22021348 ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } top := FindOne(doc, "//rss") if top == nil { t.Fatal("rss feed invalid") } node := FindOne(top, "dc:creator") if node.Prefix != "dc" { t.Fatalf("expected node prefix name is dc but is=%s", node.Prefix) } if node.NamespaceURI != "https://purl.org/dc/elements/1.1/" { t.Fatalf("dc:creator != %s", node.NamespaceURI) } if strings.Index(top.InnerText(), "author") > 0 { t.Fatalf("InnerText() include comment node text") } if strings.Index(top.OutputXML(true), "author") == -1 { t.Fatal("OutputXML shoud include comment node,but not") } } func TestMultipleProcInst(t *testing.T) { s := ` ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } node := doc.FirstChild // if node.Data != "xml" { t.Fatal("node.Data != xml") } node = node.NextSibling // New Line node = node.NextSibling // if node.Data != "xml-stylesheet" { t.Fatal("node.Data != xml-stylesheet") } } func TestParse(t *testing.T) { s := ` Harry Potter 29.99 Learning XML 39.95 ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } if root.Type != DocumentNode { t.Fatal("top node of tree is not DocumentNode") } declarNode := root.FirstChild if declarNode.Type != DeclarationNode { t.Fatal("first child node of tree is not DeclarationNode") } if declarNode.Attr[0].Name.Local != "version" && declarNode.Attr[0].Value != "1.0" { t.Fatal("version attribute not expected") } bookstore := root.LastChild if bookstore.Data != "bookstore" { t.Fatal("bookstore elem not found") } if bookstore.FirstChild.Data != "\n" { t.Fatal("first child node of bookstore is not empty node(\n)") } books := childNodes(bookstore, "book") if len(books) != 2 { t.Fatalf("expected book element count is 2, but got %d", len(books)) } // first book element testNode(t, findNode(books[0], "title"), "title") testAttr(t, findNode(books[0], "title"), "lang", "en") testValue(t, findNode(books[0], "price").InnerText(), "29.99") testValue(t, findNode(books[0], "title").InnerText(), "Harry Potter") // second book element testNode(t, findNode(books[1], "title"), "title") testAttr(t, findNode(books[1], "title"), "lang", "en") testValue(t, findNode(books[1], "price").InnerText(), "39.95") testValue(t, books[0].OutputXML(true), `Harry Potter29.99`) } func TestMissDeclaration(t *testing.T) { s := ` ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } node := FindOne(doc, "//AAA") if node == nil { t.Fatal("//AAA is nil") } } func TestMissingNamespace(t *testing.T) { s := ` value 1 value 2 ` _, err := Parse(strings.NewReader(s)) if err == nil { t.Fatal("err is nil, want got invalid XML document") } } func TestTooNested(t *testing.T) { s := ` ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } aaa := findNode(root, "AAA") if aaa == nil { t.Fatal("AAA node not exists") } ccc := aaa.LastChild.PrevSibling if ccc.Data != "CCC" { t.Fatalf("expected node is CCC,but got %s", ccc.Data) } bbb := ccc.PrevSibling.PrevSibling if bbb.Data != "BBB" { t.Fatalf("expected node is bbb,but got %s", bbb.Data) } ddd := findNode(bbb, "DDD") testNode(t, ddd, "DDD") testNode(t, ddd.LastChild.PrevSibling, "CCC") } func TestAttributeWithNamespace(t *testing.T) { s := ` ` doc, _ := Parse(strings.NewReader(s)) n := FindOne(doc, "//good[@n1:a='2']") if n == nil { t.Fatal("n is nil") } } func TestIllegalAttributeChars(t *testing.T) { s := `` doc, _ := Parse(strings.NewReader(s)) e := "If a ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } top := FindOne(doc, "//rss") if top == nil { t.Fatal("rss feed invalid") } node := FindOne(top, "dc:creator") if node.Prefix != "dc" { t.Fatalf("expected node prefix name is dc but is=%s", node.Prefix) } cdata := node.FirstChild if cdata == nil || cdata.Type != CharDataNode { t.Fatalf("expected cdata child, received %d", cdata.Type) } testValue(t, cdata.InnerText(), "Richard Lawler") } func TestStreamParser_InvalidXPath(t *testing.T) { sp, err := CreateStreamParser(strings.NewReader(""), "[invalid") if err == nil || err.Error() != "invalid streamElementXPath '[invalid', err: expression must evaluate to a node-set" { t.Fatalf("got non-expected error: %v", err) } if sp != nil { t.Fatal("expected nil for sp, but got none-nil value") } sp, err = CreateStreamParser(strings.NewReader(""), ".", "[invalid") if err == nil || err.Error() != "invalid streamElementFilter '[invalid', err: expression must evaluate to a node-set" { t.Fatalf("got non-expected error: %v", err) } if sp != nil { t.Fatal("expected nil for sp, but got none-nil value") } } func testOutputXML(t *testing.T, msg string, expectedXML string, n *Node) { if n.OutputXML(true) != expectedXML { t.Fatalf("%s, expected XML: '%s', actual: '%s'", msg, expectedXML, n.OutputXML(true)) } } func TestStreamParser_Success1(t *testing.T) { s := ` c1 b1 d1 b2z1 b3 b4 b5 c3 ` sp, err := CreateStreamParser(strings.NewReader(s), "/ROOT/*/BBB", "/ROOT/*/BBB[. != 'b3']") if err != nil { t.Fatal(err.Error()) } // First `` read n, err := sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "first call result", `b1`, n) testOutputXML(t, "doc after first call", `<>c1b1`, findRoot(n)) // Second `` read n, err = sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "second call result", `b2z1`, n) testOutputXML(t, "doc after second call", `<>c1d1b2z1`, findRoot(n)) // Third `` read (Note we will skip 'b3' since the streamElementFilter excludes it) n, err = sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "third call result", `b4`, n) // Note the inclusion of `b3` in the document? This is because `b3` has // been filtered out and is not our target node, thus it is considered just like any other // non target nodes such as ``` or `` testOutputXML(t, "doc after third call", `<>c1d1b4`, findRoot(n)) // Fourth `` read n, err = sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "fourth call result", `b5`, n) testOutputXML(t, "doc after fourth call", `<>c1d1b5`, findRoot(n)) _, err = sp.Read() if err != io.EOF { t.Fatalf("io.EOF expected, but got %v", err) } } func TestStreamParser_Success2(t *testing.T) { s := ` c1 b1 d1 b2 c2 ` sp, err := CreateStreamParser(strings.NewReader(s), "/AAA/CCC | /AAA/DDD") if err != nil { t.Fatal(err.Error()) } // First Read() should return c1 n, err := sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "first call result", `c1`, n) testOutputXML(t, "doc after first call", `<>c1`, findRoot(n)) // Second Read() should return d1 n, err = sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "second call result", `d1`, n) testOutputXML(t, "doc after second call", `<>b1d1`, findRoot(n)) // Third call should return c2 n, err = sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "third call result", `c2`, n) testOutputXML(t, "doc after third call", `<>b1b2c2`, findRoot(n)) _, err = sp.Read() if err != io.EOF { t.Fatalf("io.EOF expected, but got %v", err) } } func TestCDATA(t *testing.T) { s := ` ` sp, err := CreateStreamParser(strings.NewReader(s), "/AAA/CCC") if err != nil { t.Fatal(err.Error()) } n, err := sp.Read() if err != nil { t.Fatal(err.Error()) } testOutputXML(t, "first call result", ``, n) } xmlquery-1.3.3/query.go000066400000000000000000000144371373661047400151240ustar00rootroot00000000000000/* Package xmlquery provides extract data from XML documents using XPath expression. */ package xmlquery import ( "fmt" "strings" "github.com/antchfx/xpath" ) // SelectElements finds child elements with the specified name. func (n *Node) SelectElements(name string) []*Node { return Find(n, name) } // SelectElement finds child elements with the specified name. func (n *Node) SelectElement(name string) *Node { return FindOne(n, name) } // SelectAttr returns the attribute value with the specified name. func (n *Node) SelectAttr(name string) string { if n.Type == AttributeNode { if n.Data == name { return n.InnerText() } return "" } var local, space string local = name if i := strings.Index(name, ":"); i > 0 { space = name[:i] local = name[i+1:] } for _, attr := range n.Attr { if attr.Name.Local == local && attr.Name.Space == space { return attr.Value } } return "" } var _ xpath.NodeNavigator = &NodeNavigator{} // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified // XML Node. func CreateXPathNavigator(top *Node) *NodeNavigator { return &NodeNavigator{curr: top, root: top, attr: -1} } func getCurrentNode(it *xpath.NodeIterator) *Node { n := it.Current().(*NodeNavigator) if n.NodeType() == xpath.AttributeNode { childNode := &Node{ Type: TextNode, Data: n.Value(), } return &Node{ Parent: n.curr, Type: AttributeNode, Data: n.LocalName(), FirstChild: childNode, LastChild: childNode, } } return n.curr } // Find is like QueryAll but panics if `expr` is not a valid XPath expression. // See `QueryAll()` function. func Find(top *Node, expr string) []*Node { nodes, err := QueryAll(top, expr) if err != nil { panic(err) } return nodes } // FindOne is like Query but panics if `expr` is not a valid XPath expression. // See `Query()` function. func FindOne(top *Node, expr string) *Node { node, err := Query(top, expr) if err != nil { panic(err) } return node } // QueryAll searches the XML Node that matches by the specified XPath expr. // Returns an error if the expression `expr` cannot be parsed. func QueryAll(top *Node, expr string) ([]*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelectorAll(top, exp), nil } // Query searches the XML Node that matches by the specified XPath expr, // and returns first matched element. func Query(top *Node, expr string) (*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelector(top, exp), nil } // QuerySelectorAll searches all of the XML Node that matches the specified // XPath selectors. func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node { t := selector.Select(CreateXPathNavigator(top)) var elems []*Node for t.MoveNext() { elems = append(elems, getCurrentNode(t)) } return elems } // QuerySelector returns the first matched XML Node by the specified XPath // selector. func QuerySelector(top *Node, selector *xpath.Expr) *Node { t := selector.Select(CreateXPathNavigator(top)) if t.MoveNext() { return getCurrentNode(t) } return nil } // FindEach searches the html.Node and calls functions cb. // Important: this method is deprecated, instead, use for .. = range Find(){}. func FindEach(top *Node, expr string, cb func(int, *Node)) { for i, n := range Find(top, expr) { cb(i, n) } } // FindEachWithBreak functions the same as FindEach but allows to break the loop // by returning false from the callback function `cb`. // Important: this method is deprecated, instead, use .. = range Find(){}. func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) { for i, n := range Find(top, expr) { if !cb(i, n) { break } } } type NodeNavigator struct { root, curr *Node attr int } func (x *NodeNavigator) Current() *Node { return x.curr } func (x *NodeNavigator) NodeType() xpath.NodeType { switch x.curr.Type { case CommentNode: return xpath.CommentNode case TextNode, CharDataNode: return xpath.TextNode case DeclarationNode, DocumentNode: return xpath.RootNode case ElementNode: if x.attr != -1 { return xpath.AttributeNode } return xpath.ElementNode } panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type)) } func (x *NodeNavigator) LocalName() string { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Local } return x.curr.Data } func (x *NodeNavigator) Prefix() string { if x.NodeType() == xpath.AttributeNode { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Space } return "" } return x.curr.Prefix } func (x *NodeNavigator) NamespaceURL() string { return x.curr.NamespaceURI } func (x *NodeNavigator) Value() string { switch x.curr.Type { case CommentNode: return x.curr.Data case ElementNode: if x.attr != -1 { return x.curr.Attr[x.attr].Value } return x.curr.InnerText() case TextNode: return x.curr.Data } return "" } func (x *NodeNavigator) Copy() xpath.NodeNavigator { n := *x return &n } func (x *NodeNavigator) MoveToRoot() { x.curr = x.root } func (x *NodeNavigator) MoveToParent() bool { if x.attr != -1 { x.attr = -1 return true } else if node := x.curr.Parent; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToNextAttribute() bool { if x.attr >= len(x.curr.Attr)-1 { return false } x.attr++ return true } func (x *NodeNavigator) MoveToChild() bool { if x.attr != -1 { return false } if node := x.curr.FirstChild; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToFirst() bool { if x.attr != -1 || x.curr.PrevSibling == nil { return false } for { node := x.curr.PrevSibling if node == nil { break } x.curr = node } return true } func (x *NodeNavigator) String() string { return x.Value() } func (x *NodeNavigator) MoveToNext() bool { if x.attr != -1 { return false } if node := x.curr.NextSibling; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToPrevious() bool { if x.attr != -1 { return false } if node := x.curr.PrevSibling; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { node, ok := other.(*NodeNavigator) if !ok || node.root != x.root { return false } x.curr = node.curr x.attr = node.attr return true } xmlquery-1.3.3/query_test.go000066400000000000000000000067671373661047400161720ustar00rootroot00000000000000package xmlquery import ( "strings" "testing" ) // https://msdn.microsoft.com/en-us/library/ms762271(v=vs.85).aspx const xmlDoc = ` Gambardella, Matthew XML Developer's Guide Computer 44.95 2000-10-01 An in-depth look at creating applications with XML. Ralls, Kim Midnight Rain Fantasy 5.95 2000-12-16 A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. Corets, Eva Maeve Ascendant Fantasy 5.95 2000-11-17 After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society. ` var doc = loadXML(xmlDoc) func TestXPath(t *testing.T) { if list := Find(doc, "//book"); len(list) != 3 { t.Fatal("count(//book) != 3") } if node := FindOne(doc, "//book[@id='bk101']"); node == nil { t.Fatal("//book[@id='bk101] is not found") } if node := FindOne(doc, "//book[price>=44.95]"); node == nil { t.Fatal("//book/price>=44.95 is not found") } if list := Find(doc, "//book[genre='Fantasy']"); len(list) != 2 { t.Fatal("//book[genre='Fantasy'] items count is not equal 2") } var c int FindEach(doc, "//book", func(i int, n *Node) { c++ }) l := len(Find(doc, "//book")) if c != l { t.Fatal("count(//book) != 3") } c = 0 FindEachWithBreak(doc, "//book", func(i int, n *Node) bool { if c == l-1 { return false } c++ return true }) if c != l-1 { t.Fatal("FindEachWithBreak failed to stop.") } node := FindOne(doc, "//book[1]") if node.SelectAttr("id") != "bk101" { t.Fatal("//book[1]/@id != bk101") } } func TestXPathCdUp(t *testing.T) { doc := loadXML(``) node := FindOne(doc, "/a/b/@attr/..") t.Logf("node = %#v", node) if node == nil || node.Data != "b" { t.Fatal("//b/@id/.. != ") } } func TestInvalidXPathExpression(t *testing.T) { doc := &Node{} _, err := QueryAll(doc, "//a[@a==1]") if err == nil { t.Fatal("expected a parsed error but nil") } _, err = Query(doc, "//a[@a==1]") if err == nil { t.Fatal("expected a parsed error but nil") } } func TestNavigator(t *testing.T) { nav := &NodeNavigator{curr: doc, root: doc, attr: -1} nav.MoveToChild() // New Line nav.MoveToNext() nav.MoveToNext() // catalog if nav.curr.Data != "catalog" { t.Fatal("current node name != `catalog`") } nav.MoveToChild() // New Line nav.MoveToNext() // comment node if nav.curr.Type != CommentNode { t.Fatal("node type not CommentNode") } nav.Value() nav.MoveToNext() // New Line nav.MoveToNext() //book nav.MoveToChild() nav.MoveToNext() // book/author if nav.LocalName() != "author" { t.Fatalf("node error") } nav.MoveToParent() // book nav.MoveToNext() // next book nav.MoveToNext() // skip some whitespace if nav.curr.SelectAttr("id") != "bk102" { t.Fatal("node error") } } func loadXML(s string) *Node { node, err := Parse(strings.NewReader(s)) if err != nil { panic(err) } return node }