pax_global_header00006660000000000000000000000064135764064510014525gustar00rootroot0000000000000052 comment=33a1048d2e7a425dc35b100b5b06904981b2047b golang-github-antchfx-xmlquery-1.2.1/000077500000000000000000000000001357640645100175725ustar00rootroot00000000000000golang-github-antchfx-xmlquery-1.2.1/.gitignore000066400000000000000000000004621357640645100215640ustar00rootroot00000000000000# vscode .vscode debug *.test ./build # Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.profgolang-github-antchfx-xmlquery-1.2.1/.travis.yml000066400000000000000000000004131357640645100217010ustar00rootroot00000000000000language: go go: - 1.6 - 1.7 - 1.8 install: - go get golang.org/x/net/html/charset - go get github.com/antchfx/xpath - go get github.com/mattn/goveralls - go get github.com/golang/groupcache script: - $HOME/gopath/bin/goveralls -service=travis-cigolang-github-antchfx-xmlquery-1.2.1/LICENSE000066400000000000000000000017761357640645100206120ustar00rootroot00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.golang-github-antchfx-xmlquery-1.2.1/README.md000066400000000000000000000133321357640645100210530ustar00rootroot00000000000000xmlquery ==== [![Build Status](https://travis-ci.org/antchfx/xmlquery.svg?branch=master)](https://travis-ci.org/antchfx/xmlquery) [![Coverage Status](https://coveralls.io/repos/github/antchfx/xmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xmlquery?branch=master) [![GoDoc](https://godoc.org/github.com/antchfx/xmlquery?status.svg)](https://godoc.org/github.com/antchfx/xmlquery) [![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xmlquery)](https://goreportcard.com/report/github.com/antchfx/xmlquery) Overview === `xmlquery` is an XPath query package for XML document, lets you extract data or evaluate from XML documents by an XPath expression. `xmlquery` built-in the query object caching feature will caching the recently used XPATH query string. Enable caching can avoid re-compile XPath expression each query. Change Logs === 2019-11-11 - Add XPath query caching. 2019-10-05 - Add new methods that compatible with invalid XPath expression error: `QueryAll` and `Query`. - Add `QuerySelector` and `QuerySelectorAll` methods, supported reused your query object. - PR [#12](https://github.com/antchfx/xmlquery/pull/12) (Thanks @FrancescoIlario) - PR [#11](https://github.com/antchfx/xmlquery/pull/11) (Thanks @gjvnq) 2018-12-23 - added XML output will including comment node. [#9](https://github.com/antchfx/xmlquery/issues/9) 2018-12-03 - added support attribute name with namespace prefix and XML output. [#6](https://github.com/antchfx/xmlquery/issues/6) Installation ==== ``` $ go get github.com/antchfx/xmlquery ``` Getting Started === ### Find specified XPath query. ```go list, err := xmlquery.QueryAll(doc, "a") if err != nil { panic(err) } ``` #### Parse a XML from URL. ```go doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml") ``` #### Parse a XML from string. ```go s := `` doc, err := xmlquery.Parse(strings.NewReader(s)) ``` #### Parse a XML from io.Reader. ```go f, err := os.Open("../books.xml") doc, err := xmlquery.Parse(f) ``` #### Find authors of all books in the bookstore. ```go list := xmlquery.Find(doc, "//book//author") // or list := xmlquery.Find(doc, "//author") ``` #### Find the second book. ```go book := xmlquery.FindOne(doc, "//book[2]") ``` #### Find all book elements and only get `id` attribute self. (New Feature) ```go list := xmlquery.Find(doc,"//book/@id") ``` #### Find all books with id is bk104. ```go list := xmlquery.Find(doc, "//book[@id='bk104']") ``` #### Find all books that price less than 5. ```go list := xmlquery.Find(doc, "//book[price<5]") ``` #### Evaluate the total price of all books. ```go expr, err := xpath.Compile("sum(//book/price)") price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) fmt.Printf("total price: %f\n", price) ``` #### Evaluate the number of all books element. ```go expr, err := xpath.Compile("count(//book)") price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64) ``` FAQ ==== #### `Find()` vs `QueryAll()`, which is better? `Find` and `QueryAll` both do the same things, searches all of matched html nodes. The `Find` will panics if you give an error XPath query, but `QueryAll` will return an error for you. #### Can I save my query expression object for the next query? Yes, you can. We offer the `QuerySelector` and `QuerySelectorAll` methods, It will accept your query expression object. Cache a query expression object(or reused) will avoid re-compile XPath query expression, improve your query performance. #### Create XML document. ```go doc := &xmlquery.Node{ Type: xmlquery.DeclarationNode, Data: "xml", Attr: []xml.Attr{ xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"}, }, } root := &xmlquery.Node{ Data: "rss", Type: xmlquery.ElementNode, } doc.FirstChild = root channel := &xmlquery.Node{ Data: "channel", Type: xmlquery.ElementNode, } root.FirstChild = channel title := &xmlquery.Node{ Data: "title", Type: xmlquery.ElementNode, } title_text := &xmlquery.Node{ Data: "W3Schools Home Page", Type: xmlquery.TextNode, } title.FirstChild = title_text channel.FirstChild = title fmt.Println(doc.OutputXML(true)) // W3Schools Home Page ``` Quick Tutorial === ```go import ( "github.com/antchfx/xmlquery" ) func main(){ s := ` W3Schools Home Page https://www.w3schools.com Free web building tutorials RSS Tutorial https://www.w3schools.com/xml/xml_rss.asp New RSS tutorial on W3Schools XML Tutorial https://www.w3schools.com/xml New XML tutorial on W3Schools ` doc, err := xmlquery.Parse(strings.NewReader(s)) if err != nil { panic(err) } channel := xmlquery.FindOne(doc, "//channel") if n := channel.SelectElement("title"); n != nil { fmt.Printf("title: %s\n", n.InnerText()) } if n := channel.SelectElement("link"); n != nil { fmt.Printf("link: %s\n", n.InnerText()) } for i, n := range xmlquery.Find(doc, "//item/title") { fmt.Printf("#%d %s\n", i, n.InnerText()) } } ``` List of supported XPath query packages === |Name |Description | |--------------------------|----------------| |[htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document| |[xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document| |[jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document| Questions === Please let me know if you have any questions golang-github-antchfx-xmlquery-1.2.1/books.xml000066400000000000000000000106071357640645100214350ustar00rootroot00000000000000 Gambardella, Matthew XML Developer's Guide Computer 44.95 2000-10-01 An in-depth look at creating applications with XML. Ralls, Kim Midnight Rain Fantasy 5.95 2000-12-16 A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. Corets, Eva Maeve Ascendant Fantasy 5.95 2000-11-17 After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society. Corets, Eva Oberon's Legacy Fantasy 5.95 2001-03-10 In post-apocalypse England, the mysterious agent known only as Oberon helps to create a new life for the inhabitants of London. Sequel to Maeve Ascendant. Corets, Eva The Sundered Grail Fantasy 5.95 2001-09-10 The two daughters of Maeve, half-sisters, battle one another for control of England. Sequel to Oberon's Legacy. Randall, Cynthia Lover Birds Romance 4.95 2000-09-02 When Carla meets Paul at an ornithology conference, tempers fly as feathers get ruffled. Thurman, Paula Splish Splash Romance 4.95 2000-11-02 A deep sea diver finds true love twenty thousand leagues beneath the sea. Knorr, Stefan Creepy Crawlies Horror 4.95 2000-12-06 An anthology of horror stories about roaches, centipedes, scorpions and other insects. Kress, Peter Paradox Lost Science Fiction 6.95 2000-11-02 After an inadvertant trip through a Heisenberg Uncertainty Device, James Salway discovers the problems of being quantum. O'Brien, Tim Microsoft .NET: The Programming Bible Computer 36.95 2000-12-09 Microsoft's .NET initiative is explored in detail in this deep programmer's reference. O'Brien, Tim MSXML3: A Comprehensive Guide Computer 36.95 2000-12-01 The Microsoft MSXML3 parser is covered in detail, with attention to XML DOM interfaces, XSLT processing, SAX and more. Galos, Mike Visual Studio 7: A Comprehensive Guide Computer 49.95 2001-04-16 Microsoft Visual Studio 7 is explored in depth, looking at how Visual Basic, Visual C++, C#, and ASP+ are integrated into a comprehensive development environment. golang-github-antchfx-xmlquery-1.2.1/cache.go000066400000000000000000000014661357640645100211730ustar00rootroot00000000000000package xmlquery import ( "sync" "github.com/golang/groupcache/lru" "github.com/antchfx/xpath" ) // DisableSelectorCache will disable caching for the query selector if value is true. var DisableSelectorCache = false // SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. // Will disable caching if SelectorCacheMaxEntries <= 0. var SelectorCacheMaxEntries = 50 var ( cacheOnce sync.Once cache *lru.Cache ) func getQuery(expr string) (*xpath.Expr, error) { if DisableSelectorCache || SelectorCacheMaxEntries <= 0 { return xpath.Compile(expr) } cacheOnce.Do(func() { cache = lru.New(50) }) if v, ok := cache.Get(expr); ok { return v.(*xpath.Expr), nil } v, err := xpath.Compile(expr) if err != nil { return nil, err } cache.Add(expr, v) return v, nil } golang-github-antchfx-xmlquery-1.2.1/node.go000066400000000000000000000172671357640645100210630ustar00rootroot00000000000000package xmlquery import ( "bytes" "encoding/xml" "errors" "fmt" "io" "net/http" "strings" "golang.org/x/net/html/charset" ) // A NodeType is the type of a Node. type NodeType uint const ( // DocumentNode is a document object that, as the root of the document tree, // provides access to the entire XML document. DocumentNode NodeType = iota // DeclarationNode is the document type declaration, indicated by the following // tag (for example, ). DeclarationNode // ElementNode is an element (for example, ). ElementNode // TextNode is the text content of a node. TextNode // CommentNode a comment (for example, ). CommentNode // AttributeNode is an attribute of element. AttributeNode ) // A Node consists of a NodeType and some Data (tag name for // element nodes, content for text) and are part of a tree of Nodes. type Node struct { Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node Type NodeType Data string Prefix string NamespaceURI string Attr []xml.Attr level int // node level in the tree } // InnerText returns the text between the start and end tags of the object. func (n *Node) InnerText() string { var output func(*bytes.Buffer, *Node) output = func(buf *bytes.Buffer, n *Node) { switch n.Type { case TextNode: buf.WriteString(n.Data) return case CommentNode: return } for child := n.FirstChild; child != nil; child = child.NextSibling { output(buf, child) } } var buf bytes.Buffer output(&buf, n) return buf.String() } func (n *Node) sanitizedData(preserveSpaces bool) string { if preserveSpaces { return strings.Trim(n.Data, "\n\t") } return strings.TrimSpace(n.Data) } func calculatePreserveSpaces(n *Node, pastValue bool) bool { if attr := n.SelectAttr("xml:space"); attr == "preserve" { return true } else if attr == "default" { return false } return pastValue } func outputXML(buf *bytes.Buffer, n *Node, preserveSpaces bool) { preserveSpaces = calculatePreserveSpaces(n, preserveSpaces) if n.Type == TextNode { xml.EscapeText(buf, []byte(n.sanitizedData(preserveSpaces))) return } if n.Type == CommentNode { buf.WriteString("") return } if n.Type == DeclarationNode { buf.WriteString("") } else { buf.WriteString(">") } for child := n.FirstChild; child != nil; child = child.NextSibling { outputXML(buf, child, preserveSpaces) } if n.Type != DeclarationNode { if n.Prefix == "" { buf.WriteString(fmt.Sprintf("", n.Data)) } else { buf.WriteString(fmt.Sprintf("", n.Prefix, n.Data)) } } } // OutputXML returns the text that including tags name. func (n *Node) OutputXML(self bool) string { var buf bytes.Buffer if self { outputXML(&buf, n, false) } else { for n := n.FirstChild; n != nil; n = n.NextSibling { outputXML(&buf, n, false) } } return buf.String() } func addAttr(n *Node, key, val string) { var attr xml.Attr if i := strings.Index(key, ":"); i > 0 { attr = xml.Attr{ Name: xml.Name{Space: key[:i], Local: key[i+1:]}, Value: val, } } else { attr = xml.Attr{ Name: xml.Name{Local: key}, Value: val, } } n.Attr = append(n.Attr, attr) } func addChild(parent, n *Node) { n.Parent = parent if parent.FirstChild == nil { parent.FirstChild = n } else { parent.LastChild.NextSibling = n n.PrevSibling = parent.LastChild } parent.LastChild = n } func addSibling(sibling, n *Node) { for t := sibling.NextSibling; t != nil; t = t.NextSibling { sibling = t } n.Parent = sibling.Parent sibling.NextSibling = n n.PrevSibling = sibling if sibling.Parent != nil { sibling.Parent.LastChild = n } } // LoadURL loads the XML document from the specified URL. func LoadURL(url string) (*Node, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() return parse(resp.Body) } func parse(r io.Reader) (*Node, error) { var ( decoder = xml.NewDecoder(r) doc = &Node{Type: DocumentNode} space2prefix = make(map[string]string) level = 0 ) // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml. space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml" decoder.CharsetReader = charset.NewReaderLabel prev := doc for { tok, err := decoder.Token() switch { case err == io.EOF: goto quit case err != nil: return nil, err } switch tok := tok.(type) { case xml.StartElement: if level == 0 { // mising XML declaration node := &Node{Type: DeclarationNode, Data: "xml", level: 1} addChild(prev, node) level = 1 prev = node } // https://www.w3.org/TR/xml-names/#scoping-defaulting for _, att := range tok.Attr { if att.Name.Local == "xmlns" { space2prefix[att.Value] = "" } else if att.Name.Space == "xmlns" { space2prefix[att.Value] = att.Name.Local } } if tok.Name.Space != "" { if _, found := space2prefix[tok.Name.Space]; !found { return nil, errors.New("xmlquery: invalid XML document, namespace is missing") } } for i := 0; i < len(tok.Attr); i++ { att := &tok.Attr[i] if prefix, ok := space2prefix[att.Name.Space]; ok { att.Name.Space = prefix } } node := &Node{ Type: ElementNode, Data: tok.Name.Local, Prefix: space2prefix[tok.Name.Space], NamespaceURI: tok.Name.Space, Attr: tok.Attr, level: level, } //fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level)) if level == prev.level { addSibling(prev, node) } else if level > prev.level { addChild(prev, node) } else if level < prev.level { for i := prev.level - level; i > 1; i-- { prev = prev.Parent } addSibling(prev.Parent, node) } prev = node level++ case xml.EndElement: level-- case xml.CharData: node := &Node{Type: TextNode, Data: string(tok), level: level} if level == prev.level { addSibling(prev, node) } else if level > prev.level { addChild(prev, node) } else if level < prev.level { for i := prev.level - level; i > 1; i-- { prev = prev.Parent } addSibling(prev.Parent, node) } case xml.Comment: node := &Node{Type: CommentNode, Data: string(tok), level: level} if level == prev.level { addSibling(prev, node) } else if level > prev.level { addChild(prev, node) } else if level < prev.level { for i := prev.level - level; i > 1; i-- { prev = prev.Parent } addSibling(prev.Parent, node) } case xml.ProcInst: // Processing Instruction if prev.Type != DeclarationNode { level++ } node := &Node{Type: DeclarationNode, Data: tok.Target, level: level} pairs := strings.Split(string(tok.Inst), " ") for _, pair := range pairs { pair = strings.TrimSpace(pair) if i := strings.Index(pair, "="); i > 0 { addAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`)) } } if level == prev.level { addSibling(prev, node) } else if level > prev.level { addChild(prev, node) } prev = node case xml.Directive: } } quit: return doc, nil } // Parse returns the parse tree for the XML from the given Reader. func Parse(r io.Reader) (*Node, error) { return parse(r) } golang-github-antchfx-xmlquery-1.2.1/node_test.go000066400000000000000000000267741357640645100221250ustar00rootroot00000000000000package xmlquery import ( "html" "net/http" "net/http/httptest" "strings" "testing" ) func findNode(root *Node, name string) *Node { node := root.FirstChild for { if node == nil || node.Data == name { break } node = node.NextSibling } return node } func childNodes(root *Node, name string) []*Node { var list []*Node node := root.FirstChild for { if node == nil { break } if node.Data == name { list = append(list, node) } node = node.NextSibling } return list } func testNode(t *testing.T, n *Node, expected string) { if n.Data != expected { t.Fatalf("expected node name is %s,but got %s", expected, n.Data) } } func testAttr(t *testing.T, n *Node, name, expected string) { for _, attr := range n.Attr { if attr.Name.Local == name && attr.Value == expected { return } } t.Fatalf("not found attribute %s in the node %s", name, n.Data) } func testValue(t *testing.T, val, expected string) { if val != expected { t.Fatalf("expected value is %s,but got %s", expected, val) } } func TestLoadURL(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { s := ` ` w.Header().Set("Content-Type", "text/xml") w.Write([]byte(s)) })) defer server.Close() _, err := LoadURL(server.URL) if err != nil { t.Fatal(err) } } func TestNamespaceURL(t *testing.T) { s := ` 21|22021348 ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } top := FindOne(doc, "//rss") if top == nil { t.Fatal("rss feed invalid") } node := FindOne(top, "dc:creator") if node.Prefix != "dc" { t.Fatalf("expected node prefix name is dc but is=%s", node.Prefix) } if node.NamespaceURI != "https://purl.org/dc/elements/1.1/" { t.Fatalf("dc:creator != %s", node.NamespaceURI) } if strings.Index(top.InnerText(), "author") > 0 { t.Fatalf("InnerText() include comment node text") } if strings.Index(top.OutputXML(true), "author") == -1 { t.Fatal("OutputXML shoud include comment node,but not") } } func TestMultipleProcInst(t *testing.T) { s := ` ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } node := doc.FirstChild // if node.Data != "xml" { t.Fatal("node.Data != xml") } node = node.NextSibling // New Line node = node.NextSibling // if node.Data != "xml-stylesheet" { t.Fatal("node.Data != xml-stylesheet") } } func TestParse(t *testing.T) { s := ` Harry Potter 29.99 Learning XML 39.95 ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } if root.Type != DocumentNode { t.Fatal("top node of tree is not DocumentNode") } declarNode := root.FirstChild if declarNode.Type != DeclarationNode { t.Fatal("first child node of tree is not DeclarationNode") } if declarNode.Attr[0].Name.Local != "version" && declarNode.Attr[0].Value != "1.0" { t.Fatal("version attribute not expected") } bookstore := root.LastChild if bookstore.Data != "bookstore" { t.Fatal("bookstore elem not found") } if bookstore.FirstChild.Data != "\n" { t.Fatal("first child node of bookstore is not empty node(\n)") } books := childNodes(bookstore, "book") if len(books) != 2 { t.Fatalf("expected book element count is 2, but got %d", len(books)) } // first book element testNode(t, findNode(books[0], "title"), "title") testAttr(t, findNode(books[0], "title"), "lang", "en") testValue(t, findNode(books[0], "price").InnerText(), "29.99") testValue(t, findNode(books[0], "title").InnerText(), "Harry Potter") // second book element testNode(t, findNode(books[1], "title"), "title") testAttr(t, findNode(books[1], "title"), "lang", "en") testValue(t, findNode(books[1], "price").InnerText(), "39.95") testValue(t, books[0].OutputXML(true), `Harry Potter29.99`) } func TestMissDeclaration(t *testing.T) { s := ` ` doc, err := Parse(strings.NewReader(s)) if err != nil { t.Fatal(err) } node := FindOne(doc, "//AAA") if node == nil { t.Fatal("//AAA is nil") } } func TestMissingNamespace(t *testing.T) { s := ` value 1 value 2 ` _, err := Parse(strings.NewReader(s)) if err == nil { t.Fatal("err is nil, want got invalid XML document") } } func TestTooNested(t *testing.T) { s := ` ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } aaa := findNode(root, "AAA") if aaa == nil { t.Fatal("AAA node not exists") } ccc := aaa.LastChild.PrevSibling if ccc.Data != "CCC" { t.Fatalf("expected node is CCC,but got %s", ccc.Data) } bbb := ccc.PrevSibling.PrevSibling if bbb.Data != "BBB" { t.Fatalf("expected node is bbb,but got %s", bbb.Data) } ddd := findNode(bbb, "DDD") testNode(t, ddd, "DDD") testNode(t, ddd.LastChild.PrevSibling, "CCC") } func TestSelectElement(t *testing.T) { s := ` ` root, err := Parse(strings.NewReader(s)) if err != nil { t.Error(err) } version := root.FirstChild.SelectAttr("version") if version != "1.0" { t.Fatal("version!=1.0") } aaa := findNode(root, "AAA") var n *Node n = aaa.SelectElement("BBB") if n == nil { t.Fatalf("n is nil") } n = aaa.SelectElement("CCC") if n == nil { t.Fatalf("n is nil") } var ns []*Node ns = aaa.SelectElements("CCC") if len(ns) != 2 { t.Fatalf("len(ns)!=2") } } func TestEscapeOutputValue(t *testing.T) { data := `<*>` root, err := Parse(strings.NewReader(data)) if err != nil { t.Error(err) } escapedInnerText := root.OutputXML(true) if !strings.Contains(escapedInnerText, "<*>") { t.Fatal("Inner Text has not been escaped") } } func TestOutputXMLWithNamespacePrefix(t *testing.T) { s := `` doc, _ := Parse(strings.NewReader(s)) if s != doc.OutputXML(false) { t.Fatal("xml document missing some characters") } } func TestAttributeWithNamespace(t *testing.T) { s := ` ` doc, _ := Parse(strings.NewReader(s)) n := FindOne(doc, "//good[@n1:a='2']") if n == nil { t.Fatal("n is nil") } } func TestOutputXMLWithCommentNode(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) if e, g := "", doc.OutputXML(true); strings.Index(g, e) == -1 { t.Fatal("missing some comment-node.") } n := FindOne(doc, "//class_list") t.Log(n.OutputXML(false)) if e, g := "Lenard", n.OutputXML(false); strings.Index(g, e) == -1 { t.Fatal("missing some comment-node") } } func TestOutputXMLWithSpaceParent(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student/name") expected := " Robert " if g := doc.OutputXML(true); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceDirect(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student/name") expected := ` Robert ` if g := doc.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceOverwrittenToPreserve(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student") expected := ` Robert ` if g := n.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestOutputXMLWithSpaceOverwrittenToDefault(t *testing.T) { s := ` Robert A+ ` doc, _ := Parse(strings.NewReader(s)) t.Log(doc.OutputXML(true)) n := FindOne(doc, "/class_list/student") expected := `Robert` if g := doc.OutputXML(false); strings.Index(g, expected) == -1 { t.Errorf(`expected "%s", obtained "%s"`, expected, g) } output := html.UnescapeString(doc.OutputXML(true)) if strings.Contains(output, "\n") { t.Errorf("the outputted xml contains newlines") } t.Log(n.OutputXML(false)) } func TestIllegalAttributeChars(t *testing.T) { s := `` doc, _ := Parse(strings.NewReader(s)) e := "If a 0 { space = name[:i] local = name[i+1:] } for _, attr := range n.Attr { if attr.Name.Local == local && attr.Name.Space == space { return attr.Value } } return "" } var _ xpath.NodeNavigator = &NodeNavigator{} // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. func CreateXPathNavigator(top *Node) *NodeNavigator { return &NodeNavigator{curr: top, root: top, attr: -1} } func getCurrentNode(it *xpath.NodeIterator) *Node { n := it.Current().(*NodeNavigator) if n.NodeType() == xpath.AttributeNode { childNode := &Node{ Type: TextNode, Data: n.Value(), } return &Node{ Parent: n.curr, Type: AttributeNode, Data: n.LocalName(), FirstChild: childNode, LastChild: childNode, } } return n.curr } // Find is like QueryAll but it will panics if the `expr` is not a // valid XPath expression. See `QueryAll()` function. func Find(top *Node, expr string) []*Node { nodes, err := QueryAll(top, expr) if err != nil { panic(err) } return nodes } // FindOne is like Query but it will panics if the `expr` is not a // valid XPath expression. See `Query()` function. func FindOne(top *Node, expr string) *Node { node, err := Query(top, expr) if err != nil { panic(err) } return node } // QueryAll searches the XML Node that matches by the specified XPath expr. // Return an error if the expression `expr` cannot be parsed. func QueryAll(top *Node, expr string) ([]*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelectorAll(top, exp), nil } // Query searches the XML Node that matches by the specified XPath expr, // and returns first element of matched. func Query(top *Node, expr string) (*Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelector(top, exp), nil } // QuerySelectorAll searches all of the XML Node that matches the specified XPath selectors. func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node { t := selector.Select(CreateXPathNavigator(top)) var elems []*Node for t.MoveNext() { elems = append(elems, getCurrentNode(t)) } return elems } // QuerySelector returns the first matched XML Node by the specified XPath selector. func QuerySelector(top *Node, selector *xpath.Expr) *Node { t := selector.Select(CreateXPathNavigator(top)) if t.MoveNext() { return getCurrentNode(t) } return nil } // FindEach searches the html.Node and calls functions cb. // Important: this method has deprecated, recommend use for .. = range Find(){}. func FindEach(top *Node, expr string, cb func(int, *Node)) { for i, n := range Find(top, expr) { cb(i, n) } } // FindEachWithBreak functions the same as FindEach but allows you // to break the loop by returning false from your callback function, cb. // Important: this method has deprecated, recommend use for .. = range Find(){}. func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) { for i, n := range Find(top, expr) { if !cb(i, n) { break } } } type NodeNavigator struct { root, curr *Node attr int } func (x *NodeNavigator) Current() *Node { return x.curr } func (x *NodeNavigator) NodeType() xpath.NodeType { switch x.curr.Type { case CommentNode: return xpath.CommentNode case TextNode: return xpath.TextNode case DeclarationNode, DocumentNode: return xpath.RootNode case ElementNode: if x.attr != -1 { return xpath.AttributeNode } return xpath.ElementNode } panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type)) } func (x *NodeNavigator) LocalName() string { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Local } return x.curr.Data } func (x *NodeNavigator) Prefix() string { if x.NodeType() == xpath.AttributeNode { if x.attr != -1 { return x.curr.Attr[x.attr].Name.Space } return "" } return x.curr.Prefix } func (x *NodeNavigator) NamespaceURL() string { return x.curr.NamespaceURI } func (x *NodeNavigator) Value() string { switch x.curr.Type { case CommentNode: return x.curr.Data case ElementNode: if x.attr != -1 { return x.curr.Attr[x.attr].Value } return x.curr.InnerText() case TextNode: return x.curr.Data } return "" } func (x *NodeNavigator) Copy() xpath.NodeNavigator { n := *x return &n } func (x *NodeNavigator) MoveToRoot() { x.curr = x.root } func (x *NodeNavigator) MoveToParent() bool { if x.attr != -1 { x.attr = -1 return true } else if node := x.curr.Parent; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToNextAttribute() bool { if x.attr >= len(x.curr.Attr)-1 { return false } x.attr++ return true } func (x *NodeNavigator) MoveToChild() bool { if x.attr != -1 { return false } if node := x.curr.FirstChild; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToFirst() bool { if x.attr != -1 || x.curr.PrevSibling == nil { return false } for { node := x.curr.PrevSibling if node == nil { break } x.curr = node } return true } func (x *NodeNavigator) String() string { return x.Value() } func (x *NodeNavigator) MoveToNext() bool { if x.attr != -1 { return false } if node := x.curr.NextSibling; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveToPrevious() bool { if x.attr != -1 { return false } if node := x.curr.PrevSibling; node != nil { x.curr = node return true } return false } func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { node, ok := other.(*NodeNavigator) if !ok || node.root != x.root { return false } x.curr = node.curr x.attr = node.attr return true } golang-github-antchfx-xmlquery-1.2.1/query_test.go000066400000000000000000000067671357640645100223450ustar00rootroot00000000000000package xmlquery import ( "strings" "testing" ) // https://msdn.microsoft.com/en-us/library/ms762271(v=vs.85).aspx const xmlDoc = ` Gambardella, Matthew XML Developer's Guide Computer 44.95 2000-10-01 An in-depth look at creating applications with XML. Ralls, Kim Midnight Rain Fantasy 5.95 2000-12-16 A former architect battles corporate zombies, an evil sorceress, and her own childhood to become queen of the world. Corets, Eva Maeve Ascendant Fantasy 5.95 2000-11-17 After the collapse of a nanotechnology society in England, the young survivors lay the foundation for a new society. ` var doc = loadXML(xmlDoc) func TestXPath(t *testing.T) { if list := Find(doc, "//book"); len(list) != 3 { t.Fatal("count(//book) != 3") } if node := FindOne(doc, "//book[@id='bk101']"); node == nil { t.Fatal("//book[@id='bk101] is not found") } if node := FindOne(doc, "//book[price>=44.95]"); node == nil { t.Fatal("//book/price>=44.95 is not found") } if list := Find(doc, "//book[genre='Fantasy']"); len(list) != 2 { t.Fatal("//book[genre='Fantasy'] items count is not equal 2") } var c int FindEach(doc, "//book", func(i int, n *Node) { c++ }) l := len(Find(doc, "//book")) if c != l { t.Fatal("count(//book) != 3") } c = 0 FindEachWithBreak(doc, "//book", func(i int, n *Node) bool { if c == l-1 { return false } c++ return true }) if c != l-1 { t.Fatal("FindEachWithBreak failed to stop.") } node := FindOne(doc, "//book[1]") if node.SelectAttr("id") != "bk101" { t.Fatal("//book[1]/@id != bk101") } } func TestXPathCdUp(t *testing.T) { doc := loadXML(``) node := FindOne(doc, "/a/b/@attr/..") t.Logf("node = %#v", node) if node == nil || node.Data != "b" { t.Fatal("//b/@id/.. != ") } } func TestInvalidXPathExpression(t *testing.T) { doc := &Node{} _, err := QueryAll(doc, "//a[@a==1]") if err == nil { t.Fatal("expected a parsed error but nil") } _, err = Query(doc, "//a[@a==1]") if err == nil { t.Fatal("expected a parsed error but nil") } } func TestNavigator(t *testing.T) { nav := &NodeNavigator{curr: doc, root: doc, attr: -1} nav.MoveToChild() // New Line nav.MoveToNext() nav.MoveToNext() // catalog if nav.curr.Data != "catalog" { t.Fatal("current node name != `catalog`") } nav.MoveToChild() // New Line nav.MoveToNext() // comment node if nav.curr.Type != CommentNode { t.Fatal("node type not CommentNode") } nav.Value() nav.MoveToNext() // New Line nav.MoveToNext() //book nav.MoveToChild() nav.MoveToNext() // book/author if nav.LocalName() != "author" { t.Fatalf("node error") } nav.MoveToParent() // book nav.MoveToNext() // next book nav.MoveToNext() // skip some whitespace if nav.curr.SelectAttr("id") != "bk102" { t.Fatal("node error") } } func loadXML(s string) *Node { node, err := Parse(strings.NewReader(s)) if err != nil { panic(err) } return node }