pax_global_header00006660000000000000000000000064145570057700014524gustar00rootroot0000000000000052 comment=ce552259c070454e45ecaf63ff90fd807bc78d90 golang-github-antchfx-htmlquery-1.3.0/000077500000000000000000000000001455700577000177355ustar00rootroot00000000000000golang-github-antchfx-htmlquery-1.3.0/.gitignore000066400000000000000000000004621455700577000217270ustar00rootroot00000000000000# vscode .vscode debug *.test ./build # Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.profgolang-github-antchfx-htmlquery-1.3.0/.travis.yml000066400000000000000000000004621455700577000220500ustar00rootroot00000000000000language: go go: - 1.9.x - 1.12.x - 1.13.x install: - go get golang.org/x/net/html/charset - go get golang.org/x/net/html - go get github.com/antchfx/xpath - go get github.com/mattn/goveralls - go get github.com/golang/groupcache script: - $HOME/gopath/bin/goveralls -service=travis-cigolang-github-antchfx-htmlquery-1.3.0/LICENSE000066400000000000000000000017761455700577000207550ustar00rootroot00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.golang-github-antchfx-htmlquery-1.3.0/README.md000066400000000000000000000105611455700577000212170ustar00rootroot00000000000000htmlquery ==== [![Build Status](https://travis-ci.org/antchfx/htmlquery.svg?branch=master)](https://travis-ci.org/antchfx/htmlquery) [![Coverage Status](https://coveralls.io/repos/github/antchfx/htmlquery/badge.svg?branch=master)](https://coveralls.io/github/antchfx/htmlquery?branch=master) [![GoDoc](https://godoc.org/github.com/antchfx/htmlquery?status.svg)](https://godoc.org/github.com/antchfx/htmlquery) [![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/htmlquery)](https://goreportcard.com/report/github.com/antchfx/htmlquery) Overview ==== `htmlquery` is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression. `htmlquery` built-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. Enable query caching can avoid re-compile XPath expression each query. You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath XPath query packages for Go === | Name | Description | | ------------------------------------------------- | ----------------------------------------- | | [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document | | [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document | | [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document | Installation ==== ``` go get github.com/antchfx/htmlquery ``` Getting Started ==== #### Query, returns matched elements or error. ```go nodes, err := htmlquery.QueryAll(doc, "//a") if err != nil { panic(`not a valid XPath expression.`) } ``` #### Load HTML document from URL. ```go doc, err := htmlquery.LoadURL("http://example.com/") ``` #### Load HTML from document. ```go filePath := "/home/user/sample.html" doc, err := htmlquery.LoadDoc(filePath) ``` #### Load HTML document from string. ```go s := `....` doc, err := htmlquery.Parse(strings.NewReader(s)) ``` #### Find all A elements. ```go list := htmlquery.Find(doc, "//a") ``` #### Find all A elements that have `href` attribute. ```go list := htmlquery.Find(doc, "//a[@href]") ``` #### Find all A elements with `href` attribute and only return `href` value. ```go list := htmlquery.Find(doc, "//a/@href") for _ , n := range list{ fmt.Println(htmlquery.SelectAttr(n, "href")) // output @href value } ``` ### Find the third A element. ```go a := htmlquery.FindOne(doc, "//a[3]") ``` ### Find children element (img) under A `href` and print the source ```go a := htmlquery.FindOne(doc, "//a") img := htmlquery.FindOne(a, "//img") fmt.Prinln(htmlquery.SelectAttr(img, "src")) // output @src value ``` #### Evaluate the number of all IMG element. ```go expr, _ := xpath.Compile("count(//img)") v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64) fmt.Printf("total count is %f", v) ``` Quick Starts === ```go func main() { doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang") if err != nil { panic(err) } // Find all news item. list, err := htmlquery.QueryAll(doc, "//ol/li") if err != nil { panic(err) } for i, n := range list { a := htmlquery.FindOne(n, "//a") if a != nil { fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href")) } } } ``` FAQ ==== #### `Find()` vs `QueryAll()`, which is better? `Find` and `QueryAll` both do the same things, searches all of matched html nodes. The `Find` will panics if you give an error XPath query, but `QueryAll` will return an error for you. #### Can I save my query expression object for the next query? Yes, you can. We offer the `QuerySelector` and `QuerySelectorAll` methods, It will accept your query expression object. Cache a query expression object(or reused) will avoid re-compile XPath query expression, improve your query performance. #### XPath query object cache performance ``` goos: windows goarch: amd64 pkg: github.com/antchfx/htmlquery BenchmarkSelectorCache-4 20000000 55.2 ns/op BenchmarkDisableSelectorCache-4 500000 3162 ns/op ``` #### How to disable caching? ``` htmlquery.DisableSelectorCache = true ``` Questions === Please let me know if you have any questions. golang-github-antchfx-htmlquery-1.3.0/cache.go000066400000000000000000000016221455700577000213300ustar00rootroot00000000000000package htmlquery import ( "sync" "github.com/antchfx/xpath" "github.com/golang/groupcache/lru" ) // DisableSelectorCache will disable caching for the query selector if value is true. var DisableSelectorCache = false // SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50. // Will disable caching if SelectorCacheMaxEntries <= 0. var SelectorCacheMaxEntries = 50 var ( cacheOnce sync.Once cache *lru.Cache cacheMutex sync.Mutex ) func getQuery(expr string) (*xpath.Expr, error) { if DisableSelectorCache || SelectorCacheMaxEntries <= 0 { return xpath.Compile(expr) } cacheOnce.Do(func() { cache = lru.New(SelectorCacheMaxEntries) }) cacheMutex.Lock() defer cacheMutex.Unlock() if v, ok := cache.Get(expr); ok { return v.(*xpath.Expr), nil } v, err := xpath.Compile(expr) if err != nil { return nil, err } cache.Add(expr, v) return v, nil } golang-github-antchfx-htmlquery-1.3.0/go.mod000066400000000000000000000002651455700577000210460ustar00rootroot00000000000000module github.com/antchfx/htmlquery go 1.14 require ( github.com/antchfx/xpath v1.2.3 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da golang.org/x/net v0.5.0 ) golang-github-antchfx-htmlquery-1.3.0/go.sum000066400000000000000000000062251455700577000210750ustar00rootroot00000000000000github.com/antchfx/xpath v1.2.3 h1:CCZWOzv5bAqjVv0offZ2LVgVYFbeldKQVuLNbViZdes= github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang-github-antchfx-htmlquery-1.3.0/query.go000066400000000000000000000162641455700577000214420ustar00rootroot00000000000000/* Package htmlquery provides extract data from HTML documents using XPath expression. */ package htmlquery import ( "bufio" "fmt" "io" "net/http" "os" "strings" "github.com/antchfx/xpath" "golang.org/x/net/html" "golang.org/x/net/html/charset" ) var _ xpath.NodeNavigator = &NodeNavigator{} // CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node. func CreateXPathNavigator(top *html.Node) *NodeNavigator { return &NodeNavigator{curr: top, root: top, attr: -1} } // Find is like QueryAll but Will panics if the expression `expr` cannot be parsed. // // See `QueryAll()` function. func Find(top *html.Node, expr string) []*html.Node { nodes, err := QueryAll(top, expr) if err != nil { panic(err) } return nodes } // FindOne is like Query but will panics if the expression `expr` cannot be parsed. // See `Query()` function. func FindOne(top *html.Node, expr string) *html.Node { node, err := Query(top, expr) if err != nil { panic(err) } return node } // QueryAll searches the html.Node that matches by the specified XPath expr. // Return an error if the expression `expr` cannot be parsed. func QueryAll(top *html.Node, expr string) ([]*html.Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } nodes := QuerySelectorAll(top, exp) return nodes, nil } // Query runs the given XPath expression against the given html.Node and // returns the first matching html.Node, or nil if no matches are found. // // Returns an error if the expression `expr` cannot be parsed. func Query(top *html.Node, expr string) (*html.Node, error) { exp, err := getQuery(expr) if err != nil { return nil, err } return QuerySelector(top, exp), nil } // QuerySelector returns the first matched html.Node by the specified XPath selector. func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node { t := selector.Select(CreateXPathNavigator(top)) if t.MoveNext() { return getCurrentNode(t.Current().(*NodeNavigator)) } return nil } // QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors. func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node { var elems []*html.Node t := selector.Select(CreateXPathNavigator(top)) for t.MoveNext() { nav := t.Current().(*NodeNavigator) n := getCurrentNode(nav) elems = append(elems, n) } return elems } // LoadURL loads the HTML document from the specified URL. func LoadURL(url string) (*html.Node, error) { resp, err := http.Get(url) if err != nil { return nil, err } defer resp.Body.Close() r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type")) if err != nil { return nil, err } return html.Parse(r) } // LoadDoc loads the HTML document from the specified file path. func LoadDoc(path string) (*html.Node, error) { f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() return html.Parse(bufio.NewReader(f)) } func getCurrentNode(n *NodeNavigator) *html.Node { if n.NodeType() == xpath.AttributeNode { childNode := &html.Node{ Type: html.TextNode, Data: n.Value(), } return &html.Node{ Type: html.ElementNode, Data: n.LocalName(), FirstChild: childNode, LastChild: childNode, } } return n.curr } // Parse returns the parse tree for the HTML from the given Reader. func Parse(r io.Reader) (*html.Node, error) { return html.Parse(r) } // InnerText returns the text between the start and end tags of the object. func InnerText(n *html.Node) string { var output func(*strings.Builder, *html.Node) output = func(b *strings.Builder, n *html.Node) { switch n.Type { case html.TextNode: b.WriteString(n.Data) return case html.CommentNode: return } for child := n.FirstChild; child != nil; child = child.NextSibling { output(b, child) } } var b strings.Builder output(&b, n) return b.String() } // SelectAttr returns the attribute value with the specified name. func SelectAttr(n *html.Node, name string) (val string) { if n == nil { return } if n.Type == html.ElementNode && n.Parent == nil && name == n.Data { return InnerText(n) } for _, attr := range n.Attr { if attr.Key == name { val = attr.Val break } } return } // ExistsAttr returns whether attribute with specified name exists. func ExistsAttr(n *html.Node, name string) bool { if n == nil { return false } for _, attr := range n.Attr { if attr.Key == name { return true } } return false } // OutputHTML returns the text including tags name. func OutputHTML(n *html.Node, self bool) string { var b strings.Builder if self { html.Render(&b, n) } else { for n := n.FirstChild; n != nil; n = n.NextSibling { html.Render(&b, n) } } return b.String() } type NodeNavigator struct { root, curr *html.Node attr int } func (h *NodeNavigator) Current() *html.Node { return h.curr } func (h *NodeNavigator) NodeType() xpath.NodeType { switch h.curr.Type { case html.CommentNode: return xpath.CommentNode case html.TextNode: return xpath.TextNode case html.DocumentNode: return xpath.RootNode case html.ElementNode: if h.attr != -1 { return xpath.AttributeNode } return xpath.ElementNode case html.DoctypeNode: // ignored declare and as Root-Node type. return xpath.RootNode } panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type)) } func (h *NodeNavigator) LocalName() string { if h.attr != -1 { return h.curr.Attr[h.attr].Key } return h.curr.Data } func (*NodeNavigator) Prefix() string { return "" } func (h *NodeNavigator) Value() string { switch h.curr.Type { case html.CommentNode: return h.curr.Data case html.ElementNode: if h.attr != -1 { return h.curr.Attr[h.attr].Val } return InnerText(h.curr) case html.TextNode: return h.curr.Data } return "" } func (h *NodeNavigator) Copy() xpath.NodeNavigator { n := *h return &n } func (h *NodeNavigator) MoveToRoot() { h.curr = h.root } func (h *NodeNavigator) MoveToParent() bool { if h.attr != -1 { h.attr = -1 return true } else if node := h.curr.Parent; node != nil { h.curr = node return true } return false } func (h *NodeNavigator) MoveToNextAttribute() bool { if h.attr >= len(h.curr.Attr)-1 { return false } h.attr++ return true } func (h *NodeNavigator) MoveToChild() bool { if h.attr != -1 { return false } if node := h.curr.FirstChild; node != nil { h.curr = node return true } return false } func (h *NodeNavigator) MoveToFirst() bool { if h.attr != -1 || h.curr.PrevSibling == nil { return false } for { node := h.curr.PrevSibling if node == nil { break } h.curr = node } return true } func (h *NodeNavigator) String() string { return h.Value() } func (h *NodeNavigator) MoveToNext() bool { if h.attr != -1 { return false } if node := h.curr.NextSibling; node != nil { h.curr = node return true } return false } func (h *NodeNavigator) MoveToPrevious() bool { if h.attr != -1 { return false } if node := h.curr.PrevSibling; node != nil { h.curr = node return true } return false } func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool { node, ok := other.(*NodeNavigator) if !ok || node.root != h.root { return false } h.curr = node.curr h.attr = node.attr return true } golang-github-antchfx-htmlquery-1.3.0/query_test.go000066400000000000000000000103601455700577000224700ustar00rootroot00000000000000package htmlquery import ( "fmt" "io/ioutil" "net/http" "net/http/httptest" "os" "strings" "sync" "testing" "github.com/antchfx/xpath" "golang.org/x/net/html" ) const htmlSample = ` Hello,World!

City Gallery

London

Mountain View

London is the capital city of England. It is the most populous city in the United Kingdom, with a metropolitan area of over 13 million inhabitants.

Standing on the River Thames, London has been a major settlement for two millennia, its history going back to its founding by the Romans, who named it Londinium.

` var testDoc = loadHTML(htmlSample) func BenchmarkSelectorCache(b *testing.B) { DisableSelectorCache = false for i := 0; i < b.N; i++ { getQuery("/AAA/BBB/DDD/CCC/EEE/ancestor::*") } } func BenchmarkDisableSelectorCache(b *testing.B) { DisableSelectorCache = true for i := 0; i < b.N; i++ { getQuery("/AAA/BBB/DDD/CCC/EEE/ancestor::*") } } func TestSelectorCache(t *testing.T) { SelectorCacheMaxEntries = 2 for i := 1; i <= 3; i++ { getQuery(fmt.Sprintf("//a[position()=%d]", i)) } getQuery("//a[position()=3]") } func TestLoadURL(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { fmt.Fprint(w, htmlSample) })) defer ts.Close() _, err := LoadURL(ts.URL) if err != nil { t.Fatal(err) } } func TestLoadDoc(t *testing.T) { tempHTMLdoc, err := ioutil.TempFile("", "sample_*.html") if err != nil { t.Fatal(err) } tempHTMLFilename := tempHTMLdoc.Name() defer func(tempHTMLdoc *os.File, filename string) { tempHTMLdoc.Close() os.Remove(filename) }(tempHTMLdoc, tempHTMLFilename) tempHTMLdoc.Write([]byte(htmlSample)) if _, err := LoadDoc(tempHTMLFilename); err != nil { t.Fatal(err) } } func TestNavigator(t *testing.T) { top := FindOne(testDoc, "//html") nav := &NodeNavigator{curr: top, root: top, attr: -1} nav.MoveToChild() // HEAD nav.MoveToNext() if nav.NodeType() != xpath.TextNode { t.Fatalf("expectd node type is TextNode,but got %vs", nav.NodeType()) } nav.MoveToNext() // if nav.Value() != InnerText(FindOne(testDoc, "//body")) { t.Fatal("body not equal") } nav.MoveToPrevious() // nav.MoveToParent() // if nav.curr != top { t.Fatal("current node is not html node") } nav.MoveToNextAttribute() if nav.LocalName() != "lang" { t.Fatal("node not move to lang attribute") } nav.MoveToParent() nav.MoveToFirst() // if nav.curr.Type != html.DoctypeNode { t.Fatalf("expected node type is DoctypeNode,but got %d", nav.curr.Type) } } func TestXPath(t *testing.T) { node := FindOne(testDoc, "//html") if SelectAttr(node, "lang") != "en-US" { t.Fatal("//html[@lang] != en-Us") } node = FindOne(testDoc, "//header") if strings.Index(InnerText(node), "Logo") > 0 { t.Fatal("InnerText() have comment node text") } if !strings.Contains(OutputHTML(node, true), "Logo") { t.Fatal("OutputHTML() shoud have comment node text") } link := FindOne(testDoc, "//a[1]/@href") if link == nil { t.Fatal("link is nil") } if v := InnerText(link); v != "/London" { t.Fatalf("expect value is /London, but got %s", v) } } func TestXPathCdUp(t *testing.T) { doc := loadHTML(``) node := FindOne(doc, "//b/@attr/..") t.Logf("node = %#v", node) if node == nil || node.Data != "b" { t.Fatal("//b/@id/.. != ") } } func TestConcurrentQuery(t *testing.T) { var wg sync.WaitGroup for i := 0; i < 10; i++ { wg.Add(1) go func(i int) { defer wg.Done() s := `
a
` doc := loadHTML(s) if n := FindOne(doc, `//div`); n == nil { t.Fatalf("should find one but got nil [%d]", i) } }(i) } wg.Done() } func loadHTML(str string) *html.Node { node, err := Parse(strings.NewReader(str)) if err != nil { panic(err) } return node }