pax_global_header 0000666 0000000 0000000 00000000064 14733256146 0014525 g ustar 00root root 0000000 0000000 52 comment=80f3d8ca64965ac7833f3eba5d6cfdc6a583fbd1
golang-github-antchfx-xmlquery-1.4.3/ 0000775 0000000 0000000 00000000000 14733256146 0017576 5 ustar 00root root 0000000 0000000 golang-github-antchfx-xmlquery-1.4.3/.github/ 0000775 0000000 0000000 00000000000 14733256146 0021136 5 ustar 00root root 0000000 0000000 golang-github-antchfx-xmlquery-1.4.3/.github/workflows/ 0000775 0000000 0000000 00000000000 14733256146 0023173 5 ustar 00root root 0000000 0000000 golang-github-antchfx-xmlquery-1.4.3/.github/workflows/testing.yml 0000664 0000000 0000000 00000000756 14733256146 0025403 0 ustar 00root root 0000000 0000000 name: Testing
on: [push, pull_request]
jobs:
test:
strategy:
matrix:
go-version: ["1.20", 1.21.x, 1.22.x]
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v4
- name: Test
run: |
go version
go test . -v -cover
golang-github-antchfx-xmlquery-1.4.3/.gitignore 0000664 0000000 0000000 00000000462 14733256146 0021570 0 ustar 00root root 0000000 0000000 # vscode
.vscode
debug
*.test
./build
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof golang-github-antchfx-xmlquery-1.4.3/LICENSE 0000664 0000000 0000000 00000001776 14733256146 0020616 0 ustar 00root root 0000000 0000000 Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. golang-github-antchfx-xmlquery-1.4.3/README.md 0000664 0000000 0000000 00000016402 14733256146 0021060 0 ustar 00root root 0000000 0000000 # xmlquery
[](https://github.com/antchfx/xmlquery/actions/workflows/testing.yml)
[](https://godoc.org/github.com/antchfx/xmlquery)
[](https://goreportcard.com/report/github.com/antchfx/xmlquery)
# Overview
`xmlquery` is an XPath query package for XML documents, allowing you to extract
data or evaluate from XML documents with an XPath expression.
`xmlquery` has a built-in query object caching feature that caches recently used
XPATH query strings. Enabling caching can avoid recompile XPath expression for
each query.
You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
[htmlquery](https://github.com/antchfx/htmlquery) - Package for the HTML document query.
[xmlquery](https://github.com/antchfx/xmlquery) - Package for the XML document query.
[jsonquery](https://github.com/antchfx/jsonquery) - Package for the JSON document query.
# Installation
```
$ go get github.com/antchfx/xmlquery
```
# Quick Starts
```go
import (
"github.com/antchfx/xmlquery"
)
func main(){
s := `
W3Schools Home Page
https://www.w3schools.com
Free web building tutorials
-
RSS Tutorial
https://www.w3schools.com/xml/xml_rss.asp
New RSS tutorial on W3Schools
-
XML Tutorial
https://www.w3schools.com/xml
New XML tutorial on W3Schools
`
doc, err := xmlquery.Parse(strings.NewReader(s))
if err != nil {
panic(err)
}
channel := xmlquery.FindOne(doc, "//channel")
if n := channel.SelectElement("title"); n != nil {
fmt.Printf("title: %s\n", n.InnerText())
}
if n := channel.SelectElement("link"); n != nil {
fmt.Printf("link: %s\n", n.InnerText())
}
for i, n := range xmlquery.Find(doc, "//item/title") {
fmt.Printf("#%d %s\n", i, n.InnerText())
}
}
```
# Getting Started
### Find specified XPath query.
```go
list, err := xmlquery.QueryAll(doc, "a")
if err != nil {
panic(err)
}
```
#### Parse an XML from URL.
```go
doc, err := xmlquery.LoadURL("http://www.example.com/sitemap.xml")
```
#### Parse an XML from string.
```go
s := ``
doc, err := xmlquery.Parse(strings.NewReader(s))
```
#### Parse an XML from io.Reader.
```go
f, err := os.Open("../books.xml")
doc, err := xmlquery.Parse(f)
```
#### Parse an XML in a stream fashion (simple case without elements filtering).
```go
f, _ := os.Open("../books.xml")
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book")
for {
n, err := p.Read()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
fmt.Println(n)
}
```
Notes: `CreateStreamParser()` used for saving memory if your had a large XML file to parse.
#### Parse an XML in a stream fashion (simple case advanced element filtering).
```go
f, _ := os.Open("../books.xml")
p, err := xmlquery.CreateStreamParser(f, "/bookstore/book", "/bookstore/book[price>=10]")
for {
n, err := p.Read()
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
fmt.Println(n)
}
```
#### Find authors of all books in the bookstore.
```go
list := xmlquery.Find(doc, "//book//author")
// or
list := xmlquery.Find(doc, "//author")
```
#### Find the second book.
```go
book := xmlquery.FindOne(doc, "//book[2]")
```
#### Find the last book.
```go
book := xmlquery.FindOne(doc, "//book[last()]")
```
#### Find all book elements and only get `id` attribute.
```go
list := xmlquery.Find(doc,"//book/@id")
fmt.Println(list[0].InnerText) // outout @id value
```
#### Find all books with id `bk104`.
```go
list := xmlquery.Find(doc, "//book[@id='bk104']")
```
#### Find all books with price less than 5.
```go
list := xmlquery.Find(doc, "//book[price<5]")
```
#### Evaluate total price of all books.
```go
expr, err := xpath.Compile("sum(//book/price)")
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
fmt.Printf("total price: %f\n", price)
```
#### Count the number of books.
```go
expr, err := xpath.Compile("count(//book)")
count := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
```
#### Calculate the total price of all book prices.
```go
expr, err := xpath.Compile("sum(//book/price)")
price := expr.Evaluate(xmlquery.CreateXPathNavigator(doc)).(float64)
```
# Advanced Features
### Parse `UTF-16` XML file with `ParseWithOptions()`.
```go
f, _ := os.Open(`UTF-16.XML`)
// Convert UTF-16 XML to UTF-8
utf16ToUtf8Transformer := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
utf8Reader := transform.NewReader(f, utf16ToUtf8Transformer)
// Sets `CharsetReader`
options := xmlquery.ParserOptions{
Decoder: &xmlquery.DecoderOptions{
CharsetReader: func(charset string, input io.Reader) (io.Reader, error) {
return input, nil
},
},
}
doc, err := xmlquery.ParseWithOptions(utf8Reader, options)
```
### Query with custom namespace prefix.
```go
s := `
RequestReplyActivity
OpClientReqActivity
300
80
`
nsMap := map[string]string{
"q": "http://xmlns.xyz.com/process/2003",
"r": "http://www.w3.org/1999/XSL/Transform",
"s": "http://www.w3.org/2001/XMLSchema",
}
expr, _ := xpath.CompileWithNS("//q:activity", nsMap)
node := xmlquery.QuerySelector(doc, expr)
```
#### Create XML document without call `xml.Marshal`.
```go
doc := &xmlquery.Node{
Type: xmlquery.DeclarationNode,
Data: "xml",
Attr: []xml.Attr{
xml.Attr{Name: xml.Name{Local: "version"}, Value: "1.0"},
},
}
root := &xmlquery.Node{
Data: "rss",
Type: xmlquery.ElementNode,
}
doc.FirstChild = root
channel := &xmlquery.Node{
Data: "channel",
Type: xmlquery.ElementNode,
}
root.FirstChild = channel
title := &xmlquery.Node{
Data: "title",
Type: xmlquery.ElementNode,
}
title_text := &xmlquery.Node{
Data: "W3Schools Home Page",
Type: xmlquery.TextNode,
}
title.FirstChild = title_text
channel.FirstChild = title
fmt.Println(doc.OutputXML(true))
fmt.Println(doc.OutputXMLWithOptions(WithOutputSelf()))
```
Output:
```xml
W3Schools Home Page
```
# FAQ
#### `Find()` vs `QueryAll()`, which is better?
`Find` and `QueryAll` both do the same thing: searches all of matched XML nodes.
`Find` panics if provided with an invalid XPath query, while `QueryAll` returns
an error.
#### Can I save my query expression object for the next query?
Yes, you can. We provide `QuerySelector` and `QuerySelectorAll` methods; they
accept your query expression object.
Caching a query expression object avoids recompiling the XPath query
expression, improving query performance.
# Questions
Please let me know if you have any questions
golang-github-antchfx-xmlquery-1.4.3/cache.go 0000664 0000000 0000000 00000001622 14733256146 0021171 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"sync"
"github.com/golang/groupcache/lru"
"github.com/antchfx/xpath"
)
// DisableSelectorCache will disable caching for the query selector if value is true.
var DisableSelectorCache = false
// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50.
// Will disable caching if SelectorCacheMaxEntries <= 0.
var SelectorCacheMaxEntries = 50
var (
cacheOnce sync.Once
cache *lru.Cache
cacheMutex sync.Mutex
)
func getQuery(expr string) (*xpath.Expr, error) {
if DisableSelectorCache || SelectorCacheMaxEntries <= 0 {
return xpath.Compile(expr)
}
cacheOnce.Do(func() {
cache = lru.New(SelectorCacheMaxEntries)
})
cacheMutex.Lock()
defer cacheMutex.Unlock()
if v, ok := cache.Get(expr); ok {
return v.(*xpath.Expr), nil
}
v, err := xpath.Compile(expr)
if err != nil {
return nil, err
}
cache.Add(expr, v)
return v, nil
}
golang-github-antchfx-xmlquery-1.4.3/cached_reader.go 0000664 0000000 0000000 00000002117 14733256146 0022657 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"bufio"
)
type cachedReader struct {
buffer *bufio.Reader
cache []byte
cacheCap int
cacheLen int
caching bool
}
func newCachedReader(r *bufio.Reader) *cachedReader {
return &cachedReader{
buffer: r,
cache: make([]byte, 4096),
cacheCap: 4096,
cacheLen: 0,
caching: false,
}
}
func (c *cachedReader) StartCaching() {
c.cacheLen = 0
c.caching = true
}
func (c *cachedReader) ReadByte() (byte, error) {
if !c.caching {
return c.buffer.ReadByte()
}
b, err := c.buffer.ReadByte()
if err != nil {
return b, err
}
if c.cacheLen < c.cacheCap {
c.cache[c.cacheLen] = b
c.cacheLen++
}
return b, err
}
func (c *cachedReader) Cache() []byte {
return c.cache[:c.cacheLen]
}
func (c *cachedReader) StopCaching() {
c.caching = false
}
func (c *cachedReader) Read(p []byte) (int, error) {
n, err := c.buffer.Read(p)
if err != nil {
return n, err
}
if c.caching && c.cacheLen < c.cacheCap {
for i := 0; i < n; i++ {
c.cache[c.cacheLen] = p[i]
c.cacheLen++
if c.cacheLen >= c.cacheCap {
break
}
}
}
return n, err
}
golang-github-antchfx-xmlquery-1.4.3/cached_reader_test.go 0000664 0000000 0000000 00000001455 14733256146 0023722 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"bufio"
"bytes"
"strings"
"testing"
)
func TestCaching(t *testing.T) {
buf := strings.NewReader(`ABCDEF`)
bufReader := bufio.NewReader(buf)
cachedReader := newCachedReader(bufReader)
b, err := cachedReader.ReadByte()
if err != nil {
t.Fatal(err.Error())
}
if b != 'A' {
t.Fatalf("Expected read byte to be A, got %c instead.", b)
}
cachedReader.StartCaching()
tmpBuf := make([]byte, 10)
n, err := cachedReader.Read(tmpBuf)
if err != nil {
t.Fatal(err.Error())
}
if n != 5 {
t.Fatalf("Expected 5 bytes to be read. Got %d instead.", n)
}
if !bytes.Equal(tmpBuf[:n], []byte("BCDEF")) {
t.Fatalf("Incorrect read buffer value")
}
cached := cachedReader.Cache()
if !bytes.Equal(cached, []byte("BCDEF")) {
t.Fatalf("Incorrect cached buffer value")
}
}
golang-github-antchfx-xmlquery-1.4.3/doc_test.go 0000664 0000000 0000000 00000003266 14733256146 0021740 0 ustar 00root root 0000000 0000000 package xmlquery_test
import (
"fmt"
"strings"
"github.com/antchfx/xmlquery"
"github.com/antchfx/xpath"
)
func Example() {
// XPATH syntax and functions see https://github.com/antchfx/xpath
s := `
Gambardella, Matthew
XML Developer's Guide
Computer
44.95
2000-10-01
Ralls, Kim
Midnight Rain
Fantasy
5.95
2000-12-16
Corets, Eva
Maeve Ascendant
Fantasy
5.95
2000-11-17
`
doc, err := xmlquery.Parse(strings.NewReader(s))
if err != nil {
panic(err)
}
// Quick query all books.
books := xmlquery.Find(doc, `/bookstore/book`)
for _, n := range books {
fmt.Println(n)
}
// Find all books with price rather than 10.
books = xmlquery.Find(doc, `//book[price < 10]`)
fmt.Println(len(books))
// Find books with @id=bk102 or @id=bk101
books = xmlquery.Find(doc, `//book[@id = "bk102" or @id = "bk101"]`)
fmt.Println(len(books))
// Find books by author: Corets, Eva
book := xmlquery.FindOne(doc, `//book[author = "Corets, Eva"]`)
fmt.Println(book.SelectElement("title").InnerText()) // > Output: Maeve Ascendant
// Calculate the total prices of all books
nav := xmlquery.CreateXPathNavigator(doc)
prices := xpath.MustCompile(`sum(//book/price)`).Evaluate(nav).(float64)
fmt.Println(prices) // > Output: 56.85
}
golang-github-antchfx-xmlquery-1.4.3/go.mod 0000664 0000000 0000000 00000000265 14733256146 0020707 0 ustar 00root root 0000000 0000000 module github.com/antchfx/xmlquery
go 1.14
require (
github.com/antchfx/xpath v1.3.3
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da
golang.org/x/net v0.33.0
)
golang-github-antchfx-xmlquery-1.4.3/go.sum 0000664 0000000 0000000 00000014325 14733256146 0020736 0 ustar 00root root 0000000 0000000 github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs=
github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang-github-antchfx-xmlquery-1.4.3/node.go 0000664 0000000 0000000 00000022477 14733256146 0021066 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"bufio"
"encoding/xml"
"fmt"
"html"
"io"
"strings"
)
// A NodeType is the type of a Node.
type NodeType uint
const (
// DocumentNode is a document object that, as the root of the document tree,
// provides access to the entire XML document.
DocumentNode NodeType = iota
// DeclarationNode is the document type declaration, indicated by the
// following tag (for example, ).
DeclarationNode
// ElementNode is an element (for example, - ).
ElementNode
// TextNode is the text content of a node.
TextNode
// CharDataNode node
CharDataNode
// CommentNode a comment (for example, ).
CommentNode
// AttributeNode is an attribute of element.
AttributeNode
// NotationNode is a directive represents in document (for example, ).
NotationNode
)
type Attr struct {
Name xml.Name
Value string
NamespaceURI string
}
// A Node consists of a NodeType and some Data (tag name for
// element nodes, content for text) and are part of a tree of Nodes.
type Node struct {
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
Data string
Prefix string
NamespaceURI string
Attr []Attr
level int // node level in the tree
}
type outputConfiguration struct {
printSelf bool
preserveSpaces bool
emptyElementTagSupport bool
skipComments bool
useIndentation string
}
type OutputOption func(*outputConfiguration)
// WithOutputSelf configures the Node to print the root node itself
func WithOutputSelf() OutputOption {
return func(oc *outputConfiguration) {
oc.printSelf = true
}
}
// WithEmptyTagSupport empty tags should be written as and
// not as
func WithEmptyTagSupport() OutputOption {
return func(oc *outputConfiguration) {
oc.emptyElementTagSupport = true
}
}
// WithoutComments will skip comments in output
func WithoutComments() OutputOption {
return func(oc *outputConfiguration) {
oc.skipComments = true
}
}
// WithPreserveSpace will preserve spaces in output
func WithPreserveSpace() OutputOption {
return func(oc *outputConfiguration) {
oc.preserveSpaces = true
}
}
// WithIndentation sets the indentation string used for formatting the output.
func WithIndentation(indentation string) OutputOption {
return func(oc *outputConfiguration) {
oc.useIndentation = indentation
}
}
func newXMLName(name string) xml.Name {
if i := strings.IndexByte(name, ':'); i > 0 {
return xml.Name{
Space: name[:i],
Local: name[i+1:],
}
}
return xml.Name{
Local: name,
}
}
func (n *Node) Level() int {
return n.level
}
// InnerText returns the text between the start and end tags of the object.
func (n *Node) InnerText() string {
var output func(*strings.Builder, *Node)
output = func(b *strings.Builder, n *Node) {
switch n.Type {
case TextNode, CharDataNode:
b.WriteString(n.Data)
case CommentNode:
default:
for child := n.FirstChild; child != nil; child = child.NextSibling {
output(b, child)
}
}
}
var b strings.Builder
output(&b, n)
return b.String()
}
func (n *Node) sanitizedData(preserveSpaces bool) string {
if preserveSpaces {
return n.Data
}
return strings.TrimSpace(n.Data)
}
func calculatePreserveSpaces(n *Node, pastValue bool) bool {
if attr := n.SelectAttr("xml:space"); attr == "preserve" {
return true
} else if attr == "default" {
return false
}
return pastValue
}
type indentation struct {
level int
hasChild bool
indent string
w io.Writer
}
func newIndentation(indent string, w io.Writer) *indentation {
if indent == "" {
return nil
}
return &indentation{
indent: indent,
w: w,
}
}
func (i *indentation) NewLine() {
if i == nil {
return
}
io.WriteString(i.w, "\n")
}
func (i *indentation) Open() {
if i == nil {
return
}
io.WriteString(i.w, "\n")
io.WriteString(i.w, strings.Repeat(i.indent, i.level))
i.level++
i.hasChild = false
}
func (i *indentation) Close() {
if i == nil {
return
}
i.level--
if i.hasChild {
io.WriteString(i.w, "\n")
io.WriteString(i.w, strings.Repeat(i.indent, i.level))
}
i.hasChild = true
}
func outputXML(w io.Writer, n *Node, preserveSpaces bool, config *outputConfiguration, indent *indentation) {
preserveSpaces = calculatePreserveSpaces(n, preserveSpaces)
switch n.Type {
case TextNode:
io.WriteString(w, html.EscapeString(n.sanitizedData(preserveSpaces)))
return
case CharDataNode:
io.WriteString(w, "")
return
case CommentNode:
if !config.skipComments {
io.WriteString(w, "")
}
return
case NotationNode:
indent.NewLine()
fmt.Fprintf(w, "", n.Data)
return
case DeclarationNode:
io.WriteString(w, "" + n.Data)
default:
indent.Open()
if n.Prefix == "" {
io.WriteString(w, "<" + n.Data)
} else {
fmt.Fprintf(w, "<%s:%s", n.Prefix, n.Data)
}
}
for _, attr := range n.Attr {
if attr.Name.Space != "" {
fmt.Fprintf(w, ` %s:%s=`, attr.Name.Space, attr.Name.Local)
} else {
fmt.Fprintf(w, ` %s=`, attr.Name.Local)
}
fmt.Fprintf(w, `"%v"`, html.EscapeString(attr.Value))
}
if n.Type == DeclarationNode {
io.WriteString(w, "?>")
} else {
if n.FirstChild != nil || !config.emptyElementTagSupport {
io.WriteString(w, ">")
} else {
io.WriteString(w, "/>")
indent.Close()
return
}
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
outputXML(w, child, preserveSpaces, config, indent)
}
if n.Type != DeclarationNode {
indent.Close()
if n.Prefix == "" {
fmt.Fprintf(w, "%s>", n.Data)
} else {
fmt.Fprintf(w, "%s:%s>", n.Prefix, n.Data)
}
}
}
// OutputXML returns the text that including tags name.
func (n *Node) OutputXML(self bool) string {
if self {
return n.OutputXMLWithOptions(WithOutputSelf())
}
return n.OutputXMLWithOptions()
}
// OutputXMLWithOptions returns the text that including tags name.
func (n *Node) OutputXMLWithOptions(opts ...OutputOption) string {
var b strings.Builder
n.WriteWithOptions(&b, opts...)
return b.String()
}
// Write writes xml to given writer.
func (n *Node) Write(writer io.Writer, self bool) {
if self {
n.WriteWithOptions(writer, WithOutputSelf())
}
n.WriteWithOptions(writer)
}
// WriteWithOptions writes xml with given options to given writer.
func (n *Node) WriteWithOptions(writer io.Writer, opts ...OutputOption) {
config := &outputConfiguration{}
// Set the options
for _, opt := range opts {
opt(config)
}
pastPreserveSpaces := config.preserveSpaces
preserveSpaces := calculatePreserveSpaces(n, pastPreserveSpaces)
b := bufio.NewWriter(writer)
defer b.Flush()
if config.printSelf && n.Type != DocumentNode {
outputXML(b, n, preserveSpaces, config, newIndentation(config.useIndentation, b))
} else {
for n := n.FirstChild; n != nil; n = n.NextSibling {
outputXML(b, n, preserveSpaces, config, newIndentation(config.useIndentation, b))
}
}
}
// AddAttr adds a new attribute specified by 'key' and 'val' to a node 'n'.
func AddAttr(n *Node, key, val string) {
attr := Attr{
Name: newXMLName(key),
Value: val,
}
n.Attr = append(n.Attr, attr)
}
// SetAttr allows an attribute value with the specified name to be changed.
// If the attribute did not previously exist, it will be created.
func (n *Node) SetAttr(key, value string) {
name := newXMLName(key)
for i, attr := range n.Attr {
if attr.Name == name {
n.Attr[i].Value = value
return
}
}
AddAttr(n, key, value)
}
// RemoveAttr removes the attribute with the specified name.
func (n *Node) RemoveAttr(key string) {
name := newXMLName(key)
for i, attr := range n.Attr {
if attr.Name == name {
n.Attr = append(n.Attr[:i], n.Attr[i+1:]...)
return
}
}
}
// AddChild adds a new node 'n' to a node 'parent' as its last child.
func AddChild(parent, n *Node) {
n.Parent = parent
n.NextSibling = nil
if parent.FirstChild == nil {
parent.FirstChild = n
n.PrevSibling = nil
} else {
parent.LastChild.NextSibling = n
n.PrevSibling = parent.LastChild
}
parent.LastChild = n
}
// AddSibling adds a new node 'n' as a sibling of a given node 'sibling'.
// Note it is not necessarily true that the new node 'n' would be added
// immediately after 'sibling'. If 'sibling' isn't the last child of its
// parent, then the new node 'n' will be added at the end of the sibling
// chain of their parent.
func AddSibling(sibling, n *Node) {
for t := sibling.NextSibling; t != nil; t = t.NextSibling {
sibling = t
}
n.Parent = sibling.Parent
sibling.NextSibling = n
n.PrevSibling = sibling
n.NextSibling = nil
if sibling.Parent != nil {
sibling.Parent.LastChild = n
}
}
// RemoveFromTree removes a node and its subtree from the document
// tree it is in. If the node is the root of the tree, then it's no-op.
func RemoveFromTree(n *Node) {
if n.Parent == nil {
return
}
if n.Parent.FirstChild == n {
if n.Parent.LastChild == n {
n.Parent.FirstChild = nil
n.Parent.LastChild = nil
} else {
n.Parent.FirstChild = n.NextSibling
n.NextSibling.PrevSibling = nil
}
} else {
if n.Parent.LastChild == n {
n.Parent.LastChild = n.PrevSibling
n.PrevSibling.NextSibling = nil
} else {
n.PrevSibling.NextSibling = n.NextSibling
n.NextSibling.PrevSibling = n.PrevSibling
}
}
n.Parent = nil
n.PrevSibling = nil
n.NextSibling = nil
}
golang-github-antchfx-xmlquery-1.4.3/node_test.go 0000664 0000000 0000000 00000047777 14733256146 0022137 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"encoding/xml"
"html"
"reflect"
"strings"
"testing"
)
func findRoot(n *Node) *Node {
if n == nil {
return nil
}
for ; n.Parent != nil; n = n.Parent {
}
return n
}
func findNode(root *Node, name string) *Node {
node := root.FirstChild
for {
if node == nil || node.Data == name {
break
}
node = node.NextSibling
}
return node
}
func childNodes(root *Node, name string) []*Node {
var list []*Node
node := root.FirstChild
for {
if node == nil {
break
}
if node.Data == name {
list = append(list, node)
}
node = node.NextSibling
}
return list
}
func testNode(t *testing.T, n *Node, expected string) {
if n.Data != expected {
t.Fatalf("expected node name is %s,but got %s", expected, n.Data)
}
}
func testAttr(t *testing.T, n *Node, name, expected string) {
for _, attr := range n.Attr {
if attr.Name.Local == name && attr.Value == expected {
return
}
}
t.Fatalf("not found attribute %s in the node %s", name, n.Data)
}
func testValue(t *testing.T, val, expected interface{}) {
if val == expected {
return
}
if reflect.DeepEqual(val, expected) {
return
}
t.Fatalf("expected value is %+v, but got %+v", expected, val)
}
func testTrue(t *testing.T, v bool) {
if v {
return
}
t.Fatal("expected value is true, but got false")
}
// Given a *Node, verify that all the pointers (parent, first child, next sibling, etc.) of
// - the node itself,
// - all its child nodes, and
// - pointers along the silbling chain
// are valid.
func verifyNodePointers(t *testing.T, n *Node) {
if n == nil {
return
}
if n.FirstChild != nil {
testValue(t, n, n.FirstChild.Parent)
}
if n.LastChild != nil {
testValue(t, n, n.LastChild.Parent)
}
verifyNodePointers(t, n.FirstChild)
// There is no need to call verifyNodePointers(t, n.LastChild)
// because verifyNodePointers(t, n.FirstChild) will traverse all its
// siblings to the end, and if the last one isn't n.LastChild then it will fail.
parent := n.Parent // parent could be nil if n is the root of a tree.
// Verify the PrevSibling chain
cur, prev := n, n.PrevSibling
for ; prev != nil; cur, prev = prev, prev.PrevSibling {
testValue(t, prev.Parent, parent)
testValue(t, prev.NextSibling, cur)
}
testTrue(t, cur.PrevSibling == nil)
testTrue(t, parent == nil || parent.FirstChild == cur)
// Verify the NextSibling chain
cur, next := n, n.NextSibling
for ; next != nil; cur, next = next, next.NextSibling {
testValue(t, next.Parent, parent)
testValue(t, next.PrevSibling, cur)
}
testTrue(t, cur.NextSibling == nil)
testTrue(t, parent == nil || parent.LastChild == cur)
}
func TestAddAttr(t *testing.T) {
for _, test := range []struct {
name string
n *Node
key string
val string
expected string
}{
{
name: "node has no existing attr",
n: &Node{Type: AttributeNode},
key: "ns:k1",
val: "v1",
expected: `< ns:k1="v1">>`,
},
{
name: "node has existing attrs",
n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
key: "k2",
val: "v2",
expected: `< k1="v1" k2="v2">>`,
},
} {
t.Run(test.name, func(t *testing.T) {
AddAttr(test.n, test.key, test.val)
testValue(t, test.n.OutputXML(true), test.expected)
})
}
}
func TestSetAttr(t *testing.T) {
for _, test := range []struct {
name string
n *Node
key string
val string
expected string
}{
{
name: "node has no existing attr",
n: &Node{Type: AttributeNode},
key: "ns:k1",
val: "v1",
expected: `< ns:k1="v1">>`,
},
{
name: "node has an existing attr, overwriting",
n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Space: "ns", Local: "k1"}, Value: "v1"}}},
key: "ns:k1",
val: "v2",
expected: `< ns:k1="v2">>`,
},
{
name: "node has no existing attr, no ns",
n: &Node{Type: AttributeNode},
key: "k1",
val: "v1",
expected: `< k1="v1">>`,
},
{
name: "node has an existing attr, no ns, overwriting",
n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
key: "k1",
val: "v2",
expected: `< k1="v2">>`,
},
} {
t.Run(test.name, func(t *testing.T) {
test.n.SetAttr(test.key, test.val)
testValue(t, test.n.OutputXML(true), test.expected)
})
}
}
func TestRemoveAttr(t *testing.T) {
for _, test := range []struct {
name string
n *Node
key string
expected string
}{
{
name: "node has no existing attr",
n: &Node{Type: AttributeNode},
key: "ns:k1",
expected: `<>>`,
},
{
name: "node has an existing attr, overwriting",
n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Space: "ns", Local: "k1"}, Value: "v1"}}},
key: "ns:k1",
expected: `<>>`,
},
{
name: "node has no existing attr, no ns",
n: &Node{Type: AttributeNode},
key: "k1",
expected: `<>>`,
},
{
name: "node has an existing attr, no ns, overwriting",
n: &Node{Type: AttributeNode, Attr: []Attr{{Name: xml.Name{Local: "k1"}, Value: "v1"}}},
key: "k1",
expected: `<>>`,
},
} {
t.Run(test.name, func(t *testing.T) {
test.n.RemoveAttr(test.key)
testValue(t, test.n.OutputXML(true), test.expected)
})
}
}
func TestRemoveFromTree(t *testing.T) {
xml := `
`
parseXML := func() *Node {
doc, err := Parse(strings.NewReader(xml))
testTrue(t, err == nil)
return doc
}
t.Run("remove an elem node that is the only child of its parent", func(t *testing.T) {
doc := parseXML()
n := FindOne(doc, "//aaa/ddd/eee")
testTrue(t, n != nil)
RemoveFromTree(n)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove an elem node that is the first but not the last child of its parent", func(t *testing.T) {
doc := parseXML()
n := FindOne(doc, "//aaa/bbb")
testTrue(t, n != nil)
RemoveFromTree(n)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove an elem node that is neither the first nor the last child of its parent", func(t *testing.T) {
doc := parseXML()
n := FindOne(doc, "//aaa/ddd")
testTrue(t, n != nil)
RemoveFromTree(n)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove an elem node that is the last but not the first child of its parent", func(t *testing.T) {
doc := parseXML()
n := FindOne(doc, "//aaa/ggg")
testTrue(t, n != nil)
RemoveFromTree(n)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove decl node works", func(t *testing.T) {
doc := parseXML()
procInst := doc.FirstChild
testValue(t, procInst.Type, DeclarationNode)
RemoveFromTree(procInst)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove comment node works", func(t *testing.T) {
doc := parseXML()
commentNode := doc.FirstChild.NextSibling.NextSibling // First .NextSibling is an empty text node.
testValue(t, commentNode.Type, CommentNode)
RemoveFromTree(commentNode)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
t.Run("remove call on root does nothing", func(t *testing.T) {
doc := parseXML()
RemoveFromTree(doc)
verifyNodePointers(t, doc)
testValue(t, doc.OutputXML(false),
``)
})
}
func TestSelectElement(t *testing.T) {
s := `
`
root, err := Parse(strings.NewReader(s))
if err != nil {
t.Error(err)
}
version := root.FirstChild.SelectAttr("version")
if version != "1.0" {
t.Fatal("version!=1.0")
}
aaa := findNode(root, "AAA")
var n *Node
n = aaa.SelectElement("BBB")
if n == nil {
t.Fatalf("n is nil")
}
n = aaa.SelectElement("CCC")
if n == nil {
t.Fatalf("n is nil")
}
ns := aaa.SelectElements("CCC")
if len(ns) != 2 {
t.Fatalf("len(ns)!=2")
}
}
func TestEscapeOutputValue(t *testing.T) {
data := `<*>`
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
escapedInnerText := root.OutputXML(true)
if !strings.Contains(escapedInnerText, "<*>") {
t.Fatal("Inner Text has not been escaped")
}
}
func TestEscapeValueWrite(t *testing.T) {
data := `<*>`
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
var b strings.Builder
root.Write(&b, true)
escapedInnerText := b.String()
if !strings.Contains(escapedInnerText, "<*>") {
t.Fatal("Inner Text has not been escaped")
}
}
func TestUnnecessaryEscapeOutputValue(t *testing.T) {
data := `
Robert
A+
`
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
escapedInnerText := root.OutputXML(true)
if strings.Contains(escapedInnerText, " ") {
t.Fatal("\\n has been escaped unnecessarily")
}
if strings.Contains(escapedInnerText, "
") {
t.Fatal("\\t has been escaped unnecessarily")
}
}
func TestUnnecessaryEscapeValueWrite(t *testing.T) {
data := `
Robert
A+
`
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
var b strings.Builder
root.Write(&b, true)
escapedInnerText := b.String()
if strings.Contains(escapedInnerText, " ") {
t.Fatal("\\n has been escaped unnecessarily")
}
if strings.Contains(escapedInnerText, "
") {
t.Fatal("\\t has been escaped unnecessarily")
}
}
func TestHtmlUnescapeStringOriginString(t *testing.T) {
// has escape html character and \t
data := `
0 `
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
escapedInnerText := root.OutputXML(false)
unescapeString := html.UnescapeString(escapedInnerText)
if strings.Contains(unescapeString, "&") {
t.Fatal("& need unescape")
}
if !strings.Contains(escapedInnerText, "0\t\t") {
t.Fatal("Inner Text should keep plain text")
}
}
func TestHtmlUnescapeStringOriginStringWrite(t *testing.T) {
// has escape html character and \t
data := `
0 `
root, err := Parse(strings.NewReader(data))
if err != nil {
t.Error(err)
}
var b strings.Builder
root.Write(&b, false)
escapedInnerText := b.String()
unescapeString := html.UnescapeString(escapedInnerText)
if strings.Contains(unescapeString, "&") {
t.Fatal("& need unescape")
}
if !strings.Contains(escapedInnerText, "0\t\t") {
t.Fatal("Inner Text should keep plain text")
}
}
func TestOutputXMLWithNamespacePrefix(t *testing.T) {
s := ``
doc, _ := Parse(strings.NewReader(s))
if s != doc.OutputXML(false) {
t.Fatal("xml document missing some characters")
}
}
func TestWriteWithNamespacePrefix(t *testing.T) {
s := ``
doc, _ := Parse(strings.NewReader(s))
var b strings.Builder
doc.Write(&b, false)
if s != b.String() {
t.Fatal("xml document missing some characters")
}
}
func TestQueryWithPrefix(t *testing.T) {
s := `ns2:ClientThis is a client fault`
doc, _ := Parse(strings.NewReader(s))
n, err := Query(doc, `//S:Envelope/S:Body/ns2:Fault/faultcode`)
if err != nil {
t.Fatal(err)
}
if n == nil {
t.Fatalf("should found one but got nil")
}
if expected, v := "ns2:Client", n.InnerText(); expected != v {
t.Fatalf("expected %s but got %s", expected, v)
}
}
func TestOutputXMLWithCommentNode(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
if e, g := "", doc.OutputXML(true); !strings.Contains(g, e) {
t.Fatal("missing some comment-node.")
}
n := FindOne(doc, "//class_list")
t.Log(n.OutputXML(false))
if e, g := "Lenard", n.OutputXML(false); !strings.Contains(g, e) {
t.Fatal("missing some comment-node")
}
}
func TestOutputXMLWithSpaceParent(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
expected := " Robert "
if g := doc.OutputXML(true); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
n := FindOne(doc, "/class_list/student")
output := html.UnescapeString(n.OutputXML(false))
expected = "\n\t\t\t Robert \n\t\t\tA+\n\t\t"
if !(output == expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, output)
}
t.Log(n.OutputXML(false))
}
func TestOutputXMLWithSpaceDirect(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
n := FindOne(doc, "/class_list/student/name")
expected := ` Robert `
if g := doc.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
output := html.UnescapeString(doc.OutputXML(true))
if strings.Contains(output, "\n") {
t.Errorf("the outputted xml contains newlines")
}
t.Log(n.OutputXML(false))
}
func TestOutputXMLWithSpaceOverwrittenToPreserve(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
n := FindOne(doc, "/class_list/student")
expected := ` Robert `
if g := n.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
output := html.UnescapeString(doc.OutputXML(true))
if strings.Contains(output, "\n") {
t.Errorf("the outputted xml contains newlines")
}
t.Log(n.OutputXML(false))
}
func TestOutputXMLWithSpaceOverwrittenToDefault(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(true))
expected := `Robert`
if g := doc.OutputXML(false); !strings.Contains(g, expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, g)
}
n := FindOne(doc, "/class_list/student")
output := html.UnescapeString(n.OutputXML(false))
expected = "\n\t\t\tRobert\n\t\t\tA+\n\t\t"
if !(output == expected) {
t.Errorf(`expected "%s", obtained "%s"`, expected, output)
}
t.Log(n.OutputXML(false))
}
func TestOutputXMLWithXMLInCDATA(t *testing.T) {
s := `Hello, world!]]>`
doc, _ := Parse(strings.NewReader(s))
t.Log(doc.OutputXML(false))
if doc.OutputXML(false) != s {
t.Errorf("the outputted xml escaped CDATA section")
}
}
func TestOutputXMLWithDefaultOptions(t *testing.T) {
s := ``
expected := ``
doc, _ := Parse(strings.NewReader(s))
result := doc.OutputXMLWithOptions()
t.Log(result)
if result != expected {
t.Errorf("output was not expected. expected %v but got %v", expected, result)
}
}
func TestOutputXMLWithOptions(t *testing.T) {
s := ``
expected := ``
doc, _ := Parse(strings.NewReader(s))
result := doc.OutputXMLWithOptions(WithEmptyTagSupport())
t.Log(result)
if result != expected {
t.Errorf("output was not expected. expected %v but got %v", expected, result)
}
}
func TestOutputXMLWithPreserveSpaceOption(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
resultWithSpace := doc.OutputXMLWithOptions(WithPreserveSpace())
resultWithoutSpace := doc.OutputXMLWithOptions()
if !strings.Contains(resultWithSpace, "> Robert <") {
t.Errorf("output was not expected. expected %v but got %v", " Robert ", resultWithSpace)
}
if !strings.Contains(resultWithoutSpace, ">Robert<") {
t.Errorf("output was not expected. expected %v but got %v", " Robert ", resultWithoutSpace)
}
}
func TestOutputXMLWithIndentation(t *testing.T) {
s := `123`
expected := `
123
`
doc, _ := Parse(strings.NewReader(s))
resultWithIndent := doc.OutputXMLWithOptions(WithIndentation(" "))
if resultWithIndent != expected {
t.Errorf("output was not expected. expected %v but got %v", expected, resultWithIndent)
}
}
func TestNodeLevel(t *testing.T) {
s := `
Robert
A+
`
doc, _ := Parse(strings.NewReader(s))
if doc.Level() != 0 {
t.Errorf(`expected "%d", obtained "%d"`, 0, doc.Level())
}
n := FindOne(doc, "/class_list")
if n.Level() != 1 {
t.Errorf(`expected "%d", obtained "%d"`, 1, n.Level())
}
n = FindOne(doc, "/class_list/student/name")
if n.Level() != 3 {
t.Errorf(`expected "%d", obtained "%d"`, 3, n.Level())
}
}
func TestDirectiveNode(t *testing.T) {
expected := ``
n := &Node{Data: `DOCTYPE people_list SYSTEM "example.dtd"`, Type: NotationNode}
if v := n.OutputXML(true); expected != v {
t.Errorf(`expected "%s", obtained "%s"`, expected, v)
}
}
func TestOutputXMLWithSingleQuotes(t *testing.T) {
s := ``
expected := ``
doc, _ := Parse(strings.NewReader(s))
output := doc.OutputXML(false)
if expected != output {
t.Errorf(`expected "%s", obtained "%s"`, expected, output)
}
}
golang-github-antchfx-xmlquery-1.4.3/options.go 0000664 0000000 0000000 00000001453 14733256146 0021623 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"encoding/xml"
"io"
)
type ParserOptions struct {
Decoder *DecoderOptions
}
func (options ParserOptions) apply(parser *parser) {
if options.Decoder != nil {
(*options.Decoder).apply(parser.decoder)
}
}
// DecoderOptions implement the very same options than the standard
// encoding/xml package. Please refer to this documentation:
// https://golang.org/pkg/encoding/xml/#Decoder
type DecoderOptions struct {
Strict bool
AutoClose []string
Entity map[string]string
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
}
func (options DecoderOptions) apply(decoder *xml.Decoder) {
decoder.Strict = options.Strict
decoder.AutoClose = options.AutoClose
decoder.Entity = options.Entity
decoder.CharsetReader = options.CharsetReader
}
golang-github-antchfx-xmlquery-1.4.3/options_test.go 0000664 0000000 0000000 00000002271 14733256146 0022661 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"bytes"
"encoding/xml"
"testing"
)
func TestApplyOptions(t *testing.T) {
parser := &parser{
decoder: xml.NewDecoder(bytes.NewReader(make([]byte, 0))),
}
options := ParserOptions{
Decoder: &DecoderOptions{
Strict: false,
AutoClose: []string{"foo"},
Entity: map[string]string{
"bar": "baz",
},
},
}
options.apply(parser)
if parser.decoder.Strict != options.Decoder.Strict {
t.Fatalf("Expected Strict attribute of %v, got %v instead", options.Decoder.Strict, parser.decoder.Strict)
}
if parser.decoder.AutoClose[0] != options.Decoder.AutoClose[0] {
t.Fatalf("Expected AutoClose attribute with %v, got %v instead", options.Decoder.AutoClose, parser.decoder.AutoClose)
}
if parser.decoder.Entity["bar"] != options.Decoder.Entity["bar"] {
t.Fatalf("Expected Entity mode of %v, got %v instead", options.Decoder.Entity, parser.decoder.Entity)
}
}
func TestApplyEmptyOptions(t *testing.T) {
parser := &parser{
decoder: xml.NewDecoder(bytes.NewReader(make([]byte, 0))),
}
options := ParserOptions{
Decoder: nil,
}
// Only testing for the absence of errors since we are not
// expecting this call to do anything
options.apply(parser)
}
golang-github-antchfx-xmlquery-1.4.3/parse.go 0000664 0000000 0000000 00000032111 14733256146 0021235 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"bufio"
"encoding/xml"
"fmt"
"io"
"net/http"
"regexp"
"strings"
"sync"
"github.com/antchfx/xpath"
"golang.org/x/net/html/charset"
)
var xmlMIMERegex = regexp.MustCompile(`(?i)((application|image|message|model)/((\w|\.|-)+\+?)?|text/)(wb)?xml`)
// LoadURL loads the XML document from the specified URL.
func LoadURL(url string) (*Node, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// Make sure the Content-Type has a valid XML MIME type
if xmlMIMERegex.MatchString(resp.Header.Get("Content-Type")) {
return Parse(resp.Body)
}
return nil, fmt.Errorf("invalid XML document(%s)", resp.Header.Get("Content-Type"))
}
// Parse returns the parse tree for the XML from the given Reader.
func Parse(r io.Reader) (*Node, error) {
return ParseWithOptions(r, ParserOptions{})
}
// ParseWithOptions is like parse, but with custom options
func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
p := createParser(r)
options.apply(p)
for {
_, err := p.parse()
if err == io.EOF {
return p.doc, nil
}
if err != nil {
return nil, err
}
}
}
type parser struct {
decoder *xml.Decoder
doc *Node
level int
prev *Node
streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
streamElementFilter *xpath.Expr // If specified, it provides further filtering on the target element.
streamNode *Node // Need to remember the last target node So we can clean it up upon next Read() call.
streamNodePrev *Node // Need to remember target node's prev so upon target node removal, we can restore correct prev.
reader *cachedReader // Need to maintain a reference to the reader, so we can determine whether a node contains CDATA.
once sync.Once
space2prefix map[string]*xmlnsPrefix
}
type xmlnsPrefix struct {
name string
level int
}
func createParser(r io.Reader) *parser {
reader := newCachedReader(bufio.NewReader(r))
p := &parser{
decoder: xml.NewDecoder(reader),
doc: &Node{Type: DocumentNode},
level: 0,
reader: reader,
}
if p.decoder.CharsetReader == nil {
p.decoder.CharsetReader = charset.NewReaderLabel
}
p.prev = p.doc
return p
}
func (p *parser) parse() (*Node, error) {
p.once.Do(func() {
p.space2prefix = map[string]*xmlnsPrefix{"http://www.w3.org/XML/1998/namespace": {name: "xml", level: 0}}
})
var streamElementNodeCounter int
for {
p.reader.StartCaching()
tok, err := p.decoder.Token()
p.reader.StopCaching()
if err != nil {
return nil, err
}
switch tok := tok.(type) {
case xml.StartElement:
if p.level == 0 {
// mising XML declaration
attributes := make([]Attr, 1)
attributes[0].Name = xml.Name{Local: "version"}
attributes[0].Value = "1.0"
node := &Node{
Type: DeclarationNode,
Data: "xml",
Attr: attributes,
level: 1,
}
AddChild(p.prev, node)
p.level = 1
p.prev = node
}
for _, att := range tok.Attr {
if att.Name.Local == "xmlns" {
// https://github.com/antchfx/xmlquery/issues/67
if prefix, ok := p.space2prefix[att.Value]; !ok || (ok && prefix.level >= p.level) {
p.space2prefix[att.Value] = &xmlnsPrefix{name: "", level: p.level} // reset empty if exist the default namespace
}
} else if att.Name.Space == "xmlns" {
// maybe there are have duplicate NamespaceURL?
p.space2prefix[att.Value] = &xmlnsPrefix{name: att.Name.Local, level: p.level}
}
}
if space := tok.Name.Space; space != "" {
if _, found := p.space2prefix[space]; !found && p.decoder.Strict {
return nil, fmt.Errorf("xmlquery: invalid XML document, namespace %s is missing", space)
}
}
attributes := make([]Attr, len(tok.Attr))
for i, att := range tok.Attr {
name := att.Name
if prefix, ok := p.space2prefix[name.Space]; ok {
name.Space = prefix.name
}
attributes[i] = Attr{
Name: name,
Value: att.Value,
NamespaceURI: att.Name.Space,
}
}
node := &Node{
Type: ElementNode,
Data: tok.Name.Local,
NamespaceURI: tok.Name.Space,
Attr: attributes,
level: p.level,
}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
if node.NamespaceURI != "" {
if v, ok := p.space2prefix[node.NamespaceURI]; ok {
cached := string(p.reader.Cache())
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v.name, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v.name, node.Data)) {
node.Prefix = v.name
}
}
}
// If we're in the streaming mode, we need to remember the node if it is the target node
// so that when we finish processing the node's EndElement, we know how/what to return to
// caller. Also we need to remove the target node from the tree upon next Read() call so
// memory doesn't grow unbounded.
if p.streamElementXPath != nil {
if p.streamNode == nil {
if QuerySelector(p.doc, p.streamElementXPath) != nil {
p.streamNode = node
p.streamNodePrev = p.prev
streamElementNodeCounter = 1
}
} else {
streamElementNodeCounter++
}
}
p.prev = node
p.level++
case xml.EndElement:
p.level--
// If we're in streaming mode, and we already have a potential streaming
// target node identified (p.streamNode != nil) then we need to check if
// this is the real one we want to return to caller.
if p.streamNode != nil {
streamElementNodeCounter--
if streamElementNodeCounter == 0 {
// Now we know this element node is the at least passing the initial
// p.streamElementXPath check and is a potential target node candidate.
// We need to have 1 more check with p.streamElementFilter (if given) to
// ensure it is really the element node we want.
// The reason we need a two-step check process is because the following
// situation:
// b1
// And say the p.streamElementXPath = "/AAA/BBB[. != 'b1']". Now during
// xml.StartElement time, the node is still empty, so it will pass
// the p.streamElementXPath check. However, eventually we know this
// shouldn't be returned to the caller. Having a second more fine-grained
// filter check ensures that. So in this case, the caller should really
// setup the stream parser with:
// streamElementXPath = "/AAA/BBB["
// streamElementFilter = "/AAA/BBB[. != 'b1']"
if p.streamElementFilter == nil || QuerySelector(p.doc, p.streamElementFilter) != nil {
return p.streamNode, nil
}
// otherwise, this isn't our target node, clean things up.
// note we also remove the underlying *Node from the node tree, to prevent
// future stream node candidate selection error.
RemoveFromTree(p.streamNode)
p.prev = p.streamNodePrev
p.streamNode = nil
p.streamNodePrev = nil
}
}
case xml.CharData:
// First, normalize the cache...
cached := strings.ToUpper(string(p.reader.Cache()))
nodeType := TextNode
if strings.HasPrefix(cached, " p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
case xml.Comment:
node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
case xml.ProcInst: // Processing Instruction
if p.prev.Type != DeclarationNode {
p.level++
}
node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level}
pairs := strings.Split(string(tok.Inst), " ")
for _, pair := range pairs {
pair = strings.TrimSpace(pair)
if i := strings.Index(pair, "="); i > 0 {
AddAttr(node, pair[:i], strings.Trim(pair[i+1:], `"'`))
}
}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
p.prev = node
case xml.Directive:
node := &Node{Type: NotationNode, Data: string(tok), level: p.level}
if p.level == p.prev.level {
AddSibling(p.prev, node)
} else if p.level > p.prev.level {
AddChild(p.prev, node)
} else if p.level < p.prev.level {
for i := p.prev.level - p.level; i > 1; i-- {
p.prev = p.prev.Parent
}
AddSibling(p.prev.Parent, node)
}
}
}
}
// StreamParser enables loading and parsing an XML document in a streaming
// fashion.
type StreamParser struct {
p *parser
}
// CreateStreamParser creates a StreamParser. Argument streamElementXPath is
// required.
// Argument streamElementFilter is optional and should only be used in advanced
// scenarios.
//
// Scenario 1: simple case:
//
// xml := `b1b2`
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB")
// if err != nil {
// panic(err)
// }
// for {
// n, err := sp.Read()
// if err != nil {
// break
// }
// fmt.Println(n.OutputXML(true))
// }
//
// Output will be:
//
// b1
// b2
//
// Scenario 2: advanced case:
//
// xml := `b1b2`
// sp, err := CreateStreamParser(strings.NewReader(xml), "/AAA/BBB", "/AAA/BBB[. != 'b1']")
// if err != nil {
// panic(err)
// }
// for {
// n, err := sp.Read()
// if err != nil {
// break
// }
// fmt.Println(n.OutputXML(true))
// }
//
// Output will be:
//
// b2
//
// As the argument names indicate, streamElementXPath should be used for
// providing xpath query pointing to the target element node only, no extra
// filtering on the element itself or its children; while streamElementFilter,
// if needed, can provide additional filtering on the target element and its
// children.
//
// CreateStreamParser returns an error if either streamElementXPath or
// streamElementFilter, if provided, cannot be successfully parsed and compiled
// into a valid xpath query.
func CreateStreamParser(r io.Reader, streamElementXPath string, streamElementFilter ...string) (*StreamParser, error) {
return CreateStreamParserWithOptions(r, ParserOptions{}, streamElementXPath, streamElementFilter...)
}
// CreateStreamParserWithOptions is like CreateStreamParser, but with custom options
func CreateStreamParserWithOptions(
r io.Reader,
options ParserOptions,
streamElementXPath string,
streamElementFilter ...string,
) (*StreamParser, error) {
elemXPath, err := getQuery(streamElementXPath)
if err != nil {
return nil, fmt.Errorf("invalid streamElementXPath '%s', err: %s", streamElementXPath, err.Error())
}
elemFilter := (*xpath.Expr)(nil)
if len(streamElementFilter) > 0 {
elemFilter, err = getQuery(streamElementFilter[0])
if err != nil {
return nil, fmt.Errorf("invalid streamElementFilter '%s', err: %s", streamElementFilter[0], err.Error())
}
}
parser := createParser(r)
options.apply(parser)
sp := &StreamParser{
p: parser,
}
sp.p.streamElementXPath = elemXPath
sp.p.streamElementFilter = elemFilter
return sp, nil
}
// Read returns a target node that satisfies the XPath specified by caller at
// StreamParser creation time. If there is no more satisfying target nodes after
// reading the rest of the XML document, io.EOF will be returned. At any time,
// any XML parsing error encountered will be returned, and the stream parsing
// stopped. Calling Read() after an error is returned (including io.EOF) results
// undefined behavior. Also note, due to the streaming nature, calling Read()
// will automatically remove any previous target node(s) from the document tree.
func (sp *StreamParser) Read() (*Node, error) {
// Because this is a streaming read, we need to release/remove last
// target node from the node tree to free up memory.
if sp.p.streamNode != nil {
// We need to remove all siblings before the current stream node,
// because the document may contain unwanted nodes between the target
// ones (for example new line text node), which would otherwise
// accumulate as first childs, and slow down the stream over time
for sp.p.streamNode.PrevSibling != nil {
RemoveFromTree(sp.p.streamNode.PrevSibling)
}
sp.p.prev = sp.p.streamNode.Parent
RemoveFromTree(sp.p.streamNode)
sp.p.streamNode = nil
sp.p.streamNodePrev = nil
}
return sp.p.parse()
}
golang-github-antchfx-xmlquery-1.4.3/parse_test.go 0000664 0000000 0000000 00000042143 14733256146 0022302 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
func TestLoadURLSuccess(t *testing.T) {
contentTypes := []string{
"application/vnd.paos.xml",
"application/vnd.otps.ct-kip+xml",
"application/vnd.openxmlformats-package.core-properties+xml",
"application/CDFX+XML",
"application/ATXML",
"application/3gpdash-qoe-report+xml",
"application/vnd.nokia.pcd+wbxml",
"image/svg+xml",
"message/imdn+xml",
"model/vnd.collada+xml",
"text/xml-external-parsed-entity",
"text/xml",
"aPPLIcaTioN/xMl; charset=UTF-8",
"application/xhtml+xml",
"application/xml",
"text/xmL; charset=UTF-8",
"application/aTOM+xmL; charset=UTF-8",
"application/RsS+xmL; charset=UTF-8",
"application/maTHml+xmL; charset=UTF-8",
"application/xslt+xmL; charset=UTF-8",
}
for _, contentType := range contentTypes {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
s := `
`
w.Header().Set("Content-Type", contentType)
w.Write([]byte(s))
}))
defer server.Close()
_, err := LoadURL(server.URL)
if err != nil {
t.Fatal(err)
}
}
}
func TestLoadURLFailure(t *testing.T) {
contentTypes := []string{
"application/pdf",
"application/json",
"application/tlsrpt+gzip",
"application/vnd.3gpp.pic-bw-small",
"application/vnd.collabio.xodocuments.document-template",
"application/vnd.ctc-posml",
"application/vnd.gov.sk.e-form+zip",
"audio/mp4",
"audio/vnd.sealedmedia.softseal.mpeg",
"image/png",
"image/vnd.adobe.photoshop",
"message/example",
"message/vnd.wfa.wsc",
"model/vnd.usdz+zip",
"model/vnd.valve.source.compiled-map",
"multipart/signed",
"text/css",
"text/html",
"video/quicktime",
"video/JPEG",
}
for _, contentType := range contentTypes {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", contentType)
}))
defer server.Close()
_, err := LoadURL(server.URL)
if err != nil && err.Error() == fmt.Sprintf("invalid XML document(%s)", contentType) {
return
}
t.Fatalf("Want invalid XML document(%s), got %v", contentType, err)
}
}
func TestDefaultNamespace_1(t *testing.T) {
s := `
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
if n := FindOne(doc, "//svg"); n == nil {
t.Fatal("should find a `svg` but got nil")
}
list := Find(doc, "//tspan")
if found, expected := len(list), 2; found != expected {
t.Fatalf("should found %d tspan but found %d", expected, found)
}
}
func TestDefaultNamespace_3(t *testing.T) {
// https://github.com/antchfx/xmlquery/issues/67
// Use the duplicate xmlns on the children element
s := `
book 2
book 2
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
list := Find(doc, `/bk:books/bk:book`)
if found, expected := len(list), 2; found != expected {
t.Fatalf("should found %d bk:book but found %d", expected, found)
}
}
func TestDefaultNamespace_2(t *testing.T) {
s := `
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
if n := FindOne(doc, "//svg"); n == nil {
t.Fatal("should find a `svg` but got nil")
}
list := Find(doc, "//tspan")
if found, expected := len(list), 2; found != expected {
t.Fatalf("should found %d tspan but found %d", expected, found)
}
}
func TestDuplicateNamespaceURL(t *testing.T) {
s := `
ns2:Client
This is a client fault
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
n2 := FindOne(doc, `//S:Envelope/S:Body/ns2:Fault/faultcode`)
if n2 == nil {
t.Fatalf("should fount one but nil")
}
}
func TestNamespaceURL(t *testing.T) {
s := `
21|22021348
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
top := FindOne(doc, "//rss")
if top == nil {
t.Fatal("rss feed invalid")
}
node := FindOne(top, "dc:creator")
if node.Prefix != "dc" {
t.Fatalf("expected node prefix name is dc but is=%s", node.Prefix)
}
if node.NamespaceURI != "https://purl.org/dc/elements/1.1/" {
t.Fatalf("dc:creator != %s", node.NamespaceURI)
}
if strings.Index(top.InnerText(), "author") > 0 {
t.Fatalf("InnerText() include comment node text")
}
if !strings.Contains(top.OutputXML(true), "author") {
t.Fatal("OutputXML shoud include comment node,but not")
}
}
func TestMultipleProcInst(t *testing.T) {
s := `
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
node := doc.FirstChild //
if node.Data != "xml" {
t.Fatal("node.Data != xml")
}
node = node.NextSibling // New Line
node = node.NextSibling //
if node.Data != "xml-stylesheet" {
t.Fatal("node.Data != xml-stylesheet")
}
}
func TestParse(t *testing.T) {
s := `
Harry Potter
29.99
Learning XML
39.95
`
root, err := Parse(strings.NewReader(s))
if err != nil {
t.Error(err)
}
if root.Type != DocumentNode {
t.Fatal("top node of tree is not DocumentNode")
}
declarNode := root.FirstChild
if declarNode.Type != DeclarationNode {
t.Fatal("first child node of tree is not DeclarationNode")
}
if declarNode.Attr[0].Name.Local != "version" && declarNode.Attr[0].Value != "1.0" {
t.Fatal("version attribute not expected")
}
bookstore := root.LastChild
if bookstore.Data != "bookstore" {
t.Fatal("bookstore elem not found")
}
if bookstore.FirstChild.Data != "\n" {
t.Fatal("first child node of bookstore is not empty node(\n)")
}
books := childNodes(bookstore, "book")
if len(books) != 2 {
t.Fatalf("expected book element count is 2, but got %d", len(books))
}
// first book element
testNode(t, findNode(books[0], "title"), "title")
testAttr(t, findNode(books[0], "title"), "lang", "en")
testValue(t, findNode(books[0], "price").InnerText(), "29.99")
testValue(t, findNode(books[0], "title").InnerText(), "Harry Potter")
// second book element
testNode(t, findNode(books[1], "title"), "title")
testAttr(t, findNode(books[1], "title"), "lang", "en")
testValue(t, findNode(books[1], "price").InnerText(), "39.95")
testValue(t, books[0].OutputXML(true), `Harry Potter29.99`)
}
func TestMissDeclaration(t *testing.T) {
s := `
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
node := FindOne(doc, "//AAA")
if node == nil {
t.Fatal("//AAA is nil")
}
}
func TestMissingNamespace(t *testing.T) {
s := `
value 1
value 2
`
_, err := Parse(strings.NewReader(s))
if err == nil {
t.Fatal("err is nil, want got invalid XML document")
}
}
func TestTooNested(t *testing.T) {
s := `
`
root, err := Parse(strings.NewReader(s))
if err != nil {
t.Error(err)
}
aaa := findNode(root, "AAA")
if aaa == nil {
t.Fatal("AAA node not exists")
}
ccc := aaa.LastChild.PrevSibling
if ccc.Data != "CCC" {
t.Fatalf("expected node is CCC,but got %s", ccc.Data)
}
bbb := ccc.PrevSibling.PrevSibling
if bbb.Data != "BBB" {
t.Fatalf("expected node is bbb,but got %s", bbb.Data)
}
ddd := findNode(bbb, "DDD")
testNode(t, ddd, "DDD")
testNode(t, ddd.LastChild.PrevSibling, "CCC")
}
func TestAttributeWithNamespace(t *testing.T) {
s := `
`
doc, _ := Parse(strings.NewReader(s))
n := FindOne(doc, "//good[@n1:a='2']")
if n == nil {
t.Fatal("n is nil")
}
}
func TestIllegalAttributeChars(t *testing.T) {
s := ``
doc, _ := Parse(strings.NewReader(s))
e := "If a
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
top := FindOne(doc, "//rss")
if top == nil {
t.Fatal("rss feed invalid")
}
node := FindOne(top, "dc:creator")
if node.Prefix != "dc" {
t.Fatalf("expected node prefix name is dc but is=%s", node.Prefix)
}
cdata := node.FirstChild
if cdata == nil || cdata.Type != CharDataNode {
t.Fatalf("expected cdata child, received %d", cdata.Type)
}
testValue(t, cdata.InnerText(), "Richard Lawler")
}
func TestStreamParser_InvalidXPath(t *testing.T) {
sp, err := CreateStreamParser(strings.NewReader(""), "[invalid")
if err == nil || err.Error() != "invalid streamElementXPath '[invalid', err: expression must evaluate to a node-set" {
t.Fatalf("got non-expected error: %v", err)
}
if sp != nil {
t.Fatal("expected nil for sp, but got none-nil value")
}
sp, err = CreateStreamParser(strings.NewReader(""), ".", "[invalid")
if err == nil || err.Error() != "invalid streamElementFilter '[invalid', err: expression must evaluate to a node-set" {
t.Fatalf("got non-expected error: %v", err)
}
if sp != nil {
t.Fatal("expected nil for sp, but got none-nil value")
}
}
func testOutputXML(t *testing.T, msg string, expectedXML string, n *Node) {
if n.OutputXML(true) != expectedXML {
t.Fatalf("%s, expected XML: '%s', actual: '%s'", msg, expectedXML, n.OutputXML(true))
}
}
func TestStreamParser_Success1(t *testing.T) {
s := `
c1
b1
d1
b2z1
b3
b4
b5
c3
`
sp, err := CreateStreamParser(strings.NewReader(s), "/ROOT/*/BBB", "/ROOT/*/BBB[. != 'b3']")
if err != nil {
t.Fatal(err.Error())
}
// First `` read
n, err := sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "first call result", `b1`, n)
testOutputXML(t, "doc after first call",
`c1b1`, findRoot(n))
// Second `` read
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "second call result", `b2z1`, n)
testOutputXML(t, "doc after second call",
`d1b2z1`, findRoot(n))
// Third `` read (Note we will skip 'b3' since the streamElementFilter excludes it)
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "third call result", `b4`, n)
// Note the inclusion of `b3` in the document? This is because `b3` has
// been filtered out and is not our target node, thus it is considered just like any other
// non target nodes such as ``` or ``
testOutputXML(t, "doc after third call",
`b4`,
findRoot(n))
// Fourth `` read
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "fourth call result", `b5`, n)
testOutputXML(t, "doc after fourth call",
`b5`,
findRoot(n))
_, err = sp.Read()
if err != io.EOF {
t.Fatalf("io.EOF expected, but got %v", err)
}
}
func TestStreamParser_Success2(t *testing.T) {
s := `
c1
b1
d1
b2
c2
`
sp, err := CreateStreamParser(strings.NewReader(s), "/AAA/CCC | /AAA/DDD")
if err != nil {
t.Fatal(err.Error())
}
// First Read() should return c1
n, err := sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "first call result", `c1`, n)
testOutputXML(t, "doc after first call", `c1`, findRoot(n))
// Second Read() should return d1
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "second call result", `d1`, n)
testOutputXML(t, "doc after second call",
`b1d1`, findRoot(n))
// Third call should return c2
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "third call result", `c2`, n)
testOutputXML(t, "doc after third call",
`b2c2`, findRoot(n))
_, err = sp.Read()
if err != io.EOF {
t.Fatalf("io.EOF expected, but got %v", err)
}
}
func TestCDATA(t *testing.T) {
s := `
`
sp, err := CreateStreamParser(strings.NewReader(s), "/AAA/CCC")
if err != nil {
t.Fatal(err.Error())
}
n, err := sp.Read()
if err != nil {
t.Fatal(err.Error())
}
testOutputXML(t, "first call result", ``, n)
}
func TestXMLPreservation(t *testing.T) {
s := `
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err)
}
testOutputXML(t, "first call result",
``, doc)
}
func TestStreamParser_DefaultNamespace(t *testing.T) {
s := `
`
sp, err := CreateStreamParser(strings.NewReader(s), "//Objects/*[namespace-uri()=\"http://example.com/schema/2007/someschema\" and local-name()=\"Object\"]")
if err != nil {
t.Fatal(err.Error())
}
n, err := sp.Read()
if err != nil {
t.Fatal(err.Error())
}
var x = ``
testOutputXML(t, "first call result", x, n)
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
x = ``
testOutputXML(t, "second call result", x, n)
n, err = sp.Read()
if err != nil {
t.Fatal(err.Error())
}
x = ``
testOutputXML(t, "third call result", x, n)
}
func TestDirective(t *testing.T) {
s := `
Q Light Controller Plus
4.12.3
`
doc, err := Parse(strings.NewReader(s))
if err != nil {
t.Fatal(err.Error())
}
top := doc.FirstChild
n := top.NextSibling.NextSibling
if n == nil {
t.Error("should be not nil, but got nil")
return
}
if v := n.Type; v != NotationNode {
t.Errorf("expected the node type is NotationNode, but got %d", v)
}
if expected, val := ``, n.OutputXML(true); expected != val {
t.Errorf("expected %s but got %s", expected, val)
}
list := Find(doc, `//*`)
if m := len(list); m != 4 {
t.Errorf("expected count is 4 but got %d", m)
}
}
golang-github-antchfx-xmlquery-1.4.3/query.go 0000664 0000000 0000000 00000014675 14733256146 0021307 0 ustar 00root root 0000000 0000000 /*
Package xmlquery provides extract data from XML documents using XPath expression.
*/
package xmlquery
import (
"fmt"
"strings"
"github.com/antchfx/xpath"
)
// SelectElements finds child elements with the specified name.
func (n *Node) SelectElements(name string) []*Node {
return Find(n, name)
}
// SelectElement finds child elements with the specified name.
func (n *Node) SelectElement(name string) *Node {
return FindOne(n, name)
}
// SelectAttr returns the attribute value with the specified name.
func (n *Node) SelectAttr(name string) string {
if n.Type == AttributeNode {
if n.Data == name {
return n.InnerText()
}
return ""
}
xmlName := newXMLName(name)
for _, attr := range n.Attr {
if attr.Name == xmlName {
return attr.Value
}
}
return ""
}
var _ xpath.NodeNavigator = &NodeNavigator{}
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified
// XML Node.
func CreateXPathNavigator(top *Node) *NodeNavigator {
return &NodeNavigator{curr: top, root: top, attr: -1}
}
func getCurrentNode(it *xpath.NodeIterator) *Node {
n := it.Current().(*NodeNavigator)
if n.NodeType() == xpath.AttributeNode {
childNode := &Node{
Type: TextNode,
Data: n.Value(),
}
return &Node{
Parent: n.curr,
Type: AttributeNode,
Data: n.LocalName(),
FirstChild: childNode,
LastChild: childNode,
}
}
return n.curr
}
// Find is like QueryAll but panics if `expr` is not a valid XPath expression.
// See `QueryAll()` function.
func Find(top *Node, expr string) []*Node {
nodes, err := QueryAll(top, expr)
if err != nil {
panic(err)
}
return nodes
}
// FindOne is like Query but panics if `expr` is not a valid XPath expression.
// See `Query()` function.
func FindOne(top *Node, expr string) *Node {
node, err := Query(top, expr)
if err != nil {
panic(err)
}
return node
}
// QueryAll searches the XML Node that matches by the specified XPath expr.
// Returns an error if the expression `expr` cannot be parsed.
func QueryAll(top *Node, expr string) ([]*Node, error) {
exp, err := getQuery(expr)
if err != nil {
return nil, err
}
return QuerySelectorAll(top, exp), nil
}
// Query searches the XML Node that matches by the specified XPath expr,
// and returns first matched element.
func Query(top *Node, expr string) (*Node, error) {
exp, err := getQuery(expr)
if err != nil {
return nil, err
}
return QuerySelector(top, exp), nil
}
// QuerySelectorAll searches all of the XML Node that matches the specified
// XPath selectors.
func QuerySelectorAll(top *Node, selector *xpath.Expr) []*Node {
t := selector.Select(CreateXPathNavigator(top))
var elems []*Node
for t.MoveNext() {
elems = append(elems, getCurrentNode(t))
}
return elems
}
// QuerySelector returns the first matched XML Node by the specified XPath
// selector.
func QuerySelector(top *Node, selector *xpath.Expr) *Node {
t := selector.Select(CreateXPathNavigator(top))
if t.MoveNext() {
return getCurrentNode(t)
}
return nil
}
// FindEach searches the html.Node and calls functions cb.
// Important: this method is deprecated, instead, use for .. = range Find(){}.
func FindEach(top *Node, expr string, cb func(int, *Node)) {
for i, n := range Find(top, expr) {
cb(i, n)
}
}
// FindEachWithBreak functions the same as FindEach but allows to break the loop
// by returning false from the callback function `cb`.
// Important: this method is deprecated, instead, use .. = range Find(){}.
func FindEachWithBreak(top *Node, expr string, cb func(int, *Node) bool) {
for i, n := range Find(top, expr) {
if !cb(i, n) {
break
}
}
}
type NodeNavigator struct {
root, curr *Node
attr int
}
func (x *NodeNavigator) Current() *Node {
return x.curr
}
func (x *NodeNavigator) NodeType() xpath.NodeType {
switch x.curr.Type {
case CommentNode:
return xpath.CommentNode
case TextNode, CharDataNode, NotationNode:
return xpath.TextNode
case DeclarationNode, DocumentNode:
return xpath.RootNode
case ElementNode:
if x.attr != -1 {
return xpath.AttributeNode
}
return xpath.ElementNode
}
panic(fmt.Sprintf("unknown XML node type: %v", x.curr.Type))
}
func (x *NodeNavigator) LocalName() string {
if x.attr != -1 {
return x.curr.Attr[x.attr].Name.Local
}
return x.curr.Data
}
func (x *NodeNavigator) Prefix() string {
if x.NodeType() == xpath.AttributeNode {
if x.attr != -1 {
return x.curr.Attr[x.attr].Name.Space
}
return ""
}
return x.curr.Prefix
}
func (x *NodeNavigator) NamespaceURL() string {
if x.attr != -1 {
return x.curr.Attr[x.attr].NamespaceURI
}
return x.curr.NamespaceURI
}
func (x *NodeNavigator) Value() string {
switch x.curr.Type {
case CommentNode:
return x.curr.Data
case ElementNode:
if x.attr != -1 {
return x.curr.Attr[x.attr].Value
}
return x.curr.InnerText()
case TextNode:
return x.curr.Data
}
return ""
}
func (x *NodeNavigator) Copy() xpath.NodeNavigator {
n := *x
return &n
}
func (x *NodeNavigator) MoveToRoot() {
x.curr = x.root
}
func (x *NodeNavigator) MoveToParent() bool {
if x.attr != -1 {
x.attr = -1
return true
} else if node := x.curr.Parent; node != nil {
x.curr = node
return true
}
return false
}
func (x *NodeNavigator) MoveToNextAttribute() bool {
if x.attr >= len(x.curr.Attr)-1 {
return false
}
x.attr++
return true
}
func (x *NodeNavigator) MoveToChild() bool {
if x.attr != -1 {
return false
}
if node := x.curr.FirstChild; node != nil {
x.curr = node
return true
}
return false
}
func (x *NodeNavigator) MoveToFirst() bool {
if x.attr != -1 || x.curr.PrevSibling == nil {
return false
}
for {
node := x.curr.PrevSibling
if node == nil {
break
}
x.curr = node
}
return true
}
func (x *NodeNavigator) String() string {
return x.Value()
}
func (x *NodeNavigator) MoveToNext() bool {
if x.attr != -1 {
return false
}
for node := x.curr.NextSibling; node != nil; node = x.curr.NextSibling {
x.curr = node
if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
return true
}
}
return false
}
func (x *NodeNavigator) MoveToPrevious() bool {
if x.attr != -1 {
return false
}
for node := x.curr.PrevSibling; node != nil; node = x.curr.PrevSibling {
x.curr = node
if x.curr.Type != TextNode || strings.TrimSpace(x.curr.Data) != "" {
return true
}
}
return false
}
func (x *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
node, ok := other.(*NodeNavigator)
if !ok || node.root != x.root {
return false
}
x.curr = node.curr
x.attr = node.attr
return true
}
golang-github-antchfx-xmlquery-1.4.3/query_test.go 0000664 0000000 0000000 00000010652 14733256146 0022335 0 ustar 00root root 0000000 0000000 package xmlquery
import (
"fmt"
"strings"
"testing"
)
// https://msdn.microsoft.com/en-us/library/ms762271(v=vs.85).aspx
const xmlDoc = `
Gambardella, Matthew
XML Developer's Guide
Computer
44.95
2000-10-01
An in-depth look at creating applications
with XML.
Ralls, Kim
Midnight Rain
Fantasy
5.95
2000-12-16
A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.
Corets, Eva
Maeve Ascendant
Fantasy
5.95
2000-11-17
After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.
`
var doc = loadXML(xmlDoc)
func TestXPath(t *testing.T) {
if list := Find(doc, "//book"); len(list) != 3 {
t.Fatal("count(//book) != 3")
}
if node := FindOne(doc, "//book[@id='bk101']"); node == nil {
t.Fatal("//book[@id='bk101] is not found")
}
if node := FindOne(doc, "//book[price>=44.95]"); node == nil {
t.Fatal("//book/price>=44.95 is not found")
}
if list := Find(doc, "//book[genre='Fantasy']"); len(list) != 2 {
t.Fatal("//book[genre='Fantasy'] items count is not equal 2")
}
var c int
FindEach(doc, "//book", func(i int, n *Node) {
c++
})
l := len(Find(doc, "//book"))
if c != l {
t.Fatal("count(//book) != 3")
}
c = 0
FindEachWithBreak(doc, "//book", func(i int, n *Node) bool {
if c == l-1 {
return false
}
c++
return true
})
if c != l-1 {
t.Fatal("FindEachWithBreak failed to stop.")
}
node := FindOne(doc, "//book[1]")
if node.SelectAttr("id") != "bk101" {
t.Fatal("//book[1]/@id != bk101")
}
}
func TestXPathCdUp(t *testing.T) {
doc := loadXML(``)
node := FindOne(doc, "/a/b/@attr/..")
t.Logf("node = %#v", node)
if node == nil || node.Data != "b" {
t.Fatal("//b/@id/.. != ")
}
}
func TestInvalidXPathExpression(t *testing.T) {
doc := &Node{}
_, err := QueryAll(doc, "//a[@a==1]")
if err == nil {
t.Fatal("expected a parsed error but nil")
}
_, err = Query(doc, "//a[@a==1]")
if err == nil {
t.Fatal("expected a parsed error but nil")
}
}
func TestNavigator(t *testing.T) {
nav := &NodeNavigator{curr: doc, root: doc, attr: -1}
nav.MoveToChild() // New Line
nav.MoveToNext() // catalog
if nav.curr.Data != "catalog" {
t.Fatal("current node name != `catalog`")
}
nav.MoveToChild() // New Line
nav.MoveToNext() // comment node
if nav.curr.Type != CommentNode {
t.Fatal("node type not CommentNode")
}
nav.Value()
nav.MoveToNext() //book
nav.MoveToChild()
nav.MoveToNext() // book/author
if nav.LocalName() != "author" {
t.Fatalf("node error")
}
nav.MoveToParent() // book
nav.MoveToNext() // next book
if nav.curr.SelectAttr("id") != "bk102" {
t.Fatal("node error")
}
}
func TestAttributesNamespaces(t *testing.T) {
doc := loadXML(`
`)
results := Find(doc, "//*[@*[namespace-uri()='ns://nested' and local-name()='attr']]")
parsed := make([]string, 0, 5)
for _, tag := range results {
parsed = append(parsed, tag.SelectAttr("id"))
}
got := fmt.Sprintf("%v", parsed)
// unsure if 5 should be selected here
if got != "[2 3]" {
t.Fatalf("Expected tags [2 3], got %v", got)
}
}
func loadXML(s string) *Node {
node, err := Parse(strings.NewReader(s))
if err != nil {
panic(err)
}
return node
}
func TestMissingTextNodes(t *testing.T) {
doc := loadXML(`
Lorem ipsum dolor
`)
results := Find(doc, "//text()")
if len(results) != 3 {
t.Fatalf("Expected text nodes 3, got %d", len(results))
}
}