pax_global_header 0000666 0000000 0000000 00000000064 14745676166 0014540 g ustar 00root root 0000000 0000000 52 comment=21d47691f2448bd7a870891daf85afa091af9bca
golang-github-johanneskaufmann-dom-0.2.0/ 0000775 0000000 0000000 00000000000 14745676166 0020367 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/.github/ 0000775 0000000 0000000 00000000000 14745676166 0021727 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/.github/dependabot.yml 0000664 0000000 0000000 00000000425 14745676166 0024560 0 ustar 00root root 0000000 0000000 # Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
golang-github-johanneskaufmann-dom-0.2.0/.github/workflows/ 0000775 0000000 0000000 00000000000 14745676166 0023764 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/.github/workflows/go.yml 0000664 0000000 0000000 00000002127 14745676166 0025116 0 ustar 00root root 0000000 0000000 name: Go
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
# Test the latest go version
# and upload the test coverage.
test_latest:
name: Go latest stable
runs-on: ubuntu-latest
steps:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: 'stable'
check-latest: true
- name: Checkout code
uses: actions/checkout@v4
- name: Build
run: go build -v .
- name: Test
run: go test ./... -v -race -coverprofile=coverage.txt -covermode=atomic
# Test the latest three golang version
# on different operating systems.
test_versions:
strategy:
matrix:
go: ['1.22']
os: [ubuntu-latest, macos-latest, windows-latest]
name: Go ${{ matrix.go }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go }}
- name: Checkout code
uses: actions/checkout@v4
- name: Test
run: go test ./... -v -race -cover
golang-github-johanneskaufmann-dom-0.2.0/.gitignore 0000664 0000000 0000000 00000000752 14745676166 0022363 0 ustar 00root root 0000000 0000000 # If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work
go.work.sum
golang-github-johanneskaufmann-dom-0.2.0/LICENSE 0000664 0000000 0000000 00000002062 14745676166 0021374 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2024 Johannes Kaufmann
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
golang-github-johanneskaufmann-dom-0.2.0/README.md 0000664 0000000 0000000 00000015357 14745676166 0021661 0 ustar 00root root 0000000 0000000 # dom
[](https://pkg.go.dev/github.com/JohannesKaufmann/dom)
Helper functions for "net/html" that make it easier to interact with `*html.Node`.
π [Getting Started](#getting-started) - π [Documentation](#documentation) - π§βπ» [Examples](/examples/)
## Installation
```bash
go get -u github.com/JohannesKaufmann/dom
```
> [!NOTE]
> This "dom" libary was developed for the needs of the [html-to-markdown](https://github.com/JohannesKaufmann/html-to-markdown) library.
> That beeing said, please submit any functions that you need.
## Getting Started
```go
package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - //
firstLink := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "a"
})
fmt.Println("href:", dom.GetAttributeOr(firstLink, "href", ""))
}
```
## Node vs Element
The naming scheme in this library is:
- "Node" means `*html.Node{}`
- This means _any_ node in the tree of nodes.
- "Element" means `*html.Node{Type: html.ElementNode}`
- This means _only_ nodes with the type of `ElementNode`. For example ``, ``, ``, ... but not `#text`, ``, ...
For most functions, there are two versions. For example:
- `FirstChildNode()` and `FirstChildElement()`
- `AllChildNodes()` and `AllChildElements()`
- ...
## Documentation
[](https://pkg.go.dev/github.com/JohannesKaufmann/dom)
### Attributes & Content
You can get the attributes of a node using `GetAttribute`, `GetAttributeOr` or the more specialized `GetClasses` that returns a slice of strings.
For matching nodes, `HasID` and `HasClass` can be used.
If you want to collect the #text of all the child nodes, you can call `CollectText`.
```go
name := dom.NodeName(node)
// "h2"
href := dom.GetAttributeOr(node, "href", "")
// "github.com"
isHeading := dom.HasClass(node, "repo__name")
// `true`
content := dom.CollectText(node)
// "Lorem ipsum"
```
---
### Children & Siblings
You can already use `node.FirstChild` to get the first child _node_. For the convenience we added `FirstChildNode()` and `FirstChildElement()` which returns `*html.Node`.
To get all direct children, use `AllChildNodes` and `AllChildElements` which returns `[]*html.Node`.
- `PrevSiblingNode` and `PrevSiblingElement`
- `NextSiblingNode` and `NextSiblingElement`
### Find Nodes
Searching for nodes deep in the tree is made easier with:
```go
firstParagraph := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "p"
})
// *html.Node
allParagraphs := dom.FindAllNodes(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "p"
})
// []*html.Node
```
- π§βπ» [Example code, find](/examples/find/main.go)
- π§βπ» [Example code, selectors](/examples/selectors/main.go)
---
### Get next/previous neighbors
What is special about this? The order!
If you are somewhere in the DOM, you can call `GetNextNeighborNode` to get the next node, even if it is _further up_ the tree. The order is the same as you would see the elements in the DOM.
```go
node := startNode
for node != nil {
fmt.Println(dom.NodeName(node))
node = dom.GetNextNeighborNode(node)
}
```
If we start the `for` loop at the `` and repeatedly call `GetNextNeighborNode` this would be the _order_ that the nodes are _visited_.
```text
#document
ββhtml
β ββhead
β ββbody
β β ββnav
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton *οΈβ£
β β β β ββspan 0οΈβ£
β β β β β ββ#text "start" 1οΈβ£
β β β ββdiv 2οΈβ£
β β β β ββh3 3οΈβ£
β β β β β ββ#text "heading" 4οΈβ£
β β β β ββp 5οΈβ£
β β β β β ββ#text "description" 6οΈβ£
β β ββfooter 7οΈβ£
β β β ββp 8οΈβ£
β β β β ββ#text "down" 9οΈβ£
```
If you only want to visit the ElementNode's (and skip the `#text` Nodes) you can use `GetNextNeighborElement` instead.
If you want to skip the children you can use `GetNextNeighborNodeExcludingOwnChild`. In the example above, when starting at the `` the next node would be the ``.
The same functions also exist for the previous nodes, e.g. `GetPrevNeighborNode`.
- π§βπ» [Example code, next basics](/examples/next_basics/main.go)
- π§βπ» [Example code, next inside a loop](/examples/next_loop/main.go)
---
### Remove & Replace Node
```go
if dom.HasClass(node, "lang__old") {
newNode := &html.Node{
Type: html.TextNode,
Data: "πͺ¦",
}
dom.ReplaceNode(node, newNode)
}
for _, node := range emptyTextNodes {
dom.RemoveNode(node)
}
```
- π§βπ» [Example code, remove and replace](/examples/remove_replace/main.go)
### Unwrap Node
```text
#document
ββhtml
β ββhead
β ββbody
β β ββarticle *οΈβ£
β β β ββh3
β β β β ββ#text "Heading"
β β β ββp
β β β β ββ#text "short description"
```
If we take the input above and run `UnwrapNode(articleNode)` we can "unwrap" the `
`. That means removing the `` while _keeping_ the children (`` and ` `).
```text
#document
ββhtml
β ββhead
β ββbody
β β ββh3
β β β ββ#text "Heading"
β β ββp
β β β ββ#text "short description"
```
For the reverse you can use `WrapNode(existingNode, newNode)`.
---
### RenderRepresentation
```go
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `Read More `
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
fmt.Println(dom.RenderRepresentation(doc))
}
```
The tree representation helps to visualize the tree-structure of the DOM.
And the `#text` nodes stand out.
> [!TIP]
> This function could be useful for debugging & testcases.
> For example in [neighbors_test.go](/neighbors_test.go)
```text
#document
ββhtml
β ββhead
β ββbody
β β ββa (href=/about)
β β β ββ#text "Read More"
```
While the normal "net/html" [`Render()`](https://pkg.go.dev/golang.org/x/net/html#Render) function would have produced this:
```
Read More
```
- π§βπ» [Example code, dom representation](/examples/dom_representation/main.go)
golang-github-johanneskaufmann-dom-0.2.0/attr.go 0000664 0000000 0000000 00000002346 14745676166 0021675 0 ustar 00root root 0000000 0000000 package dom
import (
"bytes"
"strings"
"golang.org/x/net/html"
)
func GetAttribute(node *html.Node, key string) (string, bool) {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val, true
}
}
return "", false
}
func GetAttributeOr(node *html.Node, key string, fallback string) string {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val
}
}
return fallback
}
func GetClasses(node *html.Node) []string {
val, found := GetAttribute(node, "class")
if !found {
return nil
}
return strings.Fields(val)
}
func HasID(node *html.Node, expectedID string) bool {
val, found := GetAttribute(node, "id")
if !found {
return false
}
return strings.TrimSpace(val) == expectedID
}
func HasClass(node *html.Node, expectedClass string) bool {
classes := GetClasses(node)
for _, class := range classes {
if class == expectedClass {
return true
}
}
return false
}
// - - - - //
func collectText(n *html.Node, buf *bytes.Buffer) {
if n.Type == html.TextNode {
buf.WriteString(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
collectText(c, buf)
}
}
func CollectText(node *html.Node) string {
var buf bytes.Buffer
collectText(node, &buf)
return buf.String()
}
golang-github-johanneskaufmann-dom-0.2.0/attr_test.go 0000664 0000000 0000000 00000006005 14745676166 0022730 0 ustar 00root root 0000000 0000000 package dom
import (
"reflect"
"strings"
"testing"
"golang.org/x/net/html"
)
func TestGetAttribute(t *testing.T) {
node := &html.Node{
Attr: []html.Attribute{
{
Key: "previouskey",
Val: "previousval",
},
{
Key: "mykey",
Val: "myval",
},
},
}
actual, found := GetAttribute(node, "mykey")
if !found {
t.Error("expected found to be true")
}
if actual != "myval" {
t.Error("expected different value")
}
actual, found = GetAttribute(node, "unknownkey")
if found {
t.Error("expected found to be false")
}
if actual != "" {
t.Error("expected empty value")
}
}
func TestGetAttributeOr(t *testing.T) {
node := &html.Node{
Attr: []html.Attribute{
{
Key: "previouskey",
Val: "previousval",
},
{
Key: "mykey",
Val: "myval",
},
{
Key: "nextkey",
Val: "nextval",
},
},
}
actual := GetAttributeOr(node, "mykey", "myfallback")
if actual != "myval" {
t.Error("expected different value")
}
actual = GetAttributeOr(node, "unknownkey", "myfallback")
if actual != "myfallback" {
t.Error("expected different fallback value")
}
}
func TestGetClasses(t *testing.T) {
node1 := &html.Node{
Type: html.ElementNode,
Data: "h1",
Attr: []html.Attribute{
{
Key: "class",
Val: " form form--theme-xmas form--simple",
},
},
}
classes := GetClasses(node1)
if !reflect.DeepEqual(classes, []string{"form", "form--theme-xmas", "form--simple"}) {
t.Error("the slice of classes dont match")
}
node2 := &html.Node{
Type: html.ElementNode,
Data: "h1",
Attr: []html.Attribute{},
}
classes = GetClasses(node2)
if len(classes) != 0 {
t.Error("expected no classes")
}
}
func TestHasID(t *testing.T) {
node1 := &html.Node{
Type: html.ElementNode,
Data: "h1",
Attr: []html.Attribute{
{
Key: "id",
Val: " city__name ",
},
},
}
if HasID(node1, "city__name") != true {
t.Error("expected different output")
}
if HasID(node1, "city__image") != false {
t.Error("expected different output")
}
node2 := &html.Node{
Type: html.ElementNode,
Data: "h1",
Attr: []html.Attribute{},
}
if HasID(node2, "city__name") != false {
t.Error("expected different output")
}
}
func TestHasClass(t *testing.T) {
node := &html.Node{
Type: html.ElementNode,
Data: "h1",
Attr: []html.Attribute{
{
Key: "class",
Val: " form form--theme-xmas form--simple",
},
},
}
if HasClass(node, "form--theme-xmas") != true {
t.Error("expected different output")
}
if HasClass(node, "xmas") != false {
t.Error("expected different output")
}
}
func TestCollectText(t *testing.T) {
input := `
Hello world
Some description
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
heading := FindFirstNode(doc, func(node *html.Node) bool {
return HasClass(node, "article__title")
})
expected := "Hello world"
output := CollectText(heading)
if output != expected {
t.Errorf("expected %q but got %q", expected, output)
}
}
golang-github-johanneskaufmann-dom-0.2.0/change.go 0000664 0000000 0000000 00000002064 14745676166 0022145 0 ustar 00root root 0000000 0000000 package dom
import "golang.org/x/net/html"
func RemoveNode(node *html.Node) {
if node == nil || node.Parent == nil {
return
}
node.Parent.RemoveChild(node)
}
func ReplaceNode(node, newNode *html.Node) {
if node.Parent == nil || node == newNode {
return
}
node.Parent.InsertBefore(newNode, node)
node.Parent.RemoveChild(node)
}
func UnwrapNode(node *html.Node) {
if node == nil || node.Parent == nil {
return
}
// In each iteration, we once again grab the first child, since
// the previous first child was just removed.
for child := node.FirstChild; child != nil; child = node.FirstChild {
node.RemoveChild(child)
node.Parent.InsertBefore(child, node)
}
node.Parent.RemoveChild(node)
}
// WrapNode wraps the newNode around the existingNode.
func WrapNode(existingNode, newNode *html.Node) *html.Node {
if existingNode == nil || existingNode.Parent == nil {
return existingNode
}
existingNode.Parent.InsertBefore(newNode, existingNode)
existingNode.Parent.RemoveChild(existingNode)
newNode.AppendChild(existingNode)
return newNode
}
golang-github-johanneskaufmann-dom-0.2.0/change_test.go 0000664 0000000 0000000 00000007614 14745676166 0023212 0 ustar 00root root 0000000 0000000 package dom
import (
"strings"
"testing"
"golang.org/x/net/html"
)
func TestRemoveNode(t *testing.T) {
child := &html.Node{
Data: "child",
}
doc := &html.Node{
Data: "parent",
}
if doc.FirstChild != nil {
t.Error("expected FirstChild to be nil")
}
if child.Parent != nil {
t.Error("expected Parent to be nil")
}
doc.AppendChild(child)
if doc.FirstChild == nil {
t.Error("expected FirstChild not to be nil anymore")
}
if child.Parent == nil {
t.Error("expected Parent not to be nil anymore")
}
RemoveNode(child)
if doc.FirstChild != nil {
t.Error("expected FirstChild to be nil again")
}
if child.Parent != nil {
t.Error("expected Parent to be nil again")
}
// Should not crash if run again...
RemoveNode(child)
if doc.Parent != nil {
t.Error("expected Parent to still be nil")
}
}
func TestReplaceNode(t *testing.T) {
node1 := &html.Node{
Data: "original",
}
node2 := &html.Node{
Data: "replacement",
}
doc := &html.Node{}
doc.AppendChild(node1)
ReplaceNode(node1, node1)
if doc.FirstChild != node1 {
t.Error("expected the node1 to still be in place")
}
ReplaceNode(node1, node2)
if doc.FirstChild != node2 {
t.Error("expected the node2 to take the place")
}
if node1.Parent != nil {
t.Error("expected node1 to not have a parent anymore")
}
}
func TestUnwrapNode(t *testing.T) {
child1 := &html.Node{
Type: html.ElementNode,
Data: "child1",
}
child2 := &html.Node{
Type: html.ElementNode,
Data: "child2",
}
child2.AppendChild(&html.Node{
Type: html.ElementNode,
Data: "child2.1",
})
parent := &html.Node{
Type: html.ElementNode,
Data: "parent",
}
parent.AppendChild(child1)
parent.AppendChild(child2)
root := &html.Node{
Type: html.ElementNode,
Data: "root",
}
root.AppendChild(parent)
// - - - - - //
expectedBefore := strings.TrimSpace(`
root
ββparent
β ββchild1
β ββchild2
β β ββchild2.1
`)
expectedAfter := strings.TrimSpace(`
root
ββchild1
ββchild2
β ββchild2.1
`)
if RenderRepresentation(root) != expectedBefore {
t.Error("expected a different initial render")
}
if root.FirstChild != parent {
t.Error("expected the parent to be under root")
}
UnwrapNode(parent)
if root.FirstChild == parent {
t.Error("expected the parent to not be under root anymore")
}
if RenderRepresentation(root) != expectedAfter {
t.Error("expected a different final render")
}
UnwrapNode(root)
if root.Data != "root" {
t.Error("expected the root to still be the root")
}
}
func TestWrapNode(t *testing.T) {
child1 := &html.Node{
Type: html.ElementNode,
Data: "child1",
}
child2 := &html.Node{
Type: html.ElementNode,
Data: "child2",
}
child2.AppendChild(&html.Node{
Type: html.ElementNode,
Data: "child2.1",
})
parent := &html.Node{
Type: html.ElementNode,
Data: "parent",
}
parent.AppendChild(child1)
parent.AppendChild(child2)
root := &html.Node{
Type: html.ElementNode,
Data: "root",
}
root.AppendChild(parent)
// - - - - - //
wrapper := &html.Node{
Type: html.ElementNode,
Data: "wrapper",
}
expectedBefore := strings.TrimSpace(`
root
ββparent
β ββchild1
β ββchild2
β β ββchild2.1
`)
expectedAfter := strings.TrimSpace(`
root
ββwrapper
β ββparent
β β ββchild1
β β ββchild2
β β β ββchild2.1
`)
if RenderRepresentation(root) != expectedBefore {
t.Error("expected a different initial render")
}
if root.FirstChild != parent {
t.Error("expected the parent to be under root")
}
WrapNode(parent, wrapper)
if RenderRepresentation(root) != expectedAfter {
t.Error("expected a different final render")
}
if root.FirstChild != wrapper {
t.Error("expected the wrapper to be under root")
}
// With no parent, there should be no change.
WrapNode(root, &html.Node{Type: html.ElementNode, Data: "sky"})
if RenderRepresentation(root) != expectedAfter {
t.Error("there should be no changes")
}
}
golang-github-johanneskaufmann-dom-0.2.0/dom.go 0000664 0000000 0000000 00000004400 14745676166 0021473 0 ustar 00root root 0000000 0000000 // dom makes it easier to interact with the html document.
//
// Node = return all the nodes
// Element = return all the nodes that are of type Element. This e.g. excludes #text nodes.
package dom
import "golang.org/x/net/html"
// AllNodes recursively gets all the nodes in the tree.
func AllNodes(startNode *html.Node) (allNodes []*html.Node) {
var finder func(*html.Node)
finder = func(node *html.Node) {
allNodes = append(allNodes, node)
for child := node.FirstChild; child != nil; child = child.NextSibling {
finder(child)
}
}
finder(startNode)
return allNodes
}
// - - - - - - - - - - - - - - - //
func AllChildNodes(node *html.Node) (children []*html.Node) {
for child := node.FirstChild; child != nil; child = child.NextSibling {
children = append(children, child)
}
return children
}
// AllChildElements is similar to AllChildNodes but only returns
// nodes of type `ElementNode`.
func AllChildElements(node *html.Node) (children []*html.Node) {
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
children = append(children, child)
}
}
return children
}
// - - - - - - - - - - - - - - - //
func FirstChildNode(node *html.Node) *html.Node {
return node.FirstChild
}
func FirstChildElement(node *html.Node) *html.Node {
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
return child
}
}
return nil
}
// - - - - - - - - - - - - - - - //
func PrevSiblingNode(node *html.Node) *html.Node {
return node.PrevSibling
}
func PrevSiblingElement(node *html.Node) *html.Node {
for sibling := node.PrevSibling; sibling != nil; sibling = sibling.PrevSibling {
if sibling.Type == html.ElementNode {
return sibling
}
}
return nil
}
// - - - - - - - - - - - - - - - //
func NextSiblingNode(node *html.Node) *html.Node {
return node.NextSibling
}
// NextSiblingElement returns the element immediately following the passed-in node or nil.
// In contrast to `node.NextSibling` this only returns the next `ElementNode`.
func NextSiblingElement(node *html.Node) *html.Node {
for sibling := node.NextSibling; sibling != nil; sibling = sibling.NextSibling {
if sibling.Type == html.ElementNode {
return sibling
}
}
return nil
}
golang-github-johanneskaufmann-dom-0.2.0/dom_representation.go 0000664 0000000 0000000 00000002335 14745676166 0024622 0 ustar 00root root 0000000 0000000 package dom
import (
"bytes"
"fmt"
"strings"
"golang.org/x/net/html"
)
func writePipeChar(buf *bytes.Buffer, index int) {
if index == 0 {
return
}
buf.WriteString(strings.Repeat("β ", index-1))
buf.WriteString("ββ")
}
func writeNode(buf *bytes.Buffer, node *html.Node) {
name := NodeName(node)
buf.WriteString(name)
if len(node.Attr) != 0 {
buf.WriteString(" (")
}
for i, attr := range node.Attr {
buf.WriteString(fmt.Sprintf("%s=%q", attr.Key, attr.Val))
if i < len(node.Attr)-1 {
buf.WriteString(" ")
}
}
if len(node.Attr) != 0 {
buf.WriteString(")")
}
if name == "#text" {
buf.WriteString(fmt.Sprintf(" %q", node.Data))
}
}
// RenderRepresentation is useful for debugging.
// It renders out the *structure* of the dom.
func RenderRepresentation(startNode *html.Node) string {
var buf bytes.Buffer
var finder func(*html.Node, int)
finder = func(node *html.Node, index int) {
writePipeChar(&buf, index)
writeNode(&buf, node)
buf.WriteRune('\n')
for child := node.FirstChild; child != nil; child = child.NextSibling {
finder(child, index+1)
}
}
if startNode.Parent == nil {
finder(startNode, 0)
} else {
finder(startNode, 1)
}
return strings.TrimSpace(buf.String())
}
golang-github-johanneskaufmann-dom-0.2.0/dom_representation_test.go 0000664 0000000 0000000 00000003101 14745676166 0025651 0 ustar 00root root 0000000 0000000 package dom
import (
"fmt"
"strings"
"testing"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func TestRenderRepresentation_NoAttributes(t *testing.T) {
input := "\nText\n "
expected := "ββbody\nβ ββa\nβ β ββ#text \"\\nText\\n\""
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
body := FindFirstNode(doc, func(node *html.Node) bool {
return node.DataAtom == atom.Body
})
output := RenderRepresentation(body)
if output != expected {
t.Errorf("expected %q but got %q", expected, output)
}
}
func TestRenderRepresentation_MultipleAttributes(t *testing.T) {
input := `Text `
expected := `ββbody
β ββa (href="/page.html" target="_blank" class="button primary")
β β ββ#text "Text"`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
body := FindFirstNode(doc, func(node *html.Node) bool {
return node.DataAtom == atom.Body
})
output := RenderRepresentation(body)
if output != expected {
t.Errorf("expected %q but got %q", expected, output)
}
}
func TestRenderRepresentation_Root(t *testing.T) {
input := ` `
expected := strings.TrimSpace(`
#document
ββhtml
β ββhead
β ββbody
β β ββimg (src="/img.png")
`)
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
output := RenderRepresentation(doc)
fmt.Println(output)
if output != expected {
t.Errorf("expected %q but got %q", expected, output)
}
}
golang-github-johanneskaufmann-dom-0.2.0/dom_test.go 0000664 0000000 0000000 00000006607 14745676166 0022545 0 ustar 00root root 0000000 0000000 package dom
import (
"testing"
"golang.org/x/net/html"
)
func TestAllNodes(t *testing.T) {
child0 := &html.Node{
Type: html.ElementNode,
}
child0.AppendChild(&html.Node{
Type: html.TextNode,
})
child1 := &html.Node{
Type: html.ElementNode,
}
doc := &html.Node{}
doc.AppendChild(child0)
doc.AppendChild(child1)
nodes := AllNodes(doc)
if len(nodes) != 4 {
t.Errorf("expected different length, but got %d", len(nodes))
}
}
// - - - - - - - - - - - - - - - //
func TestAllChildNodes(t *testing.T) {
child0 := &html.Node{
Type: html.ElementNode,
}
child1 := &html.Node{
Type: html.TextNode,
}
child2 := &html.Node{
Type: html.ElementNode,
}
doc := &html.Node{}
doc.AppendChild(child0)
doc.AppendChild(child1)
doc.AppendChild(child2)
nodes := AllChildNodes(doc)
if len(nodes) != 3 {
t.Error("expected different length")
}
if nodes[0] != child0 || nodes[1] != child1 || nodes[2] != child2 {
t.Error("expected different nodes")
}
}
func TestAllChildElements(t *testing.T) {
firstChild := &html.Node{
Type: html.ElementNode,
}
middleChild := &html.Node{
Type: html.TextNode,
}
lastChild := &html.Node{
Type: html.ElementNode,
}
doc := &html.Node{}
doc.AppendChild(firstChild)
doc.AppendChild(middleChild)
doc.AppendChild(lastChild)
nodes := AllChildElements(doc)
if len(nodes) != 2 {
t.Error("expected different length")
}
if nodes[0] != firstChild || nodes[1] != lastChild {
t.Error("expected different nodes")
}
}
// - - - - - - - - - - - - - - - //
func TestFirstChildNode(t *testing.T) {
node := &html.Node{
Type: html.ElementNode,
}
child := &html.Node{
Type: html.TextNode,
}
node.AppendChild(child)
res := FirstChildNode(node)
if res != child {
t.Error("expected the first child node")
}
res = FirstChildNode(child)
if res != nil {
t.Error("expected the first child to be nil")
}
}
func TestFirstChildElement(t *testing.T) {
node := &html.Node{
Type: html.ElementNode,
}
child1 := &html.Node{
Type: html.TextNode,
}
child2 := &html.Node{
Type: html.ElementNode,
}
node.AppendChild(child1)
node.AppendChild(child2)
res := FirstChildElement(node)
if res != child2 {
t.Error("expected the first child node to be child2")
}
}
// - - - - - - - - - - - - - - - //
func TestPrevElementSibling(t *testing.T) {
first := &html.Node{
Type: html.ElementNode,
Data: "first",
}
text := &html.Node{
Type: html.TextNode,
Data: "between",
PrevSibling: first,
}
last := &html.Node{
Type: html.ElementNode,
Data: "last",
PrevSibling: text,
}
output := PrevSiblingElement(last)
if output != first {
t.Error("expected 'start' node")
}
output = PrevSiblingElement(text)
if output != first {
t.Error("expected 'end' node")
}
output = PrevSiblingElement(first)
if output != nil {
t.Error("expected nil node")
}
}
// - - - - - - - - - - - - - - - //
func TestNextElementSibling(t *testing.T) {
end := &html.Node{
Type: html.ElementNode,
Data: "end",
}
text := &html.Node{
Type: html.TextNode,
Data: "between",
NextSibling: end,
}
start := &html.Node{
Type: html.ElementNode,
Data: "start",
NextSibling: text,
}
output := NextSiblingElement(start)
if output != end {
t.Error("expected 'end' node")
}
output = NextSiblingElement(text)
if output != end {
t.Error("expected 'end' node")
}
output = NextSiblingElement(end)
if output != nil {
t.Error("expected nil node")
}
}
golang-github-johanneskaufmann-dom-0.2.0/example_test.go 0000664 0000000 0000000 00000001171 14745676166 0023410 0 ustar 00root root 0000000 0000000 package dom_test
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func ExampleFindFirstNode() {
input := `
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - //
firstLink := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "a"
})
fmt.Println(dom.GetAttributeOr(firstLink, "href", ""))
// Output: github.com/JohannesKaufmann/dom
}
golang-github-johanneskaufmann-dom-0.2.0/examples/ 0000775 0000000 0000000 00000000000 14745676166 0022205 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/dom_representation/ 0000775 0000000 0000000 00000000000 14745676166 0026106 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/dom_representation/main.go 0000664 0000000 0000000 00000000712 14745676166 0027361 0 ustar 00root root 0000000 0000000 package main
import (
"bytes"
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `Read More `
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - //
fmt.Println(dom.RenderRepresentation(doc))
// - - - //
var buf bytes.Buffer
err = html.Render(&buf, doc)
if err != nil {
log.Fatal(err)
}
fmt.Println(buf.String())
}
golang-github-johanneskaufmann-dom-0.2.0/examples/find/ 0000775 0000000 0000000 00000000000 14745676166 0023125 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/find/main.go 0000664 0000000 0000000 00000001102 14745676166 0024372 0 ustar 00root root 0000000 0000000 package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - //
firstLink := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "a"
})
fmt.Println("href:", dom.GetAttributeOr(firstLink, "href", ""))
}
golang-github-johanneskaufmann-dom-0.2.0/examples/next_basics/ 0000775 0000000 0000000 00000000000 14745676166 0024507 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/next_basics/main.go 0000664 0000000 0000000 00000001074 14745676166 0025764 0 ustar 00root root 0000000 0000000 package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `The library is amazing
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
fmt.Println(dom.RenderRepresentation(doc))
// - - - //
aNode := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "a"
})
next := dom.GetNextNeighborNodeExcludingOwnChild(aNode)
fmt.Printf("next %s node is %q \n", dom.NodeName(next), next.Data)
}
golang-github-johanneskaufmann-dom-0.2.0/examples/next_loop/ 0000775 0000000 0000000 00000000000 14745676166 0024214 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/next_loop/main.go 0000664 0000000 0000000 00000001046 14745676166 0025470 0 ustar 00root root 0000000 0000000 package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `
The library is amazing
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
fmt.Println(dom.RenderRepresentation(doc))
// - - - //
node := doc
for node != nil {
fmt.Println(dom.NodeName(node))
node = dom.GetNextNeighborNode(node)
}
// #document
// html
// head
// body
// p
// #text
// i
// a
// #text
// #text
// p
}
golang-github-johanneskaufmann-dom-0.2.0/examples/remove_replace/ 0000775 0000000 0000000 00000000000 14745676166 0025175 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/remove_replace/main.go 0000664 0000000 0000000 00000001677 14745676166 0026463 0 ustar 00root root 0000000 0000000 package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `
Javascript
PHP
Golang
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - - - //
italicNodes := dom.FindAllNodes(doc, func(node *html.Node) bool {
return dom.NodeName(node) == "i"
})
for _, node := range italicNodes {
if dom.HasClass(node, "lang__old") {
newNode := &html.Node{
Type: html.TextNode,
Data: "πͺ¦",
}
dom.ReplaceNode(node, newNode)
}
}
// - - - - - //
emptyTextNodes := dom.FindAllNodes(doc, func(node *html.Node) bool {
name := dom.NodeName(node)
text := dom.CollectText(node)
return name == "#text" && strings.TrimSpace(text) == ""
})
for _, node := range emptyTextNodes {
dom.RemoveNode(node)
}
// - - - - - //
fmt.Println(dom.RenderRepresentation(doc))
}
golang-github-johanneskaufmann-dom-0.2.0/examples/selectors/ 0000775 0000000 0000000 00000000000 14745676166 0024210 5 ustar 00root root 0000000 0000000 golang-github-johanneskaufmann-dom-0.2.0/examples/selectors/main.go 0000664 0000000 0000000 00000001375 14745676166 0025471 0 ustar 00root root 0000000 0000000 package main
import (
"fmt"
"log"
"strings"
"github.com/JohannesKaufmann/dom"
"golang.org/x/net/html"
)
func main() {
input := `
Github
JohannesKaufmann/dom
Code
Issues
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
log.Fatal(err)
}
// - - - //
headingNodes := dom.FindAllNodes(doc, func(node *html.Node) bool {
name := dom.NodeName(node)
return dom.NameIsHeading(name)
})
nameNode := dom.FindFirstNode(doc, func(node *html.Node) bool {
return dom.HasClass(node, "repo__name")
})
repoName := dom.CollectText(nameNode)
fmt.Printf("count:%d name:%q\n", len(headingNodes), repoName)
// count:4 name:"JohannesKaufmann/dom"
}
golang-github-johanneskaufmann-dom-0.2.0/find.go 0000664 0000000 0000000 00000002072 14745676166 0021637 0 ustar 00root root 0000000 0000000 package dom
import "golang.org/x/net/html"
func ContainsNode(startNode *html.Node, matchFn func(node *html.Node) bool) bool {
return FindFirstNode(startNode, matchFn) != nil
}
func FindFirstNode(startNode *html.Node, matchFn func(node *html.Node) bool) *html.Node {
nextFunc := UNSTABLE_initGetNeighbor(
FirstChildNode,
NextSiblingNode,
func(node *html.Node) bool {
// We should not get higher up than the startNode...
return node == startNode
},
)
child := startNode.FirstChild
for child != nil {
if matchFn(child) {
return child
}
child = nextFunc(child)
}
return nil
}
func FindAllNodes(startNode *html.Node, matchFn func(node *html.Node) bool) (foundNodes []*html.Node) {
nextFunc := UNSTABLE_initGetNeighbor(
FirstChildNode,
NextSiblingNode,
func(node *html.Node) bool {
// We should not get higher up than the startNode...
return node == startNode
},
)
child := startNode.FirstChild
for child != nil {
if matchFn(child) {
foundNodes = append(foundNodes, child)
}
child = nextFunc(child)
}
return foundNodes
}
golang-github-johanneskaufmann-dom-0.2.0/find_test.go 0000664 0000000 0000000 00000004755 14745676166 0022710 0 ustar 00root root 0000000 0000000 package dom
import (
"strings"
"testing"
"golang.org/x/net/html"
)
func TestContainsNode(t *testing.T) {
input := ` `
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
node := FindFirstNode(doc, func(node *html.Node) bool {
return node.Data == "root"
})
var called1 int
res1 := ContainsNode(node.FirstChild, func(n *html.Node) bool {
if n.Data == "next" {
t.Error("the next node should not have been visited")
}
called1++
return n.Data == "target"
})
if res1 != true {
t.Error("expected true")
}
if called1 != 2 {
t.Error("expected fn to be called 2 times")
}
var called2 int
res2 := ContainsNode(node.FirstChild, func(n *html.Node) bool {
if n.Data == "next" {
t.Error("the next node should not have been visited")
}
called2++
return n.Data == "else"
})
if res2 != false {
t.Error("expected false")
}
if called2 != 4 {
t.Error("expected fn to be called 4 times")
}
}
func TestFindFirstNode(t *testing.T) {
input := `
Heading
short description
Heading
another description
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
article := FindFirstNode(doc, func(node *html.Node) bool {
return NodeName(node) == "article"
})
if article == nil || article.Data != "article" {
t.Error("got different node")
}
h3 := FindFirstNode(article, func(node *html.Node) bool {
return NodeName(node) == "h3"
})
if h3 == nil || h3.Data != "h3" {
t.Error("got different node")
}
h4 := FindFirstNode(article, func(node *html.Node) bool {
return NodeName(node) == "h4"
})
if h4 != nil {
t.Error("expected nil node")
}
}
func TestFindAllNodes(t *testing.T) {
input := `
Heading
short description
Heading
another description
`
doc, err := html.Parse(strings.NewReader(input))
if err != nil {
t.Fatal(err)
}
paragraphs := FindAllNodes(doc, func(node *html.Node) bool {
return NodeName(node) == "p"
})
if len(paragraphs) != 2 {
t.Error("expected 2 nodes")
}
if paragraphs[0].Data != "p" || paragraphs[1].Data != "p" {
t.Error("expected paragraph nodes")
}
}
golang-github-johanneskaufmann-dom-0.2.0/go.mod 0000664 0000000 0000000 00000000124 14745676166 0021472 0 ustar 00root root 0000000 0000000 module github.com/JohannesKaufmann/dom
go 1.22.1
require golang.org/x/net v0.33.0
golang-github-johanneskaufmann-dom-0.2.0/go.sum 0000664 0000000 0000000 00000000462 14745676166 0021524 0 ustar 00root root 0000000 0000000 golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang-github-johanneskaufmann-dom-0.2.0/neighbors.go 0000664 0000000 0000000 00000005004 14745676166 0022675 0 ustar 00root root 0000000 0000000 package dom
import "golang.org/x/net/html"
// Warning: It is not meant to be called directly and may change signature from release to release!
func UNSTABLE_initGetNeighbor(
firstChildFunc func(node *html.Node) *html.Node,
prevNextFunc func(node *html.Node) *html.Node,
goUpUntilFunc func(node *html.Node) bool,
) func(*html.Node) *html.Node {
return func(node *html.Node) *html.Node {
// First look at the children
if child := firstChildFunc(node); child != nil {
return child
}
// Otherwise my prev/next sibling
if sibling := prevNextFunc(node); sibling != nil {
return sibling
}
for {
// Finally, continously go upwards until we find an element with a sibling
node = node.Parent
if node == nil {
// We reached the top
return nil
}
if goUpUntilFunc(node) {
// Don't go too far up...
return nil
}
sibling := prevNextFunc(node)
if sibling != nil {
return sibling
}
}
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //
var goUpForever = func(node *html.Node) bool { return false }
var skipFirstChild = func(node *html.Node) *html.Node { return nil }
func GetPrevNeighborNode(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
FirstChildNode,
PrevSiblingNode,
goUpForever,
)(node)
}
func GetPrevNeighborElement(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
FirstChildElement,
PrevSiblingElement,
goUpForever,
)(node)
}
func GetPrevNeighborNodeExcludingOwnChild(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
skipFirstChild,
PrevSiblingNode,
goUpForever,
)(node)
}
func GetPrevNeighborElementExcludingOwnChild(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
skipFirstChild,
PrevSiblingElement,
goUpForever,
)(node)
}
// - - - - - - - - //
func GetNextNeighborNode(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
FirstChildNode,
NextSiblingNode,
goUpForever,
)(node)
}
func GetNextNeighborElement(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
FirstChildElement,
NextSiblingElement,
goUpForever,
)(node)
}
func GetNextNeighborNodeExcludingOwnChild(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
skipFirstChild,
NextSiblingNode,
goUpForever,
)(node)
}
func GetNextNeighborElementExcludingOwnChild(node *html.Node) *html.Node {
return UNSTABLE_initGetNeighbor(
skipFirstChild,
NextSiblingElement,
goUpForever,
)(node)
}
golang-github-johanneskaufmann-dom-0.2.0/neighbors_test.go 0000664 0000000 0000000 00000014653 14745676166 0023746 0 ustar 00root root 0000000 0000000 package dom
import (
"strconv"
"strings"
"testing"
"golang.org/x/net/html"
)
var inputOneOriginal = `
up
start
down
`
func TestInitGetNeighbor(t *testing.T) {
testCases := []struct {
desc string
fn func(*html.Node) *html.Node
expected string
}{
{
desc: "GetPrevNeighborNode",
fn: GetPrevNeighborNode,
expected: `
#document
ββhtml
β ββhead (match="6")
β ββbody
β β ββnav (match="3")
β β β ββp (match="4")
β β β β ββ#text (match="5") "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan (match="1")
β β β β β ββ#text (match="2") "start"
β β β ββdiv
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter
β β β ββp
β β β β ββ#text "down"
`,
},
{
desc: "GetPrevNeighborElement",
fn: GetPrevNeighborElement,
expected: `
#document
ββhtml
β ββhead (match="4")
β ββbody
β β ββnav (match="2")
β β β ββp (match="3")
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan (match="1")
β β β β β ββ#text "start"
β β β ββdiv
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter
β β β ββp
β β β β ββ#text "down"
`,
},
{
desc: "GetPrevNeighborNodeExcludingOwnChild",
fn: GetPrevNeighborNodeExcludingOwnChild,
expected: `
#document
ββhtml
β ββhead (match="2")
β ββbody
β β ββnav (match="1")
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan
β β β β β ββ#text "start"
β β β ββdiv
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter
β β β ββp
β β β β ββ#text "down"
`,
},
{
desc: "GetPrevNeighborElementExcludingOwnChild",
fn: GetPrevNeighborElementExcludingOwnChild,
expected: `
#document
ββhtml
β ββhead (match="2")
β ββbody
β β ββnav (match="1")
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan
β β β β β ββ#text "start"
β β β ββdiv
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter
β β β ββp
β β β β ββ#text "down"
`,
},
// - - - - - - - - - - - - - - - - //
{
desc: "GetNextNeighborNode",
fn: GetNextNeighborNode,
expected: `
#document
ββhtml
β ββhead
β ββbody
β β ββnav
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan (match="1")
β β β β β ββ#text (match="2") "start"
β β β ββdiv (match="3")
β β β β ββh3 (match="4")
β β β β β ββ#text (match="5") "heading"
β β β β ββp (match="6")
β β β β β ββ#text (match="7") "description"
β β ββfooter (match="8")
β β β ββp (match="9")
β β β β ββ#text (match="10") "down"
`,
},
{
desc: "GetNextNeighborElement",
fn: GetNextNeighborElement,
expected: `
#document
ββhtml
β ββhead
β ββbody
β β ββnav
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan (match="1")
β β β β β ββ#text "start"
β β β ββdiv (match="2")
β β β β ββh3 (match="3")
β β β β β ββ#text "heading"
β β β β ββp (match="4")
β β β β β ββ#text "description"
β β ββfooter (match="5")
β β β ββp (match="6")
β β β β ββ#text "down"
`,
},
{
desc: "GetNextNeighborNodeExcludingOwnChild",
fn: GetNextNeighborNodeExcludingOwnChild,
expected: `
#document
ββhtml
β ββhead
β ββbody
β β ββnav
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan
β β β β β ββ#text "start"
β β β ββdiv (match="1")
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter (match="2")
β β β ββp
β β β β ββ#text "down"
`,
},
{
desc: "GetNextNeighborElementExcludingOwnChild",
fn: GetNextNeighborElementExcludingOwnChild,
expected: `
#document
ββhtml
β ββhead
β ββbody
β β ββnav
β β β ββp
β β β β ββ#text "up"
β β ββmain
β β β ββbutton (match="0")
β β β β ββspan
β β β β β ββ#text "start"
β β β ββdiv (match="1")
β β β β ββh3
β β β β β ββ#text "heading"
β β β β ββp
β β β β β ββ#text "description"
β β ββfooter (match="2")
β β β ββp
β β β β ββ#text "down"
`,
},
}
for _, testCase := range testCases {
t.Run(testCase.desc, func(t *testing.T) {
var replacer = strings.NewReplacer(
"\n", "",
"\t", "",
)
var inputOne = replacer.Replace(inputOneOriginal)
doc, err := html.Parse(strings.NewReader(inputOne))
if err != nil {
t.Fatal(err)
}
button := FindFirstNode(doc, func(node *html.Node) bool {
return NodeName(node) == "button"
})
// - - - - //
var i int
node := button
for node != nil {
// We record at what point each node was visited...
node.Attr = append(node.Attr, html.Attribute{
Key: "match",
Val: strconv.Itoa(i),
})
i++
node = testCase.fn(node)
}
// - - - - //
r := RenderRepresentation(doc)
t.Logf("rendered:\n%s\n", r)
if r != strings.TrimSpace(testCase.expected) {
t.Error("the representations dont match")
}
})
}
}
golang-github-johanneskaufmann-dom-0.2.0/tags.go 0000664 0000000 0000000 00000003523 14745676166 0021657 0 ustar 00root root 0000000 0000000 package dom
import "golang.org/x/net/html"
// In order to stay consistent with v1 of the library, this follows
// the naming scheme of goquery.
// E.g. "#text", "div", ...
func NodeName(node *html.Node) string {
if node == nil {
return ""
}
switch node.Type {
case html.ErrorNode:
return "#error"
case html.TextNode:
return "#text"
case html.DocumentNode:
return "#document"
case html.CommentNode:
return "#comment"
case html.DoctypeNode:
// E.g. for `` it would be "html"
return node.Data
case html.ElementNode:
// E.g. "div" or "p"
return node.Data
}
return ""
}
func NameIsInlineNode(name string) bool {
switch name {
case
"#text",
"a",
"abbr",
"acronym",
"audio",
"b",
"bdi",
"bdo",
"big",
"br",
"button",
"canvas",
"cite",
"code",
"data",
"datalist",
"del",
"dfn",
"em",
"embed",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"label",
"map",
"mark",
"meter",
"noscript",
"object",
"output",
"picture",
"progress",
"q",
"ruby",
"s",
"samp",
"script",
"select",
"slot",
"small",
"span",
"strong",
"sub",
"sup",
"svg",
"template",
"textarea",
"time",
"u",
"tt",
"var",
"video",
"wbr":
return true
default:
return false
}
}
func NameIsBlockNode(name string) bool {
switch name {
case
"address",
"article",
"aside",
"blockquote",
"details",
"dialog",
"dd",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1", "h2", "h3", "h4", "h5", "h6",
"header",
"hgroup",
"hr",
"li",
"main",
"nav",
"ol",
"p",
"pre",
"section",
"table",
"ul":
return true
default:
return false
}
}
func NameIsHeading(name string) bool {
switch name {
case
"h1", "h2", "h3", "h4", "h5", "h6":
return true
default:
return false
}
}
golang-github-johanneskaufmann-dom-0.2.0/tags_test.go 0000664 0000000 0000000 00000004157 14745676166 0022722 0 ustar 00root root 0000000 0000000 package dom
import (
"testing"
"golang.org/x/net/html"
)
func TestNodeName(t *testing.T) {
runs := []struct {
name string
node *html.Node
}{
{
name: "",
node: nil,
},
{
name: "",
node: &html.Node{
Type: 10,
},
},
{
name: "#error",
node: &html.Node{
Type: html.ErrorNode,
},
},
{
name: "#text",
node: &html.Node{
Type: html.TextNode,
Data: "some boring text",
},
},
{
name: "#document",
node: &html.Node{
Type: html.DocumentNode,
},
},
{
name: "#comment",
node: &html.Node{
Type: html.CommentNode,
},
},
{
name: "html",
node: &html.Node{
Type: html.DoctypeNode,
// E.g. for `` it would be "html"
Data: "html",
},
},
// - - - - - - - - - - //
{
name: "div",
node: &html.Node{
Type: html.ElementNode,
Data: "div",
},
},
{
name: "a",
node: &html.Node{
Type: html.ElementNode,
Data: "a",
},
},
}
for _, run := range runs {
t.Run(run.name, func(t *testing.T) {
output := NodeName(run.node)
if output != run.name {
t.Errorf("expected '%s' but got '%s'", run.name, output)
}
})
}
}
func TestNameIsInlineNode(t *testing.T) {
if out := NameIsInlineNode("strong"); out != true {
t.Error("expected different output")
}
if out := NameIsInlineNode("div"); out != false {
t.Error("expected different output")
}
if out := NameIsInlineNode("magic"); out != false {
t.Error("expected different output")
}
}
func TestNameIsBlockNode(t *testing.T) {
if out := NameIsBlockNode("div"); out != true {
t.Error("expected different output")
}
if out := NameIsBlockNode("strong"); out != false {
t.Error("expected different output")
}
if out := NameIsBlockNode("magic"); out != false {
t.Error("expected different output")
}
}
func TestNameIsHeading(t *testing.T) {
if out := NameIsHeading("h4"); out != true {
t.Error("expected different output")
}
if out := NameIsHeading("strong"); out != false {
t.Error("expected different output")
}
if out := NameIsHeading("magic"); out != false {
t.Error("expected different output")
}
}