pax_global_header00006660000000000000000000000064147310631460014517gustar00rootroot0000000000000052 comment=5263deb988702df34b4de5b8cd2fe53add4bea3d cascadia-1.3.3/000077500000000000000000000000001473106314600132535ustar00rootroot00000000000000cascadia-1.3.3/.travis.yml000066400000000000000000000002151473106314600153620ustar00rootroot00000000000000language: go go: - 1.3 - 1.4 install: - go get github.com/andybalholm/cascadia script: - go test -v notifications: email: false cascadia-1.3.3/LICENSE000077500000000000000000000024151473106314600142650ustar00rootroot00000000000000Copyright (c) 2011 Andy Balholm. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cascadia-1.3.3/README.md000066400000000000000000000064251473106314600145410ustar00rootroot00000000000000# cascadia [![](https://travis-ci.org/andybalholm/cascadia.svg)](https://travis-ci.org/andybalholm/cascadia) The Cascadia package implements CSS selectors for use with the parse trees produced by the html package. To test CSS selectors without writing Go code, check out [cascadia](https://github.com/suntong/cascadia) the command line tool, a thin wrapper around this package. [Refer to godoc here](https://godoc.org/github.com/andybalholm/cascadia). ## Example The following is an example of how you can use Cascadia. ```go package main import ( "fmt" "log" "strings" "github.com/andybalholm/cascadia" "golang.org/x/net/html" ) var pricingHtml string = `

Free

$0/mo

Pro

$15/mo

Enterprise

$29/mo

` func Query(n *html.Node, query string) *html.Node { sel, err := cascadia.Parse(query) if err != nil { return &html.Node{} } return cascadia.Query(n, sel) } func QueryAll(n *html.Node, query string) []*html.Node { sel, err := cascadia.Parse(query) if err != nil { return []*html.Node{} } return cascadia.QueryAll(n, sel) } func AttrOr(n *html.Node, attrName, or string) string { for _, a := range n.Attr { if a.Key == attrName { return a.Val } } return or } func main() { doc, err := html.Parse(strings.NewReader(pricingHtml)) if err != nil { log.Fatal(err) } fmt.Printf("List of pricing plans:\n\n") for i, p := range QueryAll(doc, "div.card.mb-4.box-shadow") { planName := Query(p, "h4").FirstChild.Data price := Query(p, ".pricing-card-title").FirstChild.Data usersIncluded := Query(p, "li:first-child").FirstChild.Data storage := Query(p, "li:nth-child(2)").FirstChild.Data detailsUrl := AttrOr(Query(p, "li:last-child a"), "href", "(No link available)") fmt.Printf( "Plan #%d\nName: %s\nPrice: %s\nUsers: %s\nStorage: %s\nDetails: %s\n\n", i+1, planName, price, usersIncluded, storage, detailsUrl, ) } } ``` The output is: ``` List of pricing plans: Plan #1 Name: Free Price: $0/mo Users: 10 users included Storage: 2 GB of storage Details: https://example.com Plan #2 Name: Pro Price: $15/mo Users: 20 users included Storage: 10 GB of storage Details: https://example.com Plan #3 Name: Enterprise Price: $29/mo Users: 30 users included Storage: 15 GB of storage Details: (No link available) ``` cascadia-1.3.3/benchmark_test.go000066400000000000000000000020351473106314600165730ustar00rootroot00000000000000package cascadia import ( "strings" "testing" "golang.org/x/net/html" ) func MustParseHTML(doc string) *html.Node { dom, err := html.Parse(strings.NewReader(doc)) if err != nil { panic(err) } return dom } var selector = MustCompile(`div.matched`) var doc = `
` var dom = MustParseHTML(doc) func BenchmarkMatchAll(b *testing.B) { var matches []*html.Node for i := 0; i < b.N; i++ { matches = selector.MatchAll(dom) } _ = matches } cascadia-1.3.3/fuzz/000077500000000000000000000000001473106314600142515ustar00rootroot00000000000000cascadia-1.3.3/fuzz/corpus/000077500000000000000000000000001473106314600155645ustar00rootroot00000000000000cascadia-1.3.3/fuzz/corpus/test0000066400000000000000000000000071473106314600165430ustar00rootroot00000000000000addresscascadia-1.3.3/fuzz/corpus/test1000066400000000000000000000000011473106314600165360ustar00rootroot00000000000000*cascadia-1.3.3/fuzz/corpus/test10000066400000000000000000000000101473106314600166160ustar00rootroot00000000000000p[title]cascadia-1.3.3/fuzz/corpus/test11000066400000000000000000000000241473106314600166240ustar00rootroot00000000000000address[title="foo"]cascadia-1.3.3/fuzz/corpus/test12000066400000000000000000000000441473106314600166270ustar00rootroot00000000000000[ title ~= foo ]cascadia-1.3.3/fuzz/corpus/test13000066400000000000000000000000261473106314600166300ustar00rootroot00000000000000[title~="hello world"]cascadia-1.3.3/fuzz/corpus/test14000066400000000000000000000000141473106314600166260ustar00rootroot00000000000000[lang|="en"]cascadia-1.3.3/fuzz/corpus/test15000066400000000000000000000000161473106314600166310ustar00rootroot00000000000000[title^="foo"]cascadia-1.3.3/fuzz/corpus/test16000066400000000000000000000000161473106314600166320ustar00rootroot00000000000000[title$="bar"]cascadia-1.3.3/fuzz/corpus/test17000066400000000000000000000000161473106314600166330ustar00rootroot00000000000000[title*="bar"]cascadia-1.3.3/fuzz/corpus/test18000066400000000000000000000000141473106314600166320ustar00rootroot00000000000000.t1:not(.t2)cascadia-1.3.3/fuzz/corpus/test19000066400000000000000000000000141473106314600166330ustar00rootroot00000000000000div:not(.t1)cascadia-1.3.3/fuzz/corpus/test2000066400000000000000000000000041473106314600165420ustar00rootroot00000000000000#foocascadia-1.3.3/fuzz/corpus/test20000066400000000000000000000000211473106314600166210ustar00rootroot00000000000000li:nth-child(odd)cascadia-1.3.3/fuzz/corpus/test21000066400000000000000000000000221473106314600166230ustar00rootroot00000000000000li:nth-child(even)cascadia-1.3.3/fuzz/corpus/test22000066400000000000000000000000221473106314600166240ustar00rootroot00000000000000li:nth-child(-n+2)cascadia-1.3.3/fuzz/corpus/test23000066400000000000000000000000221473106314600166250ustar00rootroot00000000000000li:nth-child(3n+1)cascadia-1.3.3/fuzz/corpus/test24000066400000000000000000000000261473106314600166320ustar00rootroot00000000000000li:nth-last-child(odd)cascadia-1.3.3/fuzz/corpus/test25000066400000000000000000000000271473106314600166340ustar00rootroot00000000000000li:nth-last-child(even)cascadia-1.3.3/fuzz/corpus/test26000066400000000000000000000000271473106314600166350ustar00rootroot00000000000000li:nth-last-child(-n+2)cascadia-1.3.3/fuzz/corpus/test27000066400000000000000000000000271473106314600166360ustar00rootroot00000000000000li:nth-last-child(3n+1)cascadia-1.3.3/fuzz/corpus/test28000066400000000000000000000000201473106314600166300ustar00rootroot00000000000000span:first-childcascadia-1.3.3/fuzz/corpus/test29000066400000000000000000000000171473106314600166370ustar00rootroot00000000000000span:last-childcascadia-1.3.3/fuzz/corpus/test3000066400000000000000000000000051473106314600165440ustar00rootroot00000000000000li#t1cascadia-1.3.3/fuzz/corpus/test30000066400000000000000000000000201473106314600166210ustar00rootroot00000000000000p:nth-of-type(2)cascadia-1.3.3/fuzz/corpus/test31000066400000000000000000000000251473106314600166270ustar00rootroot00000000000000p:nth-last-of-type(2)cascadia-1.3.3/fuzz/corpus/test32000066400000000000000000000000161473106314600166300ustar00rootroot00000000000000p:last-of-typecascadia-1.3.3/fuzz/corpus/test33000066400000000000000000000000171473106314600166320ustar00rootroot00000000000000p:first-of-typecascadia-1.3.3/fuzz/corpus/test34000066400000000000000000000000141473106314600166300ustar00rootroot00000000000000p:only-childcascadia-1.3.3/fuzz/corpus/test35000066400000000000000000000000161473106314600166330ustar00rootroot00000000000000p:only-of-typecascadia-1.3.3/fuzz/corpus/test36000066400000000000000000000000061473106314600166330ustar00rootroot00000000000000:emptycascadia-1.3.3/fuzz/corpus/test37000066400000000000000000000000051473106314600166330ustar00rootroot00000000000000div pcascadia-1.3.3/fuzz/corpus/test38000066400000000000000000000000131473106314600166330ustar00rootroot00000000000000div table pcascadia-1.3.3/fuzz/corpus/test39000066400000000000000000000000071473106314600166370ustar00rootroot00000000000000div > pcascadia-1.3.3/fuzz/corpus/test4000066400000000000000000000000041473106314600165440ustar00rootroot00000000000000*#t4cascadia-1.3.3/fuzz/corpus/test40000066400000000000000000000000051473106314600166250ustar00rootroot00000000000000p ~ pcascadia-1.3.3/fuzz/corpus/test41000066400000000000000000000000051473106314600166260ustar00rootroot00000000000000p + pcascadia-1.3.3/fuzz/corpus/test42000066400000000000000000000000051473106314600166270ustar00rootroot00000000000000li, pcascadia-1.3.3/fuzz/corpus/test43000066400000000000000000000000321473106314600166300ustar00rootroot00000000000000p +/*This is a comment*/ pcascadia-1.3.3/fuzz/corpus/test44000066400000000000000000000000301473106314600166270ustar00rootroot00000000000000p:contains("that wraps")cascadia-1.3.3/fuzz/corpus/test45000066400000000000000000000000331473106314600166330ustar00rootroot00000000000000p:containsOwn("that wraps")cascadia-1.3.3/fuzz/corpus/test46000066400000000000000000000000251473106314600166350ustar00rootroot00000000000000:containsOwn("inner")cascadia-1.3.3/fuzz/corpus/test47000066400000000000000000000000261473106314600166370ustar00rootroot00000000000000p:containsOwn("block")cascadia-1.3.3/fuzz/corpus/test48000066400000000000000000000000141473106314600166350ustar00rootroot00000000000000div:has(#p1)cascadia-1.3.3/fuzz/corpus/test49000066400000000000000000000000321473106314600166360ustar00rootroot00000000000000div:has(:containsOwn("2"))cascadia-1.3.3/fuzz/corpus/test5000066400000000000000000000000031473106314600165440ustar00rootroot00000000000000.t1cascadia-1.3.3/fuzz/corpus/test50000066400000000000000000000000341473106314600166300ustar00rootroot00000000000000body :has(:containsOwn("2"))cascadia-1.3.3/fuzz/corpus/test51000066400000000000000000000000411473106314600166270ustar00rootroot00000000000000body :haschild(:containsOwn("2"))cascadia-1.3.3/fuzz/corpus/test52000066400000000000000000000000171473106314600166330ustar00rootroot00000000000000p:matches([\d])cascadia-1.3.3/fuzz/corpus/test53000066400000000000000000000000201473106314600166260ustar00rootroot00000000000000p:matches([a-z])cascadia-1.3.3/fuzz/corpus/test54000066400000000000000000000000231473106314600166320ustar00rootroot00000000000000p:matches([a-zA-Z])cascadia-1.3.3/fuzz/corpus/test55000066400000000000000000000000201473106314600166300ustar00rootroot00000000000000p:matches([^\d])cascadia-1.3.3/fuzz/corpus/test56000066400000000000000000000000211473106314600166320ustar00rootroot00000000000000p:matches(^(0|a))cascadia-1.3.3/fuzz/corpus/test57000066400000000000000000000000201473106314600166320ustar00rootroot00000000000000p:matches(^\d+$)cascadia-1.3.3/fuzz/corpus/test58000066400000000000000000000000261473106314600166410ustar00rootroot00000000000000p:not(:matches(^\d+$))cascadia-1.3.3/fuzz/corpus/test59000066400000000000000000000000261473106314600166420ustar00rootroot00000000000000div :matchesOwn(^\d+$)cascadia-1.3.3/fuzz/corpus/test6000066400000000000000000000000041473106314600165460ustar00rootroot00000000000000p.t1cascadia-1.3.3/fuzz/corpus/test60000066400000000000000000000000611473106314600166310ustar00rootroot00000000000000[href#=(fina)]:not([href#=(\/\/[^\/]+untrusted)])cascadia-1.3.3/fuzz/corpus/test61000066400000000000000000000000421473106314600166310ustar00rootroot00000000000000[href#=(^https:\/\/[^\/]*\/?news)]cascadia-1.3.3/fuzz/corpus/test7000066400000000000000000000000101473106314600165440ustar00rootroot00000000000000div.teSTcascadia-1.3.3/fuzz/corpus/test8000066400000000000000000000000101473106314600165450ustar00rootroot00000000000000.t1.failcascadia-1.3.3/fuzz/corpus/test9000066400000000000000000000000071473106314600165540ustar00rootroot00000000000000p.t1.t2cascadia-1.3.3/fuzz/fuzz.go000066400000000000000000000004321473106314600155750ustar00rootroot00000000000000package fuzz import "github.com/andybalholm/cascadia" // Fuzz is the entrypoint used by the go-fuzz framework func Fuzz(data []byte) int { sel, err := cascadia.Compile(string(data)) if err != nil { if sel != nil { panic("sel != nil on error") } return 0 } return 1 } cascadia-1.3.3/go.mod000066400000000000000000000001221473106314600143540ustar00rootroot00000000000000module github.com/andybalholm/cascadia go 1.16 require golang.org/x/net v0.33.0 cascadia-1.3.3/go.sum000066400000000000000000000144371473106314600144170ustar00rootroot00000000000000github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= cascadia-1.3.3/parser.go000066400000000000000000000510151473106314600151000ustar00rootroot00000000000000// Package cascadia is an implementation of CSS selectors. package cascadia import ( "errors" "fmt" "regexp" "strconv" "strings" ) // a parser for CSS selectors type parser struct { s string // the source text i int // the current position // if `false`, parsing a pseudo-element // returns an error. acceptPseudoElements bool } // parseEscape parses a backslash escape. func (p *parser) parseEscape() (result string, err error) { if len(p.s) < p.i+2 || p.s[p.i] != '\\' { return "", errors.New("invalid escape sequence") } start := p.i + 1 c := p.s[start] switch { case c == '\r' || c == '\n' || c == '\f': return "", errors.New("escaped line ending outside string") case hexDigit(c): // unicode escape (hex) var i int for i = start; i < start+6 && i < len(p.s) && hexDigit(p.s[i]); i++ { // empty } v, _ := strconv.ParseUint(p.s[start:i], 16, 64) if len(p.s) > i { switch p.s[i] { case '\r': i++ if len(p.s) > i && p.s[i] == '\n' { i++ } case ' ', '\t', '\n', '\f': i++ } } p.i = i return string(rune(v)), nil } // Return the literal character after the backslash. result = p.s[start : start+1] p.i += 2 return result, nil } // toLowerASCII returns s with all ASCII capital letters lowercased. func toLowerASCII(s string) string { var b []byte for i := 0; i < len(s); i++ { if c := s[i]; 'A' <= c && c <= 'Z' { if b == nil { b = make([]byte, len(s)) copy(b, s) } b[i] = s[i] + ('a' - 'A') } } if b == nil { return s } return string(b) } func hexDigit(c byte) bool { return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' } // nameStart returns whether c can be the first character of an identifier // (not counting an initial hyphen, or an escape sequence). func nameStart(c byte) bool { return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 } // nameChar returns whether c can be a character within an identifier // (not counting an escape sequence). func nameChar(c byte) bool { return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' || c > 127 || c == '-' || '0' <= c && c <= '9' } // parseIdentifier parses an identifier. func (p *parser) parseIdentifier() (result string, err error) { const prefix = '-' var numPrefix int for len(p.s) > p.i && p.s[p.i] == prefix { p.i++ numPrefix++ } if len(p.s) <= p.i { return "", errors.New("expected identifier, found EOF instead") } if c := p.s[p.i]; !(nameStart(c) || c == '\\') { return "", fmt.Errorf("expected identifier, found %c instead", c) } result, err = p.parseName() if numPrefix > 0 && err == nil { result = strings.Repeat(string(prefix), numPrefix) + result } return } // parseName parses a name (which is like an identifier, but doesn't have // extra restrictions on the first character). func (p *parser) parseName() (result string, err error) { i := p.i loop: for i < len(p.s) { c := p.s[i] switch { case nameChar(c): start := i for i < len(p.s) && nameChar(p.s[i]) { i++ } result += p.s[start:i] case c == '\\': p.i = i val, err := p.parseEscape() if err != nil { return "", err } i = p.i result += val default: break loop } } if result == "" { return "", errors.New("expected name, found EOF instead") } p.i = i return result, nil } // parseString parses a single- or double-quoted string. func (p *parser) parseString() (result string, err error) { i := p.i if len(p.s) < i+2 { return "", errors.New("expected string, found EOF instead") } quote := p.s[i] i++ loop: for i < len(p.s) { switch p.s[i] { case '\\': if len(p.s) > i+1 { switch c := p.s[i+1]; c { case '\r': if len(p.s) > i+2 && p.s[i+2] == '\n' { i += 3 continue loop } fallthrough case '\n', '\f': i += 2 continue loop } } p.i = i val, err := p.parseEscape() if err != nil { return "", err } i = p.i result += val case quote: break loop case '\r', '\n', '\f': return "", errors.New("unexpected end of line in string") default: start := i for i < len(p.s) { if c := p.s[i]; c == quote || c == '\\' || c == '\r' || c == '\n' || c == '\f' { break } i++ } result += p.s[start:i] } } if i >= len(p.s) { return "", errors.New("EOF in string") } // Consume the final quote. i++ p.i = i return result, nil } // parseRegex parses a regular expression; the end is defined by encountering an // unmatched closing ')' or ']' which is not consumed func (p *parser) parseRegex() (rx *regexp.Regexp, err error) { i := p.i if len(p.s) < i+2 { return nil, errors.New("expected regular expression, found EOF instead") } // number of open parens or brackets; // when it becomes negative, finished parsing regex open := 0 loop: for i < len(p.s) { switch p.s[i] { case '(', '[': open++ case ')', ']': open-- if open < 0 { break loop } } i++ } if i >= len(p.s) { return nil, errors.New("EOF in regular expression") } rx, err = regexp.Compile(p.s[p.i:i]) p.i = i return rx, err } // skipWhitespace consumes whitespace characters and comments. // It returns true if there was actually anything to skip. func (p *parser) skipWhitespace() bool { i := p.i for i < len(p.s) { switch p.s[i] { case ' ', '\t', '\r', '\n', '\f': i++ continue case '/': if strings.HasPrefix(p.s[i:], "/*") { end := strings.Index(p.s[i+len("/*"):], "*/") if end != -1 { i += end + len("/**/") continue } } } break } if i > p.i { p.i = i return true } return false } // consumeParenthesis consumes an opening parenthesis and any following // whitespace. It returns true if there was actually a parenthesis to skip. func (p *parser) consumeParenthesis() bool { if p.i < len(p.s) && p.s[p.i] == '(' { p.i++ p.skipWhitespace() return true } return false } // consumeClosingParenthesis consumes a closing parenthesis and any preceding // whitespace. It returns true if there was actually a parenthesis to skip. func (p *parser) consumeClosingParenthesis() bool { i := p.i p.skipWhitespace() if p.i < len(p.s) && p.s[p.i] == ')' { p.i++ return true } p.i = i return false } // parseTypeSelector parses a type selector (one that matches by tag name). func (p *parser) parseTypeSelector() (result tagSelector, err error) { tag, err := p.parseIdentifier() if err != nil { return } return tagSelector{tag: toLowerASCII(tag)}, nil } // parseIDSelector parses a selector that matches by id attribute. func (p *parser) parseIDSelector() (idSelector, error) { if p.i >= len(p.s) { return idSelector{}, fmt.Errorf("expected id selector (#id), found EOF instead") } if p.s[p.i] != '#' { return idSelector{}, fmt.Errorf("expected id selector (#id), found '%c' instead", p.s[p.i]) } p.i++ id, err := p.parseName() if err != nil { return idSelector{}, err } return idSelector{id: id}, nil } // parseClassSelector parses a selector that matches by class attribute. func (p *parser) parseClassSelector() (classSelector, error) { if p.i >= len(p.s) { return classSelector{}, fmt.Errorf("expected class selector (.class), found EOF instead") } if p.s[p.i] != '.' { return classSelector{}, fmt.Errorf("expected class selector (.class), found '%c' instead", p.s[p.i]) } p.i++ class, err := p.parseIdentifier() if err != nil { return classSelector{}, err } return classSelector{class: class}, nil } // parseAttributeSelector parses a selector that matches by attribute value. func (p *parser) parseAttributeSelector() (attrSelector, error) { if p.i >= len(p.s) { return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found EOF instead") } if p.s[p.i] != '[' { return attrSelector{}, fmt.Errorf("expected attribute selector ([attribute]), found '%c' instead", p.s[p.i]) } p.i++ p.skipWhitespace() key, err := p.parseIdentifier() if err != nil { return attrSelector{}, err } key = toLowerASCII(key) p.skipWhitespace() if p.i >= len(p.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] == ']' { p.i++ return attrSelector{key: key, operation: ""}, nil } if p.i+2 >= len(p.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } op := p.s[p.i : p.i+2] if op[0] == '=' { op = "=" } else if op[1] != '=' { return attrSelector{}, fmt.Errorf(`expected equality operator, found "%s" instead`, op) } p.i += len(op) p.skipWhitespace() if p.i >= len(p.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } var val string var rx *regexp.Regexp if op == "#=" { rx, err = p.parseRegex() } else { switch p.s[p.i] { case '\'', '"': val, err = p.parseString() default: val, err = p.parseIdentifier() } } if err != nil { return attrSelector{}, err } p.skipWhitespace() if p.i >= len(p.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } // check if the attribute contains an ignore case flag ignoreCase := false if p.s[p.i] == 'i' || p.s[p.i] == 'I' { ignoreCase = true p.i++ } p.skipWhitespace() if p.i >= len(p.s) { return attrSelector{}, errors.New("unexpected EOF in attribute selector") } if p.s[p.i] != ']' { return attrSelector{}, fmt.Errorf("expected ']', found '%c' instead", p.s[p.i]) } p.i++ switch op { case "=", "!=", "~=", "|=", "^=", "$=", "*=", "#=": return attrSelector{key: key, val: val, operation: op, regexp: rx, insensitive: ignoreCase}, nil default: return attrSelector{}, fmt.Errorf("attribute operator %q is not supported", op) } } var ( errExpectedParenthesis = errors.New("expected '(' but didn't find it") errExpectedClosingParenthesis = errors.New("expected ')' but didn't find it") errUnmatchedParenthesis = errors.New("unmatched '('") ) // parsePseudoclassSelector parses a pseudoclass selector like :not(p) or a pseudo-element // For backwards compatibility, both ':' and '::' prefix are allowed for pseudo-elements. // https://drafts.csswg.org/selectors-3/#pseudo-elements // Returning a nil `Sel` (and a nil `error`) means we found a pseudo-element. func (p *parser) parsePseudoclassSelector() (out Sel, pseudoElement string, err error) { if p.i >= len(p.s) { return nil, "", fmt.Errorf("expected pseudoclass selector (:pseudoclass), found EOF instead") } if p.s[p.i] != ':' { return nil, "", fmt.Errorf("expected attribute selector (:pseudoclass), found '%c' instead", p.s[p.i]) } p.i++ var mustBePseudoElement bool if p.i >= len(p.s) { return nil, "", fmt.Errorf("got empty pseudoclass (or pseudoelement)") } if p.s[p.i] == ':' { // we found a pseudo-element mustBePseudoElement = true p.i++ } name, err := p.parseIdentifier() if err != nil { return } name = toLowerASCII(name) if mustBePseudoElement && (name != "after" && name != "backdrop" && name != "before" && name != "cue" && name != "first-letter" && name != "first-line" && name != "grammar-error" && name != "marker" && name != "placeholder" && name != "selection" && name != "spelling-error") { return out, "", fmt.Errorf("unknown pseudoelement :%s", name) } switch name { case "not", "has", "haschild": if !p.consumeParenthesis() { return out, "", errExpectedParenthesis } sel, parseErr := p.parseSelectorGroup() if parseErr != nil { return out, "", parseErr } if !p.consumeClosingParenthesis() { return out, "", errExpectedClosingParenthesis } out = relativePseudoClassSelector{name: name, match: sel} case "contains", "containsown": if !p.consumeParenthesis() { return out, "", errExpectedParenthesis } if p.i == len(p.s) { return out, "", errUnmatchedParenthesis } var val string switch p.s[p.i] { case '\'', '"': val, err = p.parseString() default: val, err = p.parseIdentifier() } if err != nil { return out, "", err } val = strings.ToLower(val) p.skipWhitespace() if p.i >= len(p.s) { return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { return out, "", errExpectedClosingParenthesis } out = containsPseudoClassSelector{own: name == "containsown", value: val} case "matches", "matchesown": if !p.consumeParenthesis() { return out, "", errExpectedParenthesis } rx, err := p.parseRegex() if err != nil { return out, "", err } if p.i >= len(p.s) { return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { return out, "", errExpectedClosingParenthesis } out = regexpPseudoClassSelector{own: name == "matchesown", regexp: rx} case "nth-child", "nth-last-child", "nth-of-type", "nth-last-of-type": if !p.consumeParenthesis() { return out, "", errExpectedParenthesis } a, b, err := p.parseNth() if err != nil { return out, "", err } if !p.consumeClosingParenthesis() { return out, "", errExpectedClosingParenthesis } last := name == "nth-last-child" || name == "nth-last-of-type" ofType := name == "nth-of-type" || name == "nth-last-of-type" out = nthPseudoClassSelector{a: a, b: b, last: last, ofType: ofType} case "first-child": out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: false} case "last-child": out = nthPseudoClassSelector{a: 0, b: 1, ofType: false, last: true} case "first-of-type": out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: false} case "last-of-type": out = nthPseudoClassSelector{a: 0, b: 1, ofType: true, last: true} case "only-child": out = onlyChildPseudoClassSelector{ofType: false} case "only-of-type": out = onlyChildPseudoClassSelector{ofType: true} case "input": out = inputPseudoClassSelector{} case "empty": out = emptyElementPseudoClassSelector{} case "root": out = rootPseudoClassSelector{} case "link": out = linkPseudoClassSelector{} case "lang": if !p.consumeParenthesis() { return out, "", errExpectedParenthesis } if p.i == len(p.s) { return out, "", errUnmatchedParenthesis } val, err := p.parseIdentifier() if err != nil { return out, "", err } val = strings.ToLower(val) p.skipWhitespace() if p.i >= len(p.s) { return out, "", errors.New("unexpected EOF in pseudo selector") } if !p.consumeClosingParenthesis() { return out, "", errExpectedClosingParenthesis } out = langPseudoClassSelector{lang: val} case "enabled": out = enabledPseudoClassSelector{} case "disabled": out = disabledPseudoClassSelector{} case "checked": out = checkedPseudoClassSelector{} case "visited", "hover", "active", "focus", "target": // Not applicable in a static context: never match. out = neverMatchSelector{value: ":" + name} case "after", "backdrop", "before", "cue", "first-letter", "first-line", "grammar-error", "marker", "placeholder", "selection", "spelling-error": return nil, name, nil default: return out, "", fmt.Errorf("unknown pseudoclass or pseudoelement :%s", name) } return } // parseInteger parses a decimal integer. func (p *parser) parseInteger() (int, error) { i := p.i start := i for i < len(p.s) && '0' <= p.s[i] && p.s[i] <= '9' { i++ } if i == start { return 0, errors.New("expected integer, but didn't find it") } p.i = i val, err := strconv.Atoi(p.s[start:i]) if err != nil { return 0, err } return val, nil } // parseNth parses the argument for :nth-child (normally of the form an+b). func (p *parser) parseNth() (a, b int, err error) { // initial state if p.i >= len(p.s) { goto eof } switch p.s[p.i] { case '-': p.i++ goto negativeA case '+': p.i++ goto positiveA case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': goto positiveA case 'n', 'N': a = 1 p.i++ goto readN case 'o', 'O', 'e', 'E': id, nameErr := p.parseName() if nameErr != nil { return 0, 0, nameErr } id = toLowerASCII(id) if id == "odd" { return 2, 1, nil } if id == "even" { return 2, 0, nil } return 0, 0, fmt.Errorf("expected 'odd' or 'even', but found '%s' instead", id) default: goto invalid } positiveA: if p.i >= len(p.s) { goto eof } switch p.s[p.i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': a, err = p.parseInteger() if err != nil { return 0, 0, err } goto readA case 'n', 'N': a = 1 p.i++ goto readN default: goto invalid } negativeA: if p.i >= len(p.s) { goto eof } switch p.s[p.i] { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': a, err = p.parseInteger() if err != nil { return 0, 0, err } a = -a goto readA case 'n', 'N': a = -1 p.i++ goto readN default: goto invalid } readA: if p.i >= len(p.s) { goto eof } switch p.s[p.i] { case 'n', 'N': p.i++ goto readN default: // The number we read as a is actually b. return 0, a, nil } readN: p.skipWhitespace() if p.i >= len(p.s) { goto eof } switch p.s[p.i] { case '+': p.i++ p.skipWhitespace() b, err = p.parseInteger() if err != nil { return 0, 0, err } return a, b, nil case '-': p.i++ p.skipWhitespace() b, err = p.parseInteger() if err != nil { return 0, 0, err } return a, -b, nil default: return a, 0, nil } eof: return 0, 0, errors.New("unexpected EOF while attempting to parse expression of form an+b") invalid: return 0, 0, errors.New("unexpected character while attempting to parse expression of form an+b") } // parseSimpleSelectorSequence parses a selector sequence that applies to // a single element. func (p *parser) parseSimpleSelectorSequence() (Sel, error) { var selectors []Sel if p.i >= len(p.s) { return nil, errors.New("expected selector, found EOF instead") } switch p.s[p.i] { case '*': // It's the universal selector. Just skip over it, since it doesn't affect the meaning. p.i++ if p.i+2 < len(p.s) && p.s[p.i:p.i+2] == "|*" { // other version of universal selector p.i += 2 } case '#', '.', '[', ':': // There's no type selector. Wait to process the other till the main loop. default: r, err := p.parseTypeSelector() if err != nil { return nil, err } selectors = append(selectors, r) } var pseudoElement string loop: for p.i < len(p.s) { var ( ns Sel newPseudoElement string err error ) switch p.s[p.i] { case '#': ns, err = p.parseIDSelector() case '.': ns, err = p.parseClassSelector() case '[': ns, err = p.parseAttributeSelector() case ':': ns, newPseudoElement, err = p.parsePseudoclassSelector() default: break loop } if err != nil { return nil, err } // From https://drafts.csswg.org/selectors-3/#pseudo-elements : // "Only one pseudo-element may appear per selector, and if present // it must appear after the sequence of simple selectors that // represents the subjects of the selector."" if ns == nil { // we found a pseudo-element if pseudoElement != "" { return nil, fmt.Errorf("only one pseudo-element is accepted per selector, got %s and %s", pseudoElement, newPseudoElement) } if !p.acceptPseudoElements { return nil, fmt.Errorf("pseudo-element %s found, but pseudo-elements support is disabled", newPseudoElement) } pseudoElement = newPseudoElement } else { if pseudoElement != "" { return nil, fmt.Errorf("pseudo-element %s must be at the end of selector", pseudoElement) } selectors = append(selectors, ns) } } if len(selectors) == 1 && pseudoElement == "" { // no need wrap the selectors in compoundSelector return selectors[0], nil } return compoundSelector{selectors: selectors, pseudoElement: pseudoElement}, nil } // parseSelector parses a selector that may include combinators. func (p *parser) parseSelector() (Sel, error) { p.skipWhitespace() result, err := p.parseSimpleSelectorSequence() if err != nil { return nil, err } for { var ( combinator byte c Sel ) if p.skipWhitespace() { combinator = ' ' } if p.i >= len(p.s) { return result, nil } switch p.s[p.i] { case '+', '>', '~': combinator = p.s[p.i] p.i++ p.skipWhitespace() case ',', ')': // These characters can't begin a selector, but they can legally occur after one. return result, nil } if combinator == 0 { return result, nil } c, err = p.parseSimpleSelectorSequence() if err != nil { return nil, err } result = combinedSelector{first: result, combinator: combinator, second: c} } } // parseSelectorGroup parses a group of selectors, separated by commas. func (p *parser) parseSelectorGroup() (SelectorGroup, error) { current, err := p.parseSelector() if err != nil { return nil, err } result := SelectorGroup{current} for p.i < len(p.s) { if p.s[p.i] != ',' { break } p.i++ c, err := p.parseSelector() if err != nil { return nil, err } result = append(result, c) } return result, nil } cascadia-1.3.3/parser_test.go000066400000000000000000000034411473106314600161370ustar00rootroot00000000000000package cascadia import ( "testing" ) var identifierTests = map[string]string{ "x": "x", "96": "", "-x": "-x", `r\e9 sumé`: "résumé", `r\0000e9 sumé`: "résumé", `r\0000e9sumé`: "résumé", `a\"b`: `a"b`, } func TestParseIdentifier(t *testing.T) { for source, want := range identifierTests { p := &parser{s: source} got, err := p.parseIdentifier() if err != nil { if want == "" { // It was supposed to be an error. continue } t.Errorf("parsing %q: got error (%s), want %q", source, err, want) continue } if want == "" { if err == nil { t.Errorf("parsing %q: got %q, want error", source, got) } continue } if p.i < len(source) { t.Errorf("parsing %q: %d bytes left over", source, len(source)-p.i) continue } if got != want { t.Errorf("parsing %q: got %q, want %q", source, got, want) } } } var stringTests = map[string]string{ `"x"`: "x", `'x'`: "x", `'x`: "", "'x\\\r\nx'": "xx", `"r\e9 sumé"`: "résumé", `"r\0000e9 sumé"`: "résumé", `"r\0000e9sumé"`: "résumé", `"a\"b"`: `a"b`, } func TestParseString(t *testing.T) { for source, want := range stringTests { p := &parser{s: source} got, err := p.parseString() if err != nil { if want == "" { // It was supposed to be an error. continue } t.Errorf("parsing %q: got error (%s), want %q", source, err, want) continue } if want == "" { if err == nil { t.Errorf("parsing %q: got %q, want error", source, got) } continue } if p.i < len(source) { t.Errorf("parsing %q: %d bytes left over", source, len(source)-p.i) continue } if got != want { t.Errorf("parsing %q: got %q, want %q", source, got, want) } } } cascadia-1.3.3/pseudo_classes.go000066400000000000000000000240161473106314600166210ustar00rootroot00000000000000package cascadia import ( "bytes" "fmt" "regexp" "strings" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) // This file implements the pseudo classes selectors, // which share the implementation of PseudoElement() and Specificity() type abstractPseudoClass struct{} func (s abstractPseudoClass) Specificity() Specificity { return Specificity{0, 1, 0} } func (c abstractPseudoClass) PseudoElement() string { return "" } type relativePseudoClassSelector struct { name string // one of "not", "has", "haschild" match SelectorGroup } func (s relativePseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } switch s.name { case "not": // matches elements that do not match a. return !s.match.Match(n) case "has": // matches elements with any descendant that matches a. return hasDescendantMatch(n, s.match) case "haschild": // matches elements with a child that matches a. return hasChildMatch(n, s.match) default: panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name)) } } // hasChildMatch returns whether n has any child that matches a. func hasChildMatch(n *html.Node, a Matcher) bool { for c := n.FirstChild; c != nil; c = c.NextSibling { if a.Match(c) { return true } } return false } // hasDescendantMatch performs a depth-first search of n's descendants, // testing whether any of them match a. It returns true as soon as a match is // found, or false if no match is found. func hasDescendantMatch(n *html.Node, a Matcher) bool { for c := n.FirstChild; c != nil; c = c.NextSibling { if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { return true } } return false } // Specificity returns the specificity of the most specific selectors // in the pseudo-class arguments. // See https://www.w3.org/TR/selectors/#specificity-rules func (s relativePseudoClassSelector) Specificity() Specificity { var max Specificity for _, sel := range s.match { newSpe := sel.Specificity() if max.Less(newSpe) { max = newSpe } } return max } func (c relativePseudoClassSelector) PseudoElement() string { return "" } type containsPseudoClassSelector struct { abstractPseudoClass value string own bool } func (s containsPseudoClassSelector) Match(n *html.Node) bool { var text string if s.own { // matches nodes that directly contain the given text text = strings.ToLower(nodeOwnText(n)) } else { // matches nodes that contain the given text. text = strings.ToLower(nodeText(n)) } return strings.Contains(text, s.value) } type regexpPseudoClassSelector struct { abstractPseudoClass regexp *regexp.Regexp own bool } func (s regexpPseudoClassSelector) Match(n *html.Node) bool { var text string if s.own { // matches nodes whose text directly matches the specified regular expression text = nodeOwnText(n) } else { // matches nodes whose text matches the specified regular expression text = nodeText(n) } return s.regexp.MatchString(text) } // writeNodeText writes the text contained in n and its descendants to b. func writeNodeText(n *html.Node, b *bytes.Buffer) { switch n.Type { case html.TextNode: b.WriteString(n.Data) case html.ElementNode: for c := n.FirstChild; c != nil; c = c.NextSibling { writeNodeText(c, b) } } } // nodeText returns the text contained in n and its descendants. func nodeText(n *html.Node) string { var b bytes.Buffer writeNodeText(n, &b) return b.String() } // nodeOwnText returns the contents of the text nodes that are direct // children of n. func nodeOwnText(n *html.Node) string { var b bytes.Buffer for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == html.TextNode { b.WriteString(c.Data) } } return b.String() } type nthPseudoClassSelector struct { abstractPseudoClass a, b int last, ofType bool } func (s nthPseudoClassSelector) Match(n *html.Node) bool { if s.a == 0 { if s.last { return simpleNthLastChildMatch(s.b, s.ofType, n) } else { return simpleNthChildMatch(s.b, s.ofType, n) } } return nthChildMatch(s.a, s.b, s.last, s.ofType, n) } // nthChildMatch implements :nth-child(an+b). // If last is true, implements :nth-last-child instead. // If ofType is true, implements :nth-of-type instead. func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool { if n.Type != html.ElementNode { return false } parent := n.Parent if parent == nil { return false } i := -1 count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { continue } count++ if c == n { i = count if !last { break } } } if i == -1 { // This shouldn't happen, since n should always be one of its parent's children. return false } if last { i = count - i + 1 } i -= b if a == 0 { return i == 0 } return i%a == 0 && i/a >= 0 } // simpleNthChildMatch implements :nth-child(b). // If ofType is true, implements :nth-of-type instead. func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool { if n.Type != html.ElementNode { return false } parent := n.Parent if parent == nil { return false } count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { continue } count++ if c == n { return count == b } if count >= b { return false } } return false } // simpleNthLastChildMatch implements :nth-last-child(b). // If ofType is true, implements :nth-last-of-type instead. func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool { if n.Type != html.ElementNode { return false } parent := n.Parent if parent == nil { return false } count := 0 for c := parent.LastChild; c != nil; c = c.PrevSibling { if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { continue } count++ if c == n { return count == b } if count >= b { return false } } return false } type onlyChildPseudoClassSelector struct { abstractPseudoClass ofType bool } // Match implements :only-child. // If `ofType` is true, it implements :only-of-type instead. func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } parent := n.Parent if parent == nil { return false } count := 0 for c := parent.FirstChild; c != nil; c = c.NextSibling { if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) { continue } count++ if count > 1 { return false } } return count == 1 } type inputPseudoClassSelector struct { abstractPseudoClass } // Matches input, select, textarea and button elements. func (s inputPseudoClassSelector) Match(n *html.Node) bool { return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") } type emptyElementPseudoClassSelector struct { abstractPseudoClass } // Matches empty elements. func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } for c := n.FirstChild; c != nil; c = c.NextSibling { switch c.Type { case html.ElementNode: return false case html.TextNode: if strings.TrimSpace(nodeText(c)) == "" { continue } else { return false } } } return true } type rootPseudoClassSelector struct { abstractPseudoClass } // Match implements :root func (s rootPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } if n.Parent == nil { return false } return n.Parent.Type == html.DocumentNode } func hasAttr(n *html.Node, attr string) bool { return matchAttribute(n, attr, func(string) bool { return true }) } type linkPseudoClassSelector struct { abstractPseudoClass } // Match implements :link func (s linkPseudoClassSelector) Match(n *html.Node) bool { return (n.DataAtom == atom.A || n.DataAtom == atom.Area || n.DataAtom == atom.Link) && hasAttr(n, "href") } type langPseudoClassSelector struct { abstractPseudoClass lang string } func (s langPseudoClassSelector) Match(n *html.Node) bool { own := matchAttribute(n, "lang", func(val string) bool { return val == s.lang || strings.HasPrefix(val, s.lang+"-") }) if n.Parent == nil { return own } return own || s.Match(n.Parent) } type enabledPseudoClassSelector struct { abstractPseudoClass } func (s enabledPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } switch n.DataAtom { case atom.A, atom.Area, atom.Link: return hasAttr(n, "href") case atom.Optgroup, atom.Menuitem, atom.Fieldset: return !hasAttr(n, "disabled") case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option: return !hasAttr(n, "disabled") && !inDisabledFieldset(n) } return false } type disabledPseudoClassSelector struct { abstractPseudoClass } func (s disabledPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } switch n.DataAtom { case atom.Optgroup, atom.Menuitem, atom.Fieldset: return hasAttr(n, "disabled") case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option: return hasAttr(n, "disabled") || inDisabledFieldset(n) } return false } func hasLegendInPreviousSiblings(n *html.Node) bool { for s := n.PrevSibling; s != nil; s = s.PrevSibling { if s.DataAtom == atom.Legend { return true } } return false } func inDisabledFieldset(n *html.Node) bool { if n.Parent == nil { return false } if n.Parent.DataAtom == atom.Fieldset && hasAttr(n.Parent, "disabled") && (n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) { return true } return inDisabledFieldset(n.Parent) } type checkedPseudoClassSelector struct { abstractPseudoClass } func (s checkedPseudoClassSelector) Match(n *html.Node) bool { if n.Type != html.ElementNode { return false } switch n.DataAtom { case atom.Input, atom.Menuitem: return hasAttr(n, "checked") && matchAttribute(n, "type", func(val string) bool { t := toLowerASCII(val) return t == "checkbox" || t == "radio" }) case atom.Option: return hasAttr(n, "selected") } return false } cascadia-1.3.3/selector.go000066400000000000000000000337311473106314600154310ustar00rootroot00000000000000package cascadia import ( "fmt" "regexp" "strings" "golang.org/x/net/html" ) // Matcher is the interface for basic selector functionality. // Match returns whether a selector matches n. type Matcher interface { Match(n *html.Node) bool } // Sel is the interface for all the functionality provided by selectors. type Sel interface { Matcher Specificity() Specificity // Returns a CSS input compiling to this selector. String() string // Returns a pseudo-element, or an empty string. PseudoElement() string } // Parse parses a selector. Use `ParseWithPseudoElement` // if you need support for pseudo-elements. func Parse(sel string) (Sel, error) { p := &parser{s: sel} compiled, err := p.parseSelector() if err != nil { return nil, err } if p.i < len(sel) { return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) } return compiled, nil } // ParseWithPseudoElement parses a single selector, // with support for pseudo-element. func ParseWithPseudoElement(sel string) (Sel, error) { p := &parser{s: sel, acceptPseudoElements: true} compiled, err := p.parseSelector() if err != nil { return nil, err } if p.i < len(sel) { return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) } return compiled, nil } // ParseGroup parses a selector, or a group of selectors separated by commas. // Use `ParseGroupWithPseudoElements` // if you need support for pseudo-elements. func ParseGroup(sel string) (SelectorGroup, error) { p := &parser{s: sel} compiled, err := p.parseSelectorGroup() if err != nil { return nil, err } if p.i < len(sel) { return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) } return compiled, nil } // ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas. // It supports pseudo-elements. func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) { p := &parser{s: sel, acceptPseudoElements: true} compiled, err := p.parseSelectorGroup() if err != nil { return nil, err } if p.i < len(sel) { return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i) } return compiled, nil } // A Selector is a function which tells whether a node matches or not. // // This type is maintained for compatibility; I recommend using the newer and // more idiomatic interfaces Sel and Matcher. type Selector func(*html.Node) bool // Compile parses a selector and returns, if successful, a Selector object // that can be used to match against html.Node objects. func Compile(sel string) (Selector, error) { compiled, err := ParseGroup(sel) if err != nil { return nil, err } return Selector(compiled.Match), nil } // MustCompile is like Compile, but panics instead of returning an error. func MustCompile(sel string) Selector { compiled, err := Compile(sel) if err != nil { panic(err) } return compiled } // MatchAll returns a slice of the nodes that match the selector, // from n and its children. func (s Selector) MatchAll(n *html.Node) []*html.Node { return s.matchAllInto(n, nil) } func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node { if s(n) { storage = append(storage, n) } for child := n.FirstChild; child != nil; child = child.NextSibling { storage = s.matchAllInto(child, storage) } return storage } func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node { for child := n.FirstChild; child != nil; child = child.NextSibling { if m.Match(child) { storage = append(storage, child) } storage = queryInto(child, m, storage) } return storage } // QueryAll returns a slice of all the nodes that match m, from the descendants // of n. func QueryAll(n *html.Node, m Matcher) []*html.Node { return queryInto(n, m, nil) } // Match returns true if the node matches the selector. func (s Selector) Match(n *html.Node) bool { return s(n) } // MatchFirst returns the first node that matches s, from n and its children. func (s Selector) MatchFirst(n *html.Node) *html.Node { if s.Match(n) { return n } for c := n.FirstChild; c != nil; c = c.NextSibling { m := s.MatchFirst(c) if m != nil { return m } } return nil } // Query returns the first node that matches m, from the descendants of n. // If none matches, it returns nil. func Query(n *html.Node, m Matcher) *html.Node { for c := n.FirstChild; c != nil; c = c.NextSibling { if m.Match(c) { return c } if matched := Query(c, m); matched != nil { return matched } } return nil } // Filter returns the nodes in nodes that match the selector. func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) { for _, n := range nodes { if s(n) { result = append(result, n) } } return result } // Filter returns the nodes that match m. func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) { for _, n := range nodes { if m.Match(n) { result = append(result, n) } } return result } type tagSelector struct { tag string } // Matches elements with a given tag name. func (t tagSelector) Match(n *html.Node) bool { return n.Type == html.ElementNode && n.Data == t.tag } func (c tagSelector) Specificity() Specificity { return Specificity{0, 0, 1} } func (c tagSelector) PseudoElement() string { return "" } type classSelector struct { class string } // Matches elements by class attribute. func (t classSelector) Match(n *html.Node) bool { return matchAttribute(n, "class", func(s string) bool { return matchInclude(t.class, s, false) }) } func (c classSelector) Specificity() Specificity { return Specificity{0, 1, 0} } func (c classSelector) PseudoElement() string { return "" } type idSelector struct { id string } // Matches elements by id attribute. func (t idSelector) Match(n *html.Node) bool { return matchAttribute(n, "id", func(s string) bool { return s == t.id }) } func (c idSelector) Specificity() Specificity { return Specificity{1, 0, 0} } func (c idSelector) PseudoElement() string { return "" } type attrSelector struct { key, val, operation string regexp *regexp.Regexp insensitive bool } // Matches elements by attribute value. func (t attrSelector) Match(n *html.Node) bool { switch t.operation { case "": return matchAttribute(n, t.key, func(string) bool { return true }) case "=": return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) }) case "!=": return attributeNotEqualMatch(t.key, t.val, n, t.insensitive) case "~=": // matches elements where the attribute named key is a whitespace-separated list that includes val. return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) }) case "|=": return attributeDashMatch(t.key, t.val, n, t.insensitive) case "^=": return attributePrefixMatch(t.key, t.val, n, t.insensitive) case "$=": return attributeSuffixMatch(t.key, t.val, n, t.insensitive) case "*=": return attributeSubstringMatch(t.key, t.val, n, t.insensitive) case "#=": return attributeRegexMatch(t.key, t.regexp, n) default: panic(fmt.Sprintf("unsuported operation : %s", t.operation)) } } // matches elements where we ignore (or not) the case of the attribute value // the user attribute is the value set by the user to match elements // the real attribute is the attribute value found in the code parsed func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool { if ignoreCase { return strings.EqualFold(userAttr, realAttr) } return userAttr == realAttr } // matches elements where the attribute named key satisifes the function f. func matchAttribute(n *html.Node, key string, f func(string) bool) bool { if n.Type != html.ElementNode { return false } for _, a := range n.Attr { if a.Key == key && f(a.Val) { return true } } return false } // attributeNotEqualMatch matches elements where // the attribute named key does not have the value val. func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool { if n.Type != html.ElementNode { return false } for _, a := range n.Attr { if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) { return false } } return true } // returns true if s is a whitespace-separated list that includes val. func matchInclude(val string, s string, ignoreCase bool) bool { for s != "" { i := strings.IndexAny(s, " \t\r\n\f") if i == -1 { return matchInsensitiveValue(s, val, ignoreCase) } if matchInsensitiveValue(s[:i], val, ignoreCase) { return true } s = s[i+1:] } return false } // matches elements where the attribute named key equals val or starts with val plus a hyphen. func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool { return matchAttribute(n, key, func(s string) bool { if matchInsensitiveValue(s, val, ignoreCase) { return true } if len(s) <= len(val) { return false } if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' { return true } return false }) } // attributePrefixMatch returns a Selector that matches elements where // the attribute named key starts with val. func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool { return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false } if ignoreCase { return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val)) } return strings.HasPrefix(s, val) }) } // attributeSuffixMatch matches elements where // the attribute named key ends with val. func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool { return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false } if ignoreCase { return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val)) } return strings.HasSuffix(s, val) }) } // attributeSubstringMatch matches nodes where // the attribute named key contains val. func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool { return matchAttribute(n, key, func(s string) bool { if strings.TrimSpace(s) == "" { return false } if ignoreCase { return strings.Contains(strings.ToLower(s), strings.ToLower(val)) } return strings.Contains(s, val) }) } // attributeRegexMatch matches nodes where // the attribute named key matches the regular expression rx func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool { return matchAttribute(n, key, func(s string) bool { return rx.MatchString(s) }) } func (c attrSelector) Specificity() Specificity { return Specificity{0, 1, 0} } func (c attrSelector) PseudoElement() string { return "" } // see pseudo_classes.go for pseudo classes selectors // on a static context, some selectors can't match anything type neverMatchSelector struct { value string } func (s neverMatchSelector) Match(n *html.Node) bool { return false } func (s neverMatchSelector) Specificity() Specificity { return Specificity{0, 0, 0} } func (c neverMatchSelector) PseudoElement() string { return "" } type compoundSelector struct { selectors []Sel pseudoElement string } // Matches elements if each sub-selectors matches. func (t compoundSelector) Match(n *html.Node) bool { if len(t.selectors) == 0 { return n.Type == html.ElementNode } for _, sel := range t.selectors { if !sel.Match(n) { return false } } return true } func (s compoundSelector) Specificity() Specificity { var out Specificity for _, sel := range s.selectors { out = out.Add(sel.Specificity()) } if s.pseudoElement != "" { // https://drafts.csswg.org/selectors-3/#specificity out = out.Add(Specificity{0, 0, 1}) } return out } func (c compoundSelector) PseudoElement() string { return c.pseudoElement } type combinedSelector struct { first Sel combinator byte second Sel } func (t combinedSelector) Match(n *html.Node) bool { if t.first == nil { return false // maybe we should panic } switch t.combinator { case 0: return t.first.Match(n) case ' ': return descendantMatch(t.first, t.second, n) case '>': return childMatch(t.first, t.second, n) case '+': return siblingMatch(t.first, t.second, true, n) case '~': return siblingMatch(t.first, t.second, false, n) default: panic("unknown combinator") } } // matches an element if it matches d and has an ancestor that matches a. func descendantMatch(a, d Matcher, n *html.Node) bool { if !d.Match(n) { return false } for p := n.Parent; p != nil; p = p.Parent { if a.Match(p) { return true } } return false } // matches an element if it matches d and its parent matches a. func childMatch(a, d Matcher, n *html.Node) bool { return d.Match(n) && n.Parent != nil && a.Match(n.Parent) } // matches an element if it matches s2 and is preceded by an element that matches s1. // If adjacent is true, the sibling must be immediately before the element. func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool { if !s2.Match(n) { return false } if adjacent { for n = n.PrevSibling; n != nil; n = n.PrevSibling { if n.Type == html.TextNode || n.Type == html.CommentNode { continue } return s1.Match(n) } return false } // Walk backwards looking for element that matches s1 for c := n.PrevSibling; c != nil; c = c.PrevSibling { if s1.Match(c) { return true } } return false } func (s combinedSelector) Specificity() Specificity { spec := s.first.Specificity() if s.second != nil { spec = spec.Add(s.second.Specificity()) } return spec } // on combinedSelector, a pseudo-element only makes sens on the last // selector, although others increase specificity. func (c combinedSelector) PseudoElement() string { if c.second == nil { return "" } return c.second.PseudoElement() } // A SelectorGroup is a list of selectors, which matches if any of the // individual selectors matches. type SelectorGroup []Sel // Match returns true if the node matches one of the single selectors. func (s SelectorGroup) Match(n *html.Node) bool { for _, sel := range s { if sel.Match(n) { return true } } return false } cascadia-1.3.3/selector_test.go000066400000000000000000000557241473106314600164760ustar00rootroot00000000000000package cascadia import ( "bytes" "encoding/json" "fmt" "io/ioutil" "log" "reflect" "strings" "testing" "golang.org/x/net/html" ) var validSelectors []validSelector func init() { c, err := ioutil.ReadFile("test_resources/valid_selectors.json") if err != nil { log.Fatal(err) } if err = json.Unmarshal(c, &validSelectors); err != nil { log.Fatal(err) } } type selectorTest struct { HTML, selector string results []string } func nodeString(n *html.Node) string { buf := bytes.NewBufferString("") if err := html.Render(buf, n); err != nil { log.Fatal(err) } return buf.String() } var selectorTests = []selectorTest{ { `
This address...
`, "address", []string{ "
This address...
", }, }, { `text`, "*", []string{ "text", "", "text", }, }, { ``, "*", []string{ "", "", "", }, }, { `

`, "#foo", []string{ `

`, }, }, { `