pax_global_header00006660000000000000000000000064141303344240014510gustar00rootroot0000000000000052 comment=e5bbece50bb885e00f08e6a924a62391af642bf7 golang-mvdan-xurls-2.3.0/000077500000000000000000000000001413033442400152375ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/.github/000077500000000000000000000000001413033442400165775ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/.github/FUNDING.yml000066400000000000000000000000161413033442400204110ustar00rootroot00000000000000github: mvdan golang-mvdan-xurls-2.3.0/.github/workflows/000077500000000000000000000000001413033442400206345ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/.github/workflows/test.yml000066400000000000000000000006521413033442400223410ustar00rootroot00000000000000on: [push, pull_request] name: Test jobs: test: strategy: matrix: go-version: [1.15.x, 1.16.x] os: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.os }} steps: - name: Install Go uses: actions/setup-go@v2 with: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@v2 - name: Test run: go test ./... golang-mvdan-xurls-2.3.0/.gitignore000066400000000000000000000001041413033442400172220ustar00rootroot00000000000000cmd/xurls/xurls generate/tldsgen/tldsgen generate/regexgen/regexgen golang-mvdan-xurls-2.3.0/LICENSE000066400000000000000000000027201413033442400162450ustar00rootroot00000000000000Copyright (c) 2015, Daniel Martí. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. golang-mvdan-xurls-2.3.0/README.md000066400000000000000000000022321413033442400165150ustar00rootroot00000000000000# xurls [![Go Reference](https://pkg.go.dev/badge/mvdan.cc/xurls/v2.svg)](https://pkg.go.dev/mvdan.cc/xurls/v2) Extract urls from text using regular expressions. Requires Go 1.15 or later. ```go import "mvdan.cc/xurls/v2" func main() { rxRelaxed := xurls.Relaxed() rxRelaxed.FindString("Do gophers live in golang.org?") // "golang.org" rxRelaxed.FindString("This string does not have a URL") // "" rxStrict := xurls.Strict() rxStrict.FindAllString("must have scheme: http://foo.com/.", -1) // []string{"http://foo.com/"} rxStrict.FindAllString("no scheme, no match: foo.com", -1) // []string{} } ``` Since API is centered around [regexp.Regexp](https://golang.org/pkg/regexp/#Regexp), many other methods are available, such as finding the [byte indexes](https://golang.org/pkg/regexp/#Regexp.FindAllIndex) for all matches. Note that calling the exposed functions means compiling a regular expression, so repeated calls should be avoided. #### cmd/xurls To install the tool globally: cd $(mktemp -d); go mod init tmp; GO111MODULE=on go get mvdan.cc/xurls/v2/cmd/xurls ```shell $ echo "Do gophers live in http://golang.org?" | xurls http://golang.org ``` golang-mvdan-xurls-2.3.0/cmd/000077500000000000000000000000001413033442400160025ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/cmd/xurls/000077500000000000000000000000001413033442400171575ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/cmd/xurls/main.go000066400000000000000000000071371413033442400204420ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package main import ( "bufio" "bytes" "errors" "flag" "fmt" "io" "io/ioutil" "net/http" "net/url" "os" "regexp" "strings" "time" "mvdan.cc/xurls/v2" ) var ( matching = flag.String("m", "", "") relaxed = flag.Bool("r", false, "") fix = flag.Bool("fix", false, "") ) func init() { flag.Usage = func() { p := func(format string, a ...interface{}) { fmt.Fprintf(os.Stderr, format, a...) } p("Usage: xurls [-h] [files]\n\n") p("If no files are given, it reads from standard input.\n\n") p(" -m only match urls whose scheme matches a regexp\n") p(" example: 'https?://|mailto:'\n") p(" -r also match urls without a scheme (relaxed)\n") p(" -fix overwrite urls that redirect\n") } } func scanPath(re *regexp.Regexp, path string) error { f := os.Stdin if path != "-" { var err error f, err = os.Open(path) if err != nil { return err } defer f.Close() } bufr := bufio.NewReader(f) var fixedBuf bytes.Buffer anyFixed := false var broken []string for { line, err := bufr.ReadBytes('\n') offset := 0 for _, pair := range re.FindAllIndex(line, -1) { // The indexes are based on the original line. pair[0] += offset pair[1] += offset match := line[pair[0]:pair[1]] if !*fix { fmt.Printf("%s\n", match) continue } u, err := url.Parse(string(match)) if err != nil { continue } fixed := u.String() switch u.Scheme { case "http", "https": // See if the URL redirects somewhere. client := &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 10 { return errors.New("stopped after 10 redirects") } // Keep the fragment around. req.URL.Fragment = u.Fragment fixed = req.URL.String() return nil }, } resp, err := client.Get(fixed) if err != nil { continue } if resp.StatusCode >= 400 { broken = append(broken, string(match)) } resp.Body.Close() } if fixed != string(match) { // Replace the url, and update the offset. newLine := line[:pair[0]] newLine = append(newLine, fixed...) newLine = append(newLine, line[pair[1]:]...) offset += len(newLine) - len(line) line = newLine anyFixed = true } } if *fix { if path == "-" { os.Stdout.Write(line) } else { fixedBuf.Write(line) } } if err == io.EOF { break } else if err != nil { return err } } if anyFixed && path != "-" { f.Close() // Overwrite the file, if we weren't reading stdin. Report its // path too. fmt.Println(path) if err := ioutil.WriteFile(path, fixedBuf.Bytes(), 0o666); err != nil { return err } } if len(broken) > 0 { return fmt.Errorf("found %d broken urls in %q:\n%s", len(broken), path, strings.Join(broken, "\n")) } return nil } func main() { os.Exit(main1()) } func main1() int { flag.Parse() if *relaxed && *matching != "" { fmt.Fprintln(os.Stderr, "-r and -m at the same time don't make much sense") return 1 } var re *regexp.Regexp if *relaxed { re = xurls.Relaxed() } else if *matching != "" { var err error if re, err = xurls.StrictMatchingScheme(*matching); err != nil { fmt.Fprintln(os.Stderr, err) return 1 } } else { re = xurls.Strict() } args := flag.Args() if len(args) == 0 { args = []string{"-"} } for _, path := range args { if err := scanPath(re, path); err != nil { fmt.Fprintln(os.Stderr, err) return 1 } } return 0 } golang-mvdan-xurls-2.3.0/cmd/xurls/main_test.go000066400000000000000000000034041413033442400214720ustar00rootroot00000000000000// Copyright (c) 2019, Daniel Martí // See LICENSE for licensing information package main import ( "context" "fmt" "io/ioutil" "net" "net/http" "os" "path/filepath" "testing" "github.com/rogpeppe/go-internal/testscript" ) func TestMain(m *testing.M) { os.Exit(testscript.RunMain(m, map[string]func() int{ "xurls": main1, })) } func TestScripts(t *testing.T) { t.Parallel() testscript.Run(t, testscript.Params{ Dir: filepath.Join("testdata", "scripts"), Setup: func(env *testscript.Env) error { mux := http.NewServeMux() mux.HandleFunc("/plain", func(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "plaintext") }) mux.HandleFunc("/redir-1", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/plain", http.StatusMovedPermanently) }) mux.HandleFunc("/redir-2", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/redir-1", http.StatusMovedPermanently) }) ln, err := net.Listen("tcp", ":0") if err != nil { return err } server := &http.Server{Handler: mux} go server.Serve(ln) env.Vars = append(env.Vars, "SERVER=http://"+ln.Addr().String()) env.Defer(func() { if err := server.Shutdown(context.TODO()); err != nil { t.Fatal(err) } }) return nil }, Cmds: map[string]func(ts *testscript.TestScript, neg bool, args []string){ "expand": func(ts *testscript.TestScript, neg bool, args []string) { if neg { ts.Fatalf("unsupported: ! expand") } if len(args) == 0 { ts.Fatalf("usage: expand file...") } for _, arg := range args { data := ts.ReadFile(arg) data = os.Expand(data, ts.Getenv) err := ioutil.WriteFile(ts.MkAbs(arg), []byte(data), 0o666) ts.Check(err) } }, }, }) } golang-mvdan-xurls-2.3.0/cmd/xurls/testdata/000077500000000000000000000000001413033442400207705ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/cmd/xurls/testdata/scripts/000077500000000000000000000000001413033442400224575ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/cmd/xurls/testdata/scripts/basic.txt000066400000000000000000000011531413033442400243010ustar00rootroot00000000000000stdin input xurls stdout 'https://foo.com' ! stdout 'bar.com' ! stdout 'custom://some-data' ! stderr . ! xurls missing ! stdout . stderr 'open missing' xurls input stdout 'https://foo.com' ! stdout 'bar.com' ! stdout 'custom://some-data' ! stderr . xurls -r input stdout 'https://foo.com' stdout 'bar.com' ! stdout 'custom://some-data' ! stderr . xurls -m 'custom://' input ! stdout 'https://foo.com' ! stdout 'bar.com' stdout 'custom://some-data' ! stderr . -- input -- First, a link with a scheme, https://foo.com. Then, one without a scheme, like bar.com. Also, a link with a custom scheme, custom://some-data. golang-mvdan-xurls-2.3.0/cmd/xurls/testdata/scripts/fix.txt000066400000000000000000000022371413033442400240120ustar00rootroot00000000000000expand nothing cp nothing nothing.orig expand redirects expand redirects.golden cp redirects redirects.orig expand broken expand broken.golden cp broken broken.orig xurls -fix nothing ! stdout . ! stderr . cmp nothing nothing.orig stdin redirects xurls -fix cmp stdout redirects.golden cmp redirects redirects.orig ! stderr . xurls -fix redirects stdout '^redirects$' ! stderr . cmp redirects redirects.golden ! xurls -fix broken stdout '^broken$' stderr '1 broken urls' stderr '/404' cmp broken broken.golden -- nothing -- No redirect: ${SERVER}/plain -- redirects -- No redirect: ${SERVER}/plain One redirect: ${SERVER}/redir-1 Two redirects: ${SERVER}/redir-2 Redirect with fragment: ${SERVER}/redir-1#foo Three links in one line: ${SERVER}/redir-1 + ${SERVER}//redir-1 + ${SERVER}///redir-1 -- redirects.golden -- No redirect: ${SERVER}/plain One redirect: ${SERVER}/plain Two redirects: ${SERVER}/plain Redirect with fragment: ${SERVER}/plain#foo Three links in one line: ${SERVER}/plain + ${SERVER}/plain + ${SERVER}/plain -- broken -- One redirect: ${SERVER}/redir-1 404 error: ${SERVER}/404 -- broken.golden -- One redirect: ${SERVER}/plain 404 error: ${SERVER}/404 golang-mvdan-xurls-2.3.0/cmd/xurls/testdata/scripts/flags.txt000066400000000000000000000004471413033442400243210ustar00rootroot00000000000000xurls -h ! stderr 'flag provided but not defined' stderr 'Usage: xurls' ! stderr 'help requested' # don't duplicate usage output ! stderr '-test\.' # don't show the test binary's usage func ! xurls -r -m="whatever" stderr 'at the same time' ! xurls -m="bad(regexp" stderr 'missing closing \)' golang-mvdan-xurls-2.3.0/example_test.go000066400000000000000000000006161413033442400202630ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package xurls_test import ( "fmt" "mvdan.cc/xurls/v2" ) func Example() { rx := xurls.Relaxed() fmt.Println(rx.FindString("Do gophers live in http://golang.org?")) fmt.Println(rx.FindAllString("foo.com is http://foo.com/.", -1)) // Output: // http://golang.org // [foo.com http://foo.com/] } golang-mvdan-xurls-2.3.0/generate/000077500000000000000000000000001413033442400170315ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/generate/schemesgen/000077500000000000000000000000001413033442400211525ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/generate/schemesgen/main.go000066400000000000000000000025551413033442400224340ustar00rootroot00000000000000// Copyright (c) 2017, Shreyas Khare // See LICENSE for licensing information package main import ( "encoding/csv" "io" "log" "net/http" "os" "text/template" ) const path = "schemes.go" var schemesTmpl = template.Must(template.New("schemes").Parse(`// Generated by schemesgen package xurls // Schemes is a sorted list of all IANA assigned schemes. // // Source: // https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv var Schemes = []string{ {{range $scheme := .Schemes}}` + "\t`" + `{{$scheme}}` + "`" + `, {{end}}} `)) func schemeList() []string { resp, err := http.Get("https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv") if err != nil { log.Fatal(err) } defer resp.Body.Close() r := csv.NewReader(resp.Body) r.Read() // ignore headers schemes := make([]string, 0) for { record, err := r.Read() if err == io.EOF { break } if err != nil { log.Fatal(err) } schemes = append(schemes, record[0]) } return schemes } func writeSchemes(schemes []string) error { f, err := os.Create(path) if err != nil { return err } defer f.Close() return schemesTmpl.Execute(f, struct { Schemes []string }{ Schemes: schemes, }) } func main() { schemes := schemeList() log.Printf("Generating %s...", path) if err := writeSchemes(schemes); err != nil { log.Fatalf("Could not write path: %v", err) } } golang-mvdan-xurls-2.3.0/generate/tldsgen/000077500000000000000000000000001413033442400204715ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/generate/tldsgen/main.go000066400000000000000000000042231413033442400217450ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package main import ( "bufio" "errors" "fmt" "log" "net/http" "os" "regexp" "sort" "strings" "sync" "text/template" ) const path = "tlds.go" var tldsTmpl = template.Must(template.New("tlds").Parse(`// Generated by tldsgen package xurls // TLDs is a sorted list of all public top-level domains. // // Sources:{{range $_, $url := .URLs}} // * {{$url}}{{end}} var TLDs = []string{ {{range $_, $tld := .TLDs}}` + "\t`" + `{{$tld}}` + "`" + `, {{end}}} `)) func cleanTld(tld string) string { tld = strings.ToLower(tld) if strings.HasPrefix(tld, "xn--") { return "" } return tld } func fetchFromURL(wg *sync.WaitGroup, url, pat string, tldSet map[string]bool) { defer wg.Done() log.Printf("Fetching %s", url) resp, err := http.Get(url) if err == nil && resp.StatusCode >= 400 { err = errors.New(resp.Status) } if err != nil { panic(fmt.Errorf("%s: %s", url, err)) } defer resp.Body.Close() scanner := bufio.NewScanner(resp.Body) re := regexp.MustCompile(pat) for scanner.Scan() { line := scanner.Text() tld := re.FindString(line) tld = cleanTld(tld) if tld == "" { continue } tldSet[tld] = true } if err := scanner.Err(); err != nil { panic(fmt.Errorf("%s: %s", url, err)) } } func tldList() ([]string, []string) { var urls []string var wg sync.WaitGroup tldSet := make(map[string]bool) fromURL := func(url, pat string) { urls = append(urls, url) wg.Add(1) go fetchFromURL(&wg, url, pat, tldSet) } fromURL("https://data.iana.org/TLD/tlds-alpha-by-domain.txt", `^[^#]+$`) fromURL("https://publicsuffix.org/list/effective_tld_names.dat", `^[^/.]+$`) wg.Wait() tlds := make([]string, 0, len(tldSet)) for tld := range tldSet { tlds = append(tlds, tld) } sort.Strings(tlds) return tlds, urls } func writeTlds(tlds, urls []string) error { f, err := os.Create(path) if err != nil { panic(err) } defer f.Close() return tldsTmpl.Execute(f, struct { TLDs []string URLs []string }{ TLDs: tlds, URLs: urls, }) } func main() { tlds, urls := tldList() log.Printf("Generating %s...", path) writeTlds(tlds, urls) } golang-mvdan-xurls-2.3.0/generate/unicodegen/000077500000000000000000000000001413033442400211515ustar00rootroot00000000000000golang-mvdan-xurls-2.3.0/generate/unicodegen/main.go000066400000000000000000000020511413033442400224220ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package main import ( "log" "os" "regexp" "strconv" "strings" "text/template" "unicode" ) const path = "unicode.go" var tmpl = template.Must(template.New("tlds").Parse(`// Generated by unicodegen package xurls const otherPuncMinusDoubleQuote = {{.}} `)) func visit(rt *unicode.RangeTable, fn func(rune)) { for _, r16 := range rt.R16 { for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) { fn(r) } } for _, r32 := range rt.R32 { for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) { fn(r) } } } func writeUnicode() error { var b strings.Builder visit(unicode.Po, func(r rune) { if r != '"' { b.WriteRune(r) } }) f, err := os.Create(path) if err != nil { return err } defer f.Close() return tmpl.Execute(f, strconv.Quote(regexp.QuoteMeta(b.String()))) } func main() { log.Printf("Generating %s...", path) if err := writeUnicode(); err != nil { log.Fatalf("Could not write path: %v", err) } } golang-mvdan-xurls-2.3.0/go.mod000066400000000000000000000001221413033442400163400ustar00rootroot00000000000000module mvdan.cc/xurls/v2 go 1.15 require github.com/rogpeppe/go-internal v1.8.0 golang-mvdan-xurls-2.3.0/go.sum000066400000000000000000000021761413033442400164000ustar00rootroot00000000000000github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e h1:aoZm08cpOy4WuID//EZDgcC4zIxODThtZNPirFr42+A= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8= github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0 h1:0vLT13EuvQ0hNvakwLuFZ/jYrLp5F3kcWHXdRggjCE8= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= golang-mvdan-xurls-2.3.0/schemes.go000066400000000000000000000107171413033442400172230ustar00rootroot00000000000000// Generated by schemesgen package xurls // Schemes is a sorted list of all IANA assigned schemes. // // Source: // https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv var Schemes = []string{ `aaa`, `aaas`, `about`, `acap`, `acct`, `acd`, `acr`, `adiumxtra`, `adt`, `afp`, `afs`, `aim`, `amss`, `android`, `appdata`, `apt`, `ark`, `attachment`, `aw`, `barion`, `beshare`, `bitcoin`, `bitcoincash`, `blob`, `bolo`, `browserext`, `cabal`, `calculator`, `callto`, `cap`, `cast`, `casts`, `chrome`, `chrome-extension`, `cid`, `coap`, `coap+tcp`, `coap+ws`, `coaps`, `coaps+tcp`, `coaps+ws`, `com-eventbrite-attendee`, `content`, `content-type`, `conti`, `crid`, `cvs`, `dab`, `dat`, `data`, `dav`, `diaspora`, `dict`, `did`, `dis`, `dlna-playcontainer`, `dlna-playsingle`, `dns`, `dntp`, `doi`, `dpp`, `drm`, `drop`, `dtmi`, `dtn`, `dvb`, `dvx`, `dweb`, `ed2k`, `elsi`, `embedded`, `ens`, `ethereum`, `example`, `facetime`, `fax`, `feed`, `feedready`, `file`, `filesystem`, `finger`, `first-run-pen-experience`, `fish`, `fm`, `ftp`, `fuchsia-pkg`, `geo`, `gg`, `git`, `gizmoproject`, `go`, `gopher`, `graph`, `gtalk`, `h323`, `ham`, `hcap`, `hcp`, `http`, `https`, `hxxp`, `hxxps`, `hydrazone`, `hyper`, `iax`, `icap`, `icon`, `im`, `imap`, `info`, `iotdisco`, `ipfs`, `ipn`, `ipns`, `ipp`, `ipps`, `irc`, `irc6`, `ircs`, `iris`, `iris.beep`, `iris.lwz`, `iris.xpc`, `iris.xpcs`, `isostore`, `itms`, `jabber`, `jar`, `jms`, `keyparc`, `lastfm`, `lbry`, `ldap`, `ldaps`, `leaptofrogans`, `lorawan`, `lvlt`, `magnet`, `mailserver`, `mailto`, `maps`, `market`, `matrix`, `message`, `microsoft.windows.camera`, `microsoft.windows.camera.multipicker`, `microsoft.windows.camera.picker`, `mid`, `mms`, `modem`, `mongodb`, `moz`, `ms-access`, `ms-browser-extension`, `ms-calculator`, `ms-drive-to`, `ms-enrollment`, `ms-excel`, `ms-eyecontrolspeech`, `ms-gamebarservices`, `ms-gamingoverlay`, `ms-getoffice`, `ms-help`, `ms-infopath`, `ms-inputapp`, `ms-lockscreencomponent-config`, `ms-media-stream-id`, `ms-mixedrealitycapture`, `ms-mobileplans`, `ms-officeapp`, `ms-people`, `ms-project`, `ms-powerpoint`, `ms-publisher`, `ms-restoretabcompanion`, `ms-screenclip`, `ms-screensketch`, `ms-search`, `ms-search-repair`, `ms-secondary-screen-controller`, `ms-secondary-screen-setup`, `ms-settings`, `ms-settings-airplanemode`, `ms-settings-bluetooth`, `ms-settings-camera`, `ms-settings-cellular`, `ms-settings-cloudstorage`, `ms-settings-connectabledevices`, `ms-settings-displays-topology`, `ms-settings-emailandaccounts`, `ms-settings-language`, `ms-settings-location`, `ms-settings-lock`, `ms-settings-nfctransactions`, `ms-settings-notifications`, `ms-settings-power`, `ms-settings-privacy`, `ms-settings-proximity`, `ms-settings-screenrotation`, `ms-settings-wifi`, `ms-settings-workplace`, `ms-spd`, `ms-sttoverlay`, `ms-transit-to`, `ms-useractivityset`, `ms-virtualtouchpad`, `ms-visio`, `ms-walk-to`, `ms-whiteboard`, `ms-whiteboard-cmd`, `ms-word`, `msnim`, `msrp`, `msrps`, `mss`, `mt`, `mtqp`, `mumble`, `mupdate`, `mvn`, `news`, `nfs`, `ni`, `nih`, `nntp`, `notes`, `num`, `ocf`, `oid`, `onenote`, `onenote-cmd`, `opaquelocktoken`, `openpgp4fpr`, `otpauth`, `pack`, `palm`, `paparazzi`, `payment`, `payto`, `pkcs11`, `platform`, `pop`, `pres`, `prospero`, `proxy`, `pwid`, `psyc`, `pttp`, `qb`, `query`, `quic-transport`, `redis`, `rediss`, `reload`, `res`, `resource`, `rmi`, `rsync`, `rtmfp`, `rtmp`, `rtsp`, `rtsps`, `rtspu`, `sarif`, `secondlife`, `secret-token`, `service`, `session`, `sftp`, `sgn`, `shc`, `shttp`, `sieve`, `simpleledger`, `sip`, `sips`, `skype`, `smb`, `sms`, `smtp`, `snews`, `snmp`, `soap.beep`, `soap.beeps`, `soldat`, `spiffe`, `spotify`, `ssb`, `ssh`, `steam`, `stun`, `stuns`, `submit`, `swh`, `svn`, `tag`, `teamspeak`, `tel`, `teliaeid`, `telnet`, `tftp`, `things`, `thismessage`, `tip`, `tn3270`, `tool`, `turn`, `turns`, `tv`, `udp`, `unreal`, `upt`, `urn`, `ut2004`, `v-event`, `vemmi`, `ventrilo`, `videotex`, `vnc`, `view-source`, `vscode`, `vscode-insiders`, `vsls`, `wais`, `wcr`, `webcal`, `wifi`, `wpid`, `ws`, `wss`, `wtai`, `wyciwyg`, `xcon`, `xcon-userid`, `xfire`, `xmlrpc.beep`, `xmlrpc.beeps`, `xmpp`, `xri`, `ymsgr`, `z39.50`, `z39.50r`, `z39.50s`, } golang-mvdan-xurls-2.3.0/tlds.go000066400000000000000000000371201413033442400165370ustar00rootroot00000000000000// Generated by tldsgen package xurls // TLDs is a sorted list of all public top-level domains. // // Sources: // * https://data.iana.org/TLD/tlds-alpha-by-domain.txt // * https://publicsuffix.org/list/effective_tld_names.dat var TLDs = []string{ `aaa`, `aarp`, `abarth`, `abb`, `abbott`, `abbvie`, `abc`, `able`, `abogado`, `abudhabi`, `ac`, `academy`, `accenture`, `accountant`, `accountants`, `aco`, `actor`, `ad`, `adac`, `ads`, `adult`, `ae`, `aeg`, `aero`, `aetna`, `af`, `afamilycompany`, `afl`, `africa`, `ag`, `agakhan`, `agency`, `ai`, `aig`, `airbus`, `airforce`, `airtel`, `akdn`, `al`, `alfaromeo`, `alibaba`, `alipay`, `allfinanz`, `allstate`, `ally`, `alsace`, `alstom`, `am`, `amazon`, `americanexpress`, `americanfamily`, `amex`, `amfam`, `amica`, `amsterdam`, `analytics`, `android`, `anquan`, `anz`, `ao`, `aol`, `apartments`, `app`, `apple`, `aq`, `aquarelle`, `ar`, `arab`, `aramco`, `archi`, `army`, `arpa`, `art`, `arte`, `as`, `asda`, `asia`, `associates`, `at`, `athleta`, `attorney`, `au`, `auction`, `audi`, `audible`, `audio`, `auspost`, `author`, `auto`, `autos`, `avianca`, `aw`, `aws`, `ax`, `axa`, `az`, `azure`, `ba`, `baby`, `baidu`, `banamex`, `bananarepublic`, `band`, `bank`, `bar`, `barcelona`, `barclaycard`, `barclays`, `barefoot`, `bargains`, `baseball`, `basketball`, `bauhaus`, `bayern`, `bb`, `bbc`, `bbt`, `bbva`, `bcg`, `bcn`, `bd`, `be`, `beats`, `beauty`, `beer`, `bentley`, `berlin`, `best`, `bestbuy`, `bet`, `bf`, `bg`, `bh`, `bharti`, `bi`, `bible`, `bid`, `bike`, `bing`, `bingo`, `bio`, `biz`, `bj`, `black`, `blackfriday`, `blockbuster`, `blog`, `bloomberg`, `blue`, `bm`, `bms`, `bmw`, `bn`, `bnpparibas`, `bo`, `boats`, `boehringer`, `bofa`, `bom`, `bond`, `boo`, `book`, `booking`, `bosch`, `bostik`, `boston`, `bot`, `boutique`, `box`, `br`, `bradesco`, `bridgestone`, `broadway`, `broker`, `brother`, `brussels`, `bs`, `bt`, `budapest`, `bugatti`, `build`, `builders`, `business`, `buy`, `buzz`, `bv`, `bw`, `by`, `bz`, `bzh`, `ca`, `cab`, `cafe`, `cal`, `call`, `calvinklein`, `cam`, `camera`, `camp`, `cancerresearch`, `canon`, `capetown`, `capital`, `capitalone`, `car`, `caravan`, `cards`, `care`, `career`, `careers`, `cars`, `casa`, `case`, `cash`, `casino`, `cat`, `catering`, `catholic`, `cba`, `cbn`, `cbre`, `cbs`, `cc`, `cd`, `center`, `ceo`, `cern`, `cf`, `cfa`, `cfd`, `cg`, `ch`, `chanel`, `channel`, `charity`, `chase`, `chat`, `cheap`, `chintai`, `christmas`, `chrome`, `church`, `ci`, `cipriani`, `circle`, `cisco`, `citadel`, `citi`, `citic`, `city`, `cityeats`, `ck`, `cl`, `claims`, `cleaning`, `click`, `clinic`, `clinique`, `clothing`, `cloud`, `club`, `clubmed`, `cm`, `cn`, `co`, `coach`, `codes`, `coffee`, `college`, `cologne`, `com`, `comcast`, `commbank`, `community`, `company`, `compare`, `computer`, `comsec`, `condos`, `construction`, `consulting`, `contact`, `contractors`, `cooking`, `cookingchannel`, `cool`, `coop`, `corsica`, `country`, `coupon`, `coupons`, `courses`, `cpa`, `cr`, `credit`, `creditcard`, `creditunion`, `cricket`, `crown`, `crs`, `cruise`, `cruises`, `csc`, `cu`, `cuisinella`, `cv`, `cw`, `cx`, `cy`, `cymru`, `cyou`, `cz`, `dabur`, `dad`, `dance`, `data`, `date`, `dating`, `datsun`, `day`, `dclk`, `dds`, `de`, `deal`, `dealer`, `deals`, `degree`, `delivery`, `dell`, `deloitte`, `delta`, `democrat`, `dental`, `dentist`, `desi`, `design`, `dev`, `dhl`, `diamonds`, `diet`, `digital`, `direct`, `directory`, `discount`, `discover`, `dish`, `diy`, `dj`, `dk`, `dm`, `dnp`, `do`, `docs`, `doctor`, `dog`, `domains`, `dot`, `download`, `drive`, `dtv`, `dubai`, `duck`, `dunlop`, `dupont`, `durban`, `dvag`, `dvr`, `dz`, `earth`, `eat`, `ec`, `eco`, `edeka`, `edu`, `education`, `ee`, `eg`, `email`, `emerck`, `energy`, `engineer`, `engineering`, `enterprises`, `epson`, `equipment`, `er`, `ericsson`, `erni`, `es`, `esq`, `estate`, `et`, `etisalat`, `eu`, `eurovision`, `eus`, `events`, `exchange`, `expert`, `exposed`, `express`, `extraspace`, `fage`, `fail`, `fairwinds`, `faith`, `family`, `fan`, `fans`, `farm`, `farmers`, `fashion`, `fast`, `fedex`, `feedback`, `ferrari`, `ferrero`, `fi`, `fiat`, `fidelity`, `fido`, `film`, `final`, `finance`, `financial`, `fire`, `firestone`, `firmdale`, `fish`, `fishing`, `fit`, `fitness`, `fj`, `fk`, `flickr`, `flights`, `flir`, `florist`, `flowers`, `fly`, `fm`, `fo`, `foo`, `food`, `foodnetwork`, `football`, `ford`, `forex`, `forsale`, `forum`, `foundation`, `fox`, `fr`, `free`, `fresenius`, `frl`, `frogans`, `frontdoor`, `frontier`, `ftr`, `fujitsu`, `fun`, `fund`, `furniture`, `futbol`, `fyi`, `ga`, `gal`, `gallery`, `gallo`, `gallup`, `game`, `games`, `gap`, `garden`, `gay`, `gb`, `gbiz`, `gd`, `gdn`, `ge`, `gea`, `gent`, `genting`, `george`, `gf`, `gg`, `ggee`, `gh`, `gi`, `gift`, `gifts`, `gives`, `giving`, `gl`, `glade`, `glass`, `gle`, `global`, `globo`, `gm`, `gmail`, `gmbh`, `gmo`, `gmx`, `gn`, `godaddy`, `gold`, `goldpoint`, `golf`, `goo`, `goodyear`, `goog`, `google`, `gop`, `got`, `gov`, `gp`, `gq`, `gr`, `grainger`, `graphics`, `gratis`, `green`, `gripe`, `grocery`, `group`, `gs`, `gt`, `gu`, `guardian`, `gucci`, `guge`, `guide`, `guitars`, `guru`, `gw`, `gy`, `hair`, `hamburg`, `hangout`, `haus`, `hbo`, `hdfc`, `hdfcbank`, `health`, `healthcare`, `help`, `helsinki`, `here`, `hermes`, `hgtv`, `hiphop`, `hisamitsu`, `hitachi`, `hiv`, `hk`, `hkt`, `hm`, `hn`, `hockey`, `holdings`, `holiday`, `homedepot`, `homegoods`, `homes`, `homesense`, `honda`, `horse`, `hospital`, `host`, `hosting`, `hot`, `hoteles`, `hotels`, `hotmail`, `house`, `how`, `hr`, `hsbc`, `ht`, `hu`, `hughes`, `hyatt`, `hyundai`, `ibm`, `icbc`, `ice`, `icu`, `id`, `ie`, `ieee`, `ifm`, `ikano`, `il`, `im`, `imamat`, `imdb`, `immo`, `immobilien`, `in`, `inc`, `industries`, `infiniti`, `info`, `ing`, `ink`, `institute`, `insurance`, `insure`, `int`, `international`, `intuit`, `investments`, `io`, `ipiranga`, `iq`, `ir`, `irish`, `is`, `ismaili`, `ist`, `istanbul`, `it`, `itau`, `itv`, `jaguar`, `java`, `jcb`, `je`, `jeep`, `jetzt`, `jewelry`, `jio`, `jll`, `jm`, `jmp`, `jnj`, `jo`, `jobs`, `joburg`, `jot`, `joy`, `jp`, `jpmorgan`, `jprs`, `juegos`, `juniper`, `kaufen`, `kddi`, `ke`, `kerryhotels`, `kerrylogistics`, `kerryproperties`, `kfh`, `kg`, `kh`, `ki`, `kia`, `kim`, `kinder`, `kindle`, `kitchen`, `kiwi`, `km`, `kn`, `koeln`, `komatsu`, `kosher`, `kp`, `kpmg`, `kpn`, `kr`, `krd`, `kred`, `kuokgroup`, `kw`, `ky`, `kyoto`, `kz`, `la`, `lacaixa`, `lamborghini`, `lamer`, `lancaster`, `lancia`, `land`, `landrover`, `lanxess`, `lasalle`, `lat`, `latino`, `latrobe`, `law`, `lawyer`, `lb`, `lc`, `lds`, `lease`, `leclerc`, `lefrak`, `legal`, `lego`, `lexus`, `lgbt`, `li`, `lidl`, `life`, `lifeinsurance`, `lifestyle`, `lighting`, `like`, `lilly`, `limited`, `limo`, `lincoln`, `linde`, `link`, `lipsy`, `live`, `living`, `lixil`, `lk`, `llc`, `llp`, `loan`, `loans`, `locker`, `locus`, `loft`, `lol`, `london`, `lotte`, `lotto`, `love`, `lpl`, `lplfinancial`, `lr`, `ls`, `lt`, `ltd`, `ltda`, `lu`, `lundbeck`, `luxe`, `luxury`, `lv`, `ly`, `ma`, `macys`, `madrid`, `maif`, `maison`, `makeup`, `man`, `management`, `mango`, `map`, `market`, `marketing`, `markets`, `marriott`, `marshalls`, `maserati`, `mattel`, `mba`, `mc`, `mckinsey`, `md`, `me`, `med`, `media`, `meet`, `melbourne`, `meme`, `memorial`, `men`, `menu`, `merckmsd`, `mg`, `mh`, `miami`, `microsoft`, `mil`, `mini`, `mint`, `mit`, `mitsubishi`, `mk`, `ml`, `mlb`, `mls`, `mm`, `mma`, `mn`, `mo`, `mobi`, `mobile`, `moda`, `moe`, `moi`, `mom`, `monash`, `money`, `monster`, `mormon`, `mortgage`, `moscow`, `moto`, `motorcycles`, `mov`, `movie`, `mp`, `mq`, `mr`, `ms`, `msd`, `mt`, `mtn`, `mtr`, `mu`, `museum`, `music`, `mutual`, `mv`, `mw`, `mx`, `my`, `mz`, `na`, `nab`, `nagoya`, `name`, `natura`, `navy`, `nba`, `nc`, `ne`, `nec`, `net`, `netbank`, `netflix`, `network`, `neustar`, `new`, `news`, `next`, `nextdirect`, `nexus`, `nf`, `nfl`, `ng`, `ngo`, `nhk`, `ni`, `nico`, `nike`, `nikon`, `ninja`, `nissan`, `nissay`, `nl`, `no`, `nokia`, `northwesternmutual`, `norton`, `now`, `nowruz`, `nowtv`, `np`, `nr`, `nra`, `nrw`, `ntt`, `nu`, `nyc`, `nz`, `obi`, `observer`, `off`, `office`, `okinawa`, `olayan`, `olayangroup`, `oldnavy`, `ollo`, `om`, `omega`, `one`, `ong`, `onion`, `onl`, `online`, `ooo`, `open`, `oracle`, `orange`, `org`, `organic`, `origins`, `osaka`, `otsuka`, `ott`, `ovh`, `pa`, `page`, `panasonic`, `paris`, `pars`, `partners`, `parts`, `party`, `passagens`, `pay`, `pccw`, `pe`, `pet`, `pf`, `pfizer`, `pg`, `ph`, `pharmacy`, `phd`, `philips`, `phone`, `photo`, `photography`, `photos`, `physio`, `pics`, `pictet`, `pictures`, `pid`, `pin`, `ping`, `pink`, `pioneer`, `pizza`, `pk`, `pl`, `place`, `play`, `playstation`, `plumbing`, `plus`, `pm`, `pn`, `pnc`, `pohl`, `poker`, `politie`, `porn`, `post`, `pr`, `pramerica`, `praxi`, `press`, `prime`, `pro`, `prod`, `productions`, `prof`, `progressive`, `promo`, `properties`, `property`, `protection`, `pru`, `prudential`, `ps`, `pt`, `pub`, `pw`, `pwc`, `py`, `qa`, `qpon`, `quebec`, `quest`, `qvc`, `racing`, `radio`, `raid`, `re`, `read`, `realestate`, `realtor`, `realty`, `recipes`, `red`, `redstone`, `redumbrella`, `rehab`, `reise`, `reisen`, `reit`, `reliance`, `ren`, `rent`, `rentals`, `repair`, `report`, `republican`, `rest`, `restaurant`, `review`, `reviews`, `rexroth`, `rich`, `richardli`, `ricoh`, `ril`, `rio`, `rip`, `rmit`, `ro`, `rocher`, `rocks`, `rodeo`, `rogers`, `room`, `rs`, `rsvp`, `ru`, `rugby`, `ruhr`, `run`, `rw`, `rwe`, `ryukyu`, `sa`, `saarland`, `safe`, `safety`, `sakura`, `sale`, `salon`, `samsclub`, `samsung`, `sandvik`, `sandvikcoromant`, `sanofi`, `sap`, `sarl`, `sas`, `save`, `saxo`, `sb`, `sbi`, `sbs`, `sc`, `sca`, `scb`, `schaeffler`, `schmidt`, `scholarships`, `school`, `schule`, `schwarz`, `science`, `scjohnson`, `scot`, `sd`, `se`, `search`, `seat`, `secure`, `security`, `seek`, `select`, `sener`, `services`, `ses`, `seven`, `sew`, `sex`, `sexy`, `sfr`, `sg`, `sh`, `shangrila`, `sharp`, `shaw`, `shell`, `shia`, `shiksha`, `shoes`, `shop`, `shopping`, `shouji`, `show`, `showtime`, `si`, `silk`, `sina`, `singles`, `site`, `sj`, `sk`, `ski`, `skin`, `sky`, `skype`, `sl`, `sling`, `sm`, `smart`, `smile`, `sn`, `sncf`, `so`, `soccer`, `social`, `softbank`, `software`, `sohu`, `solar`, `solutions`, `song`, `sony`, `soy`, `spa`, `space`, `sport`, `spot`, `sr`, `srl`, `ss`, `st`, `stada`, `staples`, `star`, `statebank`, `statefarm`, `stc`, `stcgroup`, `stockholm`, `storage`, `store`, `stream`, `studio`, `study`, `style`, `su`, `sucks`, `supplies`, `supply`, `support`, `surf`, `surgery`, `suzuki`, `sv`, `swatch`, `swiftcover`, `swiss`, `sx`, `sy`, `sydney`, `systems`, `sz`, `tab`, `taipei`, `talk`, `taobao`, `target`, `tatamotors`, `tatar`, `tattoo`, `tax`, `taxi`, `tc`, `tci`, `td`, `tdk`, `team`, `tech`, `technology`, `tel`, `temasek`, `tennis`, `teva`, `tf`, `tg`, `th`, `thd`, `theater`, `theatre`, `tiaa`, `tickets`, `tienda`, `tiffany`, `tips`, `tires`, `tirol`, `tj`, `tjmaxx`, `tjx`, `tk`, `tkmaxx`, `tl`, `tm`, `tmall`, `tn`, `to`, `today`, `tokyo`, `tools`, `top`, `toray`, `toshiba`, `total`, `tours`, `town`, `toyota`, `toys`, `tr`, `trade`, `trading`, `training`, `travel`, `travelchannel`, `travelers`, `travelersinsurance`, `trust`, `trv`, `tt`, `tube`, `tui`, `tunes`, `tushu`, `tv`, `tvs`, `tw`, `tz`, `ua`, `ubank`, `ubs`, `ug`, `uk`, `unicom`, `university`, `uno`, `uol`, `ups`, `us`, `uy`, `uz`, `va`, `vacations`, `vana`, `vanguard`, `vc`, `ve`, `vegas`, `ventures`, `verisign`, `vermögensberater`, `vermögensberatung`, `versicherung`, `vet`, `vg`, `vi`, `viajes`, `video`, `vig`, `viking`, `villas`, `vin`, `vip`, `virgin`, `visa`, `vision`, `viva`, `vivo`, `vlaanderen`, `vn`, `vodka`, `volkswagen`, `volvo`, `vote`, `voting`, `voto`, `voyage`, `vu`, `vuelos`, `wales`, `walmart`, `walter`, `wang`, `wanggou`, `watch`, `watches`, `weather`, `weatherchannel`, `webcam`, `weber`, `website`, `wed`, `wedding`, `weibo`, `weir`, `wf`, `whoswho`, `wien`, `wiki`, `williamhill`, `win`, `windows`, `wine`, `winners`, `wme`, `wolterskluwer`, `woodside`, `work`, `works`, `world`, `wow`, `ws`, `wtc`, `wtf`, `xbox`, `xerox`, `xfinity`, `xihuan`, `xin`, `xxx`, `xyz`, `yachts`, `yahoo`, `yamaxun`, `yandex`, `ye`, `yodobashi`, `yoga`, `yokohama`, `you`, `youtube`, `yt`, `yun`, `za`, `zappos`, `zara`, `zero`, `zip`, `zm`, `zone`, `zuerich`, `zw`, `ελ`, `ευ`, `бг`, `бел`, `дети`, `ею`, `католик`, `ком`, `мкд`, `мон`, `москва`, `онлайн`, `орг`, `рус`, `рф`, `сайт`, `срб`, `укр`, `қаз`, `հայ`, `קום`, `ابوظبي`, `اتصالات`, `ارامكو`, `الاردن`, `البحرين`, `الجزائر`, `السعودية`, `السعوديه`, `السعودیة`, `السعودیۃ`, `العليان`, `المغرب`, `اليمن`, `امارات`, `ايران`, `ایران`, `بارت`, `بازار`, `بيتك`, `بھارت`, `تونس`, `سودان`, `سوريا`, `سورية`, `شبكة`, `عراق`, `عرب`, `عمان`, `فلسطين`, `قطر`, `كاثوليك`, `كوم`, `مصر`, `مليسيا`, `موريتانيا`, `موقع`, `همراه`, `پاكستان`, `پاکستان`, `ڀارت`, `कॉम`, `नेट`, `भारत`, `भारतम्`, `भारोत`, `संगठन`, `বাংলা`, `ভারত`, `ভাৰত`, `ਭਾਰਤ`, `ભારત`, `ଭାରତ`, `இந்தியா`, `இலங்கை`, `சிங்கப்பூர்`, `భారత్`, `ಭಾರತ`, `ഭാരതം`, `ලංකා`, `คอม`, `ไทย`, `ລາວ`, `გე`, `みんな`, `アマゾン`, `クラウド`, `グーグル`, `コム`, `ストア`, `セール`, `ファッション`, `ポイント`, `世界`, `中信`, `中国`, `中國`, `中文网`, `亚马逊`, `企业`, `佛山`, `信息`, `健康`, `八卦`, `公司`, `公益`, `台湾`, `台灣`, `商城`, `商店`, `商标`, `嘉里`, `嘉里大酒店`, `在线`, `大众汽车`, `大拿`, `天主教`, `娱乐`, `家電`, `广东`, `微博`, `慈善`, `我爱你`, `手机`, `招聘`, `政务`, `政府`, `新加坡`, `新闻`, `时尚`, `書籍`, `机构`, `淡马锡`, `游戏`, `澳門`, `澳门`, `点看`, `移动`, `组织机构`, `网址`, `网店`, `网站`, `网络`, `联通`, `臺灣`, `诺基亚`, `谷歌`, `购物`, `通販`, `集团`, `電訊盈科`, `飞利浦`, `食品`, `餐厅`, `香格里拉`, `香港`, `닷넷`, `닷컴`, `삼성`, `한국`, } golang-mvdan-xurls-2.3.0/tlds_pseudo.go000066400000000000000000000015041413033442400201130ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package xurls // PseudoTLDs is a sorted list of some widely used unofficial TLDs. // // Sources: // * https://en.wikipedia.org/wiki/Pseudo-top-level_domain // * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains // * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00 // * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml var PseudoTLDs = []string{ `bit`, // Namecoin `example`, // Example domain `exit`, // Tor exit node `gnu`, // GNS by public key `i2p`, // I2P network `invalid`, // Invalid domain `local`, // Local network `localhost`, // Local network `test`, // Test domain `zkey`, // GNS domain name } golang-mvdan-xurls-2.3.0/unicode.go000066400000000000000000000036671413033442400172300ustar00rootroot00000000000000// Generated by unicodegen package xurls const otherPuncMinusDoubleQuote = "!#%&'\\*,\\./:;\\?@\\\\¡§¶·¿;·՚՛՜՝՞՟։׀׃׆׳״؉؊،؍؛؞؟٪٫٬٭۔܀܁܂܃܄܅܆܇܈܉܊܋܌܍߷߸߹࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾࡞।॥॰৽੶૰౷಄෴๏๚๛༄༅༆༇༈༉༊་༌།༎༏༐༑༒༔྅࿐࿑࿒࿓࿔࿙࿚၊။၌၍၎၏჻፠፡።፣፤፥፦፧፨᙮᛫᛬᛭᜵᜶។៕៖៘៙៚᠀᠁᠂᠃᠄᠅᠇᠈᠉᠊᥄᥅᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᯼᯽᯾᯿᰻᰼᰽᰾᰿᱾᱿᳀᳁᳂᳃᳄᳅᳆᳇᳓‖‗†‡•‣․‥…‧‰‱′″‴‵‶‷‸※‼‽‾⁁⁂⁃⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁓⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⳹⳺⳻⳼⳾⳿⵰⸀⸁⸆⸇⸈⸋⸎⸏⸐⸑⸒⸓⸔⸕⸖⸘⸙⸛⸞⸟⸪⸫⸬⸭⸮⸰⸱⸲⸳⸴⸵⸶⸷⸸⸹⸼⸽⸾⸿⹁⹃⹄⹅⹆⹇⹈⹉⹊⹋⹌⹍⹎⹏⹒、。〃〽・꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꡴꡵꡶꡷꣎꣏꣸꣹꣺꣼꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꫞꫟꫰꫱꯫︐︑︒︓︔︕︖︙︰﹅﹆﹉﹊﹋﹌﹐﹑﹒﹔﹕﹖﹗﹟﹠﹡﹨﹪﹫!"#%&'*,./:;?@\。、・𐄀𐄁𐄂𐎟𐏐𐕯𐡗𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐫰𐫱𐫲𐫳𐫴𐫵𐫶𐬹𐬺𐬻𐬼𐬽𐬾𐬿𐮙𐮚𐮛𐮜𐽕𐽖𐽗𐽘𐽙𑁇𑁈𑁉𑁊𑁋𑁌𑁍𑂻𑂼𑂾𑂿𑃀𑃁𑅀𑅁𑅂𑅃𑅴𑅵𑇅𑇆𑇇𑇈𑇍𑇛𑇝𑇞𑇟𑈸𑈹𑈺𑈻𑈼𑈽𑊩𑑋𑑌𑑍𑑎𑑏𑑚𑑛𑑝𑓆𑗁𑗂𑗃𑗄𑗅𑗆𑗇𑗈𑗉𑗊𑗋𑗌𑗍𑗎𑗏𑗐𑗑𑗒𑗓𑗔𑗕𑗖𑗗𑙁𑙂𑙃𑙠𑙡𑙢𑙣𑙤𑙥𑙦𑙧𑙨𑙩𑙪𑙫𑙬𑜼𑜽𑜾𑠻𑥄𑥅𑥆𑧢𑨿𑩀𑩁𑩂𑩃𑩄𑩅𑩆𑪚𑪛𑪜𑪞𑪟𑪠𑪡𑪢𑱁𑱂𑱃𑱄𑱅𑱰𑱱𑻷𑻸𑿿𒑰𒑱𒑲𒑳𒑴𖩮𖩯𖫵𖬷𖬸𖬹𖬺𖬻𖭄𖺗𖺘𖺙𖺚𖿢𛲟𝪇𝪈𝪉𝪊𝪋𞥞𞥟" golang-mvdan-xurls-2.3.0/xurls.go000066400000000000000000000113761413033442400167530ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information // Package xurls extracts urls from plain text using regular expressions. package xurls import ( "regexp" "strings" "unicode/utf8" ) //go:generate go run ./generate/tldsgen //go:generate go run ./generate/schemesgen //go:generate go run ./generate/unicodegen const ( letter = `\p{L}` mark = `\p{M}` number = `\p{N}` iriChar = letter + mark + number currency = `\p{Sc}` otherSymb = `\p{So}` endChar = iriChar + `/\-_+&~%=#` + currency + otherSymb midChar = endChar + "_*" + otherPuncMinusDoubleQuote wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` wellAll = wellParen + `|` + wellBrack + `|` + wellBrace pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+` iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?` domain = `(` + iri + `\.)+` octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])` ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b` ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:` ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)` port = `(:[0-9]*)?` ) // AnyScheme can be passed to StrictMatchingScheme to match any possibly valid // scheme, and not just the known ones. var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)` // SchemesNoAuthority is a sorted list of some well-known url schemes that are // followed by ":" instead of "://". The list includes both officially // registered and unofficial schemes. var SchemesNoAuthority = []string{ `bitcoin`, // Bitcoin `cid`, // Content-ID `file`, // Files `magnet`, // Torrent magnets `mailto`, // Mail `mid`, // Message-ID `sms`, // SMS `tel`, // Telephone `xmpp`, // XMPP } // SchemesUnofficial is a sorted list of some well-known url schemes which // aren't officially registered just yet. They tend to correspond to software. // // Mostly collected from https://en.wikipedia.org/wiki/List_of_URI_schemes#Unofficial_but_common_URI_schemes. var SchemesUnofficial = []string{ `jdbc`, // Java database Connectivity `postgres`, // PostgreSQL (short form) `postgresql`, // PostgreSQL `slack`, // Slack `zoommtg`, // Zoom (desktop) `zoomus`, // Zoom (mobile) } func anyOf(strs ...string) string { var b strings.Builder b.WriteByte('(') for i, s := range strs { if i != 0 { b.WriteByte('|') } b.WriteString(regexp.QuoteMeta(s)) } b.WriteByte(')') return b.String() } func strictExp() string { schemes := `((` + anyOf(Schemes...) + `|` + anyOf(SchemesUnofficial...) + `)://|` + anyOf(SchemesNoAuthority...) + `:)` return `(?i)` + schemes + `(?-i)` + pathCont } func relaxedExp() string { var asciiTLDs, unicodeTLDs []string for i, tld := range TLDs { if tld[0] >= utf8.RuneSelf { asciiTLDs = TLDs[:i:i] unicodeTLDs = TLDs[i:] break } } punycode := `xn--[a-z0-9-]+` // Use \b to make sure ASCII TLDs are immediately followed by a word break. // We can't do that with unicode TLDs, as they don't see following // whitespace as a word break. tlds := `(?i)(` + punycode + `|` + anyOf(append(asciiTLDs, PseudoTLDs...)...) + `\b|` + anyOf(unicodeTLDs...) + `)(?-i)` site := domain + tlds hostName := `(` + site + `|` + ipAddr + `)` webURL := hostName + port + `(/|/` + pathCont + `)?` email := `[a-zA-Z0-9._%\-+]+@` + site return strictExp() + `|` + webURL + `|` + email } // Strict produces a regexp that matches any URL with a scheme in either the // Schemes or SchemesNoAuthority lists. func Strict() *regexp.Regexp { re := regexp.MustCompile(strictExp()) re.Longest() return re } // Relaxed produces a regexp that matches any URL matched by Strict, plus any // URL with no scheme or email address. func Relaxed() *regexp.Regexp { re := regexp.MustCompile(relaxedExp()) re.Longest() return re } // StrictMatchingScheme produces a regexp similar to Strict, but requiring that // the scheme match the given regular expression. See AnyScheme too. func StrictMatchingScheme(exp string) (*regexp.Regexp, error) { strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont re, err := regexp.Compile(strictMatching) if err != nil { return nil, err } re.Longest() return re, nil } golang-mvdan-xurls-2.3.0/xurls_test.go000066400000000000000000000251031413033442400200030ustar00rootroot00000000000000// Copyright (c) 2015, Daniel Martí // See LICENSE for licensing information package xurls import ( "fmt" "regexp" "testing" ) type testCase struct { in string want interface{} } func wantStr(in string, want interface{}) string { switch x := want.(type) { case string: return x case bool: if x { return in } } return "" } func doTest(t *testing.T, name string, re *regexp.Regexp, cases []testCase) { for i, c := range cases { t.Run(fmt.Sprintf("%s/%03d", name, i), func(t *testing.T) { want := wantStr(c.in, c.want) for _, surround := range []string{"", "\n"} { in := surround + c.in + surround got := re.FindString(in) if got != want { t.Errorf(`FindString(%q) got %q, want %q`, in, got, want) } } }) } } var constantTestCases = []testCase{ {``, nil}, {` `, nil}, {`:`, nil}, {`::`, nil}, {`:::`, nil}, {`::::`, nil}, {`.`, nil}, {`..`, nil}, {`...`, nil}, {`1.1`, nil}, {`.1.`, nil}, {`1.1.1`, nil}, {`1:1`, nil}, {`:1:`, nil}, {`1:1:1`, nil}, {`://`, nil}, {`foo`, nil}, {`foo:`, nil}, {`mailto:`, nil}, {`foo://`, nil}, {`http://`, nil}, {`http:// foo`, nil}, {`http:// foo`, nil}, {`:foo`, nil}, {`://foo`, nil}, {`foorandom:bar`, nil}, {`foo.randombar`, nil}, {`zzz.`, nil}, {`.zzz`, nil}, {`zzz.zzz`, nil}, {`/some/path`, nil}, {`rel/path`, nil}, {`localhost`, nil}, {`com`, nil}, {`.com`, nil}, {`com.`, nil}, {`http`, nil}, {`http://foo`, true}, {`http://FOO`, true}, {`http://FAÀ`, true}, {`https://localhost`, true}, {`mailto:foo`, true}, {`MAILTO:foo`, true}, {`sms:123`, true}, {`xmpp:foo@bar`, true}, {`bitcoin:Addr23?amount=1&message=foo`, true}, {`cid:foo-32x32.v2_fe0f1423.png`, true}, {`mid:960830.1639@XIson.com`, true}, {`http://foo.com`, true}, {`http://foo.co.uk`, true}, {`http://foo.random`, true}, {` http://foo.com/bar `, `http://foo.com/bar`}, {` http://foo.com/bar more`, `http://foo.com/bar`}, {``, `http://foo.com/bar`}, {`more`, `http://foo.com/bar`}, {`.http://foo.com/bar.`, `http://foo.com/bar`}, {`.http://foo.com/bar.more`, `http://foo.com/bar.more`}, {`,http://foo.com/bar,`, `http://foo.com/bar`}, {`,http://foo.com/bar,more`, `http://foo.com/bar,more`}, {`*http://foo.com/bar*`, `http://foo.com/bar`}, {`*http://foo.com/bar*more`, `http://foo.com/bar*more`}, {`_http://foo.com/bar_`, `http://foo.com/bar_`}, {`_http://foo.com/bar_more`, `http://foo.com/bar_more`}, {`(http://foo.com/bar)`, `http://foo.com/bar`}, {`(http://foo.com/bar)more`, `http://foo.com/bar`}, {`[http://foo.com/bar]`, `http://foo.com/bar`}, {`[http://foo.com/bar]more`, `http://foo.com/bar`}, {`'http://foo.com/bar'`, `http://foo.com/bar`}, {`'http://foo.com/bar'more`, `http://foo.com/bar'more`}, {`"http://foo.com/bar"`, `http://foo.com/bar`}, {`"http://foo.com/bar"more`, `http://foo.com/bar`}, {`{"url":"http://foo.com/bar"}`, `http://foo.com/bar`}, {`{"before":"foo","url":"http://foo.com/bar","after":"bar"}`, `http://foo.com/bar`}, {`http://a.b/a0/-+_&~*%=#@.,:;'?![]()a`, true}, {`http://a.b/a0/$€¥`, true}, {`http://✪foo.bar/pa✪th©more`, true}, {`http://foo.bar/path/`, true}, {`http://foo.bar/path-`, true}, {`http://foo.bar/path+`, true}, {`http://foo.bar/path&`, true}, {`http://foo.bar/path~`, true}, {`http://foo.bar/path%`, true}, {`http://foo.bar/path=`, true}, {`http://foo.bar/path#`, true}, {`http://foo.bar/path.`, `http://foo.bar/path`}, {`http://foo.bar/path,`, `http://foo.bar/path`}, {`http://foo.bar/path:`, `http://foo.bar/path`}, {`http://foo.bar/path;`, `http://foo.bar/path`}, {`http://foo.bar/path'`, `http://foo.bar/path`}, {`http://foo.bar/path?`, `http://foo.bar/path`}, {`http://foo.bar/path!`, `http://foo.bar/path`}, {`http://foo.bar/path@`, `http://foo.bar/path`}, {`http://foo.bar/path|`, `http://foo.bar/path`}, {`http://foo.bar/path|more`, `http://foo.bar/path`}, {`http://foo.bar/path<`, `http://foo.bar/path`}, {`http://foo.bar/path`, `foo.com/bar`}, {`more`, `foo.com/bar`}, {`,foo.com/bar.`, `foo.com/bar`}, {`,foo.com/bar.more`, `foo.com/bar.more`}, {`,foo.com/bar,`, `foo.com/bar`}, {`,foo.com/bar,more`, `foo.com/bar,more`}, {`(foo.com/bar)`, `foo.com/bar`}, {`"foo.com/bar'`, `foo.com/bar`}, {`"foo.com/bar'more`, `foo.com/bar'more`}, {`"foo.com/bar"`, `foo.com/bar`}, {`what is foo.com?`, `foo.com`}, {`the foo.com!`, `foo.com`}, {`foo@bar`, nil}, {`foo@bar.a`, nil}, {`foo@bar.com`, true}, {`foo@sub.bar.com`, true}, {`foo@bar.com bar@bar.com`, `foo@bar.com`}, {`foo@bar.onion`, true}, {`foo@中国.中国`, true}, {`foo@test.bar.com`, true}, {`FOO@TEST.BAR.COM`, true}, {`foo@bar.com/path`, `foo@bar.com`}, {`foo+test@bar.com`, true}, {`foo+._%-@bar.com`, true}, }) doTest(t, "Strict2", Strict(), []testCase{ {`http:// foo.com`, nil}, {`foo.a`, nil}, {`foo.com`, nil}, {`foo.com/`, nil}, {`1.1.1.1`, nil}, {`3ffe:2a00:100:7031::1`, nil}, {`test.foo.com:8080/path`, nil}, {`foo@bar.com`, nil}, }) } func TestStrictMatchingSchemeError(t *testing.T) { for _, c := range []struct { exp string wantErr bool }{ {`http://`, false}, {`https?://`, false}, {`http://|mailto:`, false}, {`http://(`, true}, } { _, err := StrictMatchingScheme(c.exp) if c.wantErr && err == nil { t.Errorf(`StrictMatchingScheme("%s") did not error as expected`, c.exp) } else if !c.wantErr && err != nil { t.Errorf(`StrictMatchingScheme("%s") unexpectedly errored`, c.exp) } } } func TestStrictMatchingScheme(t *testing.T) { strictMatching, _ := StrictMatchingScheme("http://|ftps?://|mailto:") doTest(t, "StrictMatchingScheme", strictMatching, []testCase{ {`foo.com`, nil}, {`foo@bar.com`, nil}, {`http://foo`, true}, {`Http://foo`, true}, {`https://foo`, nil}, {`ftp://foo`, true}, {`ftps://foo`, true}, {`mailto:foo`, true}, {`MAILTO:foo`, true}, {`sms:123`, nil}, }) } func TestStrictMatchingSchemeAny(t *testing.T) { strictMatching, _ := StrictMatchingScheme(AnyScheme) doTest(t, "StrictMatchingScheme", strictMatching, []testCase{ {`http://foo`, true}, {`git+https://foo`, true}, {`randomtexthttp://foo.bar/etc`, true}, {`mailto:foo`, true}, }) } func bench(b *testing.B, re *regexp.Regexp, str string) { for i := 0; i < b.N; i++ { re.FindAllString(str, -1) } } func BenchmarkStrictEmpty(b *testing.B) { bench(b, Strict(), "foo") } func BenchmarkStrictSingle(b *testing.B) { bench(b, Strict(), "http://foo.foo foo.com") } func BenchmarkStrictMany(b *testing.B) { bench(b, Strict(), ` foo bar http://foo.foo foo.com bitcoin:address ftp:// xmpp:foo@bar.com`) } func BenchmarkRelaxedEmpty(b *testing.B) { bench(b, Relaxed(), "foo") } func BenchmarkRelaxedSingle(b *testing.B) { bench(b, Relaxed(), "http://foo.foo foo.com") } func BenchmarkRelaxedMany(b *testing.B) { bench(b, Relaxed(), ` foo bar http://foo.foo foo.com bitcoin:address ftp:// xmpp:foo@bar.com`) }